findNames
This commit is contained in:
@@ -5,7 +5,8 @@
|
||||
"classPath": "src/",
|
||||
"dependencies": {
|
||||
"kiss": "",
|
||||
"yaml": ""
|
||||
"yaml": "",
|
||||
"haxe-strings": ""
|
||||
},
|
||||
"url": "https://github.com/NQNStudios/kisslang",
|
||||
"contributors": [
|
||||
|
@@ -10,4 +10,6 @@
|
||||
"Valeria"
|
||||
"Vanessa"
|
||||
]
|
||||
(Names.isName name))
|
||||
(assert (Names.isName name)))
|
||||
|
||||
(assertEquals 5 .length (Names.findNames "Vanessa, Finn, and Tracy--and George--go to Troy's..."))
|
||||
|
@@ -7,6 +7,8 @@ import yaml.Parser;
|
||||
import yaml.util.ObjectMap;
|
||||
import sys.FileSystem;
|
||||
import bad_nlp.Names;
|
||||
using StringTools;
|
||||
using hx.strings.Strings;
|
||||
|
||||
@:build(kiss.Kiss.build())
|
||||
class Names {}
|
||||
|
@@ -21,7 +21,69 @@
|
||||
(dictSet loadedNames (.next (map.keys)) true))))))
|
||||
(dictSet loadedNameFiles file true))
|
||||
|
||||
(var quotesAndThings [
|
||||
"\""
|
||||
"'"
|
||||
"`"
|
||||
"["
|
||||
"]"
|
||||
"("
|
||||
")"
|
||||
])
|
||||
|
||||
(var punctuation [
|
||||
","
|
||||
"."
|
||||
";"
|
||||
":"
|
||||
"-"
|
||||
"!"
|
||||
"?"
|
||||
"'s" // possessive
|
||||
])
|
||||
|
||||
// TODO this isn't specific to Names
|
||||
(function :Array<String> splitByAll [:String text :Array<String> delims]
|
||||
(if delims
|
||||
(let [next (delims.shift)
|
||||
tokens (text.split next)]
|
||||
(flatten (for token tokens (splitByAll token (delims.copy)))))
|
||||
[text]))
|
||||
|
||||
// TODO this isn't specific to Names
|
||||
(function normalize [:String token :Bool toLower]
|
||||
(cond
|
||||
// Remove quotes and things around
|
||||
((apply or (for quote quotesAndThings (token.startsWith quote)))
|
||||
(normalize (token.substr 1) toLower))
|
||||
((apply or (for quote quotesAndThings (token.endsWith quote)))
|
||||
(normalize (substr token 0 -1) toLower))
|
||||
// Remove punctuation after
|
||||
((apply or (for punct punctuation (token.endsWith punct)))
|
||||
(normalize (substr token 0 -1) toLower))
|
||||
// Lower-case
|
||||
(toLower (token.toLowerCase))
|
||||
(true token)))
|
||||
|
||||
(function isName [:String token]
|
||||
(let [token (token.toLowerCase)]
|
||||
(loadFilesForToken token)
|
||||
(loadedNames.exists token)))
|
||||
|
||||
(var delimiters [
|
||||
" "
|
||||
"\n"
|
||||
"--"
|
||||
"/"
|
||||
])
|
||||
|
||||
(function containsName [:String text]
|
||||
(doFor token (splitByAll text delimiters)
|
||||
(let [t (normalize token false)]
|
||||
(when (and (.isUpperCase (t.substr 0 1)) (isName t))
|
||||
(return true))))
|
||||
false)
|
||||
|
||||
(function findNames [:String text]
|
||||
(.map (filter (splitByAll text delimiters) ->t (containsName t))
|
||||
->t (normalize t false)))
|
Reference in New Issue
Block a user