48 lines
1.8 KiB
Plaintext
48 lines
1.8 KiB
Plaintext
(var :Map<String,Bool> loadedNameFiles (new Map))
|
|
(var :Map<String,Bool> loadedNames (new Map))
|
|
(var libPath (Prelude.libPath "bad-nlp"))
|
|
(var namesPath (joinPath libPath "name-database"))
|
|
|
|
(var :Map<String,Bool> notNames (for notName [
|
|
"he" // This is in the name database.
|
|
"she"
|
|
] =>notName true))
|
|
|
|
(var :Map<String,Bool> yesNames (new Map))
|
|
|
|
(function loadFilesForToken [:String token]
|
|
(let [firstLetter (token.charAt 0)
|
|
firstTwoLetters (token.substr 0 2)
|
|
familyNameFile (joinPath namesPath "family_name" firstLetter "${firstTwoLetters}.yml")
|
|
givenNameFile (joinPath namesPath "given_name" firstLetter "${firstTwoLetters}.yml")]
|
|
(loadNameFile familyNameFile)
|
|
(loadNameFile givenNameFile)))
|
|
|
|
(function loadNameFile [:String file]
|
|
(unless (or !(FileSystem.exists file) (loadedNameFiles.exists file))
|
|
(let [:Array<Dynamic> data (Yaml.read file)]
|
|
(doFor name (data.iterator)
|
|
(typeCase [name]
|
|
([:String name] (dictSet loadedNames name true))
|
|
([:AnyObjectMap map]
|
|
(dictSet loadedNames (.next (map.keys)) true))))))
|
|
(dictSet loadedNameFiles file true))
|
|
|
|
(function isName [:String token]
|
|
(let [token (token.toLowerCase)]
|
|
(loadFilesForToken token)
|
|
(or (yesNames.exists token)
|
|
(and
|
|
(loadedNames.exists token)
|
|
!(notNames.exists token)))))
|
|
|
|
(function containsName [:String text]
|
|
(doFor token (Util.splitTokens text)
|
|
(let [t (Util.normalize token false)]
|
|
(when (and (.isUpperCase (t.substr 0 1)) (isName t))
|
|
(return true))))
|
|
false)
|
|
|
|
(function findNames [:String text]
|
|
(.map (filter (Util.splitTokens text) ->t (containsName t))
|
|
->t (Util.normalize t false))) |