Files
kiss-vscode/projects/bad-nlp/src/bad_nlp/Names.kiss

48 lines
1.8 KiB
Plaintext

(var :Map<String,Bool> loadedNameFiles (new Map))
(var :Map<String,Bool> loadedNames (new Map))
(var libPath (Prelude.libPath "bad-nlp"))
(var namesPath (joinPath libPath "name-database"))
(var :Map<String,Bool> notNames (for notName [
"he" // This is in the name database.
"she"
] =>notName true))
(var :Map<String,Bool> yesNames (new Map))
(function loadFilesForToken [:String token]
(let [firstLetter (token.charAt 0)
firstTwoLetters (token.substr 0 2)
familyNameFile (joinPath namesPath "family_name" firstLetter "${firstTwoLetters}.yml")
givenNameFile (joinPath namesPath "given_name" firstLetter "${firstTwoLetters}.yml")]
(loadNameFile familyNameFile)
(loadNameFile givenNameFile)))
(function loadNameFile [:String file]
(unless (or !(FileSystem.exists file) (loadedNameFiles.exists file))
(let [:Array<Dynamic> data (Yaml.read file)]
(doFor name (data.iterator)
(typeCase [name]
([:String name] (dictSet loadedNames name true))
([:AnyObjectMap map]
(dictSet loadedNames (.next (map.keys)) true))))))
(dictSet loadedNameFiles file true))
(function isName [:String token]
(let [token (token.toLowerCase)]
(loadFilesForToken token)
(or (yesNames.exists token)
(and
(loadedNames.exists token)
!(notNames.exists token)))))
(function containsName [:String text]
(doFor token (Util.splitTokens text)
(let [t (Util.normalize token false)]
(when (and (.isUpperCase (t.substr 0 1)) (isName t))
(return true))))
false)
(function findNames [:String text]
(.map (filter (Util.splitTokens text) ->t (containsName t))
->t (Util.normalize t false)))