(var :Map loadedNameFiles (new Map)) (var :Map loadedNames (new Map)) (var libPath (Prelude.libPath "bad-nlp")) (var namesPath (joinPath libPath "name-database")) (var :Map notNames (for notName [ "he" // This is in the name database. "she" ] =>notName true)) (var :Map yesNames (new Map)) (function loadFilesForToken [:String token] (let [firstLetter (token.charAt 0) firstTwoLetters (token.substr 0 2) familyNameFile (joinPath namesPath "family_name" firstLetter "${firstTwoLetters}.yml") givenNameFile (joinPath namesPath "given_name" firstLetter "${firstTwoLetters}.yml")] (loadNameFile familyNameFile) (loadNameFile givenNameFile))) (function loadNameFile [:String file] (unless (or !(FileSystem.exists file) (loadedNameFiles.exists file)) (let [:Array data (Yaml.read file)] (doFor name (data.iterator) (typeCase [name] ([:String name] (dictSet loadedNames name true)) ([:AnyObjectMap map] (dictSet loadedNames (.next (map.keys)) true)))))) (dictSet loadedNameFiles file true)) (function isName [:String token] (let [token (token.toLowerCase)] (loadFilesForToken token) (or (yesNames.exists token) (and (loadedNames.exists token) !(notNames.exists token))))) (function containsName [:String text] (doFor token (Util.splitTokens text) (let [t (Util.normalize token false)] (when (and (.isUpperCase (t.substr 0 1)) (isName t)) (return true)))) false) (function findNames [:String text] (.map (filter (Util.splitTokens text) ->t (containsName t)) ->t (Util.normalize t false)))