From 2cdbf35e3e7448d1c533cf107e0dcc7f5c4d8fb6 Mon Sep 17 00:00:00 2001 From: Nat Quayle Nelson Date: Mon, 20 Mar 2023 19:40:33 -0600 Subject: [PATCH] findNames --- projects/bad-nlp/haxelib.json | 3 +- projects/bad-nlp/src/bad_nlp/Main_.kiss | 4 +- projects/bad-nlp/src/bad_nlp/Names.hx | 2 + projects/bad-nlp/src/bad_nlp/Names.kiss | 64 ++++++++++++++++++++++++- 4 files changed, 70 insertions(+), 3 deletions(-) diff --git a/projects/bad-nlp/haxelib.json b/projects/bad-nlp/haxelib.json index fffc7344..d6b693e4 100644 --- a/projects/bad-nlp/haxelib.json +++ b/projects/bad-nlp/haxelib.json @@ -5,7 +5,8 @@ "classPath": "src/", "dependencies": { "kiss": "", - "yaml": "" + "yaml": "", + "haxe-strings": "" }, "url": "https://github.com/NQNStudios/kisslang", "contributors": [ diff --git a/projects/bad-nlp/src/bad_nlp/Main_.kiss b/projects/bad-nlp/src/bad_nlp/Main_.kiss index ff1ff326..dd5a6353 100644 --- a/projects/bad-nlp/src/bad_nlp/Main_.kiss +++ b/projects/bad-nlp/src/bad_nlp/Main_.kiss @@ -10,4 +10,6 @@ "Valeria" "Vanessa" ] - (Names.isName name)) + (assert (Names.isName name))) + +(assertEquals 5 .length (Names.findNames "Vanessa, Finn, and Tracy--and George--go to Troy's...")) diff --git a/projects/bad-nlp/src/bad_nlp/Names.hx b/projects/bad-nlp/src/bad_nlp/Names.hx index 69ab5a86..69165828 100644 --- a/projects/bad-nlp/src/bad_nlp/Names.hx +++ b/projects/bad-nlp/src/bad_nlp/Names.hx @@ -7,6 +7,8 @@ import yaml.Parser; import yaml.util.ObjectMap; import sys.FileSystem; import bad_nlp.Names; +using StringTools; +using hx.strings.Strings; @:build(kiss.Kiss.build()) class Names {} diff --git a/projects/bad-nlp/src/bad_nlp/Names.kiss b/projects/bad-nlp/src/bad_nlp/Names.kiss index fb84ff0b..0302bbd1 100644 --- a/projects/bad-nlp/src/bad_nlp/Names.kiss +++ b/projects/bad-nlp/src/bad_nlp/Names.kiss @@ -21,7 +21,69 @@ (dictSet loadedNames (.next (map.keys)) true)))))) (dictSet loadedNameFiles file true)) +(var quotesAndThings [ + "\"" + "'" + "`" + "[" + "]" + "(" + ")" +]) + +(var punctuation [ + "," + "." + ";" + ":" + "-" + "!" + "?" + "'s" // possessive +]) + +// TODO this isn't specific to Names +(function :Array splitByAll [:String text :Array delims] + (if delims + (let [next (delims.shift) + tokens (text.split next)] + (flatten (for token tokens (splitByAll token (delims.copy))))) + [text])) + +// TODO this isn't specific to Names +(function normalize [:String token :Bool toLower] + (cond + // Remove quotes and things around + ((apply or (for quote quotesAndThings (token.startsWith quote))) + (normalize (token.substr 1) toLower)) + ((apply or (for quote quotesAndThings (token.endsWith quote))) + (normalize (substr token 0 -1) toLower)) + // Remove punctuation after + ((apply or (for punct punctuation (token.endsWith punct))) + (normalize (substr token 0 -1) toLower)) + // Lower-case + (toLower (token.toLowerCase)) + (true token))) + (function isName [:String token] (let [token (token.toLowerCase)] (loadFilesForToken token) - (loadedNames.exists token))) \ No newline at end of file + (loadedNames.exists token))) + +(var delimiters [ + " " + "\n" + "--" + "/" +]) + +(function containsName [:String text] + (doFor token (splitByAll text delimiters) + (let [t (normalize token false)] + (when (and (.isUpperCase (t.substr 0 1)) (isName t)) + (return true)))) + false) + +(function findNames [:String text] + (.map (filter (splitByAll text delimiters) ->t (containsName t)) + ->t (normalize t false))) \ No newline at end of file