From 122aa03d37a64c45f3f8c75e15dbfc2b6285de01 Mon Sep 17 00:00:00 2001 From: Nat Quayle Nelson Date: Mon, 20 Mar 2023 19:40:33 -0600 Subject: [PATCH] findNames --- haxelib.json | 3 +- src/bad_nlp/Main_.kiss | 4 ++- src/bad_nlp/Names.hx | 2 ++ src/bad_nlp/Names.kiss | 64 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 70 insertions(+), 3 deletions(-) diff --git a/haxelib.json b/haxelib.json index fffc734..d6b693e 100644 --- a/haxelib.json +++ b/haxelib.json @@ -5,7 +5,8 @@ "classPath": "src/", "dependencies": { "kiss": "", - "yaml": "" + "yaml": "", + "haxe-strings": "" }, "url": "https://github.com/NQNStudios/kisslang", "contributors": [ diff --git a/src/bad_nlp/Main_.kiss b/src/bad_nlp/Main_.kiss index ff1ff32..dd5a635 100644 --- a/src/bad_nlp/Main_.kiss +++ b/src/bad_nlp/Main_.kiss @@ -10,4 +10,6 @@ "Valeria" "Vanessa" ] - (Names.isName name)) + (assert (Names.isName name))) + +(assertEquals 5 .length (Names.findNames "Vanessa, Finn, and Tracy--and George--go to Troy's...")) diff --git a/src/bad_nlp/Names.hx b/src/bad_nlp/Names.hx index 69ab5a8..6916582 100644 --- a/src/bad_nlp/Names.hx +++ b/src/bad_nlp/Names.hx @@ -7,6 +7,8 @@ import yaml.Parser; import yaml.util.ObjectMap; import sys.FileSystem; import bad_nlp.Names; +using StringTools; +using hx.strings.Strings; @:build(kiss.Kiss.build()) class Names {} diff --git a/src/bad_nlp/Names.kiss b/src/bad_nlp/Names.kiss index fb84ff0..0302bbd 100644 --- a/src/bad_nlp/Names.kiss +++ b/src/bad_nlp/Names.kiss @@ -21,7 +21,69 @@ (dictSet loadedNames (.next (map.keys)) true)))))) (dictSet loadedNameFiles file true)) +(var quotesAndThings [ + "\"" + "'" + "`" + "[" + "]" + "(" + ")" +]) + +(var punctuation [ + "," + "." + ";" + ":" + "-" + "!" + "?" + "'s" // possessive +]) + +// TODO this isn't specific to Names +(function :Array splitByAll [:String text :Array delims] + (if delims + (let [next (delims.shift) + tokens (text.split next)] + (flatten (for token tokens (splitByAll token (delims.copy))))) + [text])) + +// TODO this isn't specific to Names +(function normalize [:String token :Bool toLower] + (cond + // Remove quotes and things around + ((apply or (for quote quotesAndThings (token.startsWith quote))) + (normalize (token.substr 1) toLower)) + ((apply or (for quote quotesAndThings (token.endsWith quote))) + (normalize (substr token 0 -1) toLower)) + // Remove punctuation after + ((apply or (for punct punctuation (token.endsWith punct))) + (normalize (substr token 0 -1) toLower)) + // Lower-case + (toLower (token.toLowerCase)) + (true token))) + (function isName [:String token] (let [token (token.toLowerCase)] (loadFilesForToken token) - (loadedNames.exists token))) \ No newline at end of file + (loadedNames.exists token))) + +(var delimiters [ + " " + "\n" + "--" + "/" +]) + +(function containsName [:String text] + (doFor token (splitByAll text delimiters) + (let [t (normalize token false)] + (when (and (.isUpperCase (t.substr 0 1)) (isName t)) + (return true)))) + false) + +(function findNames [:String text] + (.map (filter (splitByAll text delimiters) ->t (containsName t)) + ->t (normalize t false))) \ No newline at end of file