Bad NLP Util class
This commit is contained in:
@@ -6,7 +6,6 @@ import yaml.Yaml;
|
|||||||
import yaml.Parser;
|
import yaml.Parser;
|
||||||
import yaml.util.ObjectMap;
|
import yaml.util.ObjectMap;
|
||||||
import sys.FileSystem;
|
import sys.FileSystem;
|
||||||
import bad_nlp.Names;
|
|
||||||
using StringTools;
|
using StringTools;
|
||||||
using hx.strings.Strings;
|
using hx.strings.Strings;
|
||||||
|
|
||||||
|
|||||||
@@ -21,69 +21,18 @@
|
|||||||
(dictSet loadedNames (.next (map.keys)) true))))))
|
(dictSet loadedNames (.next (map.keys)) true))))))
|
||||||
(dictSet loadedNameFiles file true))
|
(dictSet loadedNameFiles file true))
|
||||||
|
|
||||||
(var quotesAndThings [
|
|
||||||
"\""
|
|
||||||
"'"
|
|
||||||
"`"
|
|
||||||
"["
|
|
||||||
"]"
|
|
||||||
"("
|
|
||||||
")"
|
|
||||||
])
|
|
||||||
|
|
||||||
(var punctuation [
|
|
||||||
","
|
|
||||||
"."
|
|
||||||
";"
|
|
||||||
":"
|
|
||||||
"-"
|
|
||||||
"!"
|
|
||||||
"?"
|
|
||||||
"'s" // possessive
|
|
||||||
])
|
|
||||||
|
|
||||||
// TODO this isn't specific to Names
|
|
||||||
(function :Array<String> splitByAll [:String text :Array<String> delims]
|
|
||||||
(if delims
|
|
||||||
(let [next (delims.shift)
|
|
||||||
tokens (text.split next)]
|
|
||||||
(flatten (for token tokens (splitByAll token (delims.copy)))))
|
|
||||||
[text]))
|
|
||||||
|
|
||||||
// TODO this isn't specific to Names
|
|
||||||
(function normalize [:String token :Bool toLower]
|
|
||||||
(cond
|
|
||||||
// Remove quotes and things around
|
|
||||||
((apply or (for quote quotesAndThings (token.startsWith quote)))
|
|
||||||
(normalize (token.substr 1) toLower))
|
|
||||||
((apply or (for quote quotesAndThings (token.endsWith quote)))
|
|
||||||
(normalize (substr token 0 -1) toLower))
|
|
||||||
// Remove punctuation after
|
|
||||||
((apply or (for punct punctuation (token.endsWith punct)))
|
|
||||||
(normalize (substr token 0 -1) toLower))
|
|
||||||
// Lower-case
|
|
||||||
(toLower (token.toLowerCase))
|
|
||||||
(true token)))
|
|
||||||
|
|
||||||
(function isName [:String token]
|
(function isName [:String token]
|
||||||
(let [token (token.toLowerCase)]
|
(let [token (token.toLowerCase)]
|
||||||
(loadFilesForToken token)
|
(loadFilesForToken token)
|
||||||
(loadedNames.exists token)))
|
(loadedNames.exists token)))
|
||||||
|
|
||||||
(var delimiters [
|
|
||||||
" "
|
|
||||||
"\n"
|
|
||||||
"--"
|
|
||||||
"/"
|
|
||||||
])
|
|
||||||
|
|
||||||
(function containsName [:String text]
|
(function containsName [:String text]
|
||||||
(doFor token (splitByAll text delimiters)
|
(doFor token (Util.splitTokens text)
|
||||||
(let [t (normalize token false)]
|
(let [t (Util.normalize token false)]
|
||||||
(when (and (.isUpperCase (t.substr 0 1)) (isName t))
|
(when (and (.isUpperCase (t.substr 0 1)) (isName t))
|
||||||
(return true))))
|
(return true))))
|
||||||
false)
|
false)
|
||||||
|
|
||||||
(function findNames [:String text]
|
(function findNames [:String text]
|
||||||
(.map (filter (splitByAll text delimiters) ->t (containsName t))
|
(.map (filter (Util.splitTokens text) ->t (containsName t))
|
||||||
->t (normalize t false)))
|
->t (Util.normalize t false)))
|
||||||
9
projects/bad-nlp/src/bad_nlp/Util.hx
Normal file
9
projects/bad-nlp/src/bad_nlp/Util.hx
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
package bad_nlp;
|
||||||
|
|
||||||
|
import kiss.Prelude;
|
||||||
|
import kiss.List;
|
||||||
|
using StringTools;
|
||||||
|
using hx.strings.Strings;
|
||||||
|
|
||||||
|
@:build(kiss.Kiss.build())
|
||||||
|
class Util {}
|
||||||
51
projects/bad-nlp/src/bad_nlp/Util.kiss
Normal file
51
projects/bad-nlp/src/bad_nlp/Util.kiss
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
(var quotesAndThings [
|
||||||
|
"\""
|
||||||
|
"'"
|
||||||
|
"`"
|
||||||
|
"["
|
||||||
|
"]"
|
||||||
|
"("
|
||||||
|
")"
|
||||||
|
])
|
||||||
|
|
||||||
|
(var punctuation [
|
||||||
|
","
|
||||||
|
"."
|
||||||
|
";"
|
||||||
|
":"
|
||||||
|
"-"
|
||||||
|
"!"
|
||||||
|
"?"
|
||||||
|
"'s" // possessive
|
||||||
|
])
|
||||||
|
|
||||||
|
(var delimiters [
|
||||||
|
" "
|
||||||
|
"\n"
|
||||||
|
"--"
|
||||||
|
"/"
|
||||||
|
])
|
||||||
|
|
||||||
|
(function :Array<String> splitByAll [:String text :Array<String> delims]
|
||||||
|
(if delims
|
||||||
|
(let [next (delims.shift)
|
||||||
|
tokens (text.split next)]
|
||||||
|
(flatten (for token tokens (splitByAll token (delims.copy)))))
|
||||||
|
[text]))
|
||||||
|
|
||||||
|
(function splitTokens [:String text]
|
||||||
|
(splitByAll text delimiters))
|
||||||
|
|
||||||
|
(function normalize [:String token :Bool toLower]
|
||||||
|
(cond
|
||||||
|
// Remove quotes and things around
|
||||||
|
((apply or (for quote quotesAndThings (token.startsWith quote)))
|
||||||
|
(normalize (token.substr 1) toLower))
|
||||||
|
((apply or (for quote quotesAndThings (token.endsWith quote)))
|
||||||
|
(normalize (substr token 0 -1) toLower))
|
||||||
|
// Remove punctuation after
|
||||||
|
((apply or (for punct punctuation (token.endsWith punct)))
|
||||||
|
(normalize (substr token 0 -1) toLower))
|
||||||
|
// Lower-case
|
||||||
|
(toLower (token.toLowerCase))
|
||||||
|
(true token)))
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
-lib kiss
|
-lib kiss
|
||||||
-lib kiss-vscode-api
|
-lib kiss-vscode-api
|
||||||
|
-lib bad-nlp
|
||||||
-cp src
|
-cp src
|
||||||
-dce full
|
-dce full
|
||||||
-D analyzer-optimize
|
-D analyzer-optimize
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ import kiss.KissInterp;
|
|||||||
using haxe.io.Path;
|
using haxe.io.Path;
|
||||||
using StringTools;
|
using StringTools;
|
||||||
|
|
||||||
|
import bad_nlp.Util;
|
||||||
|
import bad_nlp.Names;
|
||||||
|
|
||||||
typedef KTxt2Block = {
|
typedef KTxt2Block = {
|
||||||
source:String,
|
source:String,
|
||||||
output:String,
|
output:String,
|
||||||
|
|||||||
Reference in New Issue
Block a user