add language-specific rules (for e.g. Turkish) to Utf8String casing

fix utf8string casing
This commit is contained in:
Lars Doucet
2018-10-02 13:15:12 -05:00
parent fc3bba8c38
commit 7c9b3e657f

View File

@@ -209,22 +209,35 @@ abstract UTF8String(String) from String to String {
Returns a String where all characters of `this` String are lower case.
Affects the characters `A-Z`. Other characters remain unchanged.
If `language` is specified, language-specific casing rules will be followed.
**/
public function toLowerCase ():String {
public function toLowerCase (language:Language=null):String {
if(language == null) language = STANDARD;
#if sys
if (lowercaseMap == null) {
lowercaseMap = new Map<Int, Int> ();
Utf8Ext.fillUpperToLowerMap (uppercaseMap);
Utf8Ext.fillUpperToLowerMap (lowercaseMap);
}
var r = new Utf8 ();
Utf8.iter (this, function (v) {
if(language != STANDARD)
{
var v2 = toLowerCaseLanguageFixes(v,language);
if(v2 != v)
{
r.addChar(v2);
return;
}
}
r.addChar (lowercaseMap.exists (v) ? lowercaseMap[v] : v);
});
@@ -239,6 +252,19 @@ abstract UTF8String(String) from String to String {
}
private static function toLowerCaseLanguageFixes(v:Int,language:Language):Int
{
return switch(language)
{
case TURKISH:
switch(v)
{
case 0xC4B0: 0x69; //İ-->i (large dotted İ to small i) //probably redundant and can be removed, presented here for logical symmtery for when genuine cases are needed
default: v;
}
default: v;
}
}
/**
Returns the String itself.
@@ -254,22 +280,35 @@ abstract UTF8String(String) from String to String {
Returns a String where all characters of `this` String are upper case.
Affects the characters `a-z`. Other characters remain unchanged.
If `language` is specified, language-specific casing rules will be followed.
**/
public function toUpperCase ():String {
public function toUpperCase (language:Language=null):String {
if(language == null) language = STANDARD;
#if sys
if (uppercaseMap == null) {
uppercaseMap = new Map<Int, Int> ();
Utf8Ext.fillLowerToUpperMap (uppercaseMap);
}
var r = new Utf8 ();
Utf8.iter (this, function(v) {
if(language != STANDARD)
{
var v2 = toUpperCaseLanguageFixes(v,language);
if(v2 != v)
{
r.addChar(v2);
return;
}
}
r.addChar (uppercaseMap.exists (v) ? uppercaseMap[v] : v);
});
@@ -283,6 +322,20 @@ abstract UTF8String(String) from String to String {
#end
}
private static function toUpperCaseLanguageFixes(v:Int,language:Language):Int
{
return switch(language)
{
case TURKISH:
switch(v)
{
case 0x69: 0xC4B0; //i-->İ (small i to large dotted İ)
default: v;
}
default: v;
}
}
@:op(A == B) private static function equals (a:UTF8String, b:UTF8String):Bool {
@@ -748,4 +801,11 @@ for (i in 0...51) map[0x10CC0+i] = 0x10C80+i;
for (i in 0...32) map[0x118C0+i] = 0x118A0+i;
for (i in 0...34) map[0x1E922+i] = 0x1E900+i;
}
}
enum Language
{
STANDARD; //any language that doesn't have surprising results with casing
TURKISH; //turkish
//add more special case languages as necessary
}