diff --git a/NOTICE.md b/NOTICE.md index 6a3a0fb7e..bbc3096d4 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -72,6 +72,9 @@ The typed array implementation was developed in collaboration with Sven Bergstr and the hxtypedarray project, which is available under an "MIT" license. For details, see https://github.com/underscorediscovery/hxtypedarray +The unifill project is included in the [lime/text/unifill/](lime/text/unifill/) directory, +which is available under an "MIT" license. For details, see https://github.com/mandel59/unifill + lime/project/Version is adapted from thx.semver project, which is available under an "MIT" license. For details, see https://github.com/fponticelli/thx.semver diff --git a/lime/text/Utf8ExtInternal.hx b/lime/text/UTF8String.hx similarity index 61% rename from lime/text/Utf8ExtInternal.hx rename to lime/text/UTF8String.hx index 9d618b572..8834f9bfd 100644 --- a/lime/text/Utf8ExtInternal.hx +++ b/lime/text/UTF8String.hx @@ -1,8 +1,348 @@ package lime.text; -// Use org.zamedev.lib.tools.CaseMapsGenerator to generate this file -class Utf8ExtInternal { +import haxe.Utf8; +import lime.text.unifill.Unifill; +import lime.text.unifill.CodePoint; + + +abstract UTF8String(String) from String to String { + + + #if sys + private static var lowercaseMap:Map; + private static var uppercaseMap:Map; + #end + + /** + The number of characters in `this` String. + **/ + public var length (get, never):Int; + + + /** + Creates a copy from a given String. + **/ + public function new (str:String) { + + this = new String (str); + + } + + + /** + Returns the character at position `index` of `this` String. + + If `index` is negative or exceeds `this.length`, the empty String `""` + is returned. + **/ + public function charAt (index:Int):String { + + return Unifill.uCharAt (this, index); + + } + + + /** + Returns the character code at position `index` of `this` String. + + If `index` is negative or exceeds `this.length`, `null` is returned. + + To obtain the character code of a single character, `"x".code` can be + used instead to inline the character code at compile time. Note that + this only works on String literals of length 1. + **/ + public function charCodeAt (index:Int):Null { + + return Utf8.charCodeAt (this, index); + + } + + + /** + Returns the String corresponding to the character code `code`. + + If `code` is negative or has another invalid value, the result is + unspecified. + **/ + public static function fromCharCode (code:Int):String { + + return CodePoint.fromInt (code); + + } + + + /** + Returns the string corresponding to the array of character codes `codes`. + + If #unifill is defined, these codes will be treated as UTF-8 code points, + otherwise it will default to using String.fromCharCode() for each character + **/ + public static function fromCharCodes (codes:Array):String { + + var s = ""; + + for (code in codes) { + + s += CodePoint.fromInt (code); + + } + + return s; + + } + + + /** + Returns the position of the leftmost occurence of `str` within `this` + String. + + If `startIndex` is given, the search is performed within the substring + of `this` String starting from `startIndex`. Otherwise the search is + performed within `this` String. In either case, the returned position + is relative to the beginning of `this` String. + + If `str` cannot be found, -1 is returned. + **/ + public function indexOf (str:String, startIndex:Int = 0):Int { + + return Unifill.uIndexOf (this, str, startIndex); + + } + + + /** + Returns the position of the rightmost occurence of `str` within `this` + String. + + If `startIndex` is given, the search is performed within the substring + of `this` String from 0 to `startIndex`. Otherwise the search is + performed within `this` String. In either case, the returned position + is relative to the beginning of `this` String. + + If `str` cannot be found, -1 is returned. + **/ + public function lastIndexOf(str:String, ?startIndex:Int):Int { + + return Unifill.uLastIndexOf (this, str, startIndex); + + } + + + /** + Splits `this` String at each occurence of `delimiter`. + + If `this` String is the empty String `""`, the result is not consistent + across targets and may either be `[]` (on Js, Cpp) or `[""]`. + + If `delimiter` is the empty String `""`, `this` String is split into an + Array of `this.length` elements, where the elements correspond to the + characters of `this` String. + + If `delimiter` is not found within `this` String, the result is an Array + with one element, which equals `this` String. + + If `delimiter` is null, the result is unspecified. + + Otherwise, `this` String is split into parts at each occurence of + `delimiter`. If `this` String starts (or ends) with `delimiter`, the + result `Array` contains a leading (or trailing) empty String `""` element. + Two subsequent delimiters also result in an empty String `""` element. + **/ + public function split (delimiter:String):Array { + + return Unifill.uSplit (this, delimiter); + + } + + + /** + Returns `len` characters of `this` String, starting at position `pos`. + + If `len` is omitted, all characters from position `pos` to the end of + `this` String are included. + + If `pos` is negative, its value is calculated from the end of `this` + String by `this.length + pos`. If this yields a negative value, 0 is + used instead. + + If the calculated position + `len` exceeds `this.length`, the characters + from that position to the end of `this` String are returned. + + If `len` is negative, the result is unspecified. + **/ + public function substr (pos:Int, ?len:Int):String { + + return Utf8.sub (this, pos, len); + + } + + + /** + Returns the part of `this` String from `startIndex` to but not including `endIndex`. + + If `startIndex` or `endIndex` are negative, 0 is used instead. + + If `startIndex` exceeds `endIndex`, they are swapped. + + If the (possibly swapped) `endIndex` is omitted or exceeds + `this.length`, `this.length` is used instead. + + If the (possibly swapped) `startIndex` exceeds `this.length`, the empty + String `""` is returned. + **/ + public function substring (startIndex:Int, ?endIndex:Int):String { + + return Unifill.uSubstring (this, startIndex, endIndex); + + } + + + /** + Returns a String where all characters of `this` String are lower case. + + Affects the characters `A-Z`. Other characters remain unchanged. + **/ + public function toLowerCase ():String { + + #if sys + + if (lowercaseMap == null) { + + lowercaseMap = new Map (); + Utf8Ext.fillUpperToLowerMap (uppercaseMap); + + } + + var r = new Utf8 (); + + Utf8.iter (this, function (v) { + + r.addChar (lowercaseMap.exists (v) ? lowercaseMap[v] : v); + + }); + + return r.toString (); + + #else + + return this.toLowerCase (); + + #end + + } + + + /** + Returns the String itself. + **/ + public function toString ():String { + + return this; + + } + + + /** + Returns a String where all characters of `this` String are upper case. + + Affects the characters `a-z`. Other characters remain unchanged. + **/ + public function toUpperCase ():String { + + #if sys + + if (uppercaseMap == null) { + + uppercaseMap = new Map (); + Utf8Ext.fillLowerToUpperMap (uppercaseMap); + + } + + var r = new Utf8 (); + + Utf8.iter (this, function(v) { + + r.addChar (uppercaseMap.exists (v) ? uppercaseMap[v] : v); + + }); + + return r.toString (); + + #else + + return this.toUpperCase (); + + #end + + } + + + @:op(A == B) private static function equals (a:UTF8String, b:UTF8String):Bool { + + return Unifill.uCompare (a, b) == 0; + + } + + + @:op(A < B) private static function lt (a:UTF8String, b:UTF8String):Bool { + + return Unifill.uCompare (a, b) == -1; + + } + + + @:op(A > B) private static function gt (a:UTF8String, b:UTF8String):Bool { + + return Unifill.uCompare (a, b) == 1; + + } + + + @:op(A <= B) private static function lteq (a:UTF8String, b:UTF8String):Bool { + + return Unifill.uCompare (a, b) != 1; + + } + + + @:op(A >= B) static function gteq (a:UTF8String, b:UTF8String):Bool { + + return Unifill.uCompare (a, b) != -1; + + } + + + @:op(A + B) static function plus (a:UTF8String, b:UTF8String):UTF8String { + + var sb = new StringBuf (); + sb.add (Std.string (a)); + sb.add (Std.string (b)); + return sb.toString (); + + } + + + + + // Get & Set Methods + + + + + private function get_length ():Int { + + return Utf8.length (this); + + } + + +} + + +// generated from org.zamedev.lib.tools.CaseMapsGenerator + +private class Utf8Ext { public static function fillUpperToLowerMap(map : Map) : Void { var i = 0; for (i in 0...26) map[0x41+i] = 0x61+i; @@ -381,4 +721,4 @@ for (i in 0...51) map[0x10CC0+i] = 0x10C80+i; for (i in 0...32) map[0x118C0+i] = 0x118A0+i; for (i in 0...34) map[0x1E922+i] = 0x1E900+i; } -} +} \ No newline at end of file diff --git a/lime/text/UTFString.hx b/lime/text/UTFString.hx deleted file mode 100644 index 31398d498..000000000 --- a/lime/text/UTFString.hx +++ /dev/null @@ -1,354 +0,0 @@ -package lime.text; - -import haxe.Utf8; -#if unifill -import lime.text.unifill.Unifill; -import lime.text.unifill.CodePoint; -#end - -/** - * ... - * @author - */ -abstract UTFString(String) from String to String -{ - #if (unifill && (neko || php || cpp)) - static var inited:Bool = false; - static var lcaseMap:Map; - static var ucaseMap:Map; - #end - - /** - The number of characters in `this` String. - **/ - public var length(get, never) : Int; - - /** - Creates a copy from a given String. - **/ - public function new(str:String) - { - this = new String(str); - } - - /** - Caching of character maps in two case sensitivites - **/ - static function initialize() : Void { - #if (unifill && (neko || php || cpp)) - lcaseMap = new Map(); - ucaseMap = new Map(); - - Utf8ExtInternal.fillUpperToLowerMap(lcaseMap); - Utf8ExtInternal.fillLowerToUpperMap(ucaseMap); - inited = true; - #end - } - - /** - Returns a String where all characters of `this` String are upper case. - - Affects the characters `a-z`. Other characters remain unchanged. - **/ - public function toUpperCase() : String - { - #if (unifill && (neko || php || cpp)) - if (!inited) initialize(); - - var r = new Utf8(); - - Utf8.iter(this, function(v) { - r.addChar(ucaseMap.exists(v) ? ucaseMap[v] : v); - }); - - return r.toString(); - #else - return this.toUpperCase(); - #end - } - - /** - Returns a String where all characters of `this` String are lower case. - - Affects the characters `A-Z`. Other characters remain unchanged. - **/ - public function toLowerCase() : String - { - #if (unifill && (neko || php || cpp)) - if (!inited) initialize(); - - var r = new Utf8(); - - Utf8.iter(this, function(v) { - r.addChar(lcaseMap.exists(v) ? lcaseMap[v] : v); - }); - - return r.toString(); - #else - return this.toLowerCase(); - #end - } - - /** - Returns the character at position `index` of `this` String. - - If `index` is negative or exceeds `this.length`, the empty String `""` - is returned. - **/ - public function charAt(index : Int) : String - { - #if unifill - return Unifill.uCharAt(this, index); - #else - return this.charAt(index); - #end - } - - /** - Returns the character code at position `index` of `this` String. - - If `index` is negative or exceeds `this.length`, `null` is returned. - - To obtain the character code of a single character, `"x".code` can be - used instead to inline the character code at compile time. Note that - this only works on String literals of length 1. - **/ - public function charCodeAt(index : Int) : Null - { - #if unifill - return Utf8.charCodeAt(this, index); - #else - return this.charCodeAt(index); - #end - } - - /** - Returns the position of the leftmost occurence of `str` within `this` - String. - - If `startIndex` is given, the search is performed within the substring - of `this` String starting from `startIndex`. Otherwise the search is - performed within `this` String. In either case, the returned position - is relative to the beginning of `this` String. - - If `str` cannot be found, -1 is returned. - **/ - public function indexOf(str : String, ?startIndex : Int = 0) : Int - { - #if unifill - return Unifill.uIndexOf(this, str, startIndex); - #else - return this.indexOf(str, startIndex); - #end - } - - /** - Returns the position of the rightmost occurence of `str` within `this` - String. - - If `startIndex` is given, the search is performed within the substring - of `this` String from 0 to `startIndex`. Otherwise the search is - performed within `this` String. In either case, the returned position - is relative to the beginning of `this` String. - - If `str` cannot be found, -1 is returned. - **/ - public function lastIndexOf(str : String, ?startIndex : Int) : Int - { - #if unifill - return Unifill.uLastIndexOf(this, str, startIndex); - #else - return this.lastIndexOf(str, startIndex); - #end - } - - /** - Splits `this` String at each occurence of `delimiter`. - - If `this` String is the empty String `""`, the result is not consistent - across targets and may either be `[]` (on Js, Cpp) or `[""]`. - - If `delimiter` is the empty String `""`, `this` String is split into an - Array of `this.length` elements, where the elements correspond to the - characters of `this` String. - - If `delimiter` is not found within `this` String, the result is an Array - with one element, which equals `this` String. - - If `delimiter` is null, the result is unspecified. - - Otherwise, `this` String is split into parts at each occurence of - `delimiter`. If `this` String starts (or ends) with `delimiter`, the - result `Array` contains a leading (or trailing) empty String `""` element. - Two subsequent delimiters also result in an empty String `""` element. - **/ - public function split(delimiter : String) : Array - { - #if unifill - return Unifill.uSplit(this, delimiter); - #else - return this.split(delimiter); - #end - } - - /** - Returns `len` characters of `this` String, starting at position `pos`. - - If `len` is omitted, all characters from position `pos` to the end of - `this` String are included. - - If `pos` is negative, its value is calculated from the end of `this` - String by `this.length + pos`. If this yields a negative value, 0 is - used instead. - - If the calculated position + `len` exceeds `this.length`, the characters - from that position to the end of `this` String are returned. - - If `len` is negative, the result is unspecified. - **/ - public function substr(pos : Int, ?len : Int) : String - { - #if unifill - return Utf8.sub(this, pos, len); - #else - return this.substr(pos, len); - #end - } - - /** - Returns the part of `this` String from `startIndex` to but not including `endIndex`. - - If `startIndex` or `endIndex` are negative, 0 is used instead. - - If `startIndex` exceeds `endIndex`, they are swapped. - - If the (possibly swapped) `endIndex` is omitted or exceeds - `this.length`, `this.length` is used instead. - - If the (possibly swapped) `startIndex` exceeds `this.length`, the empty - String `""` is returned. - **/ - public function substring(startIndex : Int, ?endIndex : Int) : String - { - #if unifill - return Unifill.uSubstring(this, startIndex, endIndex); - #else - return this.substring(startIndex, endIndex); - #end - } - - /** - Returns the String itself. - **/ - public function toString() : String - { - return this; - } - - /** - Returns the String corresponding to the character code `code`. - - If `code` is negative or has another invalid value, the result is - unspecified. - **/ - public static function fromCharCode(code : Int) : String - { - #if unifill - //var sb = new StringBuf(); - //sb.addChar(code); - //return sb.toString(); - return CodePoint.fromInt(code); - #else - return String.fromCharCode(code); - #end - } - - /** - Returns the string corresponding to the array of character codes `codes`. - - If #unifill is defined, these codes will be treated as UTF-8 code points, - otherwise it will default to using String.fromCharCode() for each character - **/ - public static function fromCharCodes(codes : Array) : String - { - var s = ""; - for (code in codes) - { - #if unifill - s += CodePoint.fromInt(code); - #else - s += String.fromCharCode(code); - #end - } - return s; - } - - /**********PRIVATE*************/ - - @:op(A == B) static function equals(a:UTFString, b:UTFString) : Bool - { - #if unifill - return Unifill.uCompare(a, b) == 0; - #else - return Std.string(a) == Std.string(b); - #end - } - - @:op(A < B) static function lt(a:UTFString, b:UTFString) : Bool - { - #if unifill - return Unifill.uCompare(a, b) == -1; - #else - return Std.string(a) < Std.string(b); - #end - } - - @:op(A > B) static function gt(a:UTFString, b:UTFString) : Bool - { - #if unifill - return Unifill.uCompare(a, b) == 1; - #else - return Std.string(a) > Std.string(b); - #end - } - - @:op(A <= B) static function lteq(a:UTFString, b:UTFString) : Bool - { - #if unifill - return Unifill.uCompare(a, b) != 1; - #else - return Std.string(a) <= Std.string(b); - #end - } - - @:op(A >= B) static function gteq(a:UTFString, b:UTFString) : Bool - { - #if unifill - return Unifill.uCompare(a, b) != -1; - #else - return Std.string(a) >= Std.string(b); - #end - } - - @:op(A + B) static function plus(a:UTFString, b:UTFString) : String - { - #if unifill - var sb = new StringBuf(); - sb.add(Std.string(a)); - sb.add(Std.string(b)); - return sb.toString(); - #else - return Std.string(a) + Std.string(b); - #end - } - - private function get_length() : Int - { - #if unifill - return Utf8.length(this); - #else - return this.length; - #end - } - -} \ No newline at end of file