Merge UTF8String, unifill

2017-06-05 16:54:57 -07:00
parent 10326ec249
commit af965d9b04
3 changed files with 346 additions and 357 deletions
--- a/NOTICE.md
+++ b/NOTICE.md
@@ -72,6 +72,9 @@ The typed array implementation was developed in collaboration with Sven Bergstr
 and the hxtypedarray project, which is available under an "MIT" license.
 For details, see https://github.com/underscorediscovery/hxtypedarray

+The unifill project is included in the [lime/text/unifill/](lime/text/unifill/) directory,
+which is available under an "MIT" license. For details, see https://github.com/mandel59/unifill
+
 lime/project/Version is adapted from thx.semver project, which is available under
 an "MIT" license. For details, see https://github.com/fponticelli/thx.semver

--- a/lime/text/Utf8ExtInternal.hx
+++ b/lime/text/Utf8ExtInternal.hx
@@ -1,8 +1,348 @@
 package lime.text;

-// Use org.zamedev.lib.tools.CaseMapsGenerator to generate this file

-class Utf8ExtInternal {
+import haxe.Utf8;
+import lime.text.unifill.Unifill;
+import lime.text.unifill.CodePoint;
+
+
+abstract UTF8String(String) from String to String {
+	
+	
+	#if sys
+	private static var lowercaseMap:Map<Int, Int>;
+	private static var uppercaseMap:Map<Int, Int>;
+	#end
+	
+	/**
+		The number of characters in `this` String.
+	**/
+	public var length (get, never):Int;
+	
+	
+	/**
+		Creates a copy from a given String.
+	**/
+	public function new (str:String) {
+		
+		this = new String (str);
+		
+	}
+	
+	
+	/**
+		Returns the character at position `index` of `this` String.
+		
+		If `index` is negative or exceeds `this.length`, the empty String `""`
+		is returned.
+	**/
+	public function charAt (index:Int):String {
+		
+		return Unifill.uCharAt (this, index);
+		
+	}
+	
+	
+	/**
+		Returns the character code at position `index` of `this` String.
+		
+		If `index` is negative or exceeds `this.length`, `null` is returned.
+		
+		To obtain the character code of a single character, `"x".code` can be
+		used instead to inline the character code at compile time. Note that
+		this only works on String literals of length 1.
+	**/
+	public function charCodeAt (index:Int):Null<Int> {
+		
+		return Utf8.charCodeAt (this, index);
+		
+	}
+	
+	
+	/**
+		Returns the String corresponding to the character code `code`.
+		
+		If `code` is negative or has another invalid value, the result is
+		unspecified.
+	**/
+	public static function fromCharCode (code:Int):String {
+		
+		return CodePoint.fromInt (code);
+		
+	}
+	
+	
+	/**
+		Returns the string corresponding to the array of character codes `codes`.
+		
+		If #unifill is defined, these codes will be treated as UTF-8 code points,
+		otherwise it will default to using String.fromCharCode() for each character
+	 **/
+	public static function fromCharCodes (codes:Array<Int>):String {
+		
+		var s = "";
+		
+		for (code in codes) {
+			
+			s += CodePoint.fromInt (code);
+			
+		}
+		
+		return s;
+		
+	}
+	
+	
+	/**
+		Returns the position of the leftmost occurence of `str` within `this`
+		String.
+		
+		If `startIndex` is given, the search is performed within the substring
+		of `this` String starting from `startIndex`. Otherwise the search is
+		performed within `this` String. In either case, the returned position
+		is relative to the beginning of `this` String.
+		
+		If `str` cannot be found, -1 is returned.
+	**/
+	public function indexOf (str:String, startIndex:Int = 0):Int {
+		
+		return Unifill.uIndexOf (this, str, startIndex);
+		
+	}
+	
+	
+	/**
+		Returns the position of the rightmost occurence of `str` within `this`
+		String.
+		
+		If `startIndex` is given, the search is performed within the substring
+		of `this` String from 0 to `startIndex`. Otherwise the search is
+		performed within `this` String. In either case, the returned position
+		is relative to the beginning of `this` String.
+		
+		If `str` cannot be found, -1 is returned.
+	**/
+	public function lastIndexOf(str:String, ?startIndex:Int):Int {
+		
+		return Unifill.uLastIndexOf (this, str, startIndex);
+		
+	}
+	
+	
+	/**
+		Splits `this` String at each occurence of `delimiter`.
+		
+		If `this` String is the empty String `""`, the result is not consistent
+		across targets and may either be `[]` (on Js, Cpp) or `[""]`.
+		
+		If `delimiter` is the empty String `""`, `this` String is split into an
+		Array of `this.length` elements, where the elements correspond to the
+		characters of `this` String.
+		
+		If `delimiter` is not found within `this` String, the result is an Array
+		with one element, which equals `this` String.
+		
+		If `delimiter` is null, the result is unspecified.
+		
+		Otherwise, `this` String is split into parts at each occurence of
+		`delimiter`. If `this` String starts (or ends) with `delimiter`, the
+		result `Array` contains a leading (or trailing) empty String `""` element.
+		Two subsequent delimiters also result in an empty String `""` element.
+	**/
+	public function split (delimiter:String):Array<String> {
+		
+		return Unifill.uSplit (this, delimiter);
+		
+	}
+	
+	
+	/**
+		Returns `len` characters of `this` String, starting at position `pos`.
+		
+		If `len` is omitted, all characters from position `pos` to the end of
+		`this` String are included.
+		
+		If `pos` is negative, its value is calculated from the end of `this`
+		String by `this.length + pos`. If this yields a negative value, 0 is
+		used instead.
+		
+		If the calculated position + `len` exceeds `this.length`, the characters
+		from that position to the end of `this` String are returned.
+		
+		If `len` is negative, the result is unspecified.
+	**/
+	public function substr (pos:Int, ?len:Int):String {
+		
+		return Utf8.sub (this, pos, len);
+		
+	}
+	
+	
+	/**
+		Returns the part of `this` String from `startIndex` to but not including `endIndex`.
+		
+		If `startIndex` or `endIndex` are negative, 0 is used instead.
+		
+		If `startIndex` exceeds `endIndex`, they are swapped.
+		
+		If the (possibly swapped) `endIndex` is omitted or exceeds
+		`this.length`, `this.length` is used instead.
+		
+		If the (possibly swapped) `startIndex` exceeds `this.length`, the empty
+		String `""` is returned.
+	**/
+	public function substring (startIndex:Int, ?endIndex:Int):String {
+		
+		return Unifill.uSubstring (this, startIndex, endIndex);
+		
+	}
+	
+	
+	/**
+		Returns a String where all characters of `this` String are lower case.
+		
+		Affects the characters `A-Z`. Other characters remain unchanged.
+	**/
+	public function toLowerCase ():String {
+		
+		#if sys
+		
+		if (lowercaseMap == null) {
+			
+			lowercaseMap = new Map<Int, Int> ();
+			Utf8Ext.fillUpperToLowerMap (uppercaseMap);
+			
+		}
+		
+		var r = new Utf8 ();
+		
+		Utf8.iter (this, function (v) {
+			
+			r.addChar (lowercaseMap.exists (v) ? lowercaseMap[v] : v);
+			
+		});
+		
+		return r.toString ();
+		
+		#else
+		
+		return this.toLowerCase ();
+		
+		#end
+		
+	}
+	
+	
+	/**
+		Returns the String itself.
+	**/
+	public function toString ():String {
+		
+		return this;
+		
+	}
+	
+	
+	/**
+		Returns a String where all characters of `this` String are upper case.
+		
+		Affects the characters `a-z`. Other characters remain unchanged.
+	**/
+	public function toUpperCase ():String {
+		
+		#if sys
+		
+		if (uppercaseMap == null) {
+			
+			uppercaseMap = new Map<Int, Int> ();
+			Utf8Ext.fillLowerToUpperMap (uppercaseMap);
+			
+		}
+		
+		var r = new Utf8 ();
+		
+		Utf8.iter (this, function(v) {
+			
+			r.addChar (uppercaseMap.exists (v) ? uppercaseMap[v] : v);
+			
+		});
+		
+		return r.toString ();
+		
+		#else
+		
+		return this.toUpperCase ();
+		
+		#end
+		
+	}
+	
+	
+	@:op(A == B) private static function equals (a:UTF8String, b:UTF8String):Bool {
+		
+		return Unifill.uCompare (a, b) == 0;
+		
+	}
+	
+	
+	@:op(A < B) private static function lt (a:UTF8String, b:UTF8String):Bool {
+		
+		return Unifill.uCompare (a, b) == -1;
+		
+	}
+	
+	
+	@:op(A > B) private static function gt (a:UTF8String, b:UTF8String):Bool {
+		
+		return Unifill.uCompare (a, b) == 1;
+		
+	}
+	
+	
+	@:op(A <= B) private static function lteq (a:UTF8String, b:UTF8String):Bool {
+		
+		return Unifill.uCompare (a, b) != 1;
+		
+	}
+	
+	
+	@:op(A >= B) static function gteq (a:UTF8String, b:UTF8String):Bool {
+		
+		return Unifill.uCompare (a, b) != -1;
+		
+	}
+	
+	
+	@:op(A + B) static function plus (a:UTF8String, b:UTF8String):UTF8String {
+		
+		var sb = new StringBuf ();
+		sb.add (Std.string (a));
+		sb.add (Std.string (b));
+		return sb.toString ();
+		
+	}
+	
+	
+	
+	
+	// Get & Set Methods
+	
+	
+	
+	
+	private function get_length ():Int {
+		
+		return Utf8.length (this);
+		
+	}
+	
+	
+}
+
+
+// generated from org.zamedev.lib.tools.CaseMapsGenerator
+
+private class Utf8Ext {
 public static function fillUpperToLowerMap(map : Map<Int, Int>) : Void {
 var i = 0;
 for (i in 0...26) map[0x41+i] = 0x61+i;
@@ -381,4 +721,4 @@ for (i in 0...51) map[0x10CC0+i] = 0x10C80+i;
 for (i in 0...32) map[0x118C0+i] = 0x118A0+i;
 for (i in 0...34) map[0x1E922+i] = 0x1E900+i;
 }
-}
+}
--- a/lime/text/UTFString.hx
+++ b/lime/text/UTFString.hx
@@ -1,354 +0,0 @@
-package lime.text;
-
-import haxe.Utf8;
-#if unifill
-import lime.text.unifill.Unifill;
-import lime.text.unifill.CodePoint;
-#end
-
-/**
- * ...
- * @author
- */
-abstract UTFString(String) from String to String
-{
-	#if (unifill && (neko || php || cpp))
-	static var inited:Bool = false;
-	static var lcaseMap:Map<Int, Int>;
-	static var ucaseMap:Map<Int, Int>;
-	#end
-	
-	/**
-		The number of characters in `this` String.
-	**/
-	public var length(get, never) : Int;
-	
-	/**
-		Creates a copy from a given String.
-	**/
-	public function new(str:String)
-	{
-		this = new String(str);
-	}
-	
-	/**
-		Caching of character maps in two case sensitivites
-	**/
-	static function initialize() : Void {
-		#if (unifill && (neko || php || cpp))
-		lcaseMap = new Map<Int, Int>();
-		ucaseMap = new Map<Int, Int>();
-		
-		Utf8ExtInternal.fillUpperToLowerMap(lcaseMap);
-		Utf8ExtInternal.fillLowerToUpperMap(ucaseMap);
-		inited = true;
-		#end
-	}
-	
-	/**
-		Returns a String where all characters of `this` String are upper case.
-		
-		Affects the characters `a-z`. Other characters remain unchanged.
-	**/
-	public function toUpperCase() : String
-	{
-		#if (unifill && (neko || php || cpp))
-		if (!inited) initialize();
-		
-		var r = new Utf8();
-		
-		Utf8.iter(this, function(v) {
-			r.addChar(ucaseMap.exists(v) ? ucaseMap[v] : v);
-		});
-		
-		return r.toString();
-		#else
-		return this.toUpperCase();
-		#end
-	}
-
-	/**
-		Returns a String where all characters of `this` String are lower case.
-		
-		Affects the characters `A-Z`. Other characters remain unchanged.
-	**/
-	public function toLowerCase() : String
-	{
-		#if (unifill && (neko || php || cpp))
-		if (!inited) initialize();
-		
-		var r = new Utf8();
-		
-		Utf8.iter(this, function(v) {
-			r.addChar(lcaseMap.exists(v) ? lcaseMap[v] : v);
-		});
-		
-		return r.toString();
-		#else
-		return this.toLowerCase();
-		#end
-	}
-
-	/**
-		Returns the character at position `index` of `this` String.
-		
-		If `index` is negative or exceeds `this.length`, the empty String `""`
-		is returned.
-	**/
-	public function charAt(index : Int) : String
-	{
-		#if unifill
-		return Unifill.uCharAt(this, index);
-		#else
-		return this.charAt(index);
-		#end
-	}
-
-	/**
-		Returns the character code at position `index` of `this` String.
-		
-		If `index` is negative or exceeds `this.length`, `null` is returned.
-		
-		To obtain the character code of a single character, `"x".code` can be
-		used instead to inline the character code at compile time. Note that
-		this only works on String literals of length 1.
-	**/
-	public function charCodeAt(index : Int) : Null<Int>
-	{
-		#if unifill
-		return Utf8.charCodeAt(this, index);
-		#else
-		return this.charCodeAt(index);
-		#end
-	}
-	
-	/**
-		Returns the position of the leftmost occurence of `str` within `this`
-		String.
-		
-		If `startIndex` is given, the search is performed within the substring
-		of `this` String starting from `startIndex`. Otherwise the search is
-		performed within `this` String. In either case, the returned position
-		is relative to the beginning of `this` String.
-		
-		If `str` cannot be found, -1 is returned.
-	**/
-	public function indexOf(str : String, ?startIndex : Int = 0) : Int
-	{
-		#if unifill
-		return Unifill.uIndexOf(this, str, startIndex);
-		#else
-		return this.indexOf(str, startIndex);
-		#end
-	}
-	
-	/**
-		Returns the position of the rightmost occurence of `str` within `this`
-		String.
-		
-		If `startIndex` is given, the search is performed within the substring
-		of `this` String from 0 to `startIndex`. Otherwise the search is
-		performed within `this` String. In either case, the returned position
-		is relative to the beginning of `this` String.
-		
-		If `str` cannot be found, -1 is returned.
-	**/
-	public function lastIndexOf(str : String, ?startIndex : Int) : Int
-	{
-		#if unifill
-		return Unifill.uLastIndexOf(this, str, startIndex);
-		#else
-		return this.lastIndexOf(str, startIndex);
-		#end
-	}
-
-	/**
-		Splits `this` String at each occurence of `delimiter`.
-		
-		If `this` String is the empty String `""`, the result is not consistent
-		across targets and may either be `[]` (on Js, Cpp) or `[""]`.
-		
-		If `delimiter` is the empty String `""`, `this` String is split into an
-		Array of `this.length` elements, where the elements correspond to the
-		characters of `this` String.
-		
-		If `delimiter` is not found within `this` String, the result is an Array
-		with one element, which equals `this` String.
-		
-		If `delimiter` is null, the result is unspecified.
-		
-		Otherwise, `this` String is split into parts at each occurence of
-		`delimiter`. If `this` String starts (or ends) with `delimiter`, the
-		result `Array` contains a leading (or trailing) empty String `""` element.
-		Two subsequent delimiters also result in an empty String `""` element.
-	**/
-	public function split(delimiter : String) : Array<String>
-	{
-		#if unifill
-		return Unifill.uSplit(this, delimiter);
-		#else
-		return this.split(delimiter);
-		#end
-	}
-
-	/**
-		Returns `len` characters of `this` String, starting at position `pos`.
-		
-		If `len` is omitted, all characters from position `pos` to the end of
-		`this` String are included.
-		
-		If `pos` is negative, its value is calculated from the end of `this`
-		String by `this.length + pos`. If this yields a negative value, 0 is
-		used instead.
-		
-		If the calculated position + `len` exceeds `this.length`, the characters
-		from that position to the end of `this` String are returned.
-		
-		If `len` is negative, the result is unspecified.
-	**/
-	public function substr(pos : Int, ?len : Int) : String
-	{
-		#if unifill
-		return Utf8.sub(this, pos, len);
-		#else
-		return this.substr(pos, len);
-		#end
-	}
-
-	/**
-		Returns the part of `this` String from `startIndex` to but not including `endIndex`.
-		
-		If `startIndex` or `endIndex` are negative, 0 is used instead.
-		
-		If `startIndex` exceeds `endIndex`, they are swapped.
-		
-		If the (possibly swapped) `endIndex` is omitted or exceeds
-		`this.length`, `this.length` is used instead.
-		
-		If the (possibly swapped) `startIndex` exceeds `this.length`, the empty
-		String `""` is returned.
-	**/
-	public function substring(startIndex : Int, ?endIndex : Int) : String
-	{
-		#if unifill
-		return Unifill.uSubstring(this, startIndex, endIndex);
-		#else
-		return this.substring(startIndex, endIndex);
-		#end
-	}
-
-	/**
-		Returns the String itself.
-	**/
-	public function toString() : String
-	{
-		return this;
-	}
-
-	/**
-		Returns the String corresponding to the character code `code`.
-		
-		If `code` is negative or has another invalid value, the result is
-		unspecified.
-	**/
-	public static function fromCharCode(code : Int) : String
-	{
-		#if unifill
-		//var sb = new StringBuf();
-		//sb.addChar(code);
-		//return sb.toString();
-		return CodePoint.fromInt(code);
-		#else
-		return String.fromCharCode(code);
-		#end
-	}
-	
-	/**
-		Returns the string corresponding to the array of character codes `codes`.
-		
-		If #unifill is defined, these codes will be treated as UTF-8 code points,
-		otherwise it will default to using String.fromCharCode() for each character
-	 **/
-	public static function fromCharCodes(codes : Array<Int>) : String
-	{
-		var s = "";
-		for (code in codes)
-		{
-			#if unifill
-			s += CodePoint.fromInt(code);
-			#else
-			s += String.fromCharCode(code);
-			#end
-		}
-		return s;
-	}
-	
-	/**********PRIVATE*************/
-	
-	@:op(A == B) static function equals(a:UTFString, b:UTFString) : Bool
-	{
-		#if unifill
-		return Unifill.uCompare(a, b) == 0;
-		#else
-		return Std.string(a) == Std.string(b);
-		#end
-	}
-	
-	@:op(A < B) static function lt(a:UTFString, b:UTFString) : Bool
-	{
-		#if unifill
-		return Unifill.uCompare(a, b) == -1;
-		#else
-		return Std.string(a) < Std.string(b);
-		#end
-	}
-	
-	@:op(A > B) static function gt(a:UTFString, b:UTFString) : Bool
-	{
-		#if unifill
-		return Unifill.uCompare(a, b) == 1;
-		#else
-		return Std.string(a) > Std.string(b);
-		#end
-	}
-	
-	@:op(A <= B) static function lteq(a:UTFString, b:UTFString) : Bool
-	{
-		#if unifill
-		return Unifill.uCompare(a, b) != 1;
-		#else
-		return Std.string(a) <= Std.string(b);
-		#end
-	}
-	
-	@:op(A >= B) static function gteq(a:UTFString, b:UTFString) : Bool
-	{
-		#if unifill
-		return Unifill.uCompare(a, b) != -1;
-		#else
-		return Std.string(a) >= Std.string(b);
-		#end
-	}
-	
-	@:op(A + B) static function plus(a:UTFString, b:UTFString) : String
-	{
-		#if unifill
-		var sb = new StringBuf();
-		sb.add(Std.string(a));
-		sb.add(Std.string(b));
-		return sb.toString();
-		#else
-		return Std.string(a) + Std.string(b);
-		#end
-	}
-	
-	private function get_length() : Int
-	{
-		#if unifill
-		return Utf8.length(this);
-		#else
-		return this.length;
-		#end
-	}
-	
-}