Browse Source

[std] Add StringBuf.clear() (#11848)

* [std] Add StringBuf.clear

* [std] Clarify StringBuf.clear documentation

* clear properly on HL too

* use on Serializer.reset

see #12068

---------

Co-authored-by: Simon Krajewski <[email protected]>
Frixuu 5 months ago
parent
commit
d95817b47b

+ 7 - 0
src/macro/eval/evalStdLib.ml

@@ -2429,6 +2429,12 @@ module StdStringBuf = struct
 		vnull
 		vnull
 	)
 	)
 
 
+	let clear = vifun0 (fun vthis ->
+		let this = this vthis in
+		VStringBuffer.clear this;
+		vnull
+	)
+
 	let get_length = vifun0 (fun vthis ->
 	let get_length = vifun0 (fun vthis ->
 		let this = this vthis in
 		let this = this vthis in
 		vint this.blength
 		vint this.blength
@@ -3691,6 +3697,7 @@ let init_standard_library builtins =
 		"add",StdStringBuf.add;
 		"add",StdStringBuf.add;
 		"addChar",StdStringBuf.addChar;
 		"addChar",StdStringBuf.addChar;
 		"addSub",StdStringBuf.addSub;
 		"addSub",StdStringBuf.addSub;
+		"clear",StdStringBuf.clear;
 		"get_length",StdStringBuf.get_length;
 		"get_length",StdStringBuf.get_length;
 		"toString",StdStringBuf.toString;
 		"toString",StdStringBuf.toString;
 	];
 	];

+ 4 - 0
src/macro/eval/evalString.ml

@@ -284,6 +284,10 @@ module VStringBuffer = struct
 		Buffer.add_substring this.bbuffer s.sstring b_pos b_len;
 		Buffer.add_substring this.bbuffer s.sstring b_pos b_len;
 		this.blength <- this.blength + c_len
 		this.blength <- this.blength + c_len
 
 
+	let clear this =
+		Buffer.clear this.bbuffer;
+		this.blength <- 0
+
 	let contents this =
 	let contents this =
 		create_with_length (Buffer.contents this.bbuffer) this.blength
 		create_with_length (Buffer.contents this.bbuffer) this.blength
 end
 end

+ 7 - 0
std/StringBuf.hx

@@ -88,6 +88,13 @@ class StringBuf {
 		b += (len == null ? s.substr(pos) : s.substr(pos, len));
 		b += (len == null ? s.substr(pos) : s.substr(pos, len));
 	}
 	}
 
 
+	/**
+		Removes all characters from `this` StringBuf, making it possible to reuse it.
+	**/
+	public inline function clear():Void {
+		b = "";
+	}
+
 	/**
 	/**
 		Returns the content of `this` StringBuf as String.
 		Returns the content of `this` StringBuf as String.
 
 

+ 19 - 11
std/cpp/_std/StringBuf.hx

@@ -21,31 +21,34 @@
  */
  */
 
 
 import cpp.NativeString;
 import cpp.NativeString;
-
-using cpp.NativeArray;
+import cpp.Pointer;
 
 
 @:coreApi
 @:coreApi
 class StringBuf {
 class StringBuf {
-	private var b:Array<String>;
+	private var b:Null<Array<String>> = null;
 
 
 	public var length(get, never):Int;
 	public var length(get, never):Int;
 
 
-	var charBuf:Array<cpp.Char>;
+	var charBuf:Null<Array<cpp.Char>> = null;
 
 
 	public function new():Void {}
 	public function new():Void {}
 
 
-	private function charBufAsString():String {
-		var len = charBuf.length;
-		charBuf.push(0);
-		return NativeString.fromGcPointer(charBuf.address(0), len);
+	private function drainCharBuf():String {
+		final buffer = this.charBuf;
+		final length = buffer.length;
+		buffer.push(0);
+		final bufferPtr = Pointer.arrayElem(buffer, 0);
+		final bufferString = NativeString.fromGcPointer(bufferPtr, length);
+		this.charBuf = null;
+		return bufferString;
 	}
 	}
 
 
 	private function flush():Void {
 	private function flush():Void {
+		final charBufAsString = drainCharBuf();
 		if (b == null)
 		if (b == null)
-			b = [charBufAsString()];
+			b = [charBufAsString];
 		else
 		else
-			b.push(charBufAsString());
-		charBuf = null;
+			b.push(charBufAsString);
 	}
 	}
 
 
 	function get_length():Int {
 	function get_length():Int {
@@ -89,6 +92,11 @@ class StringBuf {
 		}
 		}
 	}
 	}
 
 
+	public function clear():Void {
+		this.charBuf?.resize(0);
+		this.b?.resize(0);
+	}
+
 	public function toString():String {
 	public function toString():String {
 		if (charBuf != null)
 		if (charBuf != null)
 			flush();
 			flush();

+ 1 - 0
std/eval/_std/StringBuf.hx

@@ -27,5 +27,6 @@ extern class StringBuf {
 	function add<T>(x:T):Void;
 	function add<T>(x:T):Void;
 	function addChar(c:Int):Void;
 	function addChar(c:Int):Void;
 	function addSub(s:String, pos:Int, ?len:Int):Void;
 	function addSub(s:String, pos:Int, ?len:Int):Void;
+	function clear():Void;
 	function toString():String;
 	function toString():String;
 }
 }

+ 26 - 26
std/haxe/Serializer.hx

@@ -44,33 +44,33 @@ import haxe.ds.List;
 **/
 **/
 class Serializer {
 class Serializer {
 	/**
 	/**
-		Enables object caching during serialization to handle circular references and 
+		Enables object caching during serialization to handle circular references and
 		repeated objects.
 		repeated objects.
-	
-		Set `USE_CACHE` to `true` if the values you are serializing may contain 
-		circular references or repeated objects. This prevents infinite loops and 
+
+		Set `USE_CACHE` to `true` if the values you are serializing may contain
+		circular references or repeated objects. This prevents infinite loops and
 		ensures that shared references are preserved in the serialized output.
 		ensures that shared references are preserved in the serialized output.
-	
-		Enabling this option may also reduce the size of the resulting serialized 
+
+		Enabling this option may also reduce the size of the resulting serialized
 		string, but can have a minor performance impact.
 		string, but can have a minor performance impact.
-	
-		This is a global default. You can override it per instance using the 
+
+		This is a global default. You can override it per instance using the
 		`useCache` field on a `Serializer`.
 		`useCache` field on a `Serializer`.
 	 */
 	 */
 	public static var USE_CACHE = false;
 	public static var USE_CACHE = false;
 
 
 	/**
 	/**
 		Serializes enum values using constructor indices instead of names.
 		Serializes enum values using constructor indices instead of names.
-	
-		When `USE_ENUM_INDEX` is set to `true`, enum constructors are serialized by 
-		their numeric index. This can reduce the size of the serialized data, 
+
+		When `USE_ENUM_INDEX` is set to `true`, enum constructors are serialized by
+		their numeric index. This can reduce the size of the serialized data,
 		especially for enums with long or frequently used constructor names.
 		especially for enums with long or frequently used constructor names.
-	
-		However, using indices makes serialized data more fragile for long-term 
-		storage. If enum definitions change (e.g., by adding or removing constructors), 
+
+		However, using indices makes serialized data more fragile for long-term
+		storage. If enum definitions change (e.g., by adding or removing constructors),
 		the indices may no longer match the intended constructors.
 		the indices may no longer match the intended constructors.
-	
-		This is a global default. You can override it per instance using the 
+
+		This is a global default. You can override it per instance using the
 		`useEnumIndex` field on a `Serializer`.
 		`useEnumIndex` field on a `Serializer`.
 	 */
 	 */
 	public static var USE_ENUM_INDEX = false;
 	public static var USE_ENUM_INDEX = false;
@@ -84,20 +84,20 @@ class Serializer {
 	var scount:Int;
 	var scount:Int;
 
 
 	/**
 	/**
-	 	Determines whether this `Serializer` instance uses object caching.
-	
-	 	When enabled, repeated references to the same object are serialized using references 
-	 	instead of duplicating data, reducing output size and preserving object identity.
-	
-	 	See `USE_CACHE` for a complete description.
- 	*/
+		Determines whether this `Serializer` instance uses object caching.
+
+		When enabled, repeated references to the same object are serialized using references
+		instead of duplicating data, reducing output size and preserving object identity.
+
+		See `USE_CACHE` for a complete description.
+	 */
 	public var useCache:Bool;
 	public var useCache:Bool;
 
 
 	/**
 	/**
 		Determines whether this `Serializer` instance serializes enum values using their index
 		Determines whether this `Serializer` instance serializes enum values using their index
 		instead of their constructor name.
 		instead of their constructor name.
 
 
-		Using indexes can reduce the size of the serialized data but may be less readable and 
+		Using indexes can reduce the size of the serialized data but may be less readable and
 		more fragile if enum definitions change.
 		more fragile if enum definitions change.
 
 
 		See `USE_ENUM_INDEX` for a complete description.
 		See `USE_ENUM_INDEX` for a complete description.
@@ -125,12 +125,12 @@ class Serializer {
 
 
 	/**
 	/**
 		Resets the internal state of the Serializer, allowing it to be reused.
 		Resets the internal state of the Serializer, allowing it to be reused.
-		
+
 		This does not affect the `useCache` or `useEnumIndex` properties;
 		This does not affect the `useCache` or `useEnumIndex` properties;
 		their values will remain unchanged after calling this method.
 		their values will remain unchanged after calling this method.
 	**/
 	**/
 	public function reset() {
 	public function reset() {
-		buf = new StringBuf();
+		buf.clear();
 		cache.resize(0);
 		cache.resize(0);
 		shash.clear();
 		shash.clear();
 		scount = 0;
 		scount = 0;

+ 12 - 4
std/hl/_std/StringBuf.hx

@@ -27,6 +27,10 @@
 	public var length(get, never):Int;
 	public var length(get, never):Int;
 
 
 	public function new():Void {
 	public function new():Void {
+		initialize();
+	}
+
+	inline function initialize():Void {
 		pos = 0;
 		pos = 0;
 		size = 8; // ensure 4 bytes expand for addChar()
 		size = 8; // ensure 4 bytes expand for addChar()
 		b = new hl.Bytes(size);
 		b = new hl.Bytes(size);
@@ -55,16 +59,16 @@
 
 
 	public function add<T>(x:T):Void {
 	public function add<T>(x:T):Void {
 		var slen = 0;
 		var slen = 0;
-		var str = Std.downcast((x:Dynamic),String);
-		if( str != null ) {
-			__add(@:privateAccess str.bytes, 0, str.length<<1);
+		var str = Std.downcast((x : Dynamic), String);
+		if (str != null) {
+			__add(@:privateAccess str.bytes, 0, str.length << 1);
 			return;
 			return;
 		}
 		}
 		var sbytes = hl.Bytes.fromValue(x, new hl.Ref(slen));
 		var sbytes = hl.Bytes.fromValue(x, new hl.Ref(slen));
 		__add(sbytes, 0, slen << 1);
 		__add(sbytes, 0, slen << 1);
 	}
 	}
 
 
-	public function addSub(s:String, pos:Int, ?len:Int):Void@:privateAccess {
+	public function addSub(s:String, pos:Int, ?len:Int):Void @:privateAccess {
 		if (pos < 0)
 		if (pos < 0)
 			pos = 0;
 			pos = 0;
 		if (pos >= s.length)
 		if (pos >= s.length)
@@ -101,6 +105,10 @@
 			throw "Invalid unicode char " + c;
 			throw "Invalid unicode char " + c;
 	}
 	}
 
 
+	public function clear():Void {
+		initialize();
+	}
+
 	public function toString():String {
 	public function toString():String {
 		if (pos + 2 > size)
 		if (pos + 2 > size)
 			__expand(0);
 			__expand(0);

+ 4 - 0
std/jvm/_std/StringBuf.hx

@@ -99,6 +99,10 @@ class StringBuf {
 		b.appendCodePoint(c);
 		b.appendCodePoint(c);
 	}
 	}
 
 
+	public function clear():Void {
+		b.setLength(0);
+	}
+
 	public function toString():String {
 	public function toString():String {
 		return b.toString();
 		return b.toString();
 	}
 	}

+ 29 - 10
std/lua/_std/StringBuf.hx

@@ -20,15 +20,25 @@
  * DEALINGS IN THE SOFTWARE.
  * DEALINGS IN THE SOFTWARE.
  */
  */
 
 
+import lua.Lua;
 import lua.Table;
 import lua.Table;
 
 
 class StringBuf {
 class StringBuf {
-	var b:Table<Int, String>;
+	private var b:Table<Int, String>;
+
+	/**
+		Count of "good" elements in the internal buffer table.
+
+		If `this` StringBuf has been `clear`ed previously,
+		this value might not be equal to the length (`#`) of that table.
+	**/
+	private var bufferLength:Int;
 
 
 	public var length(get, null):Int;
 	public var length(get, null):Int;
 
 
 	public inline function new() {
 	public inline function new() {
 		b = Table.create();
 		b = Table.create();
+		this.bufferLength = 0;
 		this.length = 0;
 		this.length = 0;
 	}
 	}
 
 
@@ -37,23 +47,32 @@ class StringBuf {
 	}
 	}
 
 
 	public inline function add<T>(x:T):Void {
 	public inline function add<T>(x:T):Void {
-		var str = Std.string(x);
-		Table.insert(b, str);
-		length += str.length;
+		final str = Std.string(x);
+		final i = this.bufferLength += 1;
+		Lua.rawset(this.b, i, str);
+		this.length += str.length;
 	}
 	}
 
 
 	public inline function addChar(c:Int):Void {
 	public inline function addChar(c:Int):Void {
-		Table.insert(b, String.fromCharCode(c));
-		length += 1;
+		final i = this.bufferLength += 1;
+		Lua.rawset(this.b, i, String.fromCharCode(c));
+		this.length += 1;
 	}
 	}
 
 
 	public inline function addSub(s:String, pos:Int, ?len:Int):Void {
 	public inline function addSub(s:String, pos:Int, ?len:Int):Void {
-		var part = len == null ? s.substr(pos) : s.substr(pos, len);
-		Table.insert(b, part);
-		length += part.length;
+		this.add(s.substr(pos, len));
+	}
+
+	public inline function clear():Void {
+		this.bufferLength = 0;
+		this.length = 0;
 	}
 	}
 
 
 	public inline function toString():String {
 	public inline function toString():String {
-		return Table.concat(b);
+		final len = this.bufferLength;
+		if (len == 0) {
+			return "";
+		}
+		return Table.concat(this.b, "", 1, len);
 	}
 	}
 }
 }

+ 5 - 0
std/neko/_std/StringBuf.hx

@@ -45,6 +45,10 @@
 			__add_char(b, c);
 			__add_char(b, c);
 		}
 		}
 
 
+	public inline function clear():Void {
+		buffer_reset(b);
+	}
+
 	public inline function toString():String {
 	public inline function toString():String {
 		return new String(__to_string(b));
 		return new String(__to_string(b));
 	}
 	}
@@ -54,5 +58,6 @@
 	static var __add_char:Dynamic = neko.Lib.load("std", "buffer_add_char", 2);
 	static var __add_char:Dynamic = neko.Lib.load("std", "buffer_add_char", 2);
 	static var __add_sub:Dynamic = neko.Lib.load("std", "buffer_add_sub", 4);
 	static var __add_sub:Dynamic = neko.Lib.load("std", "buffer_add_sub", 4);
 	static var __to_string:Dynamic = neko.Lib.load("std", "buffer_string", 1);
 	static var __to_string:Dynamic = neko.Lib.load("std", "buffer_string", 1);
+	static var buffer_reset:Dynamic = neko.Lib.load("std", "buffer_reset", 1);
 	static var __get_length:Dynamic = try neko.Lib.load("std", "buffer_get_length", 1) catch (e:Dynamic) null;
 	static var __get_length:Dynamic = try neko.Lib.load("std", "buffer_get_length", 1) catch (e:Dynamic) null;
 }
 }

+ 4 - 0
std/php/_std/StringBuf.hx

@@ -56,6 +56,10 @@ import php.Syntax;
 		b += String.fromCharCode(c);
 		b += String.fromCharCode(c);
 	}
 	}
 
 
+	public inline function clear():Void {
+		b = "";
+	}
+
 	public inline function toString():String {
 	public inline function toString():String {
 		return b;
 		return b;
 	}
 	}

+ 9 - 8
std/python/_std/StringBuf.hx

@@ -20,7 +20,6 @@
  * DEALINGS IN THE SOFTWARE.
  * DEALINGS IN THE SOFTWARE.
  */
  */
 
 
-import python.lib.io.IOBase.SeekSet;
 import python.lib.io.StringIO;
 import python.lib.io.StringIO;
 
 
 @:coreApi
 @:coreApi
@@ -34,11 +33,7 @@ class StringBuf {
 	public var length(get, never):Int;
 	public var length(get, never):Int;
 
 
 	function get_length():Int {
 	function get_length():Int {
-		var pos = b.tell();
-		b.seek(0, SeekEnd);
-		var len = b.tell();
-		b.seek(pos, SeekSet);
-		return len;
+		return b.tell();
 	}
 	}
 
 
 	public inline function add<T>(x:T):Void {
 	public inline function add<T>(x:T):Void {
@@ -57,7 +52,13 @@ class StringBuf {
 		add1((len == null ? s.substr(pos) : s.substr(pos, len)));
 		add1((len == null ? s.substr(pos) : s.substr(pos, len)));
 	}
 	}
 
 
-	public inline function toString():String {
-		return b.getvalue();
+	public inline function clear():Void {
+		b.seek(0, SeekSet);
+	}
+
+	public function toString():String {
+		final length = this.length;
+		b.seek(0, SeekSet);
+		return b.read(length);
 	}
 	}
 }
 }

+ 41 - 1
tests/unit/src/unitstd/StringBuf.unit.hx

@@ -1,6 +1,7 @@
 // add, toString
 // add, toString
 var x = new StringBuf();
 var x = new StringBuf();
 x.toString() == "";
 x.toString() == "";
+x.length == 0;
 x.add(null);
 x.add(null);
 x.toString() == "null";
 x.toString() == "null";
 
 
@@ -37,8 +38,47 @@ x.addSub("a👽b", 1, 1);
 x.toString() == "👽";
 x.toString() == "👽";
 #end
 #end
 
 
+// StringBuf can store multiple elements
+final x = new StringBuf();
+x.add("ab");
+x.add("cd");
+x.addChar("e".code);
+x.add("fg");
+x.toString() == "abcdefg";
+
+// Calling toString() does not empty the buffer
+x.toString() == "abcdefg";
+x.toString() == "abcdefg";
+x.length == 7;
+
 // identity
 // identity
 function identityTest(s:StringBuf) {
 function identityTest(s:StringBuf) {
 	return s;
 	return s;
 }
 }
-identityTest(x) == x;
+identityTest(x) == x;
+
+// Clearing a buffer resets its visible state
+x.length > 0;
+x.clear();
+x.toString() == "";
+x.length == 0;
+
+// Previously cleared buffers do not leak past state
+x.add("foo");
+x.toString() == "foo";
+x.length == 3;
+
+// Buffers can be cleared multiple times
+x.clear();
+x.length == 0;
+x.clear();
+x.clear();
+x.clear();
+x.length == 0;
+
+// Buffers can be cleared immediately after creation
+// (ie. `clear` does not depend on any private state being non-null)
+final x = new StringBuf();
+x.clear();
+x.toString() == "";
+x.length == 0;