浏览代码

[std] Add StringBuf.clear() (#11848)

* [std] Add StringBuf.clear

* [std] Clarify StringBuf.clear documentation

* clear properly on HL too

* use on Serializer.reset

see #12068

---------

Co-authored-by: Simon Krajewski <[email protected]>
Frixuu 5 月之前
父节点
当前提交
d95817b47b

+ 7 - 0
src/macro/eval/evalStdLib.ml

@@ -2429,6 +2429,12 @@ module StdStringBuf = struct
 		vnull
 	)
 
+	let clear = vifun0 (fun vthis ->
+		let this = this vthis in
+		VStringBuffer.clear this;
+		vnull
+	)
+
 	let get_length = vifun0 (fun vthis ->
 		let this = this vthis in
 		vint this.blength
@@ -3691,6 +3697,7 @@ let init_standard_library builtins =
 		"add",StdStringBuf.add;
 		"addChar",StdStringBuf.addChar;
 		"addSub",StdStringBuf.addSub;
+		"clear",StdStringBuf.clear;
 		"get_length",StdStringBuf.get_length;
 		"toString",StdStringBuf.toString;
 	];

+ 4 - 0
src/macro/eval/evalString.ml

@@ -284,6 +284,10 @@ module VStringBuffer = struct
 		Buffer.add_substring this.bbuffer s.sstring b_pos b_len;
 		this.blength <- this.blength + c_len
 
+	let clear this =
+		Buffer.clear this.bbuffer;
+		this.blength <- 0
+
 	let contents this =
 		create_with_length (Buffer.contents this.bbuffer) this.blength
 end

+ 7 - 0
std/StringBuf.hx

@@ -88,6 +88,13 @@ class StringBuf {
 		b += (len == null ? s.substr(pos) : s.substr(pos, len));
 	}
 
+	/**
+		Removes all characters from `this` StringBuf, making it possible to reuse it.
+	**/
+	public inline function clear():Void {
+		b = "";
+	}
+
 	/**
 		Returns the content of `this` StringBuf as String.
 

+ 19 - 11
std/cpp/_std/StringBuf.hx

@@ -21,31 +21,34 @@
  */
 
 import cpp.NativeString;
-
-using cpp.NativeArray;
+import cpp.Pointer;
 
 @:coreApi
 class StringBuf {
-	private var b:Array<String>;
+	private var b:Null<Array<String>> = null;
 
 	public var length(get, never):Int;
 
-	var charBuf:Array<cpp.Char>;
+	var charBuf:Null<Array<cpp.Char>> = null;
 
 	public function new():Void {}
 
-	private function charBufAsString():String {
-		var len = charBuf.length;
-		charBuf.push(0);
-		return NativeString.fromGcPointer(charBuf.address(0), len);
+	private function drainCharBuf():String {
+		final buffer = this.charBuf;
+		final length = buffer.length;
+		buffer.push(0);
+		final bufferPtr = Pointer.arrayElem(buffer, 0);
+		final bufferString = NativeString.fromGcPointer(bufferPtr, length);
+		this.charBuf = null;
+		return bufferString;
 	}
 
 	private function flush():Void {
+		final charBufAsString = drainCharBuf();
 		if (b == null)
-			b = [charBufAsString()];
+			b = [charBufAsString];
 		else
-			b.push(charBufAsString());
-		charBuf = null;
+			b.push(charBufAsString);
 	}
 
 	function get_length():Int {
@@ -89,6 +92,11 @@ class StringBuf {
 		}
 	}
 
+	public function clear():Void {
+		this.charBuf?.resize(0);
+		this.b?.resize(0);
+	}
+
 	public function toString():String {
 		if (charBuf != null)
 			flush();

+ 1 - 0
std/eval/_std/StringBuf.hx

@@ -27,5 +27,6 @@ extern class StringBuf {
 	function add<T>(x:T):Void;
 	function addChar(c:Int):Void;
 	function addSub(s:String, pos:Int, ?len:Int):Void;
+	function clear():Void;
 	function toString():String;
 }

+ 26 - 26
std/haxe/Serializer.hx

@@ -44,33 +44,33 @@ import haxe.ds.List;
 **/
 class Serializer {
 	/**
-		Enables object caching during serialization to handle circular references and 
+		Enables object caching during serialization to handle circular references and
 		repeated objects.
-	
-		Set `USE_CACHE` to `true` if the values you are serializing may contain 
-		circular references or repeated objects. This prevents infinite loops and 
+
+		Set `USE_CACHE` to `true` if the values you are serializing may contain
+		circular references or repeated objects. This prevents infinite loops and
 		ensures that shared references are preserved in the serialized output.
-	
-		Enabling this option may also reduce the size of the resulting serialized 
+
+		Enabling this option may also reduce the size of the resulting serialized
 		string, but can have a minor performance impact.
-	
-		This is a global default. You can override it per instance using the 
+
+		This is a global default. You can override it per instance using the
 		`useCache` field on a `Serializer`.
 	 */
 	public static var USE_CACHE = false;
 
 	/**
 		Serializes enum values using constructor indices instead of names.
-	
-		When `USE_ENUM_INDEX` is set to `true`, enum constructors are serialized by 
-		their numeric index. This can reduce the size of the serialized data, 
+
+		When `USE_ENUM_INDEX` is set to `true`, enum constructors are serialized by
+		their numeric index. This can reduce the size of the serialized data,
 		especially for enums with long or frequently used constructor names.
-	
-		However, using indices makes serialized data more fragile for long-term 
-		storage. If enum definitions change (e.g., by adding or removing constructors), 
+
+		However, using indices makes serialized data more fragile for long-term
+		storage. If enum definitions change (e.g., by adding or removing constructors),
 		the indices may no longer match the intended constructors.
-	
-		This is a global default. You can override it per instance using the 
+
+		This is a global default. You can override it per instance using the
 		`useEnumIndex` field on a `Serializer`.
 	 */
 	public static var USE_ENUM_INDEX = false;
@@ -84,20 +84,20 @@ class Serializer {
 	var scount:Int;
 
 	/**
-	 	Determines whether this `Serializer` instance uses object caching.
-	
-	 	When enabled, repeated references to the same object are serialized using references 
-	 	instead of duplicating data, reducing output size and preserving object identity.
-	
-	 	See `USE_CACHE` for a complete description.
- 	*/
+		Determines whether this `Serializer` instance uses object caching.
+
+		When enabled, repeated references to the same object are serialized using references
+		instead of duplicating data, reducing output size and preserving object identity.
+
+		See `USE_CACHE` for a complete description.
+	 */
 	public var useCache:Bool;
 
 	/**
 		Determines whether this `Serializer` instance serializes enum values using their index
 		instead of their constructor name.
 
-		Using indexes can reduce the size of the serialized data but may be less readable and 
+		Using indexes can reduce the size of the serialized data but may be less readable and
 		more fragile if enum definitions change.
 
 		See `USE_ENUM_INDEX` for a complete description.
@@ -125,12 +125,12 @@ class Serializer {
 
 	/**
 		Resets the internal state of the Serializer, allowing it to be reused.
-		
+
 		This does not affect the `useCache` or `useEnumIndex` properties;
 		their values will remain unchanged after calling this method.
 	**/
 	public function reset() {
-		buf = new StringBuf();
+		buf.clear();
 		cache.resize(0);
 		shash.clear();
 		scount = 0;

+ 12 - 4
std/hl/_std/StringBuf.hx

@@ -27,6 +27,10 @@
 	public var length(get, never):Int;
 
 	public function new():Void {
+		initialize();
+	}
+
+	inline function initialize():Void {
 		pos = 0;
 		size = 8; // ensure 4 bytes expand for addChar()
 		b = new hl.Bytes(size);
@@ -55,16 +59,16 @@
 
 	public function add<T>(x:T):Void {
 		var slen = 0;
-		var str = Std.downcast((x:Dynamic),String);
-		if( str != null ) {
-			__add(@:privateAccess str.bytes, 0, str.length<<1);
+		var str = Std.downcast((x : Dynamic), String);
+		if (str != null) {
+			__add(@:privateAccess str.bytes, 0, str.length << 1);
 			return;
 		}
 		var sbytes = hl.Bytes.fromValue(x, new hl.Ref(slen));
 		__add(sbytes, 0, slen << 1);
 	}
 
-	public function addSub(s:String, pos:Int, ?len:Int):Void@:privateAccess {
+	public function addSub(s:String, pos:Int, ?len:Int):Void @:privateAccess {
 		if (pos < 0)
 			pos = 0;
 		if (pos >= s.length)
@@ -101,6 +105,10 @@
 			throw "Invalid unicode char " + c;
 	}
 
+	public function clear():Void {
+		initialize();
+	}
+
 	public function toString():String {
 		if (pos + 2 > size)
 			__expand(0);

+ 4 - 0
std/jvm/_std/StringBuf.hx

@@ -99,6 +99,10 @@ class StringBuf {
 		b.appendCodePoint(c);
 	}
 
+	public function clear():Void {
+		b.setLength(0);
+	}
+
 	public function toString():String {
 		return b.toString();
 	}

+ 29 - 10
std/lua/_std/StringBuf.hx

@@ -20,15 +20,25 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+import lua.Lua;
 import lua.Table;
 
 class StringBuf {
-	var b:Table<Int, String>;
+	private var b:Table<Int, String>;
+
+	/**
+		Count of "good" elements in the internal buffer table.
+
+		If `this` StringBuf has been `clear`ed previously,
+		this value might not be equal to the length (`#`) of that table.
+	**/
+	private var bufferLength:Int;
 
 	public var length(get, null):Int;
 
 	public inline function new() {
 		b = Table.create();
+		this.bufferLength = 0;
 		this.length = 0;
 	}
 
@@ -37,23 +47,32 @@ class StringBuf {
 	}
 
 	public inline function add<T>(x:T):Void {
-		var str = Std.string(x);
-		Table.insert(b, str);
-		length += str.length;
+		final str = Std.string(x);
+		final i = this.bufferLength += 1;
+		Lua.rawset(this.b, i, str);
+		this.length += str.length;
 	}
 
 	public inline function addChar(c:Int):Void {
-		Table.insert(b, String.fromCharCode(c));
-		length += 1;
+		final i = this.bufferLength += 1;
+		Lua.rawset(this.b, i, String.fromCharCode(c));
+		this.length += 1;
 	}
 
 	public inline function addSub(s:String, pos:Int, ?len:Int):Void {
-		var part = len == null ? s.substr(pos) : s.substr(pos, len);
-		Table.insert(b, part);
-		length += part.length;
+		this.add(s.substr(pos, len));
+	}
+
+	public inline function clear():Void {
+		this.bufferLength = 0;
+		this.length = 0;
 	}
 
 	public inline function toString():String {
-		return Table.concat(b);
+		final len = this.bufferLength;
+		if (len == 0) {
+			return "";
+		}
+		return Table.concat(this.b, "", 1, len);
 	}
 }

+ 5 - 0
std/neko/_std/StringBuf.hx

@@ -45,6 +45,10 @@
 			__add_char(b, c);
 		}
 
+	public inline function clear():Void {
+		buffer_reset(b);
+	}
+
 	public inline function toString():String {
 		return new String(__to_string(b));
 	}
@@ -54,5 +58,6 @@
 	static var __add_char:Dynamic = neko.Lib.load("std", "buffer_add_char", 2);
 	static var __add_sub:Dynamic = neko.Lib.load("std", "buffer_add_sub", 4);
 	static var __to_string:Dynamic = neko.Lib.load("std", "buffer_string", 1);
+	static var buffer_reset:Dynamic = neko.Lib.load("std", "buffer_reset", 1);
 	static var __get_length:Dynamic = try neko.Lib.load("std", "buffer_get_length", 1) catch (e:Dynamic) null;
 }

+ 4 - 0
std/php/_std/StringBuf.hx

@@ -56,6 +56,10 @@ import php.Syntax;
 		b += String.fromCharCode(c);
 	}
 
+	public inline function clear():Void {
+		b = "";
+	}
+
 	public inline function toString():String {
 		return b;
 	}

+ 9 - 8
std/python/_std/StringBuf.hx

@@ -20,7 +20,6 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-import python.lib.io.IOBase.SeekSet;
 import python.lib.io.StringIO;
 
 @:coreApi
@@ -34,11 +33,7 @@ class StringBuf {
 	public var length(get, never):Int;
 
 	function get_length():Int {
-		var pos = b.tell();
-		b.seek(0, SeekEnd);
-		var len = b.tell();
-		b.seek(pos, SeekSet);
-		return len;
+		return b.tell();
 	}
 
 	public inline function add<T>(x:T):Void {
@@ -57,7 +52,13 @@ class StringBuf {
 		add1((len == null ? s.substr(pos) : s.substr(pos, len)));
 	}
 
-	public inline function toString():String {
-		return b.getvalue();
+	public inline function clear():Void {
+		b.seek(0, SeekSet);
+	}
+
+	public function toString():String {
+		final length = this.length;
+		b.seek(0, SeekSet);
+		return b.read(length);
 	}
 }

+ 41 - 1
tests/unit/src/unitstd/StringBuf.unit.hx

@@ -1,6 +1,7 @@
 // add, toString
 var x = new StringBuf();
 x.toString() == "";
+x.length == 0;
 x.add(null);
 x.toString() == "null";
 
@@ -37,8 +38,47 @@ x.addSub("a👽b", 1, 1);
 x.toString() == "👽";
 #end
 
+// StringBuf can store multiple elements
+final x = new StringBuf();
+x.add("ab");
+x.add("cd");
+x.addChar("e".code);
+x.add("fg");
+x.toString() == "abcdefg";
+
+// Calling toString() does not empty the buffer
+x.toString() == "abcdefg";
+x.toString() == "abcdefg";
+x.length == 7;
+
 // identity
 function identityTest(s:StringBuf) {
 	return s;
 }
-identityTest(x) == x;
+identityTest(x) == x;
+
+// Clearing a buffer resets its visible state
+x.length > 0;
+x.clear();
+x.toString() == "";
+x.length == 0;
+
+// Previously cleared buffers do not leak past state
+x.add("foo");
+x.toString() == "foo";
+x.length == 3;
+
+// Buffers can be cleared multiple times
+x.clear();
+x.length == 0;
+x.clear();
+x.clear();
+x.clear();
+x.length == 0;
+
+// Buffers can be cleared immediately after creation
+// (ie. `clear` does not depend on any private state being non-null)
+final x = new StringBuf();
+x.clear();
+x.toString() == "";
+x.length == 0;