Răsfoiți Sursa

Rewrite TextFormatter.StripCRLF

Uses StringBuilder and char span indexof search to reduce intermediate allocations.

The new implementation behaves slightly different compared to old implementation. In synthetic LFCR scenario it is correctly removed while the old implementation left the CR, which seems like an off-by-one error.
Tonttu 4 luni în urmă
părinte
comite
6f63dca591

+ 102 - 0
Benchmarks/Text/TextFormatter/StripCRLF.cs

@@ -0,0 +1,102 @@
+using System.Text;
+using BenchmarkDotNet.Attributes;
+using Tui = Terminal.Gui;
+
+namespace Terminal.Gui.Benchmarks.Text.TextFormatter;
+
+[MemoryDiagnoser]
+public class StripCRLF
+{
+    /// <summary>
+    /// Benchmark for previous implementation.
+    /// </summary>
+    /// <param name="str"></param>
+    /// <param name="keepNewLine"></param>
+    /// <returns></returns>
+    [Benchmark]
+    [ArgumentsSource (nameof (DataSource))]
+    public string Previous (string str, bool keepNewLine)
+    {
+        return RuneListToString (str, keepNewLine);
+    }
+
+    /// <summary>
+    /// Benchmark for current implementation with StringBuilder and char span index of search.
+    /// </summary>
+    [Benchmark (Baseline = true)]
+    [ArgumentsSource (nameof (DataSource))]
+    public string Current (string str, bool keepNewLine)
+    {
+        return Tui.TextFormatter.StripCRLF (str, keepNewLine);
+    }
+
+    /// <summary>
+    /// Previous implementation with intermediate list allocation.
+    /// </summary>
+    private static string RuneListToString (string str, bool keepNewLine = false)
+    {
+        List<Rune> runes = str.ToRuneList ();
+
+        for (var i = 0; i < runes.Count; i++)
+        {
+            switch ((char)runes [i].Value)
+            {
+                case '\n':
+                    if (!keepNewLine)
+                    {
+                        runes.RemoveAt (i);
+                    }
+
+                    break;
+
+                case '\r':
+                    if (i + 1 < runes.Count && runes [i + 1].Value == '\n')
+                    {
+                        runes.RemoveAt (i);
+
+                        if (!keepNewLine)
+                        {
+                            runes.RemoveAt (i);
+                        }
+
+                        i++;
+                    }
+                    else
+                    {
+                        if (!keepNewLine)
+                        {
+                            runes.RemoveAt (i);
+                        }
+                    }
+
+                    break;
+            }
+        }
+
+        return StringExtensions.ToString (runes);
+    }
+
+    public IEnumerable<object []> DataSource ()
+    {
+        string textSource =
+                """
+				Ĺόŕéḿ íṕśúḿ d́όĺόŕ śít́ áḿét́, ćόńśéćt́ét́úŕ ád́íṕíśćíńǵ éĺít́. Ṕŕáéśéńt́ q́úíś ĺúćt́úś éĺít́. Íńt́éǵéŕ út́ áŕćú éǵét́ d́όĺόŕ śćéĺéŕíśq́úé ḿát́t́íś áć ét́ d́íáḿ.
+				Ṕéĺĺéńt́éśq́úé śéd́ d́áṕíb́úś ḿáśśá, v́éĺ t́ŕíśt́íq́úé d́úí. Śéd́ v́ít́áé ńéq́úé éú v́éĺít́ όŕńáŕé áĺíq́úét́. Út́ q́úíś όŕćí t́éḿṕόŕ, t́éḿṕόŕ t́úŕṕíś íd́, t́éḿṕúś ńéq́úé.
+				Ṕŕáéśéńt́ śáṕíéń t́úŕṕíś, όŕńáŕé v́éĺ ḿáúŕíś át́, v́áŕíúś śúśćíṕít́ áńt́é. Út́ ṕúĺv́íńáŕ t́úŕṕíś ḿáśśá, q́úíś ćúŕśúś áŕćú f́áúćíb́úś íń.
+				Óŕćí v́áŕíúś ńát́όq́úé ṕéńát́íb́úś ét́ ḿáǵńíś d́íś ṕáŕt́úŕíéńt́ ḿόńt́éś, ńáśćét́úŕ ŕíd́íćúĺúś ḿúś. F́úśćé át́ éx́ b́ĺáńd́ít́, ćόńv́áĺĺíś q́úáḿ ét́, v́úĺṕút́át́é ĺáćúś.
+				Śúśṕéńd́íśśé śít́ áḿét́ áŕćú út́ áŕćú f́áúćíb́úś v́áŕíúś. V́ív́áḿúś śít́ áḿét́ ḿáx́íḿúś d́íáḿ. Ńáḿ éx́ ĺéό, ṕh́áŕét́ŕá éú ĺόb́όŕt́íś át́, t́ŕíśt́íq́úé út́ f́éĺíś.
+				""";
+        // Consistent line endings between systems keeps performance evaluation more consistent.
+        textSource = textSource.ReplaceLineEndings ("\r\n");
+
+        bool[] permutations = [true, false];
+        foreach (bool keepNewLine in permutations)
+        {
+            yield return [textSource [..1], keepNewLine];
+            yield return [textSource [..10], keepNewLine];
+            yield return [textSource [..100], keepNewLine];
+            yield return [textSource [..(textSource.Length / 2)], keepNewLine];
+            yield return [textSource, keepNewLine];
+        }
+    }
+}

+ 2 - 1
Terminal.Gui/Terminal.Gui.csproj

@@ -78,9 +78,10 @@
         <Using Include="Terminal.Gui.EnumExtensions" />
     </ItemGroup>
     <!-- =================================================================== -->
-    <!-- Assembliy names for which internal items are visible -->
+    <!-- Assembly names for which internal items are visible -->
     <!-- =================================================================== -->
     <ItemGroup>
+        <InternalsVisibleTo Include="Benchmarks"/>
         <InternalsVisibleTo Include="UnitTests" />
         <InternalsVisibleTo Include="UnitTests.Parallelizable" />
         <InternalsVisibleTo Include="StressTests" />

+ 46 - 28
Terminal.Gui/Text/TextFormatter.cs

@@ -1,4 +1,5 @@
 #nullable enable
+using System.Buffers;
 using System.Diagnostics;
 
 namespace Terminal.Gui;
@@ -9,6 +10,9 @@ namespace Terminal.Gui;
 /// </summary>
 public class TextFormatter
 {
+    // Utilized in CRLF related helper methods for faster newline char index search.
+    private static readonly SearchValues<char> NewLineSearchValues = SearchValues.Create(['\r', '\n']);
+
     private Key _hotKey = new ();
     private int _hotKeyPos = -1;
     private List<string> _lines = new ();
@@ -1187,45 +1191,59 @@ public class TextFormatter
     // TODO: Move to StringExtensions?
     internal static string StripCRLF (string str, bool keepNewLine = false)
     {
-        List<Rune> runes = str.ToRuneList ();
+        StringBuilder stringBuilder = new();
 
-        for (var i = 0; i < runes.Count; i++)
+        ReadOnlySpan<char> remaining = str.AsSpan ();
+        while (remaining.Length > 0)
         {
-            switch ((char)runes [i].Value)
+            int nextLineBreakIndex = remaining.IndexOfAny (NewLineSearchValues);
+            if (nextLineBreakIndex == -1)
             {
-                case '\n':
-                    if (!keepNewLine)
-                    {
-                        runes.RemoveAt (i);
-                    }
+                if (str.Length == remaining.Length)
+                {
+                    return str;
+                }
+                stringBuilder.Append (remaining);
+                break;
+            }
 
-                    break;
+            ReadOnlySpan<char> slice = remaining.Slice (0, nextLineBreakIndex);
+            stringBuilder.Append (slice);
 
-                case '\r':
-                    if (i + 1 < runes.Count && runes [i + 1].Value == '\n')
+            // Evaluate how many line break characters to preserve.
+            int stride;
+            char lineBreakChar = remaining [nextLineBreakIndex];
+            if (lineBreakChar == '\n')
+            {
+                stride = 1;
+                if (keepNewLine)
+                {
+                    stringBuilder.Append ('\n');
+                }
+            }
+            else // '\r'
+            {
+                bool crlf = (nextLineBreakIndex + 1) < remaining.Length && remaining [nextLineBreakIndex + 1] == '\n';
+                if (crlf)
+                {
+                    stride = 2;
+                    if (keepNewLine)
                     {
-                        runes.RemoveAt (i);
-
-                        if (!keepNewLine)
-                        {
-                            runes.RemoveAt (i);
-                        }
-
-                        i++;
+                        stringBuilder.Append ('\n');
                     }
-                    else
+                }
+                else
+                {
+                    stride = 1;
+                    if (keepNewLine)
                     {
-                        if (!keepNewLine)
-                        {
-                            runes.RemoveAt (i);
-                        }
+                        stringBuilder.Append ('\r');
                     }
-
-                    break;
+                }
             }
+            remaining = remaining.Slice (slice.Length + stride);
         }
-
-        return StringExtensions.ToString (runes);
+        return stringBuilder.ToString ();
     }
 
     // TODO: Move to StringExtensions?

+ 3 - 3
Tests/UnitTestsParallelizable/Text/TextFormatterTests.cs

@@ -2895,9 +2895,9 @@ public class TextFormatterTests
     [InlineData ("This has crlf\r\nin the middle", "This has crlfin the middle")]
     [InlineData ("This has crlf in the end\r\n", "This has crlf in the end")]
     // LFCR
-    [InlineData ("\n\rThis has lfcr in the beginning", "\rThis has lfcr in the beginning")]
-    [InlineData ("This has lfcr\n\rin the middle", "This has lfcr\rin the middle")]
-    [InlineData ("This has lfcr in the end\n\r", "This has lfcr in the end\r")]
+    [InlineData ("\n\rThis has lfcr in the beginning", "This has lfcr in the beginning")]
+    [InlineData ("This has lfcr\n\rin the middle", "This has lfcrin the middle")]
+    [InlineData ("This has lfcr in the end\n\r", "This has lfcr in the end")]
     // CR
     [InlineData ("\rThis has cr in the beginning", "This has cr in the beginning")]
     [InlineData ("This has cr\rin the middle", "This has crin the middle")]