Browse Source

* Patch for sdfdata multiline support and assoiated test case from Reinier Olislagers (bug 22237 and bug #22213)

git-svn-id: trunk@22145 -
michael 13 years ago
parent
commit
2939c41263

+ 1 - 0
.gitattributes

@@ -2100,6 +2100,7 @@ packages/fcl-db/tests/sdfdstoolsunit.pas svneol=native#text/plain
 packages/fcl-db/tests/sqldbtoolsunit.pas svneol=native#text/plain
 packages/fcl-db/tests/tcgensql.pas svneol=native#text/plain
 packages/fcl-db/tests/tcparser.pas svneol=native#text/plain
+packages/fcl-db/tests/tcsdfdata.pp svneol=native#text/plain
 packages/fcl-db/tests/tcsqlscanner.pas svneol=native#text/plain
 packages/fcl-db/tests/test.json svneol=native#text/plain
 packages/fcl-db/tests/testbasics.pas svneol=native#text/plain

+ 61 - 29
packages/fcl-db/src/sdf/sdfdata.pp

@@ -13,11 +13,13 @@ unit SdfData;
 ---------------
 Modifications
 ---------------
+7/Jun/12 BigChimp:
+      Quote fields with delimiters or quotes to match Delphi SDF definition
+      (see e.g. help on TStrings.CommaText)
 14/Jul/11 BigChimp:
       Added AllowMultiLine property so user can use fields that have line endings
       (Carriage Return and/or Line Feed) embedded in their fields (fields need to be
-      quoted). Enabled by default; will break compatibility with earlier versions of
-      SdfData, but using multilines would have resulted in corrupted import anyway.
+      quoted). For now: output only (reading these fields does not work yet)
 12/Mar/04  Lazarus version (Sergey Smirnov AKA SSY)
       Locate and CheckString functions are removed because of Variant data type.
       Many things are changed for FPC/Lazarus compatibility.
@@ -939,25 +941,33 @@ end;
 
 function TSdfDataSet.StoreToBuf(Source: String): String;
 const
- CR :char = #13;
- LF :char = #10;
+ CR    :char = #13;
+ LF    :char = #10;
+ Quote :char = #34; // Character that encloses field if quoted. Hard-coded to "
 var
-  i,
-  p             :Integer;
-  pRet,
-  pStr,
-  pStrEnd       :PChar;
+  IsQuoted   // Whether or not field starts with a quote
+                :Boolean;
+  FieldMaxSize, // Maximum fields size as defined in FieldDefs
+  i,         // Field counter (0..)
+  p          // Length of string in field
+                :Integer;
+  pDeQuoted, // Temporary buffer for dedoubling quotes
+  pRet,      // Pointer to insertion point in return value
+  pStr,      // Beginning of field
+  pStrEnd    // End of field
+                :PChar;
   Ret           :String;
 begin
   SetLength(Ret, FRecordSize);
-
   FillChar(PChar(Ret)^, FRecordSize, Ord(' '));
-    PStrEnd := PChar(Source);
+
+  PStrEnd := PChar(Source);
   pRet := PChar(Ret);
 
   for i := 0 to FieldDefs.Count - 1 do
    begin
-
+    FieldMaxSize := FieldDefs[i].Size;
+    IsQuoted := false;
     while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] in [#1..' ']) do
     begin
      if FFMultiLine then
@@ -980,14 +990,15 @@ begin
 
     pStr := pStrEnd;
 
-    if (pStr[0] = '"') then
+    if (pStr[0] = Quote) then
      begin
+      IsQuoted := true; // See below: accept end of string without explicit quote
       if FFMultiLine then
        begin
         repeat
          Inc(pStrEnd);
         until not Boolean(Byte(pStrEnd[0])) or
-         ((pStrEnd[0] = '"') and ((pStrEnd + 1)[0] in [Delimiter,#0]));
+         ((pStrEnd[0] = Quote) and ((pStrEnd + 1)[0] in [Delimiter,#0]));
        end
       else
        begin
@@ -995,33 +1006,52 @@ begin
          repeat
           Inc(pStrEnd);
          until not Boolean(Byte(pStrEnd[0])) or
-          ((pStrEnd[0] = '"') and ((pStrEnd + 1)[0] in [Delimiter,CR,LF, #0]));
+          ((pStrEnd[0] = Quote) and ((pStrEnd + 1)[0] in [Delimiter,CR,LF,#0]));
        end;
 
-
-      if (pStrEnd[0] = '"') then
-        Inc(pStr);
+      if (pStrEnd[0] = Quote) then
+       Inc(pStr); //Skip final quote
      end
     else
       while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
         Inc(pStrEnd);
 
+    // Copy over entire field (or at least up to field length):
     p := pStrEnd - pStr;
-    if (p > FieldDefs[i].Size) then
-      p := FieldDefs[i].Size;
-
-    Move(pStr[0], pRet[0], p);
+    if IsQuoted then
+    begin
+     pDeQuoted := pRet; //Needed to avoid changing insertion point
+     // Copy entire field but not more than maximum field length:
+     // (We can mess with pStr now; the next loop will reset it after
+     // pStrEnd):
+     while (pstr < pStrEnd) and (pDeQuoted-pRet <= FieldMaxSize) do
+     begin
+      if pStr^ = Quote then inc(pStr);// skip first quote
+      pDeQuoted^ := pStr^;
+      inc(pStr);
+      inc(pDeQuoted);
+     end;
+    end
+    else
+    begin
+     if (p > FieldMaxSize) then
+       p := FieldMaxSize;
+     Move(pStr[0], pRet[0], p);
+    end;
 
-    Inc(pRet, FieldDefs[i].Size);
+    Inc(pRet, FieldMaxSize);
 
-    if (pStrEnd[0] = '"') then
+    // Move the end of field position past quotes and delimiters
+    // ready for processing the next field
+    if (pStrEnd[0] = Quote) then
       while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
         Inc(pStrEnd);
 
     if (pStrEnd[0] = Delimiter) then
      Inc(pStrEnd);
    end;
-  Result := Ret;
+
+  Result := ret;
 end;
 
 function TSdfDataSet.BufToStore(Buffer: TRecordBuffer): String;
@@ -1034,9 +1064,9 @@ var
 begin
   Result := '';
   p := 1;
-  QuoteMe:=false;
   for i := 0 to FieldDefs.Count - 1 do
   begin
+    QuoteMe:=false;
     Str := Trim(Copy(pansichar(Buffer), p, FieldDefs[i].Size));
     Inc(p, FieldDefs[i].Size);
     if FFMultiLine then
@@ -1051,11 +1081,13 @@ begin
        Str := StringReplace(Str, #10, '', [rfReplaceAll]);
        Str := StringReplace(Str, #13, '', [rfReplaceAll]);
       end;
-    // Check for any delimiters occurring in field text
-    if ((not QuoteMe) and (StrScan(PChar(Str), FDelimiter) <> nil)) then QuoteMe:=true;
+    // Check for any delimiters or quotes occurring in field text  
+    if (not QuoteMe) then
+	  if (StrScan(PChar(Str), FDelimiter) <> nil) or
+	    (StrScan(PChar(Str), QuoteDelimiter) <> nil) then QuoteMe:=true;
     if (QuoteMe) then
       begin
-      Str:=Stringreplace(Str,QuoteDelimiter,QuoteDelimiter+QuoteDelimiter,[rfReplaceAll]);
+      Str := Stringreplace(Str, QuoteDelimiter, QuoteDelimiter+QuoteDelimiter, [rfReplaceAll]);
       Str := QuoteDelimiter + Str + QuoteDelimiter;
       end;
     Result := Result + Str + FDelimiter;

+ 1 - 0
packages/fcl-db/tests/dbtestframework.pas

@@ -17,6 +17,7 @@ uses
   bufdatasettoolsunit,
   memdstoolsunit,
   SdfDSToolsUnit,
+  tcsdfdata,
 // Units wich contains the tests
   TestBasics,
   TestFieldTypes,

+ 228 - 0
packages/fcl-db/tests/tcsdfdata.pp

@@ -0,0 +1,228 @@
+unit tcsdfdata;
+// Tests multiline functionality of sdfdataset
+
+{$mode objfpc}{$H+}
+
+interface
+
+uses
+  Classes, SysUtils, Fpcunit, Testutils, Testregistry,
+  dateutils, sdfdata;
+
+type
+
+  { Ttestexport1 }
+
+  Ttestexport1 = class(Ttestcase)
+  protected
+    TestDataset: TSDFDataset;
+    procedure Setup; override;
+    procedure Teardown; override;
+  published
+    procedure TestOutput;
+    procedure TestInputOurFormat;
+    procedure TestDelimitedTextOutput;
+  end;
+
+implementation
+
+procedure Ttestexport1.TestOutput;
+const
+  OutputFilename='output.csv';
+begin
+  TestDataSet.Close;
+
+  if FileExists(OutputFilename) then DeleteFile(OutputFileName);
+  TestDataset.FileName:=OutputFileName;
+  TestDataset.Open;
+  // Fill test data
+  TestDataset.Append;
+  TestDataset.FieldByName('ID').AsInteger := 1;
+  // Data with quotes
+  TestDataset.FieldByName('NAME').AsString := 'J"T"';
+  TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
+  TestDataset.Post;
+
+  TestDataset.Append;
+  TestDataset.FieldByName('ID').AsInteger := 2;
+  // Data with delimiter
+  TestDataset.FieldByName('NAME').AsString := 'Hello'+TestDataset.Delimiter+' goodbye';
+  TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
+  TestDataset.Post;
+
+  TestDataset.Append;
+  TestDataset.FieldByName('ID').AsInteger := 3;
+  //Data with delimiter and quote (to test 19376)
+  TestDataset.FieldByName('NAME').AsString := 'Delimiter,"and";quote';
+  TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
+  TestDataset.Post;
+
+
+  TestDataset.Append;
+  TestDataset.FieldByName('ID').AsInteger := 4;
+  // Regular data
+  TestDataset.FieldByName('NAME').AsString := 'Just a long line of text without anything special';
+  TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
+  TestDataset.Post;
+
+  TestDataset.Last;
+  TestDataset.First;
+  // This fails - seems it sees the header as a record, too?
+  AssertEquals('Number of records in test dataset', 4, TestDataset.RecordCount);
+  TestDataset.Close;
+end;
+
+procedure Ttestexport1.TestInputOurFormat;
+// Test if input works with our format
+// Mainly check if reading quotes is according to Delphi sdf specs and works.
+// See test results from bug 19610 for evidence that the strings below should work.
+// If this works, we can switch to this and be RFC 4180 compliant and Delphi compliant.
+const
+  OutputFileName='input.csv';
+  //Value1 is the on disk format; it should translate to Expected1
+  Value1='"Delimiter,""and"";quote"';
+  Expected1='Delimiter,"and";quote';
+  Value2='"J""T"""';
+  Expected2='J"T"';
+  Value3='Just a long line';
+  Expected3='Just a long line';
+  //Note: Delphi can read this, see evidence in bug 19610 (the "quoted and space" value)
+  Value4='"Just a quoted long line"';
+  Expected4='Just a quoted long line';
+  // Delphi can read multiline, see evidence in bug 19610 (the multiline entry)
+  Value5='"quoted_multi'+#13+#10+'line"';
+  Expected5='quoted_multi'+#13+#10+'line';
+  Value6='"Delimiter,and;quoted"';
+  Expected6='Delimiter,and;quoted';
+  Value7='"A random""quote"';
+  Expected7='A random"quote';
+var
+  FileStrings: TStringList;
+begin
+  TestDataset.Close;
+  TestDataset.AllowMultiLine:=true;
+  if FileExists(OutputFilename) then DeleteFile(OutputFileName);
+  FileStrings:=TStringList.Create;
+  try
+    FileStrings.Add('ID,NAME,BIRTHDAY');
+    FileStrings.Add('1,'+Value1+',31-12-1976');
+    FileStrings.Add('2,'+Value2+',31-12-1976');
+    FileStrings.Add('3,'+Value3+',31-12-1976');
+    FileStrings.Add('4,'+Value4+',31-12-1976');
+    FileStrings.Add('5,'+Value5+',31-12-1976');
+    FileStrings.Add('6,'+Value6+',31-12-1976');
+    FileStrings.Add('7,'+Value7+',31-12-1976');
+    FileStrings.SaveToFile(OutputFileName);
+  finally
+    FileStrings.Free;
+  end;
+
+  // Load our dataset
+  TestDataset.FileName:=OutputFileName;
+  TestDataset.Open;
+  TestDataset.First;
+  AssertEquals(Expected1, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected2, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected3, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected4, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected5, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected6, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected7, TestDataSet.FieldByName('NAME').AsString);
+end;
+
+procedure Ttestexport1.TestDelimitedTextOutput;
+// Test if input works with our format
+// Mainly check if reading quotes is according to Delphi sdf specs and works.
+// See test results from bug 19610 for evidence that the strings below should work.
+// If this works, we can switch to this and be RFC 4180 compliant and Delphi compliant.
+const
+  OutputFileName='delim.csv';
+  //Value1 is the on disk format; it should translate to Expected1
+  Value1='Delimiter,"and";quote';
+  Value2='J"T"';
+  Value3='Just a long line';
+  Value4='Just a quoted long line';
+  Value5='multi'+#13+#10+'line';
+  Value6='Delimiter,and;done';
+  Value7='Some "random" quotes';
+var
+  FileStrings: TStringList;
+  OneRecord: TStringList;
+begin
+  TestDataset.Close;
+  TestDataset.AllowMultiLine:=true;
+  if FileExists(OutputFileName) then DeleteFile(OutputFileName);
+  FileStrings:=TStringList.Create;
+  OneRecord:=TStringList.Create;
+  try
+    FileStrings.Add('Field1,Field2,Field3,Field4,Field5,Field6,Field7');
+    OneRecord.Add(Value1);
+    OneRecord.Add(Value2);
+    OneRecord.Add(Value3);
+    OneRecord.Add(Value4);
+    OneRecord.Add(Value5);
+    OneRecord.Add(Value6);
+    OneRecord.Add(Value7);
+    OneRecord.Delimiter:=',';
+    OneRecord.QuoteChar:='"';
+    OneRecord.StrictDelimiter:=true;
+    FileStrings.Add(OneRecord.DelimitedText);
+    FileStrings.SaveToFile(OutputFileName);
+  finally
+    FileStrings.Free;
+    OneRecord.Free;
+  end;
+
+  // Load our dataset
+  TestDataset.FileName:=OutputFileName;
+  TestDataset.Open;
+  TestDataset.First;
+  AssertEquals(Value1, TestDataSet.Fields[0].AsString);
+  AssertEquals(Value2, TestDataSet.Fields[1].AsString);
+  AssertEquals(Value3, TestDataSet.Fields[2].AsString);
+  AssertEquals(Value4, TestDataSet.Fields[3].AsString);
+  AssertEquals(Value5, TestDataSet.Fields[4].AsString);
+  AssertEquals(Value6, TestDataSet.Fields[5].AsString);
+  AssertEquals(Value7, TestDataSet.Fields[6].AsString);
+end;
+
+
+procedure Ttestexport1.Setup;
+
+begin
+  TestDataset := TSDFDataset.Create(nil);
+  TestDataset.Delimiter := ',';
+  TestDataset.FileMustExist:=false;
+  TestDataset.FirstLineAsSchema := True;
+  TestDataset.Schema.Add('ID');
+  TestDataset.Schema.Add('NAME');
+  TestDataset.Schema.Add('BIRTHDAY');
+end;
+
+procedure Ttestexport1.Teardown;
+begin
+  try
+    TestDataset.Close;
+  except
+    //swallow
+  end;
+
+  TestDataset.Free;
+  try
+    //DeleteFile(FCSVFileName);
+  except
+    //swallow
+  end;
+end;
+
+initialization
+
+  Registertest(Ttestexport1);
+end.
+