Browse Source

* Patch for sdfdata multiline support and assoiated test case from Reinier Olislagers (bug 22237 and bug #22213)

git-svn-id: trunk@22145 -
michael 13 years ago
parent
commit
2939c41263

+ 1 - 0
.gitattributes

@@ -2100,6 +2100,7 @@ packages/fcl-db/tests/sdfdstoolsunit.pas svneol=native#text/plain
 packages/fcl-db/tests/sqldbtoolsunit.pas svneol=native#text/plain
 packages/fcl-db/tests/sqldbtoolsunit.pas svneol=native#text/plain
 packages/fcl-db/tests/tcgensql.pas svneol=native#text/plain
 packages/fcl-db/tests/tcgensql.pas svneol=native#text/plain
 packages/fcl-db/tests/tcparser.pas svneol=native#text/plain
 packages/fcl-db/tests/tcparser.pas svneol=native#text/plain
+packages/fcl-db/tests/tcsdfdata.pp svneol=native#text/plain
 packages/fcl-db/tests/tcsqlscanner.pas svneol=native#text/plain
 packages/fcl-db/tests/tcsqlscanner.pas svneol=native#text/plain
 packages/fcl-db/tests/test.json svneol=native#text/plain
 packages/fcl-db/tests/test.json svneol=native#text/plain
 packages/fcl-db/tests/testbasics.pas svneol=native#text/plain
 packages/fcl-db/tests/testbasics.pas svneol=native#text/plain

+ 61 - 29
packages/fcl-db/src/sdf/sdfdata.pp

@@ -13,11 +13,13 @@ unit SdfData;
 ---------------
 ---------------
 Modifications
 Modifications
 ---------------
 ---------------
+7/Jun/12 BigChimp:
+      Quote fields with delimiters or quotes to match Delphi SDF definition
+      (see e.g. help on TStrings.CommaText)
 14/Jul/11 BigChimp:
 14/Jul/11 BigChimp:
       Added AllowMultiLine property so user can use fields that have line endings
       Added AllowMultiLine property so user can use fields that have line endings
       (Carriage Return and/or Line Feed) embedded in their fields (fields need to be
       (Carriage Return and/or Line Feed) embedded in their fields (fields need to be
-      quoted). Enabled by default; will break compatibility with earlier versions of
-      SdfData, but using multilines would have resulted in corrupted import anyway.
+      quoted). For now: output only (reading these fields does not work yet)
 12/Mar/04  Lazarus version (Sergey Smirnov AKA SSY)
 12/Mar/04  Lazarus version (Sergey Smirnov AKA SSY)
       Locate and CheckString functions are removed because of Variant data type.
       Locate and CheckString functions are removed because of Variant data type.
       Many things are changed for FPC/Lazarus compatibility.
       Many things are changed for FPC/Lazarus compatibility.
@@ -939,25 +941,33 @@ end;
 
 
 function TSdfDataSet.StoreToBuf(Source: String): String;
 function TSdfDataSet.StoreToBuf(Source: String): String;
 const
 const
- CR :char = #13;
- LF :char = #10;
+ CR    :char = #13;
+ LF    :char = #10;
+ Quote :char = #34; // Character that encloses field if quoted. Hard-coded to "
 var
 var
-  i,
-  p             :Integer;
-  pRet,
-  pStr,
-  pStrEnd       :PChar;
+  IsQuoted   // Whether or not field starts with a quote
+                :Boolean;
+  FieldMaxSize, // Maximum fields size as defined in FieldDefs
+  i,         // Field counter (0..)
+  p          // Length of string in field
+                :Integer;
+  pDeQuoted, // Temporary buffer for dedoubling quotes
+  pRet,      // Pointer to insertion point in return value
+  pStr,      // Beginning of field
+  pStrEnd    // End of field
+                :PChar;
   Ret           :String;
   Ret           :String;
 begin
 begin
   SetLength(Ret, FRecordSize);
   SetLength(Ret, FRecordSize);
-
   FillChar(PChar(Ret)^, FRecordSize, Ord(' '));
   FillChar(PChar(Ret)^, FRecordSize, Ord(' '));
-    PStrEnd := PChar(Source);
+
+  PStrEnd := PChar(Source);
   pRet := PChar(Ret);
   pRet := PChar(Ret);
 
 
   for i := 0 to FieldDefs.Count - 1 do
   for i := 0 to FieldDefs.Count - 1 do
    begin
    begin
-
+    FieldMaxSize := FieldDefs[i].Size;
+    IsQuoted := false;
     while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] in [#1..' ']) do
     while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] in [#1..' ']) do
     begin
     begin
      if FFMultiLine then
      if FFMultiLine then
@@ -980,14 +990,15 @@ begin
 
 
     pStr := pStrEnd;
     pStr := pStrEnd;
 
 
-    if (pStr[0] = '"') then
+    if (pStr[0] = Quote) then
      begin
      begin
+      IsQuoted := true; // See below: accept end of string without explicit quote
       if FFMultiLine then
       if FFMultiLine then
        begin
        begin
         repeat
         repeat
          Inc(pStrEnd);
          Inc(pStrEnd);
         until not Boolean(Byte(pStrEnd[0])) or
         until not Boolean(Byte(pStrEnd[0])) or
-         ((pStrEnd[0] = '"') and ((pStrEnd + 1)[0] in [Delimiter,#0]));
+         ((pStrEnd[0] = Quote) and ((pStrEnd + 1)[0] in [Delimiter,#0]));
        end
        end
       else
       else
        begin
        begin
@@ -995,33 +1006,52 @@ begin
          repeat
          repeat
           Inc(pStrEnd);
           Inc(pStrEnd);
          until not Boolean(Byte(pStrEnd[0])) or
          until not Boolean(Byte(pStrEnd[0])) or
-          ((pStrEnd[0] = '"') and ((pStrEnd + 1)[0] in [Delimiter,CR,LF, #0]));
+          ((pStrEnd[0] = Quote) and ((pStrEnd + 1)[0] in [Delimiter,CR,LF,#0]));
        end;
        end;
 
 
-
-      if (pStrEnd[0] = '"') then
-        Inc(pStr);
+      if (pStrEnd[0] = Quote) then
+       Inc(pStr); //Skip final quote
      end
      end
     else
     else
       while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
       while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
         Inc(pStrEnd);
         Inc(pStrEnd);
 
 
+    // Copy over entire field (or at least up to field length):
     p := pStrEnd - pStr;
     p := pStrEnd - pStr;
-    if (p > FieldDefs[i].Size) then
-      p := FieldDefs[i].Size;
-
-    Move(pStr[0], pRet[0], p);
+    if IsQuoted then
+    begin
+     pDeQuoted := pRet; //Needed to avoid changing insertion point
+     // Copy entire field but not more than maximum field length:
+     // (We can mess with pStr now; the next loop will reset it after
+     // pStrEnd):
+     while (pstr < pStrEnd) and (pDeQuoted-pRet <= FieldMaxSize) do
+     begin
+      if pStr^ = Quote then inc(pStr);// skip first quote
+      pDeQuoted^ := pStr^;
+      inc(pStr);
+      inc(pDeQuoted);
+     end;
+    end
+    else
+    begin
+     if (p > FieldMaxSize) then
+       p := FieldMaxSize;
+     Move(pStr[0], pRet[0], p);
+    end;
 
 
-    Inc(pRet, FieldDefs[i].Size);
+    Inc(pRet, FieldMaxSize);
 
 
-    if (pStrEnd[0] = '"') then
+    // Move the end of field position past quotes and delimiters
+    // ready for processing the next field
+    if (pStrEnd[0] = Quote) then
       while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
       while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
         Inc(pStrEnd);
         Inc(pStrEnd);
 
 
     if (pStrEnd[0] = Delimiter) then
     if (pStrEnd[0] = Delimiter) then
      Inc(pStrEnd);
      Inc(pStrEnd);
    end;
    end;
-  Result := Ret;
+
+  Result := ret;
 end;
 end;
 
 
 function TSdfDataSet.BufToStore(Buffer: TRecordBuffer): String;
 function TSdfDataSet.BufToStore(Buffer: TRecordBuffer): String;
@@ -1034,9 +1064,9 @@ var
 begin
 begin
   Result := '';
   Result := '';
   p := 1;
   p := 1;
-  QuoteMe:=false;
   for i := 0 to FieldDefs.Count - 1 do
   for i := 0 to FieldDefs.Count - 1 do
   begin
   begin
+    QuoteMe:=false;
     Str := Trim(Copy(pansichar(Buffer), p, FieldDefs[i].Size));
     Str := Trim(Copy(pansichar(Buffer), p, FieldDefs[i].Size));
     Inc(p, FieldDefs[i].Size);
     Inc(p, FieldDefs[i].Size);
     if FFMultiLine then
     if FFMultiLine then
@@ -1051,11 +1081,13 @@ begin
        Str := StringReplace(Str, #10, '', [rfReplaceAll]);
        Str := StringReplace(Str, #10, '', [rfReplaceAll]);
        Str := StringReplace(Str, #13, '', [rfReplaceAll]);
        Str := StringReplace(Str, #13, '', [rfReplaceAll]);
       end;
       end;
-    // Check for any delimiters occurring in field text
-    if ((not QuoteMe) and (StrScan(PChar(Str), FDelimiter) <> nil)) then QuoteMe:=true;
+    // Check for any delimiters or quotes occurring in field text  
+    if (not QuoteMe) then
+	  if (StrScan(PChar(Str), FDelimiter) <> nil) or
+	    (StrScan(PChar(Str), QuoteDelimiter) <> nil) then QuoteMe:=true;
     if (QuoteMe) then
     if (QuoteMe) then
       begin
       begin
-      Str:=Stringreplace(Str,QuoteDelimiter,QuoteDelimiter+QuoteDelimiter,[rfReplaceAll]);
+      Str := Stringreplace(Str, QuoteDelimiter, QuoteDelimiter+QuoteDelimiter, [rfReplaceAll]);
       Str := QuoteDelimiter + Str + QuoteDelimiter;
       Str := QuoteDelimiter + Str + QuoteDelimiter;
       end;
       end;
     Result := Result + Str + FDelimiter;
     Result := Result + Str + FDelimiter;

+ 1 - 0
packages/fcl-db/tests/dbtestframework.pas

@@ -17,6 +17,7 @@ uses
   bufdatasettoolsunit,
   bufdatasettoolsunit,
   memdstoolsunit,
   memdstoolsunit,
   SdfDSToolsUnit,
   SdfDSToolsUnit,
+  tcsdfdata,
 // Units wich contains the tests
 // Units wich contains the tests
   TestBasics,
   TestBasics,
   TestFieldTypes,
   TestFieldTypes,

+ 228 - 0
packages/fcl-db/tests/tcsdfdata.pp

@@ -0,0 +1,228 @@
+unit tcsdfdata;
+// Tests multiline functionality of sdfdataset
+
+{$mode objfpc}{$H+}
+
+interface
+
+uses
+  Classes, SysUtils, Fpcunit, Testutils, Testregistry,
+  dateutils, sdfdata;
+
+type
+
+  { Ttestexport1 }
+
+  Ttestexport1 = class(Ttestcase)
+  protected
+    TestDataset: TSDFDataset;
+    procedure Setup; override;
+    procedure Teardown; override;
+  published
+    procedure TestOutput;
+    procedure TestInputOurFormat;
+    procedure TestDelimitedTextOutput;
+  end;
+
+implementation
+
+procedure Ttestexport1.TestOutput;
+const
+  OutputFilename='output.csv';
+begin
+  TestDataSet.Close;
+
+  if FileExists(OutputFilename) then DeleteFile(OutputFileName);
+  TestDataset.FileName:=OutputFileName;
+  TestDataset.Open;
+  // Fill test data
+  TestDataset.Append;
+  TestDataset.FieldByName('ID').AsInteger := 1;
+  // Data with quotes
+  TestDataset.FieldByName('NAME').AsString := 'J"T"';
+  TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
+  TestDataset.Post;
+
+  TestDataset.Append;
+  TestDataset.FieldByName('ID').AsInteger := 2;
+  // Data with delimiter
+  TestDataset.FieldByName('NAME').AsString := 'Hello'+TestDataset.Delimiter+' goodbye';
+  TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
+  TestDataset.Post;
+
+  TestDataset.Append;
+  TestDataset.FieldByName('ID').AsInteger := 3;
+  //Data with delimiter and quote (to test 19376)
+  TestDataset.FieldByName('NAME').AsString := 'Delimiter,"and";quote';
+  TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
+  TestDataset.Post;
+
+
+  TestDataset.Append;
+  TestDataset.FieldByName('ID').AsInteger := 4;
+  // Regular data
+  TestDataset.FieldByName('NAME').AsString := 'Just a long line of text without anything special';
+  TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
+  TestDataset.Post;
+
+  TestDataset.Last;
+  TestDataset.First;
+  // This fails - seems it sees the header as a record, too?
+  AssertEquals('Number of records in test dataset', 4, TestDataset.RecordCount);
+  TestDataset.Close;
+end;
+
+procedure Ttestexport1.TestInputOurFormat;
+// Test if input works with our format
+// Mainly check if reading quotes is according to Delphi sdf specs and works.
+// See test results from bug 19610 for evidence that the strings below should work.
+// If this works, we can switch to this and be RFC 4180 compliant and Delphi compliant.
+const
+  OutputFileName='input.csv';
+  //Value1 is the on disk format; it should translate to Expected1
+  Value1='"Delimiter,""and"";quote"';
+  Expected1='Delimiter,"and";quote';
+  Value2='"J""T"""';
+  Expected2='J"T"';
+  Value3='Just a long line';
+  Expected3='Just a long line';
+  //Note: Delphi can read this, see evidence in bug 19610 (the "quoted and space" value)
+  Value4='"Just a quoted long line"';
+  Expected4='Just a quoted long line';
+  // Delphi can read multiline, see evidence in bug 19610 (the multiline entry)
+  Value5='"quoted_multi'+#13+#10+'line"';
+  Expected5='quoted_multi'+#13+#10+'line';
+  Value6='"Delimiter,and;quoted"';
+  Expected6='Delimiter,and;quoted';
+  Value7='"A random""quote"';
+  Expected7='A random"quote';
+var
+  FileStrings: TStringList;
+begin
+  TestDataset.Close;
+  TestDataset.AllowMultiLine:=true;
+  if FileExists(OutputFilename) then DeleteFile(OutputFileName);
+  FileStrings:=TStringList.Create;
+  try
+    FileStrings.Add('ID,NAME,BIRTHDAY');
+    FileStrings.Add('1,'+Value1+',31-12-1976');
+    FileStrings.Add('2,'+Value2+',31-12-1976');
+    FileStrings.Add('3,'+Value3+',31-12-1976');
+    FileStrings.Add('4,'+Value4+',31-12-1976');
+    FileStrings.Add('5,'+Value5+',31-12-1976');
+    FileStrings.Add('6,'+Value6+',31-12-1976');
+    FileStrings.Add('7,'+Value7+',31-12-1976');
+    FileStrings.SaveToFile(OutputFileName);
+  finally
+    FileStrings.Free;
+  end;
+
+  // Load our dataset
+  TestDataset.FileName:=OutputFileName;
+  TestDataset.Open;
+  TestDataset.First;
+  AssertEquals(Expected1, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected2, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected3, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected4, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected5, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected6, TestDataSet.FieldByName('NAME').AsString);
+  TestDataSet.Next;
+  AssertEquals(Expected7, TestDataSet.FieldByName('NAME').AsString);
+end;
+
+procedure Ttestexport1.TestDelimitedTextOutput;
+// Test if input works with our format
+// Mainly check if reading quotes is according to Delphi sdf specs and works.
+// See test results from bug 19610 for evidence that the strings below should work.
+// If this works, we can switch to this and be RFC 4180 compliant and Delphi compliant.
+const
+  OutputFileName='delim.csv';
+  //Value1 is the on disk format; it should translate to Expected1
+  Value1='Delimiter,"and";quote';
+  Value2='J"T"';
+  Value3='Just a long line';
+  Value4='Just a quoted long line';
+  Value5='multi'+#13+#10+'line';
+  Value6='Delimiter,and;done';
+  Value7='Some "random" quotes';
+var
+  FileStrings: TStringList;
+  OneRecord: TStringList;
+begin
+  TestDataset.Close;
+  TestDataset.AllowMultiLine:=true;
+  if FileExists(OutputFileName) then DeleteFile(OutputFileName);
+  FileStrings:=TStringList.Create;
+  OneRecord:=TStringList.Create;
+  try
+    FileStrings.Add('Field1,Field2,Field3,Field4,Field5,Field6,Field7');
+    OneRecord.Add(Value1);
+    OneRecord.Add(Value2);
+    OneRecord.Add(Value3);
+    OneRecord.Add(Value4);
+    OneRecord.Add(Value5);
+    OneRecord.Add(Value6);
+    OneRecord.Add(Value7);
+    OneRecord.Delimiter:=',';
+    OneRecord.QuoteChar:='"';
+    OneRecord.StrictDelimiter:=true;
+    FileStrings.Add(OneRecord.DelimitedText);
+    FileStrings.SaveToFile(OutputFileName);
+  finally
+    FileStrings.Free;
+    OneRecord.Free;
+  end;
+
+  // Load our dataset
+  TestDataset.FileName:=OutputFileName;
+  TestDataset.Open;
+  TestDataset.First;
+  AssertEquals(Value1, TestDataSet.Fields[0].AsString);
+  AssertEquals(Value2, TestDataSet.Fields[1].AsString);
+  AssertEquals(Value3, TestDataSet.Fields[2].AsString);
+  AssertEquals(Value4, TestDataSet.Fields[3].AsString);
+  AssertEquals(Value5, TestDataSet.Fields[4].AsString);
+  AssertEquals(Value6, TestDataSet.Fields[5].AsString);
+  AssertEquals(Value7, TestDataSet.Fields[6].AsString);
+end;
+
+
+procedure Ttestexport1.Setup;
+
+begin
+  TestDataset := TSDFDataset.Create(nil);
+  TestDataset.Delimiter := ',';
+  TestDataset.FileMustExist:=false;
+  TestDataset.FirstLineAsSchema := True;
+  TestDataset.Schema.Add('ID');
+  TestDataset.Schema.Add('NAME');
+  TestDataset.Schema.Add('BIRTHDAY');
+end;
+
+procedure Ttestexport1.Teardown;
+begin
+  try
+    TestDataset.Close;
+  except
+    //swallow
+  end;
+
+  TestDataset.Free;
+  try
+    //DeleteFile(FCSVFileName);
+  except
+    //swallow
+  end;
+end;
+
+initialization
+
+  Registertest(Ttestexport1);
+end.
+