Ver Fonte

* Patch from wp to add BOM detection to CSV reader (bug ID 30897)

git-svn-id: trunk@34871 -
michael há 8 anos atrás
pai
commit
5af24e94ae

+ 2 - 0
.gitattributes

@@ -1933,6 +1933,8 @@ packages/fcl-base/examples/cachetest.pp svneol=native#text/plain
 packages/fcl-base/examples/cfgtest.pp svneol=native#text/plain
 packages/fcl-base/examples/contit.pp svneol=native#text/plain
 packages/fcl-base/examples/crittest.pp svneol=native#text/plain
+packages/fcl-base/examples/csvbom.pp svneol=native#text/plain
+packages/fcl-base/examples/databom.txt svneol=native#text/plain
 packages/fcl-base/examples/dbugsrv.pp svneol=native#text/plain
 packages/fcl-base/examples/debugtest.pp svneol=native#text/plain
 packages/fcl-base/examples/decodeascii85.pp svneol=native#text/plain

+ 1 - 0
packages/fcl-base/examples/README.txt

@@ -75,3 +75,4 @@ daemon.pp    Test for daemonapp (MVC)
 testtimer.pp Test for TFPTimer (MVC)
 testini.pp   Test/Demo for inifiles, ReadSectionValues.
 contit.pp    Test/Demo for iterators in contnr.pp
+csvbom.pp    Test/Demo for BOM detection in CSV document. (needs databom.txt)

+ 53 - 0
packages/fcl-base/examples/csvbom.pp

@@ -0,0 +1,53 @@
+program csvbom;
+
+{$APPTYPE Console}
+{$mode objfpc}{$H+}
+
+uses
+  sysutils, classes, dateutils, csvreadwrite;
+
+type
+  TDataRec = record
+    FDate: TDate;
+    FNumber: Integer;
+    FText: String;
+  end;
+
+const
+  FILENAME = 'databom.txt';
+
+var
+  parser: TCSVParser;
+  stream: TFileStream;
+  data: array of TDataRec;
+  s: String;
+  i: Integer;
+begin
+  parser := TCSVParser.Create;
+  try
+    parser.Delimiter := ',';
+    parser.DetectBOM := true;     // uncomment for running with patched version
+    stream := TFileStream.Create(FILENAME, fmOpenRead);
+    parser.SetSource(stream);
+    SetLength(data, 0);
+    while parser.ParseNextCell do begin
+      if parser.CurrentRow > High(data) then
+        SetLength(data, parser.CurrentRow + 1);
+      s := parser.CurrentCellText;
+      case parser.CurrentCol of
+        0: data[High(data)].FDate := ScanDateTime('yyyy-mm-dd', s);
+        1: data[High(data)].FNumber := StrToInt(s);
+        2: data[High(data)].FText := s;
+      end;
+    end;
+
+    for i:=0 to High(data) do
+      WriteLn(DateToStr(data[i].FDate), '; ', data[i].FNumber, '; ', data[i].FText);
+    Writeln('Press enter to quit program');
+    Readln;
+  finally
+    stream.Free;
+    parser.Free;
+  end;
+end.
+

+ 2 - 0
packages/fcl-base/examples/databom.txt

@@ -0,0 +1,2 @@
+2016-01-01,100,ABC
+2016-01-02,110,DEF

+ 31 - 0
packages/fcl-base/src/csvreadwrite.pp

@@ -92,12 +92,16 @@ Type
 
   { TCSVParser }
 
+  TCSVByteOrderMark = (bomNone, bomUTF8, bomUTF16LE, bomUTF16BE);
+
   TCSVParser = class(TCSVHandler)
   private
     FFreeStream: Boolean;
     // fields
     FSourceStream: TStream;
     FStrStreamWrapper: TStringStream;
+    FBOM: TCSVByteOrderMark;
+    FDetectBOM: Boolean;
     // parser state
     EndOfFile: Boolean;
     EndOfLine: Boolean;
@@ -140,6 +144,10 @@ Type
     property MaxColCount: Integer read FMaxColCount;
     // Does the parser own the stream ? If true, a previous stream is freed when set or when parser is destroyed.
     Property FreeStream : Boolean Read FFreeStream Write FFreeStream;
+    // Return BOM found in file
+    property BOM: TCSVByteOrderMark read FBOM;
+    // Detect whether a BOM marker is present. If set to True, then BOM can be used to see what BOM marker there was.
+    property DetectBOM: Boolean read FDetectBOM write FDetectBOM default false;
   end;
 
   // Sequential output to CSV stream
@@ -443,9 +451,32 @@ begin
 end;
 
 procedure TCSVParser.ResetParser;
+var
+  b: packed array[0..2] of byte;
+  n: Integer;
 begin
   ClearOutput;
   FSourceStream.Seek(0, soFromBeginning);
+  if FDetectBOM then
+  begin
+    FSourceStream.ReadBuffer(b[0], 3);
+    if (b[0] = $EF) and (b[1] = $BB) and (b[2] = $BF) then begin
+      FBOM := bomUTF8;
+      n := 3;
+    end else
+    if (b[0] = $FE) and (b[1] = $FF) then begin
+      FBOM := bomUTF16BE;
+      n := 2;
+    end else
+    if (b[0] = $FF) and (b[1] = $FE) then begin
+      FBOM := bomUTF16LE;
+      n := 2;
+    end else begin
+      FBOM := bomNone;
+      n := 0;
+    end;
+    FSourceStream.Seek(n, soFromBeginning);
+  end;
   EndOfFile := False;
   NextChar;
 end;