Browse Source

* Type helper for easier loading

Michaël Van Canneyt 2 years ago
parent
commit
bd4b3af8b6
1 changed files with 96 additions and 22 deletions
  1. 96 22
      packages/fcl-pdf/src/fppdfparser.pp

+ 96 - 22
packages/fcl-pdf/src/fppdfparser.pp

@@ -17,6 +17,7 @@ unit fppdfparser;
 
 
 {$mode ObjFPC}{$H+}
 {$mode ObjFPC}{$H+}
 {$J-}
 {$J-}
+{$ModeSwitch typehelpers}
 
 
 { $DEFINE DEBUGSTREAMS}
 { $DEFINE DEBUGSTREAMS}
 { $DEFINE DUMPSTREAMS}
 { $DEFINE DUMPSTREAMS}
@@ -36,6 +37,7 @@ Type
     FilterName : String;
     FilterName : String;
     Source : TStream;
     Source : TStream;
     Dest : TStream;
     Dest : TStream;
+
     ParamDict : TPDFDictionary;
     ParamDict : TPDFDictionary;
   end;
   end;
 
 
@@ -63,6 +65,7 @@ Type
     FOnUnknownFilter: TPDFFilterEvent;
     FOnUnknownFilter: TPDFFilterEvent;
     FResolveContentStreams: Boolean;
     FResolveContentStreams: Boolean;
     FResolveObjects: Boolean;
     FResolveObjects: Boolean;
+    FResolveToUnicodeCMaps: Boolean;
     FScanner : TPDFScanner;
     FScanner : TPDFScanner;
     FLastDict : TPDFDictionary; // Last created dictionary
     FLastDict : TPDFDictionary; // Last created dictionary
     FloadingXRef : TPDFXRefArray;
     FloadingXRef : TPDFXRefArray;
@@ -73,6 +76,7 @@ Type
     procedure ParseCMAPCodeSpaceRange(aMap: TPDFCMapData);
     procedure ParseCMAPCodeSpaceRange(aMap: TPDFCMapData);
     procedure ParseInlineImageData(var aOperands: TPDFTokenArray; aScanner: TPDFScanner );
     procedure ParseInlineImageData(var aOperands: TPDFTokenArray; aScanner: TPDFScanner );
     procedure SetResolveContentStreams(AValue: Boolean);
     procedure SetResolveContentStreams(AValue: Boolean);
+    procedure SetResolveToUnicodeCMaps(AValue: Boolean);
   Protected
   Protected
     // Progress, Logging & Errors.
     // Progress, Logging & Errors.
     procedure DoProgress(aKind : TPDFProgressKind; aCurrent,aCount : Integer);
     procedure DoProgress(aKind : TPDFProgressKind; aCurrent,aCount : Integer);
@@ -147,7 +151,7 @@ Type
     function LoadXREFobject(Itm: TPDFXRef; addToDocument : Boolean = True): TPDFIndirect; virtual;
     function LoadXREFobject(Itm: TPDFXRef; addToDocument : Boolean = True): TPDFIndirect; virtual;
     procedure ParseContentStream(aObject: TPDFPageObject; aStream: TStream;
     procedure ParseContentStream(aObject: TPDFPageObject; aStream: TStream;
       aOnCommand: TPDFNewCommandEvent); virtual;
       aOnCommand: TPDFNewCommandEvent); virtual;
-    Procedure ResolveToUnicodeCMaps(aDoc : TPDFDocument);
+    Procedure DoResolveToUnicodeCMaps(aDoc : TPDFDocument);
     class procedure Unpredict(var Data: TPDFFilterData);
     class procedure Unpredict(var Data: TPDFFilterData);
     Class procedure AsciiHEXDecode(aSrc,aDest : TStream);
     Class procedure AsciiHEXDecode(aSrc,aDest : TStream);
     Class Function AsciiHEXDecode(aSrc : TStream) : TStream;
     Class Function AsciiHEXDecode(aSrc : TStream) : TStream;
@@ -161,11 +165,13 @@ Type
     Class Function RunlengthDecode(aSrc : TStream) : TStream;
     Class Function RunlengthDecode(aSrc : TStream) : TStream;
     Property Document : TPDFDocument Read FDoc;
     Property Document : TPDFDocument Read FDoc;
     // load all objects when XRef is parsed ?
     // load all objects when XRef is parsed ?
-    Property LoadObjects : Boolean Read FLoadObjects Write FLoadObjects;
+    Property LoadObjects : Boolean Read FLoadObjects Write FLoadObjects  default True;
     // When loading objects, resolve objects ?
     // When loading objects, resolve objects ?
-    Property ResolveObjects : Boolean Read FResolveObjects Write FResolveObjects;
-    // Resolve content streams of pages ?
-    Property ResolveContentStreams : Boolean Read FResolveContentStreams Write SetResolveContentStreams;
+    Property ResolveObjects : Boolean Read FResolveObjects Write FResolveObjects default True;
+    // Resolve content streams of pages ? Default true.
+    Property ResolveContentStreams : Boolean Read FResolveContentStreams Write SetResolveContentStreams default true;
+    // Resolve ToUnicode CMap maps ? Default false,
+    Property ResolveToUnicodeCMaps : Boolean Read FResolveToUnicodeCMaps Write SetResolveToUnicodeCMaps default false;
     // Called when an unknown filter is encountered
     // Called when an unknown filter is encountered
     Property OnUnknownFilter : TPDFFilterEvent Read FOnUnknownFilter Write FOnUnknownFilter;
     Property OnUnknownFilter : TPDFFilterEvent Read FOnUnknownFilter Write FOnUnknownFilter;
     // Log function
     // Log function
@@ -174,8 +180,22 @@ Type
     Property OnProgress : TPDFProgressEvent Read FOnProgress Write FOnProgress;
     Property OnProgress : TPDFProgressEvent Read FOnProgress Write FOnProgress;
   end;
   end;
 
 
+  { TPDFDocumentHelper }
+Type
+  TPDFLoadOption = (loLoadObjects,loResolveObjects,loResolveContentStreams,loResolveToUnicodeCMaps);
+  TPDFLoadOptions = set of TPDFLoadOption;
+
+  TPDFDocumentHelper = class Helper for TPDFDocument
+    Procedure LoadFromFile(Const aFilename : String; aOnLog : TPDFLogNotifyEvent = nil; aOnProgress : TPDFProgressEvent = Nil);
+    Procedure LoadFromStream(Const aStream : TStream; aOnLog : TPDFLogNotifyEvent = nil; aOnProgress : TPDFProgressEvent = Nil);
+    Procedure LoadFromFile(Const aFilename : String; aOptions : TPDFLoadOptions; aOnLog : TPDFLogNotifyEvent = nil; aOnProgress : TPDFProgressEvent = Nil);
+    Procedure LoadFromStream(Const aStream : TStream; aOptions : TPDFLoadOptions; aOnLog : TPDFLogNotifyEvent = nil; aOnProgress : TPDFProgressEvent = Nil);
+  end;
+
 
 
 Const
 Const
+  PDFDefaultLoadOptions = [loLoadObjects,loResolveObjects,loResolveContentStreams,loResolveToUnicodeCMaps];
+
   // Error codes
   // Error codes
   penUnknownToken = 1;
   penUnknownToken = 1;
   penExpectedInteger = 2;
   penExpectedInteger = 2;
@@ -242,7 +262,6 @@ resourcestring
   SErrUnknownFilter =  'Unknown stream filter : %s';
   SErrUnknownFilter =  'Unknown stream filter : %s';
   SErrInvalidDictionaryRef = 'Invalid dictionary reference value: %s ';
   SErrInvalidDictionaryRef = 'Invalid dictionary reference value: %s ';
   SErrDictionaryNoLengthObject = 'Invalid dictionary length object reference [%d %d]';
   SErrDictionaryNoLengthObject = 'Invalid dictionary length object reference [%d %d]';
-  SErrEOFWhileScanningString = 'EOF encountered while scanning string';
   sErrContentStreamNotFound = 'Invalid content stream object reference [%d %d]';
   sErrContentStreamNotFound = 'Invalid content stream object reference [%d %d]';
 
 
   // SErrDictionaryNoLengthInObject = 'Invalid dictionary length object reference [%d %d] : No length in object';
   // SErrDictionaryNoLengthInObject = 'Invalid dictionary length object reference [%d %d] : No length in object';
@@ -251,7 +270,6 @@ resourcestring
 
 
   SErrObjectIsNotObjectStream = 'Object %d is not a ObjStm object.';
   SErrObjectIsNotObjectStream = 'Object %d is not a ObjStm object.';
   SErrStreamObjectWithoutDict = 'ObjStm Object %d does not have a dictionary';
   SErrStreamObjectWithoutDict = 'ObjStm Object %d does not have a dictionary';
-  SErrNoSuchObjectInstream = 'No object %d in stream %s (%d)';
   SErrNotStreamObject = 'Object %d is not a stream, it is a %s object';
   SErrNotStreamObject = 'Object %d is not a stream, it is a %s object';
   SErrExpectedString = ': Expected string';
   SErrExpectedString = ': Expected string';
   SErrXRefindex = 'XRef index';
   SErrXRefindex = 'XRef index';
@@ -281,6 +299,55 @@ resourcestring
   SErrExpectedIdentifierN = '%s: Expected identifier "%s", got "%s"';
   SErrExpectedIdentifierN = '%s: Expected identifier "%s", got "%s"';
   SErrExpectedName = '%s: Expected name "%s", got "%s"';
   SErrExpectedName = '%s: Expected name "%s", got "%s"';
 
 
+{ TPDFDocumentHelper }
+
+procedure TPDFDocumentHelper.LoadFromFile(const aFilename: String;aOnLog: TPDFLogNotifyEvent; aOnProgress: TPDFProgressEvent);
+begin
+  LoadFromFile(aFileName,PDFDefaultLoadOptions,aOnLog,aOnProgress);
+end;
+
+procedure TPDFDocumentHelper.LoadFromFile(const aFilename: String;
+  aOptions : TPDFLoadOptions; aOnLog: TPDFLogNotifyEvent; aOnProgress: TPDFProgressEvent);
+
+Var
+  F : TFileStream;
+
+begin
+  F:=TFileStream.Create(aFileName,fmOpenRead or fmShareDenyWrite);
+  try
+    LoadFromStream(F,aOptions, aOnLog,aOnProgress)
+  finally
+    F.Free;
+  end;
+
+end;
+
+procedure TPDFDocumentHelper.LoadFromStream(const aStream: TStream;
+  aOnLog: TPDFLogNotifyEvent; aOnProgress: TPDFProgressEvent);
+
+begin
+  LoadFromStream(aStream,PDFDefaultLoadOptions,aOnLog,aOnProgress);
+end;
+
+procedure TPDFDocumentHelper.LoadFromStream(const aStream: TStream;aOptions : TPDFLoadOptions;
+  aOnLog: TPDFLogNotifyEvent; aOnProgress: TPDFProgressEvent);
+Var
+  aParser:TPDFParser;
+begin
+  aParser:=TPDFParser.Create(aStream);
+  try
+    aParser.OnLog:=aOnLog;
+    aParser.OnProgress:=aOnProgress;
+    aParser.LoadObjects:=loLoadObjects in aOptions;
+    aParser.ResolveObjects:=loResolveObjects in aOptions;
+    aParser.ResolveContentStreams:=loResolveContentStreams in aOptions;
+    aParser.ResolveToUnicodeCMaps:=loResolveToUnicodeCMaps in aOptions;
+    aParser.ParseDocument(Self);
+  Finally
+    aParser.Free;
+  end;
+end;
+
 {$IFDEF DEBUGSTREAMS}
 {$IFDEF DEBUGSTREAMS}
 {$IFDEF DUMPSTREAMS}
 {$IFDEF DUMPSTREAMS}
 
 
@@ -348,6 +415,7 @@ begin
   FLoadObjects:=True;
   FLoadObjects:=True;
   FResolveObjects:=True;
   FResolveObjects:=True;
   FResolveContentStreams:=True;
   FResolveContentStreams:=True;
+  FResolveToUnicodeCMaps:=False;
 end;
 end;
 
 
 
 
@@ -614,7 +682,7 @@ end;
 
 
 // On entry, we're on begincodespacerange.
 // On entry, we're on begincodespacerange.
 // On exit, we're on endcodespacerange
 // On exit, we're on endcodespacerange
-Procedure TPDFParser.ParseCMAPCodeSpaceRange(aMap : TPDFCMapData);
+procedure TPDFParser.ParseCMAPCodeSpaceRange(aMap: TPDFCMapData);
 
 
 Var
 Var
   L : TPDFCodeSpaceRangeArray;
   L : TPDFCodeSpaceRangeArray;
@@ -659,7 +727,7 @@ end;
 
 
 // On entry, we're on beginbfchar.
 // On entry, we're on beginbfchar.
 // On exit, we're on endbfchar
 // On exit, we're on endbfchar
-Procedure TPDFParser.ParseCMAPBFChar(aMap : TPDFCMapData);
+procedure TPDFParser.ParseCMAPBFChar(aMap: TPDFCMapData);
 
 
 Var
 Var
   L : TPDFBFCharArray;
   L : TPDFBFCharArray;
@@ -705,7 +773,7 @@ begin
   aMap.BFChars:=Concat(aMap.BFChars,L);
   aMap.BFChars:=Concat(aMap.BFChars,L);
 end;
 end;
 
 
-Procedure TPDFParser.ParseCMAPBFRange(aMap : TPDFCMapData);
+procedure TPDFParser.ParseCMAPBFRange(aMap: TPDFCMapData);
 
 
 Var
 Var
   L : TPDFCIDRangeArray;
   L : TPDFCIDRangeArray;
@@ -788,7 +856,7 @@ begin
 end;
 end;
 
 
 
 
-Procedure TPDFParser.ParseCMap(aStream : TStream; aMap : TPDFCMap);
+procedure TPDFParser.ParseCMap(aStream: TStream; aMap: TPDFCMap);
 
 
 Var
 Var
   aScanner: TPDFScanner;
   aScanner: TPDFScanner;
@@ -837,7 +905,7 @@ begin
   end;
   end;
 end;
 end;
 
 
-procedure TPDFParser.ResolveToUnicodeCMaps(aDoc: TPDFDocument);
+procedure TPDFParser.DoResolveToUnicodeCMaps(aDoc: TPDFDocument);
 
 
 var
 var
   Obj : TPDFObject;
   Obj : TPDFObject;
@@ -924,8 +992,6 @@ Var
   I,aStartIndex,aCount : Integer;
   I,aStartIndex,aCount : Integer;
   lToken : TPDFToken;
   lToken : TPDFToken;
   Itm : TPDFXRef;
   Itm : TPDFXRef;
-  EndByTrailer : Boolean;
-  T : TPDFTrailer;
 
 
 begin
 begin
   Result:=TPDFXRefList.Create();
   Result:=TPDFXRefList.Create();
@@ -1731,6 +1797,7 @@ Var
   Index : TPDFIndexPairArray;
   Index : TPDFIndexPairArray;
 
 
 begin
 begin
+  Index:=[];
   aSize:=aObjectDict.GetIntegerValue(SPDFKeySize);
   aSize:=aObjectDict.GetIntegerValue(SPDFKeySize);
   if aObjectDict.ContainsKey(SPDFKeyIndex) then
   if aObjectDict.ContainsKey(SPDFKeyIndex) then
     Idx:=aObjectDict.GetArrayValue(SPDFKeyIndex)
     Idx:=aObjectDict.GetArrayValue(SPDFKeyIndex)
@@ -1784,11 +1851,10 @@ Var
   Sizes : Array[0..2] of Byte;
   Sizes : Array[0..2] of Byte;
   Indexes : TPDFIndexPairArray;
   Indexes : TPDFIndexPairArray;
   Fields : Array[0..2] of Integer;
   Fields : Array[0..2] of Integer;
-  aID,aFirst,aLast : integer;
+  aID,aFirst : integer;
   aPair : TPDFIndexPair;
   aPair : TPDFIndexPair;
   O,O2 : TPDFObject;
   O,O2 : TPDFObject;
   W : TPDFArray absolute O;
   W : TPDFArray absolute O;
-  Idx : TPDFArray absolute O;
   V : TPDFValue absolute O2;
   V : TPDFValue absolute O2;
   I,J,aSize : Integer;
   I,J,aSize : Integer;
   D : PByte;
   D : PByte;
@@ -1909,7 +1975,7 @@ Var
   aStream : TStream;
   aStream : TStream;
 
 
 begin
 begin
-  Writeln('Parsing XREF at : ',aStartPos);
+  // Writeln('Parsing XREF at : ',aStartPos);
   Result:=Nil;
   Result:=Nil;
   ParentObject:=Nil;
   ParentObject:=Nil;
   if (FScanner.Position<>aStartPos) then
   if (FScanner.Position<>aStartPos) then
@@ -1979,6 +2045,14 @@ begin
     ResolveObjects:=true;
     ResolveObjects:=true;
 end;
 end;
 
 
+procedure TPDFParser.SetResolveToUnicodeCMaps(AValue: Boolean);
+begin
+  if FResolveToUnicodeCMaps=AValue then Exit;
+  FResolveToUnicodeCMaps:=AValue;
+  if aValue then
+    ResolveObjects:=True;
+end;
+
 procedure TPDFParser.DoProgress(aKind: TPDFProgressKind; aCurrent, aCount: Integer);
 procedure TPDFParser.DoProgress(aKind: TPDFProgressKind; aCurrent, aCount: Integer);
 begin
 begin
   If Assigned(FOnProgress) then
   If Assigned(FOnProgress) then
@@ -2175,7 +2249,6 @@ var
   I : Integer;
   I : Integer;
   Itm : TPDFXRef;
   Itm : TPDFXRef;
   UseCompressed : Boolean;
   UseCompressed : Boolean;
-  Ind : TPDFIndirect;
 
 
 begin
 begin
   For UseCompressed:=False to True do
   For UseCompressed:=False to True do
@@ -2191,7 +2264,7 @@ begin
       else if (Itm.Instance=Nil) and Itm.InUse and (Itm.Compressed=UseCompressed) then
       else if (Itm.Instance=Nil) and Itm.InUse and (Itm.Compressed=UseCompressed) then
         if Itm.ReferenceIndex>0 then
         if Itm.ReferenceIndex>0 then
           begin
           begin
-          Ind:=LoadXRefObject(Itm);
+          LoadXRefObject(Itm);
 {          if Assigned(Ind) then
 {          if Assigned(Ind) then
             Writeln('Loaded ',Ind.GetDescription);}
             Writeln('Loaded ',Ind.GetDescription);}
           end;
           end;
@@ -2299,6 +2372,8 @@ begin
     LoadIndirectObjects;
     LoadIndirectObjects;
     If ResolveContentStreams then
     If ResolveContentStreams then
       DoResolveContentStreams(FDoc);
       DoResolveContentStreams(FDoc);
+    if ResolveToUnicodeCMaps then
+      DoResolveToUnicodeCMaps(FDoc);
     end;
     end;
 end;
 end;
 
 
@@ -2323,6 +2398,7 @@ Var
   Streams : Array of TStream;
   Streams : Array of TStream;
 
 
 begin
 begin
+  Streams:=[];
   Result:=Nil;
   Result:=Nil;
   try
   try
     if aPage.ContentCount=1 then
     if aPage.ContentCount=1 then
@@ -2347,9 +2423,7 @@ end;
 procedure TPDFParser.DoResolveContentStreams(aDoc: TPDFDocument; aOnCommand : TPDFNewCommandEvent = Nil);
 procedure TPDFParser.DoResolveContentStreams(aDoc: TPDFDocument; aOnCommand : TPDFNewCommandEvent = Nil);
 
 
 Var
 Var
-  I,J,aCount : Integer;
-  Obj,ObjFree : TPDFIndirect;
-  Cont : TPDFContentStream;
+  I,aCount : Integer;
   aPage : TPDFPageObject;
   aPage : TPDFPageObject;
   aStream : TStream;
   aStream : TStream;