|
@@ -17,6 +17,7 @@ unit fppdfparser;
|
|
|
|
|
|
{$mode ObjFPC}{$H+}
|
|
{$mode ObjFPC}{$H+}
|
|
{$J-}
|
|
{$J-}
|
|
|
|
+{$ModeSwitch typehelpers}
|
|
|
|
|
|
{ $DEFINE DEBUGSTREAMS}
|
|
{ $DEFINE DEBUGSTREAMS}
|
|
{ $DEFINE DUMPSTREAMS}
|
|
{ $DEFINE DUMPSTREAMS}
|
|
@@ -36,6 +37,7 @@ Type
|
|
FilterName : String;
|
|
FilterName : String;
|
|
Source : TStream;
|
|
Source : TStream;
|
|
Dest : TStream;
|
|
Dest : TStream;
|
|
|
|
+
|
|
ParamDict : TPDFDictionary;
|
|
ParamDict : TPDFDictionary;
|
|
end;
|
|
end;
|
|
|
|
|
|
@@ -63,6 +65,7 @@ Type
|
|
FOnUnknownFilter: TPDFFilterEvent;
|
|
FOnUnknownFilter: TPDFFilterEvent;
|
|
FResolveContentStreams: Boolean;
|
|
FResolveContentStreams: Boolean;
|
|
FResolveObjects: Boolean;
|
|
FResolveObjects: Boolean;
|
|
|
|
+ FResolveToUnicodeCMaps: Boolean;
|
|
FScanner : TPDFScanner;
|
|
FScanner : TPDFScanner;
|
|
FLastDict : TPDFDictionary; // Last created dictionary
|
|
FLastDict : TPDFDictionary; // Last created dictionary
|
|
FloadingXRef : TPDFXRefArray;
|
|
FloadingXRef : TPDFXRefArray;
|
|
@@ -73,6 +76,7 @@ Type
|
|
procedure ParseCMAPCodeSpaceRange(aMap: TPDFCMapData);
|
|
procedure ParseCMAPCodeSpaceRange(aMap: TPDFCMapData);
|
|
procedure ParseInlineImageData(var aOperands: TPDFTokenArray; aScanner: TPDFScanner );
|
|
procedure ParseInlineImageData(var aOperands: TPDFTokenArray; aScanner: TPDFScanner );
|
|
procedure SetResolveContentStreams(AValue: Boolean);
|
|
procedure SetResolveContentStreams(AValue: Boolean);
|
|
|
|
+ procedure SetResolveToUnicodeCMaps(AValue: Boolean);
|
|
Protected
|
|
Protected
|
|
// Progress, Logging & Errors.
|
|
// Progress, Logging & Errors.
|
|
procedure DoProgress(aKind : TPDFProgressKind; aCurrent,aCount : Integer);
|
|
procedure DoProgress(aKind : TPDFProgressKind; aCurrent,aCount : Integer);
|
|
@@ -147,7 +151,7 @@ Type
|
|
function LoadXREFobject(Itm: TPDFXRef; addToDocument : Boolean = True): TPDFIndirect; virtual;
|
|
function LoadXREFobject(Itm: TPDFXRef; addToDocument : Boolean = True): TPDFIndirect; virtual;
|
|
procedure ParseContentStream(aObject: TPDFPageObject; aStream: TStream;
|
|
procedure ParseContentStream(aObject: TPDFPageObject; aStream: TStream;
|
|
aOnCommand: TPDFNewCommandEvent); virtual;
|
|
aOnCommand: TPDFNewCommandEvent); virtual;
|
|
- Procedure ResolveToUnicodeCMaps(aDoc : TPDFDocument);
|
|
|
|
|
|
+ Procedure DoResolveToUnicodeCMaps(aDoc : TPDFDocument);
|
|
class procedure Unpredict(var Data: TPDFFilterData);
|
|
class procedure Unpredict(var Data: TPDFFilterData);
|
|
Class procedure AsciiHEXDecode(aSrc,aDest : TStream);
|
|
Class procedure AsciiHEXDecode(aSrc,aDest : TStream);
|
|
Class Function AsciiHEXDecode(aSrc : TStream) : TStream;
|
|
Class Function AsciiHEXDecode(aSrc : TStream) : TStream;
|
|
@@ -161,11 +165,13 @@ Type
|
|
Class Function RunlengthDecode(aSrc : TStream) : TStream;
|
|
Class Function RunlengthDecode(aSrc : TStream) : TStream;
|
|
Property Document : TPDFDocument Read FDoc;
|
|
Property Document : TPDFDocument Read FDoc;
|
|
// load all objects when XRef is parsed ?
|
|
// load all objects when XRef is parsed ?
|
|
- Property LoadObjects : Boolean Read FLoadObjects Write FLoadObjects;
|
|
|
|
|
|
+ Property LoadObjects : Boolean Read FLoadObjects Write FLoadObjects default True;
|
|
// When loading objects, resolve objects ?
|
|
// When loading objects, resolve objects ?
|
|
- Property ResolveObjects : Boolean Read FResolveObjects Write FResolveObjects;
|
|
|
|
- // Resolve content streams of pages ?
|
|
|
|
- Property ResolveContentStreams : Boolean Read FResolveContentStreams Write SetResolveContentStreams;
|
|
|
|
|
|
+ Property ResolveObjects : Boolean Read FResolveObjects Write FResolveObjects default True;
|
|
|
|
+ // Resolve content streams of pages ? Default true.
|
|
|
|
+ Property ResolveContentStreams : Boolean Read FResolveContentStreams Write SetResolveContentStreams default true;
|
|
|
|
+ // Resolve ToUnicode CMap maps ? Default false,
|
|
|
|
+ Property ResolveToUnicodeCMaps : Boolean Read FResolveToUnicodeCMaps Write SetResolveToUnicodeCMaps default false;
|
|
// Called when an unknown filter is encountered
|
|
// Called when an unknown filter is encountered
|
|
Property OnUnknownFilter : TPDFFilterEvent Read FOnUnknownFilter Write FOnUnknownFilter;
|
|
Property OnUnknownFilter : TPDFFilterEvent Read FOnUnknownFilter Write FOnUnknownFilter;
|
|
// Log function
|
|
// Log function
|
|
@@ -174,8 +180,22 @@ Type
|
|
Property OnProgress : TPDFProgressEvent Read FOnProgress Write FOnProgress;
|
|
Property OnProgress : TPDFProgressEvent Read FOnProgress Write FOnProgress;
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
+ { TPDFDocumentHelper }
|
|
|
|
+Type
|
|
|
|
+ TPDFLoadOption = (loLoadObjects,loResolveObjects,loResolveContentStreams,loResolveToUnicodeCMaps);
|
|
|
|
+ TPDFLoadOptions = set of TPDFLoadOption;
|
|
|
|
+
|
|
|
|
+ TPDFDocumentHelper = class Helper for TPDFDocument
|
|
|
|
+ Procedure LoadFromFile(Const aFilename : String; aOnLog : TPDFLogNotifyEvent = nil; aOnProgress : TPDFProgressEvent = Nil);
|
|
|
|
+ Procedure LoadFromStream(Const aStream : TStream; aOnLog : TPDFLogNotifyEvent = nil; aOnProgress : TPDFProgressEvent = Nil);
|
|
|
|
+ Procedure LoadFromFile(Const aFilename : String; aOptions : TPDFLoadOptions; aOnLog : TPDFLogNotifyEvent = nil; aOnProgress : TPDFProgressEvent = Nil);
|
|
|
|
+ Procedure LoadFromStream(Const aStream : TStream; aOptions : TPDFLoadOptions; aOnLog : TPDFLogNotifyEvent = nil; aOnProgress : TPDFProgressEvent = Nil);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
|
|
Const
|
|
Const
|
|
|
|
+ PDFDefaultLoadOptions = [loLoadObjects,loResolveObjects,loResolveContentStreams,loResolveToUnicodeCMaps];
|
|
|
|
+
|
|
// Error codes
|
|
// Error codes
|
|
penUnknownToken = 1;
|
|
penUnknownToken = 1;
|
|
penExpectedInteger = 2;
|
|
penExpectedInteger = 2;
|
|
@@ -242,7 +262,6 @@ resourcestring
|
|
SErrUnknownFilter = 'Unknown stream filter : %s';
|
|
SErrUnknownFilter = 'Unknown stream filter : %s';
|
|
SErrInvalidDictionaryRef = 'Invalid dictionary reference value: %s ';
|
|
SErrInvalidDictionaryRef = 'Invalid dictionary reference value: %s ';
|
|
SErrDictionaryNoLengthObject = 'Invalid dictionary length object reference [%d %d]';
|
|
SErrDictionaryNoLengthObject = 'Invalid dictionary length object reference [%d %d]';
|
|
- SErrEOFWhileScanningString = 'EOF encountered while scanning string';
|
|
|
|
sErrContentStreamNotFound = 'Invalid content stream object reference [%d %d]';
|
|
sErrContentStreamNotFound = 'Invalid content stream object reference [%d %d]';
|
|
|
|
|
|
// SErrDictionaryNoLengthInObject = 'Invalid dictionary length object reference [%d %d] : No length in object';
|
|
// SErrDictionaryNoLengthInObject = 'Invalid dictionary length object reference [%d %d] : No length in object';
|
|
@@ -251,7 +270,6 @@ resourcestring
|
|
|
|
|
|
SErrObjectIsNotObjectStream = 'Object %d is not a ObjStm object.';
|
|
SErrObjectIsNotObjectStream = 'Object %d is not a ObjStm object.';
|
|
SErrStreamObjectWithoutDict = 'ObjStm Object %d does not have a dictionary';
|
|
SErrStreamObjectWithoutDict = 'ObjStm Object %d does not have a dictionary';
|
|
- SErrNoSuchObjectInstream = 'No object %d in stream %s (%d)';
|
|
|
|
SErrNotStreamObject = 'Object %d is not a stream, it is a %s object';
|
|
SErrNotStreamObject = 'Object %d is not a stream, it is a %s object';
|
|
SErrExpectedString = ': Expected string';
|
|
SErrExpectedString = ': Expected string';
|
|
SErrXRefindex = 'XRef index';
|
|
SErrXRefindex = 'XRef index';
|
|
@@ -281,6 +299,55 @@ resourcestring
|
|
SErrExpectedIdentifierN = '%s: Expected identifier "%s", got "%s"';
|
|
SErrExpectedIdentifierN = '%s: Expected identifier "%s", got "%s"';
|
|
SErrExpectedName = '%s: Expected name "%s", got "%s"';
|
|
SErrExpectedName = '%s: Expected name "%s", got "%s"';
|
|
|
|
|
|
|
|
+{ TPDFDocumentHelper }
|
|
|
|
+
|
|
|
|
+procedure TPDFDocumentHelper.LoadFromFile(const aFilename: String;aOnLog: TPDFLogNotifyEvent; aOnProgress: TPDFProgressEvent);
|
|
|
|
+begin
|
|
|
|
+ LoadFromFile(aFileName,PDFDefaultLoadOptions,aOnLog,aOnProgress);
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure TPDFDocumentHelper.LoadFromFile(const aFilename: String;
|
|
|
|
+ aOptions : TPDFLoadOptions; aOnLog: TPDFLogNotifyEvent; aOnProgress: TPDFProgressEvent);
|
|
|
|
+
|
|
|
|
+Var
|
|
|
|
+ F : TFileStream;
|
|
|
|
+
|
|
|
|
+begin
|
|
|
|
+ F:=TFileStream.Create(aFileName,fmOpenRead or fmShareDenyWrite);
|
|
|
|
+ try
|
|
|
|
+ LoadFromStream(F,aOptions, aOnLog,aOnProgress)
|
|
|
|
+ finally
|
|
|
|
+ F.Free;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure TPDFDocumentHelper.LoadFromStream(const aStream: TStream;
|
|
|
|
+ aOnLog: TPDFLogNotifyEvent; aOnProgress: TPDFProgressEvent);
|
|
|
|
+
|
|
|
|
+begin
|
|
|
|
+ LoadFromStream(aStream,PDFDefaultLoadOptions,aOnLog,aOnProgress);
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure TPDFDocumentHelper.LoadFromStream(const aStream: TStream;aOptions : TPDFLoadOptions;
|
|
|
|
+ aOnLog: TPDFLogNotifyEvent; aOnProgress: TPDFProgressEvent);
|
|
|
|
+Var
|
|
|
|
+ aParser:TPDFParser;
|
|
|
|
+begin
|
|
|
|
+ aParser:=TPDFParser.Create(aStream);
|
|
|
|
+ try
|
|
|
|
+ aParser.OnLog:=aOnLog;
|
|
|
|
+ aParser.OnProgress:=aOnProgress;
|
|
|
|
+ aParser.LoadObjects:=loLoadObjects in aOptions;
|
|
|
|
+ aParser.ResolveObjects:=loResolveObjects in aOptions;
|
|
|
|
+ aParser.ResolveContentStreams:=loResolveContentStreams in aOptions;
|
|
|
|
+ aParser.ResolveToUnicodeCMaps:=loResolveToUnicodeCMaps in aOptions;
|
|
|
|
+ aParser.ParseDocument(Self);
|
|
|
|
+ Finally
|
|
|
|
+ aParser.Free;
|
|
|
|
+ end;
|
|
|
|
+end;
|
|
|
|
+
|
|
{$IFDEF DEBUGSTREAMS}
|
|
{$IFDEF DEBUGSTREAMS}
|
|
{$IFDEF DUMPSTREAMS}
|
|
{$IFDEF DUMPSTREAMS}
|
|
|
|
|
|
@@ -348,6 +415,7 @@ begin
|
|
FLoadObjects:=True;
|
|
FLoadObjects:=True;
|
|
FResolveObjects:=True;
|
|
FResolveObjects:=True;
|
|
FResolveContentStreams:=True;
|
|
FResolveContentStreams:=True;
|
|
|
|
+ FResolveToUnicodeCMaps:=False;
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
|
|
@@ -614,7 +682,7 @@ end;
|
|
|
|
|
|
// On entry, we're on begincodespacerange.
|
|
// On entry, we're on begincodespacerange.
|
|
// On exit, we're on endcodespacerange
|
|
// On exit, we're on endcodespacerange
|
|
-Procedure TPDFParser.ParseCMAPCodeSpaceRange(aMap : TPDFCMapData);
|
|
|
|
|
|
+procedure TPDFParser.ParseCMAPCodeSpaceRange(aMap: TPDFCMapData);
|
|
|
|
|
|
Var
|
|
Var
|
|
L : TPDFCodeSpaceRangeArray;
|
|
L : TPDFCodeSpaceRangeArray;
|
|
@@ -659,7 +727,7 @@ end;
|
|
|
|
|
|
// On entry, we're on beginbfchar.
|
|
// On entry, we're on beginbfchar.
|
|
// On exit, we're on endbfchar
|
|
// On exit, we're on endbfchar
|
|
-Procedure TPDFParser.ParseCMAPBFChar(aMap : TPDFCMapData);
|
|
|
|
|
|
+procedure TPDFParser.ParseCMAPBFChar(aMap: TPDFCMapData);
|
|
|
|
|
|
Var
|
|
Var
|
|
L : TPDFBFCharArray;
|
|
L : TPDFBFCharArray;
|
|
@@ -705,7 +773,7 @@ begin
|
|
aMap.BFChars:=Concat(aMap.BFChars,L);
|
|
aMap.BFChars:=Concat(aMap.BFChars,L);
|
|
end;
|
|
end;
|
|
|
|
|
|
-Procedure TPDFParser.ParseCMAPBFRange(aMap : TPDFCMapData);
|
|
|
|
|
|
+procedure TPDFParser.ParseCMAPBFRange(aMap: TPDFCMapData);
|
|
|
|
|
|
Var
|
|
Var
|
|
L : TPDFCIDRangeArray;
|
|
L : TPDFCIDRangeArray;
|
|
@@ -788,7 +856,7 @@ begin
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
|
|
-Procedure TPDFParser.ParseCMap(aStream : TStream; aMap : TPDFCMap);
|
|
|
|
|
|
+procedure TPDFParser.ParseCMap(aStream: TStream; aMap: TPDFCMap);
|
|
|
|
|
|
Var
|
|
Var
|
|
aScanner: TPDFScanner;
|
|
aScanner: TPDFScanner;
|
|
@@ -837,7 +905,7 @@ begin
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
-procedure TPDFParser.ResolveToUnicodeCMaps(aDoc: TPDFDocument);
|
|
|
|
|
|
+procedure TPDFParser.DoResolveToUnicodeCMaps(aDoc: TPDFDocument);
|
|
|
|
|
|
var
|
|
var
|
|
Obj : TPDFObject;
|
|
Obj : TPDFObject;
|
|
@@ -924,8 +992,6 @@ Var
|
|
I,aStartIndex,aCount : Integer;
|
|
I,aStartIndex,aCount : Integer;
|
|
lToken : TPDFToken;
|
|
lToken : TPDFToken;
|
|
Itm : TPDFXRef;
|
|
Itm : TPDFXRef;
|
|
- EndByTrailer : Boolean;
|
|
|
|
- T : TPDFTrailer;
|
|
|
|
|
|
|
|
begin
|
|
begin
|
|
Result:=TPDFXRefList.Create();
|
|
Result:=TPDFXRefList.Create();
|
|
@@ -1731,6 +1797,7 @@ Var
|
|
Index : TPDFIndexPairArray;
|
|
Index : TPDFIndexPairArray;
|
|
|
|
|
|
begin
|
|
begin
|
|
|
|
+ Index:=[];
|
|
aSize:=aObjectDict.GetIntegerValue(SPDFKeySize);
|
|
aSize:=aObjectDict.GetIntegerValue(SPDFKeySize);
|
|
if aObjectDict.ContainsKey(SPDFKeyIndex) then
|
|
if aObjectDict.ContainsKey(SPDFKeyIndex) then
|
|
Idx:=aObjectDict.GetArrayValue(SPDFKeyIndex)
|
|
Idx:=aObjectDict.GetArrayValue(SPDFKeyIndex)
|
|
@@ -1784,11 +1851,10 @@ Var
|
|
Sizes : Array[0..2] of Byte;
|
|
Sizes : Array[0..2] of Byte;
|
|
Indexes : TPDFIndexPairArray;
|
|
Indexes : TPDFIndexPairArray;
|
|
Fields : Array[0..2] of Integer;
|
|
Fields : Array[0..2] of Integer;
|
|
- aID,aFirst,aLast : integer;
|
|
|
|
|
|
+ aID,aFirst : integer;
|
|
aPair : TPDFIndexPair;
|
|
aPair : TPDFIndexPair;
|
|
O,O2 : TPDFObject;
|
|
O,O2 : TPDFObject;
|
|
W : TPDFArray absolute O;
|
|
W : TPDFArray absolute O;
|
|
- Idx : TPDFArray absolute O;
|
|
|
|
V : TPDFValue absolute O2;
|
|
V : TPDFValue absolute O2;
|
|
I,J,aSize : Integer;
|
|
I,J,aSize : Integer;
|
|
D : PByte;
|
|
D : PByte;
|
|
@@ -1909,7 +1975,7 @@ Var
|
|
aStream : TStream;
|
|
aStream : TStream;
|
|
|
|
|
|
begin
|
|
begin
|
|
- Writeln('Parsing XREF at : ',aStartPos);
|
|
|
|
|
|
+ // Writeln('Parsing XREF at : ',aStartPos);
|
|
Result:=Nil;
|
|
Result:=Nil;
|
|
ParentObject:=Nil;
|
|
ParentObject:=Nil;
|
|
if (FScanner.Position<>aStartPos) then
|
|
if (FScanner.Position<>aStartPos) then
|
|
@@ -1979,6 +2045,14 @@ begin
|
|
ResolveObjects:=true;
|
|
ResolveObjects:=true;
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
+procedure TPDFParser.SetResolveToUnicodeCMaps(AValue: Boolean);
|
|
|
|
+begin
|
|
|
|
+ if FResolveToUnicodeCMaps=AValue then Exit;
|
|
|
|
+ FResolveToUnicodeCMaps:=AValue;
|
|
|
|
+ if aValue then
|
|
|
|
+ ResolveObjects:=True;
|
|
|
|
+end;
|
|
|
|
+
|
|
procedure TPDFParser.DoProgress(aKind: TPDFProgressKind; aCurrent, aCount: Integer);
|
|
procedure TPDFParser.DoProgress(aKind: TPDFProgressKind; aCurrent, aCount: Integer);
|
|
begin
|
|
begin
|
|
If Assigned(FOnProgress) then
|
|
If Assigned(FOnProgress) then
|
|
@@ -2175,7 +2249,6 @@ var
|
|
I : Integer;
|
|
I : Integer;
|
|
Itm : TPDFXRef;
|
|
Itm : TPDFXRef;
|
|
UseCompressed : Boolean;
|
|
UseCompressed : Boolean;
|
|
- Ind : TPDFIndirect;
|
|
|
|
|
|
|
|
begin
|
|
begin
|
|
For UseCompressed:=False to True do
|
|
For UseCompressed:=False to True do
|
|
@@ -2191,7 +2264,7 @@ begin
|
|
else if (Itm.Instance=Nil) and Itm.InUse and (Itm.Compressed=UseCompressed) then
|
|
else if (Itm.Instance=Nil) and Itm.InUse and (Itm.Compressed=UseCompressed) then
|
|
if Itm.ReferenceIndex>0 then
|
|
if Itm.ReferenceIndex>0 then
|
|
begin
|
|
begin
|
|
- Ind:=LoadXRefObject(Itm);
|
|
|
|
|
|
+ LoadXRefObject(Itm);
|
|
{ if Assigned(Ind) then
|
|
{ if Assigned(Ind) then
|
|
Writeln('Loaded ',Ind.GetDescription);}
|
|
Writeln('Loaded ',Ind.GetDescription);}
|
|
end;
|
|
end;
|
|
@@ -2299,6 +2372,8 @@ begin
|
|
LoadIndirectObjects;
|
|
LoadIndirectObjects;
|
|
If ResolveContentStreams then
|
|
If ResolveContentStreams then
|
|
DoResolveContentStreams(FDoc);
|
|
DoResolveContentStreams(FDoc);
|
|
|
|
+ if ResolveToUnicodeCMaps then
|
|
|
|
+ DoResolveToUnicodeCMaps(FDoc);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
@@ -2323,6 +2398,7 @@ Var
|
|
Streams : Array of TStream;
|
|
Streams : Array of TStream;
|
|
|
|
|
|
begin
|
|
begin
|
|
|
|
+ Streams:=[];
|
|
Result:=Nil;
|
|
Result:=Nil;
|
|
try
|
|
try
|
|
if aPage.ContentCount=1 then
|
|
if aPage.ContentCount=1 then
|
|
@@ -2347,9 +2423,7 @@ end;
|
|
procedure TPDFParser.DoResolveContentStreams(aDoc: TPDFDocument; aOnCommand : TPDFNewCommandEvent = Nil);
|
|
procedure TPDFParser.DoResolveContentStreams(aDoc: TPDFDocument; aOnCommand : TPDFNewCommandEvent = Nil);
|
|
|
|
|
|
Var
|
|
Var
|
|
- I,J,aCount : Integer;
|
|
|
|
- Obj,ObjFree : TPDFIndirect;
|
|
|
|
- Cont : TPDFContentStream;
|
|
|
|
|
|
+ I,aCount : Integer;
|
|
aPage : TPDFPageObject;
|
|
aPage : TPDFPageObject;
|
|
aStream : TStream;
|
|
aStream : TStream;
|
|
|
|
|