Browse Source

* PDF reader, initial implementation

Michaël Van Canneyt 2 years ago
parent
commit
6e7869bea0

+ 58 - 0
packages/fcl-pdf/examples/pdfdump.lpi

@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<CONFIG>
+  <ProjectOptions>
+    <Version Value="12"/>
+    <General>
+      <Flags>
+        <SaveOnlyProjectUnits Value="True"/>
+        <MainUnitHasCreateFormStatements Value="False"/>
+        <MainUnitHasTitleStatement Value="False"/>
+        <MainUnitHasScaledStatement Value="False"/>
+      </Flags>
+      <SessionStorage Value="InProjectDir"/>
+      <Title Value="PDF dump program"/>
+      <UseAppBundle Value="False"/>
+      <ResourceType Value="res"/>
+    </General>
+    <BuildModes>
+      <Item Name="Default" Default="True"/>
+    </BuildModes>
+    <PublishOptions>
+      <Version Value="2"/>
+      <UseFileFilters Value="True"/>
+    </PublishOptions>
+    <RunParams>
+      <FormatVersion Value="2"/>
+    </RunParams>
+    <Units>
+      <Unit>
+        <Filename Value="pdfdump.pp"/>
+        <IsPartOfProject Value="True"/>
+      </Unit>
+    </Units>
+  </ProjectOptions>
+  <CompilerOptions>
+    <Version Value="11"/>
+    <Target>
+      <Filename Value="pdfdump"/>
+    </Target>
+    <SearchPaths>
+      <IncludeFiles Value="$(ProjOutDir)"/>
+      <OtherUnitFiles Value="../src"/>
+      <UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/>
+    </SearchPaths>
+  </CompilerOptions>
+  <Debugging>
+    <Exceptions>
+      <Item>
+        <Name Value="EAbort"/>
+      </Item>
+      <Item>
+        <Name Value="ECodetoolError"/>
+      </Item>
+      <Item>
+        <Name Value="EFOpenError"/>
+      </Item>
+    </Exceptions>
+  </Debugging>
+</CONFIG>

+ 418 - 0
packages/fcl-pdf/examples/pdfdump.pp

@@ -0,0 +1,418 @@
+{ **********************************************************************
+  This file is part of the Free Component Library
+
+  PDF file dumper
+  Copyright (c) 2022 by Michael Van Canneyt [email protected]
+
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+  **********************************************************************}
+
+program pdfdump;
+
+{$mode objfpc}
+{$h+}
+
+uses
+  cwString, sysutils, classes, contnrs, fppdfobjects, fppdfparser, fppdfpredict,
+  custapp, fppdfconsts;
+
+type
+
+  { TPDFDumpApplication }
+  TInfoSection = (isInfo,isCatalog,isTrailer,isObjects, isFonts, isPages,isPageContents,isPageText, isDictionaries);
+  TInfoSections = Set of TInfoSection;
+
+  TPDFDumpApplication = class(TCustomApplication)
+  Private
+    FFiles : TStrings;
+    FSections : TInfoSections;
+    FPageNo : Integer;
+    FVerbose : Boolean;
+  Public
+    constructor Create(aOwner: TComponent); override;
+    destructor destroy; override;
+  Protected
+    procedure DisplayPageText(Doc: TPDFDocument; aIndex: Integer;  aPage: TPDFPageObject);
+    procedure DoLog(sender: TObject; aKind: TLogkind; const aMessage: string);
+    Procedure DoProgress(Sender : TObject;aKind : TProgressKind; aCurrent,aCount : Integer);
+    procedure DisplayCatalog(Doc: TPDFDocument);
+    procedure DisplayInfo(Doc: TPDFDocument);
+    procedure DisplayObjects(Doc: TPDFDocument);
+    procedure DisplayFonts(Doc: TPDFDocument);
+    procedure DisplayPageContents(Doc: TPDFDocument; aIndex: Integer;   aPage: TPDFPageObject);
+    procedure DisplayPages(Doc: TPDFDocument);
+    procedure DisplayTrailer(Doc: TPDFDocument);
+  Public
+    function ProcessOptions : Boolean;
+    procedure Usage(Msg : String);
+    procedure DumpFile(FN: String);
+    procedure DoRun; override;
+  end;
+
+{ TPDFDumpApplication }
+
+constructor TPDFDumpApplication.Create(aOwner: TComponent);
+begin
+  inherited Create(aOwner);
+  FFiles:=TStringList.Create;
+end;
+
+destructor TPDFDumpApplication.destroy;
+begin
+  FreeAndNil(FFiles);
+  inherited destroy;
+end;
+
+procedure TPDFDumpApplication.DoRun;
+
+var
+  FN : String;
+  Count,Errors : Integer;
+
+begin
+  StopOnException:=True;
+  Terminate;
+  if not ProcessOptions then
+    exit;
+  Errors:=0;
+  Count:=0;
+  For FN in FFiles do
+    try
+      Inc(Count);
+      DumpFile(FN);
+    except
+      On E: Exception do
+        begin
+        ExitCode:=1;
+        Writeln(Stderr,Format('Error %s examining file "%s" : %s',[E.ClassName,FN,E.Message]));
+        Inc(Count);
+        end;
+    end;
+  Flush(output);
+  if Errors>0 then
+    begin
+    Writeln(StdErr,Format('Processed %d files, encountered an error in %f files.',[Count,Errors]));
+    Flush(StdErr);
+    end;
+end;
+
+function TPDFDumpApplication.ProcessOptions: Boolean;
+
+  Procedure CheckSection(aShort : Char; aLong : String; aSection : TInfoSection);
+
+  begin
+    if HasOption(aShort,aLong) then
+      Include(FSections,aSection);
+  end;
+
+Const
+  ShortOpts = 'hopcdiln:vtf';
+  LongOpts : Array of string = ('help','objects','pages','pagecontent','dictionaries','info','catalog','pageno:','verbose','text','fonts');
+
+Var
+  Err : String;
+  S : TInfoSection;
+
+begin
+  Err:=Checkoptions(ShortOpts,LongOpts);
+  GetNonOptions(ShortOpts,LongOpts,FFiles);
+  if (Err<>'') or HasOption('h','help') then
+    begin
+    Usage(Err);
+    exit(False);
+    end;
+  if FFiles.Count=0 then
+    begin
+    Usage('No filenames specified');
+    Exit(False);
+    end;
+  CheckSection('o','objects',isObjects);
+  CheckSection('p','pages',isPages);
+  CheckSection('c','pagecontent',isPageContents);
+  CheckSection('d','dictionaries',isDictionaries);
+  CheckSection('i','info',isInfo);
+  CheckSection('f','fonts',isFonts);
+  CheckSection('l','catalog',isInfo);
+  CheckSection('t','text',isPageText);
+  fVerbose:=HasOption('v','verbose');
+  if HasOption('n','pageno') then
+    begin
+    FPageNo:=StrToInt(GetOptionValue('n','pageno'));
+    end;
+
+  if (FSections=[]) then
+    for S in TInfoSection do
+      Include(FSections,S);
+end;
+
+procedure TPDFDumpApplication.Usage(Msg: String);
+begin
+  Writeln('Usage ',ExtractFileName(ParamStr(0)),' [options] FILE1 FILE2 ...');
+  Writeln('Where options is one or more of:');
+  Writeln('-h --help                This help text');
+  Writeln('-c --pagecontent         Show page content stream (commands). Needs -p');
+  Writeln('-d --dictionaries        Show object dictionaries. Needs -o');
+  Writeln('-p --fonts               Show font info');
+  Writeln('-i --info                Show document info');
+  Writeln('-l --catalog             Show document catalog');
+  Writeln('-n --pageno=N            Show only page N');
+  Writeln('-o --objects             Show indirect objects');
+  Writeln('-p --pages               Show pages');
+  Writeln('-t --text                Show page text. Needs -p');
+  Writeln('-v --verbose             Show warnings/extra info when parsing');
+  Halt(Ord(Msg<>''));
+end;
+
+procedure TPDFDumpApplication.DisplayTrailer(Doc : TPDFDocument);
+
+begin
+  if Assigned(Doc.TrailerDict) then
+    begin
+    Writeln('Trailer dictionary:');
+    Writeln(Doc.TrailerDict.GetDescription);
+    end;
+end;
+
+procedure TPDFDumpApplication.DisplayObjects(Doc : TPDFDocument);
+
+Var
+  Obj : TPDFObject;
+  Ind : TPDFIndirect absolute Obj;
+
+begin
+  Writeln('Indirect object count : ',Doc.Count);
+  For obj in Doc do
+    begin
+    Writeln('Object (',Obj.ClassName,') : ',Obj.GetDescription);
+    if Obj is TPDFIndirect then
+      if Assigned(Ind.ObjectDict) and (isDictionaries in FSections) then
+        begin
+        Writeln('object dictionary : ',Ind.ObjectDict.GetDescription);
+        Writeln;
+        end;
+    end;
+end;
+
+procedure TPDFDumpApplication.DisplayFonts(Doc: TPDFDocument);
+
+Var
+  Obj : TPDFObject;
+//  Fnt : TPDFFontObject absolute Obj;
+
+begin
+  Writeln('Font definitions:');
+  Writeln;
+  For Obj in Doc do
+    if Obj is TPDFFontObject then
+      begin
+      Writeln(Obj.GetDescription);
+      Writeln;
+      Writeln;
+      end;
+
+end;
+
+procedure TPDFDumpApplication.DoProgress(Sender: TObject; aKind: TProgressKind;
+  aCurrent, aCount: Integer);
+
+Const
+  Kinds : Array [TProgressKind] of String = ('XRef','Indirect','ContentStream');
+
+begin
+  Writeln('Loading ', Kinds[aKind],': ',aCurrent,'/',aCount);
+end;
+
+procedure TPDFDumpApplication.DoLog(sender: TObject; aKind: TLogkind;
+  const aMessage: string);
+begin
+  Writeln('[',aKind,'] : ',aMessage);
+end;
+
+procedure TPDFDumpApplication.DisplayCatalog(Doc : TPDFDocument);
+
+begin
+  if Assigned(Doc.FindCatalog) then
+    begin
+    Writeln('Document catalog:');
+    Writeln(Doc.FindCatalog.ObjectDict.GetDescription);
+    end;
+end;
+
+procedure TPDFDumpApplication.DisplayInfo(Doc : TPDFDocument);
+
+Var
+  Info : TPDFDocumentInfo;
+
+begin
+  if Not Assigned(Doc.FindDocumentInfoObject) then
+    exit;
+  Info:=Doc.FindDocumentInfo;
+  With Info do
+    Try
+      Writeln('Document info:');
+      Writeln('Title : ',Title);
+      Writeln('Author : ',Author);
+      Writeln('Subject : ',Subject);
+      Writeln('Keywords : ',Keywords);
+      Writeln('Creator : ',Creator);
+      Writeln('Producer : ',Producer);
+      Writeln('Creation Date : ',DateTimeToStr(CreationDate));
+      Writeln('Modification Date : ',DateTimeToStr(ModDate));
+      Writeln('Trapped : ',Trapped);
+    Finally
+      Free;
+    end;
+end;
+
+procedure TPDFDumpApplication.DisplayPageContents(Doc : TPDFDocument; aIndex: Integer; aPage : TPDFPageObject);
+
+Var
+  I,J : Integer;
+  Cmd : TPDFCommand;
+
+begin
+  For I:=0 to aPage.CommandList.Count-1 do
+    begin
+    Cmd:=aPage.CommandList[I];
+    Write('Command ',I,' : ',Cmd.Command,' (',Cmd.ClassName,'):');
+    For J:=0 to Length(Cmd.Tokens)-1 do
+      Write(' ',Cmd.Tokens[J].TokenData);
+    Writeln;
+    end;
+end;
+
+procedure TPDFDumpApplication.DisplayPageText(Doc : TPDFDocument; aIndex: Integer; aPage : TPDFPageObject);
+
+Var
+  I : Integer;
+  Cmd : TPDFCommand;
+  FontName,Rawtext : RawByteString;
+  aFontRef : TPDFRefData;
+  UnicodeMap : TPDFCMap;
+  aFontObj : TPDFFontObject;
+
+begin
+  UnicodeMap:=Nil;
+  For I:=0 to aPage.CommandList.Count-1 do
+    begin
+    Cmd:=aPage.CommandList[I];
+    if Cmd is TPDFTfCommand then
+      begin
+      FontName:=TPDFTfCommand(Cmd).FontName;
+      if (FontName<>'') and (FontName[1]='/') then
+        Delete(FontName,1,1);
+      aFontRef:=aPage.FindFontRef(FontName);
+      aFontObj:=Doc.FindFont(aFontRef); // TPDFFontObject
+      if Assigned(aFontObj) then
+        UnicodeMap:=aFontObj.UnicodeCMap
+      else
+        UnicodeMap:=nil;
+      end
+    else If cmd is TPDFTextCommand then
+     begin
+     rawText:=TPDFTextCommand(Cmd).GetFullText(UnicodeMap);
+     // Writeln('GetCodePage : ',CodePageToCodePageName(StringCodePage(Rawtext)));
+     SetCodePage(RawText,CP_UTF8);
+     Writeln(RawText);
+     end;
+   end;
+end;
+
+procedure TPDFDumpApplication.DisplayPages(Doc : TPDFDocument);
+
+Var
+  aPage : TPDFPageObject;
+  I : Integer;
+
+begin
+  Writeln('Page count : ',Doc.PageCount);
+  For I:=0 to Doc.PageCount-1 do
+    begin
+    aPage:=Doc.Pages[I];
+    Write('Page object ',I,': ');
+    if not Assigned(aPage) then
+      Writeln('Not found')
+    else
+      begin
+      Writeln('Object type: ',aPage.ObjectType,' (',aPage.ClassName,')');
+      if isDictionaries in FSections then
+        begin
+        Writeln('Page dictionary : ',aPage.ObjectDict.GetDescription);
+        Writeln;
+        end;
+      if isPageContents in FSections then
+        DisplayPageContents(Doc,I,aPage);
+      if isPageText in FSections then
+        begin
+        Writeln('Page text : ');
+        Writeln;
+        DisplayPageText(Doc,I,aPage)
+        end;
+      end;
+    end;
+end;
+
+procedure TPDFDumpApplication.DumpFile(FN : String);
+
+Var
+  F : TFileStream;
+  P : TPDFParser;
+  Doc : TPDFDocument;
+  S : TInfoSection;
+
+begin
+  P:=Nil;
+  Doc:=Nil;
+  Writeln('Contents of ',FN,' : ');
+  F:=TFileStream.Create(FN,fmOpenRead or fmShareDenyWrite);
+  try
+    Doc:=TPDFDocument.Create();
+    P:=TPDFParser.Create(F);
+    if FVerbose then
+      begin
+      P.OnProgress:=@DoProgress;
+      P.OnLog:=@DoLog;
+      end;
+    // P.ResolveObjects:=False;
+    P.ParseDocument(Doc);
+    if isPageText in FSections then
+      P.ResolveToUnicodeCMaps(Doc);
+    For S in FSections do
+      begin
+      Case s of
+        isObjects : DisplayObjects(Doc);
+        isPages : DisplayPages(Doc);
+        isCatalog : DisplayCatalog(Doc);
+        isInfo : DisplayInfo(Doc);
+        isFonts : DisplayFonts(Doc);
+        isTrailer : DisplayTrailer(Doc);
+      else
+        // Do nothing
+      end;
+      Writeln;
+      Writeln();
+      end;
+  finally
+    Doc.Free;
+    P.Free;
+    F.Free;
+  end;
+  Flush(Output);
+end;
+
+begin
+  With TPDFDumpApplication.Create(Nil) do
+    try
+      Initialize;
+      Run;
+    finally
+      Free
+    end;
+end.
+

+ 18 - 0
packages/fcl-pdf/fpmake.pp

@@ -58,7 +58,25 @@ begin
       Dependencies.AddUnit('fpttfsubsetter');
       Dependencies.AddUnit('fpttfsubsetter');
       Dependencies.AddInclude('src/fontmetrics_stdpdf.inc');
       Dependencies.AddInclude('src/fontmetrics_stdpdf.inc');
       end;
       end;
+    T:=P.Targets.AddUnit('src/fppdfconsts.pp');
+    T:=P.Targets.AddUnit('src/fppdfpredict.pp');
+    T:=P.Targets.AddUnit('src/fppdfsource.pp');
     
     
+    T:=P.Targets.AddUnit('src/fppdfobjects.pp');
+    T.Dependencies.AddUnit('fppdfconsts');
+
+    T:=P.Targets.AddUnit('src/fppdfscanner.pp');
+    T.ResourceStrings:=true;
+    T.Dependencies.AddUnit('fppdfobjects');
+    T.Dependencies.AddUnit('fppdfsource');
+    
+    T:=P.Targets.AddUnit('src/fppdfparser.pp');
+    T.ResourceStrings:=true;
+    T.Dependencies.AddUnit('fppdfobjects');
+    T.Dependencies.AddUnit('fppdfsource');
+    T.Dependencies.AddUnit('fppdfconsts');
+    T.Dependencies.AddUnit('fppdfpredict');
+     
     // md5.ref
     // md5.ref
 {$ifndef ALLPACKAGES}
 {$ifndef ALLPACKAGES}
     Run;
     Run;

+ 309 - 0
packages/fcl-pdf/src/fppdfconsts.pp

@@ -0,0 +1,309 @@
+{ **********************************************************************
+  This file is part of the Free Component Library
+
+  PDF names/constants.
+  Copyright (c) 2022 by Michael Van Canneyt [email protected]
+
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+  **********************************************************************}
+unit fppdfconsts;
+
+{$mode ObjFPC}{$H+}
+
+interface
+
+
+const
+  // Delimiters
+  SPDFStartXRef = 'startxref';
+  SPDFXRef      = 'xref';
+  SPDFObj       = 'obj';
+  SPDFEndObj    = 'endobj';
+  SPDFStream    = 'stream';
+  SPDFEndStream = 'endstream';
+  SPDFTrailer   = 'trailer';
+
+  SPDFTokenR = 'R';
+
+  // Object types
+  SPDFTypeXref           = 'XRef';
+  SPDFTypeObjStm         = 'ObjStm';
+  SPDFTypePage           = 'Page';
+  SPDFTypePages          = 'Pages';
+  SPDFTypeXObject        = 'XObject';
+  SPDFTypePattern        = 'Pattern';
+  SPDFTypeExtGState      = 'ExtGState';
+  SPDFTypeFont           = 'Font';
+  SPDFTypeFontDescriptor = 'FontDescriptor';
+  SPDFTypeMask           = 'Mask';
+  SPDFTypeOCG            = 'OCG';
+  SPDFTypeAnnot          = 'Annot';
+  SPDFTypeCatalog        = 'Catalog';
+
+  // Known dictionary entry names
+
+  // for streams
+  SPDFKeyLength       = 'Length';
+  SPDFKeyFilter       = 'Filter';
+  SPDFKeyDecodeParms  = 'DecodeParms';
+  SPDFKeyF            = 'F';
+  SPDFKeyFFilter      = 'FFilter';
+  SPDFKeyFDecodeParms = 'FDecodeParms';
+  SPDFKeyN            = 'N';
+
+  // Filter names
+  SPDFFilterFlateDecode     = 'FlateDecode';
+  SPDFFilterASCIIHexDecode  = 'ASCIIHexDecode';
+  SPDFFilterASCII85Decode   = 'ASCII85Decode';
+  SPDFFilterLZWDecode       = 'LZWDecode';
+  SPDFFilterRunLengthDecode = 'RunLengthDecode';
+  SPDFFilterCCITTFaxDecode  = 'CCITTFaxDecode';
+  SPDFFilterJBIG2Decode     = 'JBIG2Decode';
+  SPDFFilterDCTDecode       = 'DCTDecode';
+  SPDFFilterJPXDecode       = 'JPXDecode';
+  SPDFFilterCrypt           = 'Crypt';
+  SPDFKeyPredictor          = 'Predictor';
+  SPDFKeyColors             = 'Colors';
+  SPDFKeyColumns            = 'Columns';
+  SPDFKeyBitsPerComponent   = 'BitsPerComponent';
+
+  // Pages
+  SPDFKeyCount              = 'Count';
+  SPDFKeyParent             = 'Parent';
+  SPDFKeyKids               = 'Kids';
+
+  // Page
+
+  SPDFPageKeyLastModified         = 'LastModified';
+  SPDFPageKeyResources            = 'Resources';
+  SPDFPageKeyMediaBox             = 'MediaBox';
+  SPDFPageKeyCropBox              = 'CropBox';
+  SPDFPageKeyBleedBox             = 'BleedBox';
+  SPDFPageKeyTrimBox              = 'TrimBox';
+  SPDFPageKeyArtBox               = 'ArtBox';
+  SPDFPageKeyBoxColorInfo         = 'BoxColorInfo';
+  SPDFPageKeyContents             = 'Contents';
+  SPDFPageKeyRotate               = 'Rotate';
+  SPDFPageKeyGroup                = 'Group';
+  SPDFPageKeyThumb                = 'Thumb';
+  SPDFPageKeyB                    = 'B';
+  SPDFPageKeyDur                  = 'Dur';
+  SPDFPageKeyTrans                = 'Trans';
+  SPDFPageKeyAnnots               = 'Annots';
+  SPDFPageKeyAA                   = 'AA';
+  SPDFPageKeyMetaData             = 'Metadata';
+  SPDFPageKeyPieceInfo            = 'Pieceinfo';
+  SPDFPageKeyStructParents        = 'StructParents';
+  SPDFPageKeyID                   = 'ID';
+  SPDFPageKeyPZ                   = 'PZ';
+  SPDFPageKeySeparationInfo       = 'SeparationInfo';
+  SPDFPageKeyTabs                 = 'Tabs';
+  SPDFPageKeyTemplateInstantiated = 'TemplateInstantiated';
+  SPDFPageKeyPresSteps            = 'PresSteps';
+  SPDFPageKeyUserUnit             = 'UserUnit';
+  SPDFPageKeyVP                   = 'VP';
+
+  // Resource dictionary
+  SPDFResourceKeyFont       = 'Font';
+  SPDFResourceKeyExtGState  = 'ExtGState';
+  SPDFResourceKeyColorSpace = 'ColorSpace';
+  SPDFResourceKeyPattern    = 'Pattern';
+  SPDFResourceKeyShading    = 'Shading';
+  SPDFResourceKeyXObject    = 'XObject';
+  SPDFResourceKeyProcSet    = 'ProcSet';
+  SPDFResourceKeyProperties = 'Properties';
+
+
+
+  // Object streams
+  SPDFKeyFirst  = 'First';
+  SPDFKeySize   = 'Size';
+  SPDFKeyW      = 'W';
+  SPDFKeyIndex  = 'Index';
+
+  // Trailer(Object)
+  SPDFKeyRoot = 'Root';
+  SPDFKeyInfo = 'Info';
+  SPDFKeyPrev = 'Prev';
+
+  // Catalog
+  SPDFKeyPages = 'Pages';
+
+  // Document Info
+  SPDFKeyTitle         = 'Title';
+  SPDFKeyAuthor        = 'Author';
+  SPDFKeySubject       = 'Subject';
+  SPDFKeyKeywords      = 'Keywords';
+  SPDFKeyCreator       = 'Creator';
+  SPDFKeyProducer      = 'Producer';
+  SPDFKeyCreationDate  = 'CreationDate';
+  SPDFKeyModDate       = 'ModDate';
+  SPDFKeyTrapped       = 'Trapped';
+
+  // Inline image entries
+  SPDFImageKeyBPC    = 'BPC';
+  SPDFImageKeyW      = 'W';
+  SPDFImageKeyCS     = 'CS';
+  SPDFImageKeyD      = 'D';
+  SPDFImageKeyDP     = 'D';
+  SPDFImageKeyF      = 'F';
+  SPDFImageKeyH      = 'H';
+  SPDFImageKeyIM     = 'IM';
+  SPDFImageKeyIntent = 'Intent';
+  SPDFImageKeyI      = 'I';
+  SPDFImageKeyG      = 'G';
+  SPDFImageKeyRGB    = 'RGB';
+  SPDFImageKeyCMYK   = 'CMYK';
+  SPDFImageKey       = 'CMYK';
+  SPDFImageKeyAHx    = 'AHx';
+  SPDFImageKeyA85    = 'A85';
+  SPDFImageKeyLZW    = 'LZW';
+  SPDFImageKeyFl     =  'Fl';
+  SPDFImageKeyRL     = 'RL';
+  SPDFImageKeyCCF    = 'CCF';
+  SPDFImageKeyDCT    = 'DCT';
+
+  // Font keys
+  SPDFFontKeyType         = 'Type';
+  SPDFFontKeySubType      = 'Subtype';
+  SPDFFontKeyBaseFont     = 'BaseFont';
+  SPDFFontKeyEncoding     = 'Encoding';
+  SPDFFontKeyDescendantFonts = 'DescendantFonts';
+  SPDFFontKeyToUnicode    = 'ToUnicode';
+
+  SPDFFontKeyName         = 'Name';
+  SPDFFontKeyFontName     = 'FontName';
+  SPDFFontKeyFamily       = 'FontFamily';
+  SPDFFontKeyStretch      = 'FontStretch';
+  SPDFFontKeyWeight       = 'FontWeight';
+  SPDFFontKeyFlags        = 'Flags';
+  SPDFFontKeyBBox         = 'FontBBox';
+  SPDFFontKeyItalicAngle  = 'ItalicAngle';
+  SPDFFontKeyAscent       = 'Ascent';
+  SPDFFontKeyDescent      = 'Descent';
+  SPDFFontKeyLeading      = 'Leading';
+  SPDFFontKeyCapHeight    = 'CapHeight';
+  SPDFFontKeyXHeight      = 'XHeight';
+  SPDFFontKeyStemV        = 'StemV';
+  SPDFFontKeyStemH        = 'StemH';
+  SPDFFontKeyAvgWidth     = 'AvgWidth';
+  SPDFFontKeyMaxWidth     = 'MaxWidth';
+  SPDFFontKeyMissingWidth = 'MissingWidth';
+  SPDFFontKeyFontFile     = 'FontFile';
+  SPDFFontKeyFontFile2    = 'FontFile2';
+  SPDFFontKeyFontFile3    = 'FontFile3';
+  SPDFFontKeyCharSet      = 'CharSet';
+
+  SPDFFontKeyStyle        = 'Style';
+  SPDFFontKeyLang         = 'Lang';
+  SPDFFontKeyFD           = 'FD';
+  SPDFFontKeyCIDSet       = 'CIDSet';
+
+  SPDFFontKeyBaseEncoding  = 'BaseEncoding';
+  SPDFFontKeyDifferences   = 'Differences';
+  SPDFFontKeyCharProcs     = 'CharProcs';
+  SPDFFontKeyCIDSystemInfo = 'CIDSystemInfo';
+
+
+  SPDFCIDSystemInfoKeyRegistry = 'Registry';
+  SPDFCIDSystemInfoKeyOrdering = 'Ordering';
+  SPDFCIDSystemInfoKeySupplement = 'Supplement';
+
+
+
+  // CIDFont keys
+  SPDFCIDFontKeyType           = 'Type';
+  SPDFCIDFontKeySubtype        = 'Subtype';
+  SPDFCIDFontKeyBaseFont       = 'BaseFont';
+  SPDFCIDFontKeyCIDSystemInfo  = SPDFFontKeyCIDSystemInfo;
+  SPDFCIDFontKeyFontDescriptor = 'FontDescriptor';
+  SPDFCIDFontKeyDW             = 'DW';
+  SPDFCIDFontKeyW              = 'W';
+  SPDFCIDFontKeyDW2            = 'DW2';
+  SPDFCIDFontKeyW2             = 'W2';
+  SPDFCIDFontKeyCIDToGIDMap    = 'CIDToGIDMap';
+
+  // CMAP keys
+  SCMAPKeyType           = 'Type';
+  SCMAPKeyCMapName       = 'CMapName';
+  SCMAPKeyCIDSystemInfo  = SPDFFontKeyCIDSystemInfo;
+  SCMAPKeyWMode          = 'WMode';
+  SCMAPKeyUseCMap        = 'UseCMap';
+
+
+
+
+  // CJK Cmaps
+
+  SCMAPGB_EUC_H        = 'GB-EUC-H';
+  SCMAPGB_EUC_V        = 'GB-EUC-V';
+  SCMAPGBPC_EUC_H      = 'GBpc-EUC-H';
+  SCMAPGBPC_EUC_V      = 'GBpc-EUC-V';
+  SCMAPGBK_EUC_H       = 'GBK-EUC-H';
+  SCMAPGBK_EUC_V       = 'GBK-EUC-V';
+  SCMAPGBKP_EUC_H      = 'GBKp-EUC-H';
+  SCMAPGBKP_EUC_V      = 'GBKp-EUC-V';
+  SCMAPGBK2K_H         = 'GBK2K-H';
+  SCMAPGBK2K_V         = 'GBK2K-V';
+  SCMAPUniGB_UCS2_H    = 'UniGB-UCS2-H';
+  SCMAPUniGB_UCS2_V    = 'UniGB-UCS2-V';
+  SCMAPUniGB_UTF16_H   = 'UniGB-UTF16-H';
+  SCMAPUniGB_UTF16_V   = 'UniGB-UTF16-V';
+  SCMAPB5pc_H          = 'B5pc-H';
+  SCMAPB5pc_V          = 'B5pc-V';
+  SCMAPHKscs_B5_H      = 'HKscs-B5-H';
+  SCMAPHKscs_B5_V      = 'HKscs-B5-V';
+  SCMAPETen_B5_H       = 'ETen-B5-H';
+  SCMAPETen_B5_V       = 'ETen-B5-V';
+  SCMAPETenms_B5_H     = 'ETenms-B5-H';
+  CMAPETenms_B5_V      = 'ETenms-B5-V';
+  CMAPCNS_EUC_H        = 'CNS-EUC-H';
+  CMAPCNS_EUC_V        = 'CNS-EUC-V';
+  CMAPUniCNS_UCS2_H    = 'UniCNS-UCS2-H';
+  CMAPUniCNS_UCS2_V    = 'UniCNS-UCS2-V';
+  CMAPUniCNS_UTF16_H   = 'UniCNS-UTF16-H';
+  CMAPUniCNS_UTF16_V   = 'UniCNS-UTF16-V';
+  CMAP83pv_RKSJ_H      = '83pv-RKSJ-H';
+  CMAP90ms_RKSJ_H      = '90ms-RKSJ-H';
+  CMAP90ms_RKSJ_V      = '90ms-RKSJ-V';
+  CMAP90msp_RKSJ_H     = '90msp-RKSJ-H';
+  CMAP90msp_RKSJ_V     = '90msp-RKSJ-V';
+  CMAP90pv_RKSJ_H      = '90pv-RKSJ-H';
+  CMAPAdd_RKSJ_H       = 'Add-RKSJ-H';
+  CMAPAdd_RKSJ_V       = 'Add-RKSJ-V';
+  CMAPEUC_H            = 'EUC-H';
+  CMAPEUC_V            = 'EUC-V';
+  CMAPExt_RKSJ_H       = 'Ext-RKSJ-H';
+  CMAPExt_RKSJ_V       = 'Ext-RKSJ-V';
+  CMAPH                = 'H';
+  CMAPV                = 'V';
+  CMAPUniJIS_UCS2_H    = 'UniJIS-UCS2-H';
+  CMAPUniJIS_UCS2_V    = 'UniJIS-UCS2-V';
+  CMAPUniJIS_UCS2_HW_H = 'UniJIS-UCS2-HW-H';
+  CMAPUniJIS_UCS2_HW_V = 'UniJIS-UCS2-HW-V';
+  CMAPUniJIS_UTF16_H   = 'UniJIS-UTF16-H';
+  CMAPUniJIS_UTF16_V   = 'UniJIS-UTF16-V';
+  CMAPKSC_EUC_H = 'KSC-EUC-H';
+  CMAPKSC_EUC_V = 'KSC-EUC-V';
+  CMAPKSCms_UHC_H = 'KSCms-UHC-H';
+  CMAPKSCms_UHC_V = 'KSCms-UHC-V';
+  CMAPKSCms_UHC_HW_H = 'KSCms-UHC-HW-H';
+  CMAPKSCms_UHC_HW_V = 'KSCms-UHC-HW-V';
+  CMAPKSCpc_EUC_H = 'KSCpc-EUC-H';
+  CMAPUniKS_UCS2_H = 'UniKS-UCS2-H';
+  CMAPUniKS_UCS2_V = 'UniKS-UCS2-V';
+  CMAPUniKS_UTF16_H = 'UniKS-UTF16-H';
+  CMAPUniKS_UTF16_V = 'UniKS-UTF16-V';
+  CMAPIdentity_H = 'Identity-H';
+  CMAPIdentity_V = 'Identity-V';
+
+implementation
+
+end.
+

+ 3204 - 0
packages/fcl-pdf/src/fppdfobjects.pp

@@ -0,0 +1,3204 @@
+{ **********************************************************************
+  This file is part of the Free Component Library
+
+  PDF File data structures
+  Copyright (c) 2022 by Michael Van Canneyt [email protected]
+
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+  **********************************************************************}
+
+unit fppdfobjects;
+
+{$mode ObjFPC}{$H+}
+{$modeswitch advancedrecords}
+
+{ $DEFINE DEBUGPDFALLOCATION}
+{ $DEFINE DEBUGOBJECTDICT}
+
+interface
+
+uses
+  TypInfo, Types, rtlConsts, SysUtils, Classes, Contnrs, fppdfconsts;
+
+Const
+  TextArraySpaceTreshold = 200;
+
+Type
+  EPDF = Class(Exception);
+  TPDFElementType = (peComment,peIndirectObject,peXREF,peXREFItem, peTrailer,peStartXRef,peMalFormed,
+                     peValue,peStream,peArray,peContainer,peDictionary,peDictEntry,peRef,peCommand);
+  TPDFTokenType = (
+     ptEOF,
+     ptWhiteSpace,        //  #10,#13,#12,#11,#32,#8
+     ptShl,               // <<
+     ptShr,               // >>
+     ptName,              // /
+     ptComment,           // %
+     ptSquareOpen,        // [
+     ptSquareClose,       // ]
+     ptCurlyOpen,         // {
+     ptCurlyClose,        // }
+     ptString,            // ( )
+     ptByteString,        // Image data
+     ptHexString,         // < >
+     ptNumber,            // 0..9 and .
+     ptKeyword            // Anything else
+  );
+  TPDFTokenTypes = set of TPDFTokenType;
+
+  { TPDFToken }
+
+  TPDFToken = record
+    TokenType : TPDFTokenType;
+    TokenData : RawByteString;
+    TokenPos : Int64; // 0-Based
+    Function CheckString(const aValue : RawByteString) : Boolean;
+    Function HasOnlyHexChars : Boolean;
+    Function IsInteger : Boolean;
+    Function IsHexInteger : Boolean;
+    Function AsHexInteger : Integer;
+    Function AsBEHexInteger : Integer;
+    Function AsInteger : Integer;
+    Function AsDouble : Double;
+    Function IsHexString : Boolean;
+    Function IsString : Boolean;
+    Function AsString : RawByteString;
+    function AsName: RawByteString;
+    Function IsInt64 : Boolean;
+    Function AsInt64 : Int64;
+    Function IsWhiteSpace : Boolean;
+    Function IsKeyword: Boolean;
+    Function IsName: Boolean;
+    Function IsNumber : Boolean;
+  end;
+
+  TPDFTokenArray = Array of TPDFToken;
+
+  TPDFObject = Class;
+  TPDFIndirect = Class;
+  TPDFDocument = Class;
+  TPDFCommand = Class;
+  TPDFCommandList = class;
+  TPDFCMap = Class;
+
+  TPDFIndirectClass = class of TPDFIndirect;
+  TPDFCommandClass = Class of TPDFCommand;
+  TPDFCommandListClass = class of TPDFCommandList;
+
+  { TPDFObjectEnumerator }
+
+  TPDFObjectEnumerator = class
+  public
+    function GetCurrent: TPDFObject; virtual; abstract;
+    function MoveNext: Boolean; virtual;
+    property Current: TPDFObject read GetCurrent;
+  end;
+
+  { TPDFSingleObjectEnumerator }
+
+  TPDFSingleObjectEnumerator = Class(TPDFObjectEnumerator)
+    FObject : TPDFObject;
+    FFirst : Boolean;
+  Public
+    Constructor Create(aObj : TPDFObject);
+    function GetCurrent: TPDFObject; override;
+    function MoveNext: Boolean; override;
+  end;
+
+
+  { TPDFObject }
+
+  TPDFObject = class
+  Public
+    Constructor Create(); virtual;
+{$IFDEF DEBUGPDFALLOCATION}
+    Destructor destroy; override;
+{$ENDIF}
+    class function ElementType : TPDFElementType; virtual; abstract;
+    Function GetEnumerator : TPDFObjectEnumerator; virtual;
+    Function GetDescription : String; virtual;
+  end;
+
+  { TPDFRef }
+
+  { TPDFRefData }
+
+  TPDFRefData = record
+    ObjectID: Integer;
+    ObjectGeneration: Integer;
+    function IsEmpty : Boolean;
+    Function AsString : String;
+  end;
+
+
+  TPDFRef = Class(TPDFObject)
+  private
+    FRef: TPDFRefData;
+  public
+    Constructor Create(aID,aVersion : Integer); overload; virtual;
+    Function GetDescription: String; override;
+    class function ElementType : TPDFElementType; override;
+    property Ref : TPDFRefData Read FRef;
+    Property ObjectID : Integer Read FRef.ObjectID  Write FRef.ObjectID;
+    Property ObjectGeneration : Integer Read FRef.ObjectGeneration  Write FRef.ObjectGeneration;
+  end;
+
+  { TPDFValue }
+
+  TPDFValue = Class(TPDFObject)
+  private
+    FValue: RawbyteString;
+    FTokenType: TPDFTokenType;
+    function GetAsBoolean: Boolean;
+    function GetAsFloat: Double;
+    function GetAsInt64: Int64;
+    function GetAsInteger: Integer;
+    Function GetAsDateTime : TDateTime;
+  Public
+    Constructor Create(aValue : RawByteString; aTokenType : TPDFTokenType); overload; virtual;
+    class function ElementType : TPDFElementType; override;
+
+    function GetDescription : String; override;
+    Function IsKeyword (aKeyWord : string) : Boolean;
+    Function IsInteger : Boolean;
+    Function IsInt64 : Boolean;
+    Property TokenType : TPDFTokentype Read FTokenType;
+    Property Value : RawbyteString Read FValue Write FValue;
+    Property AsInteger : Integer Read GetAsInteger;
+    Property AsInt64 : Int64 Read GetAsInt64;
+    Property AsBoolean : Boolean Read GetAsBoolean;
+    Property AsFloat : Double Read GetAsFloat;
+    Property AsDateTime : TDateTime Read GetAsDateTime;
+  end;
+
+  { TPDFTokensObject }
+
+  TPDFTokensObject = class (TPDFObject)
+  private
+    FTokens: TPDFTokenArray;
+  Public
+    Constructor Create(const aTokens : TPDFTokenArray); overload;
+    Property Tokens : TPDFTokenArray Read FTokens Write FTokens;
+  end;
+
+  { TPDFComment }
+
+  TPDFComment = class(TPDFObject)
+  private
+    FComment: String;
+  Public
+    Constructor Create(const aComment : RawByteString); overload;
+    class function ElementType : TPDFElementType; override;
+    Property Comment : String Read FComment;
+  end;
+
+  { TPDFContainer }
+
+  TPDFContainer = class(TPDFObject)
+  Private
+    FItems : TFPObjectList;
+    function GetCount: Integer;
+    function GetObject(aIndex : integer): TPDFObject;
+    procedure SetObject(aIndex : integer; AValue: TPDFObject);
+  Protected
+    Function Replace(aIndex : Integer; aObj : TPDFObject) : TPDFObject;
+  Public
+    Constructor Create(); override;
+    Class Function OwnsObjects : Boolean; virtual;
+    class function ElementType : TPDFElementType; override;
+    Destructor destroy; override;
+    Function GetEnumerator : TPDFObjectEnumerator; override;
+    Function Add(aObject : TPDFObject) : Integer; virtual;
+    Property Count : Integer Read GetCount;
+    Property Objects[aIndex : integer] : TPDFObject Read GetObject Write SetObject; default;
+  end;
+
+  { TPDFContainerObjectEnumerator }
+
+  TPDFContainerObjectEnumerator = Class(TPDFObjectEnumerator)
+  Private
+    FContainer : TPDFContainer;
+    FCurrentIdx : Integer;
+  Public
+    Constructor Create(aContainer : TPDFContainer);
+    function GetCurrent: TPDFObject; override;
+    function MoveNext: Boolean; override;
+  end;
+
+
+
+  { TPDFXRef }
+
+  TPDFXRef = Class(TPDFObject)
+  private
+    FCompressed: Boolean;
+    FInstance: TPDFIndirect;
+    FInUse: Boolean;
+    FNextFreeObject: Integer;
+    FObjectIndex: Integer;
+    FObjectOffset: Int64;
+    FObjectGeneration: Integer;
+    FReferenceIndex: Integer;
+    FStreamObjectNr: Integer;
+  Public
+    class function ElementType: TPDFElementType; override;
+    Function Match (aObjectID,aObjectGeneration : Integer) : Boolean;
+    function GetDescription: String; override;
+    // Reduce to 3 fields and special getter/setters
+    // ID of the object being referenced.
+    Property ReferenceIndex : Integer Read FReferenceIndex Write FReferenceIndex;
+    // For free objects, ID of next object.
+    Property NextFreeObject : Integer Read FNextFreeObject Write FNextFreeObject;
+    // offset in the PDF file stream
+    Property ObjectOffset : Int64 Read FObjectOffset Write FObjectOffset;
+    // For compressed object Index in the compr
+    Property ObjectIndex : Integer Read FObjectIndex Write FObjectIndex;
+    // Version (generation) of the object  (both InUse and Not)
+    Property ObjectGeneration : Integer Read FObjectGeneration Write FObjectGeneration;
+    // For compressed streams, ID of the Stream object in which the object is.
+    Property StreamObjectNr : Integer Read FStreamObjectNr Write FStreamObjectNr;
+    // Is the object in use ?
+    Property InUse : Boolean Read FInUse Write FInUse;
+    // Is the object in a compressed stream ?
+    Property Compressed : Boolean Read FCompressed Write FCompressed;
+    // Reference to the object
+    Property Instance : TPDFIndirect Read FInstance Write FInstance;
+  end;
+  TPDFXRefArray = Array of TPDFXRef;
+
+  TPDFXRefList = class(TPDFContainer)
+  private
+    FCompressed: Boolean;
+    function GetItm(aIndex : integer): TPDFXRef;
+    procedure SetItm(aIndex : integer; AValue: TPDFXRef);
+  Public
+    class function ElementType : TPDFElementType; override;
+    Function IndexOfReference(aObjectID,aObjectGeneration : Integer) : Integer;
+    Function FindReference(aObjectID,aObjectGeneration : Integer) : TPDFXRef;
+    Property References[aIndex : integer] : TPDFXRef Read GetItm Write SetItm; default;
+    Property Compressed : Boolean Read FCompressed Write FCompressed;
+  end;
+
+  { TPDFStartXRef }
+
+  TPDFStartXRef = class(TPDFObject)
+  private
+    FIndex: Int64;
+  public
+    class function ElementType : TPDFElementType; override;
+    // Offset in PDF file where to find XRef index/object
+    Property Index : Int64 Read FIndex Write FIndex;
+  end;
+
+  { TPDFArray }
+
+  TPDFArray = class(TPDFContainer)
+    class function ElementType : TPDFElementType; override;
+    function GetDescription: String; override;
+    Function IsIntegerAt(aIndex : Integer) : Boolean;
+    Function IsKeywordAt(aIndex : Integer; const aKeyWord: RawByteString) : Boolean;
+    Function GetIntegerAt(aIndex : Integer) : Integer;
+  end;
+
+  { TPDFDictEntry }
+
+  TPDFDictEntry = Class(TPDFObject)
+  private
+    FKey: String;
+    FValue: TPDFObject;
+  Public
+    class function ElementType : TPDFElementType; override;
+    Destructor Destroy; override;
+    function GetDescription: String; override;
+    Property Key : String Read FKey Write FKey;
+    // Value is owned by dict entry
+    Property Value : TPDFObject Read FValue Write FValue;
+  end;
+
+  { TPDFDictionary }
+
+  TPDFDictionary = class(TPDFContainer)
+  Public
+    class function ElementType : TPDFElementType; override;
+    function GetDescription: String; override;
+    Function AddEntry(aKey : String; aValue : TPDFObject) : TPDFDictEntry;
+    Function AddEntry(aEntry : TPDFDictEntry) : Integer;
+    function ContainsKey(const aKeyword : RawByteString) : boolean;
+    function IndexofKey(const aKeyword : RawByteString) : Integer;
+    Function FindKey(const aKeyword : RawByteString) : TPDFDictEntry;
+    Function GetKey(const aKeyword : RawByteString) : TPDFDictEntry;
+    Function FindValue(const aKeyword : RawByteString) : TPDFObject;
+    Function FindArrayValue(const aKeyword : RawByteString) : TPDFArray;
+    Function FindDictionaryValue(const aKeyword : RawByteString) : TPDFDictionary;
+    Function GetValue(const aKeyword : RawByteString) : TPDFObject;
+    Function GetStringValue(const aKeyword : RawByteString) : RawByteString;
+    Function GetIntegerValue(const aKeyword : RawByteString) : Integer;
+    Function GetInt64Value(const aKeyword : RawByteString) : Int64;
+    Function GetArrayValue(const aKeyword : RawByteString) : TPDFArray;
+    Function GetDictionaryValue(const aKeyword : RawByteString) : TPDFDictionary;
+  end;
+
+
+
+  { TPDFTrailer }
+
+  TPDFTrailer = class(TPDFDictionary)
+    class function ElementType : TPDFElementType; override;
+    function Contains(const aKeyword : RawByteString) : boolean;
+  end;
+
+  { TPDFStream }
+
+  TPDFStream = Class(TPDFObject)
+  private
+    FData: TBytes;
+  Public
+    Constructor Create(const aData : TBytes); overload;
+    class function ElementType : TPDFElementType; override;
+    Property Data : TBytes Read FData Write FData;
+  end;
+
+
+  { TPDFIndirect }
+  TObjectPosition = record
+    ID : Integer;
+    Offset : Integer;
+  end;
+  TObjectPositionArray = Array of TObjectPosition;
+
+  TPDFIndirect = class(TPDFContainer)
+  private
+    class var _ClassList : TFPDataHashTable;
+  private
+    FObjectType : String;
+    FDict : TPDFDictionary;
+    FObjectID: Integer;
+    FObjectGeneration: Integer;
+    FStream: TPDFStream;
+    FUnfilteredStream: TStream;
+    FObjectPositions : TObjectPositionArray;
+    FDocument : TPDFDocument;
+    function GetObjectType: String;
+  Protected
+    function CheckObjectDict : Boolean;
+    Class function RegisterAsType : String; virtual;
+  public
+    class constructor InitIndirect;
+    class destructor DoneIndirect;
+    Constructor Create(aSource : TPDFIndirect); overload; virtual;
+    Destructor Destroy; override;
+    Function ResolveObjectType : TPDFIndirect;
+    Class function FindClassForType(aType : String) : TPDFIndirectClass;
+    Class Procedure RegisterType(aType : String; aClass : TPDFIndirectClass);
+    Class Procedure UnRegisterType(aType : String);
+    Class Procedure Register;
+    Class Procedure UnRegister;
+    class function ElementType : TPDFElementType; override;
+    function FindDictValue(const aKey : RawByteString) : TPDFObject;
+    function GetDictValue(const aKey : RawByteString) : TPDFObject;
+    function GetDescription: String; override;
+    Function Add(aObject : TPDFObject) : Integer; override;
+    function Match(aObjectID : Integer; aObjectGeneration : Integer) : Boolean;
+    // Document to which this indirect object belongs. Set by TPDFDocument.AddIndirect
+    Property Document : TPDFDocument Read FDocument;
+    // Set when adding items
+    Property Stream : TPDFStream Read FStream;
+    // owned by the Indirect object
+    Property UnfilteredStream : TStream Read FUnfilteredStream Write FUnfilteredStream;
+    // Object ID
+    Property ObjectID : Integer Read FObjectID Write FObjectID;
+    // object version/generation
+    Property ObjectGeneration : Integer Read FObjectGeneration Write FObjectGeneration;
+    // Set when adding items
+    Property ObjectDict : TPDFDictionary Read FDict;
+    // Determined from dictionary. empty steing if not found
+    Property ObjectType : String Read GetObjectType;
+    // For object stream
+    Property ObjectPositions : TObjectPositionArray Read FObjectPositions Write FObjectPositions;
+  end;
+
+  { TPDFIndirectXRef }
+
+  TPDFIndirectXRef = class (TPDFIndirect)
+  private
+    FXref: TPDFXRefList;
+  protected
+    Class function RegisterAsType : String; override;
+  Public
+    Destructor destroy; override;
+    Property XRef : TPDFXRefList Read FXref Write FXref;
+  end;
+
+  { TPDFPageObject }
+
+  { TPDFObjectStreamObject }
+
+  TPDFObjectStreamObject = Class(TPDFIndirect)
+  protected
+    Class function RegisterAsType : String; override;
+  end;
+
+  TPDFPageObject = Class(TPDFIndirect)
+  private
+    FResources : TPDFDictionary;
+    FCommandList: TPDFCommandList;
+    function GetContent(aIndex : integer): TPDFIndirect;
+    function GetContentCount: Integer;
+    function GetContentRef(aIndex : integer): TPDFrefData;
+    function GetParent: TPDFIndirect;
+    function GetParentRef: TPDFRef;
+    function GetResources: TPDFDictionary;
+  Protected
+    Class function RegisterAsType : String; override;
+    // Override if you want to use a descendent of TPDFCommandList;
+    Class function CreateCommandList : TPDFCommandList; virtual;
+  Public
+    Constructor Create; override; overload;
+    Destructor Destroy; override;
+    function FindResources: TPDFDictionary;
+    Function FindFontRefObj(aFontName : String) : TPDFRef;
+    Function FindFontRef(aFontName : String) : TPDFRefData;
+    Property ParentRef : TPDFRef Read GetParentRef;
+    Property Parent : TPDFIndirect Read GetParent;
+    Property Contents[aIndex : integer] : TPDFIndirect Read GetContent;
+    Property ContentRef[aIndex : integer] : TPDFrefData Read GetContentRef;
+    Property ContentCount : Integer read GetContentCount;
+    Property CommandList : TPDFCommandList Read FCommandList;
+    Property Resources : TPDFDictionary Read GetResources;
+  end;
+  TPDFPageClass = Class of TPDFPageObject;
+
+  { TPDFFontObject }
+
+  TPDFFontObject = Class(TPDFIndirect)
+  private
+    FUnicodeCMAP: TPDFCMap;
+    function GetString(AIndex: Integer): String;
+    function GetToUnicode: TPDFRefData;
+    function GetToUnicodeObj: TPDFRef;
+    function GetValueName(AIndex: Integer): String;
+  Protected
+    Class function RegisterAsType : String; override;
+  Public
+    Destructor Destroy; override;
+    Function GetDescription: String; override;
+    function ResolveToUnicodeMapStream(aDoc: TPDFDocument): TPDFIndirect;
+    Property Type_ : String Index 0 Read GetString;
+    Property SubType : String Index 1 Read GetString;
+    Property Name : String Index 2 Read GetString;
+    Property BaseFont: String Index 3 Read GetString;
+    Property Encoding : String Index 4 Read GetString;
+    Property ToUnicodeRef : TPDFRefData Read GetToUnicode; // 5
+    Property ToUnicode : TPDFRef Read GetToUnicodeObj; // 5
+    // Owned by Font !
+    Property UnicodeCMAP : TPDFCMap Read FUnicodeCMAP Write FUnicodeCMAP;
+  end;
+
+
+  { TPDFPagesObject }
+
+  TPDFPagesObject = Class(TPDFIndirect)
+  private
+    function GetChildCount: Integer;
+    function GetChildObject(aIndex : integer): TPDFIndirect;
+    function GetChildRef(aIndex : integer): TPDFRefData;
+    function GetPageCount: Integer;
+  Protected
+    Class function RegisterAsType : String; override;
+
+    Function ParentRef : TPdfRef;
+    Function Parent : TPDFIndirect;
+    // Find page with index aIndex. Will search in sub tree pages
+    Function FindPage(aIndex : Integer) : TPDFPageObject;
+    // Page count in this node
+    Property PageCount : Integer Read GetPageCount;
+    // Number of direct childen
+    Property ChildCount : Integer Read GetChildCount;
+    // Reference to child aIndex, 0-Based.
+    Property ChildRef[aIndex : integer] : TPDFRefData Read GetChildRef;
+    // Child I. Can be a TPDFPagesObject or a TPDFPageObject
+    Property ChildObject[aIndex : integer] : TPDFIndirect Read GetChildObject;
+  end;
+
+  { TPDFCatalogObject }
+
+  TPDFCatalogObject = Class(TPDFIndirect)
+  private
+    function GetPages: TPDFPagesObject;
+  Protected
+    Class function RegisterAsType : String; override;
+    Property Pages : TPDFPagesObject Read GetPages;
+  end;
+
+  { TPDFCommand }
+  TPDFCommand = Class(TPDFTokensObject)
+  private
+    class var _ClassList : TFPDataHashTable;
+  private
+    FCommand: String;
+  Protected
+    Class Function RegisterCommandName : String; virtual;
+  Public
+    Class constructor Init;
+    Class Destructor Done;
+    class function ElementType : TPDFElementType; override;
+    Class function FindClassForCommand(const aCommand : String) : TPDFCommandClass;
+    Class Procedure RegisterCommand(const aCommand : String; aClass : TPDFCommandClass);
+    Class Procedure UnRegisterCommand(const aCommand : String);
+    Class Procedure Register;
+    Class Procedure UnRegister;
+    Constructor Create(const aCommand : String; aTokens : TPDFTokenArray);
+  Public
+    Property Command : String Read FCommand Write FCommand;
+  end;
+
+  { TPDFBTCommand }
+
+  TPDFBTCommand = class(TPDFCommand)
+    Class Function RegisterCommandName : String;override;
+  end;
+
+  { TPDFETCommand }
+
+  TPDFETCommand = class(TPDFCommand)
+    Class Function RegisterCommandName : String; override;
+  end;
+
+  // Do not register this one.
+
+  { TPDFTextCommand }
+
+  TPDFTextCommand = Class(TPDFCommand)
+  Public
+    Function GetFullText(aUnicodeMap : TPDFCMap) : RawByteString; virtual; overload;
+    Function GetFullText : RawByteString; virtual; abstract; overload;
+  end;
+
+  { TPDFTJCommand }
+
+  TPDFTJCommand = class(TPDFTextCommand)
+    Class Function RegisterCommandName : String; override;
+    Function GetFullText(aUnicodeMap : TPDFCMap) : RawByteString; override; overload;
+    Function GetFullText : RawByteString; override;
+  end;
+
+  { TPDFTfCommand }
+
+  TPDFTfCommand = class(TPDFCommand)
+  private
+    function GetFontName: String;
+    function GetFontSize: Integer;
+  public
+    Class Function RegisterCommandName : String; override;
+    property FontName : String Read GetFontName;
+    Property FontSize : Integer Read GetFontSize;
+  end;
+
+  { TPDFTj_Command }
+
+  TPDFTj_Command = class(TPDFTextCommand)
+    Class Function RegisterCommandName : String; override;
+    Function GetFullText : RawByteString; override;
+  end;
+
+
+  { TPDFTdCommand }
+
+  TPDFTDCommand = class(TPDFCommand)
+    Class Function RegisterCommandName : String; override;
+  end;
+
+  { TPDFTdCommand }
+
+  { TPDFTd_Command }
+
+  TPDFTd_Command = class(TPDFCommand)
+    Class Function RegisterCommandName : String; override;
+  end;
+
+
+  { TPDFTfCommand }
+
+
+  TPDFImageData = record
+    Width,
+    height,
+    BitsPerComponent : Integer;
+    ColorSpace : String;
+    ColorSpaceComponents : Integer;
+    Filters : Array of String;
+  end;
+
+  { TPDFImageDataCommand }
+
+  TPDFImageDataCommand = Class(TPDFCommand)
+    Class Procedure ParseImageOperands(aOperands : TPDFTokenArray; Out aImageData : TPDFImageData);
+    Class Function RegisterCommandName : String; override;
+  end;
+
+
+  { TPDFUnknownCommand }
+  // Catch all. Do not register
+  TPDFUnknownCommand = class(TPDFCommand)
+  end;
+
+
+
+  // This object owns all operands for all commands
+
+  { TPDFCommandList }
+
+  TPDFCommandList = Class(TPDFContainer)
+  private
+    function GetCommand(aIndex : Integer): TPDFCommand;
+  Public
+    Property Commands[aIndex :Integer] : TPDFCommand Read GetCommand; default;
+  end;
+
+  { TCodeSpaceRange }
+
+  TCodeSpaceRange = record
+    RangeStart, RangeEnd : Cardinal;
+    Function Contains(aCode : Cardinal) : Boolean;
+  end;
+  TCodeSpaceRangeArray = Array of TCodeSpaceRange;
+
+  { TNotDefRange }
+
+  TNotDefRange = record
+    RangeStart, RangeEnd : Cardinal;
+    ReplaceMent : Cardinal;
+    Function Contains(aCode : Cardinal) : Boolean;
+  end;
+  TNotDefRangeArray = Array of TNotDefRange;
+
+  TBFChar = record
+    Src,Dest : Cardinal;
+    DestName : String;
+  end;
+  TBFCharArray = Array of TBFChar;
+
+  { TCIDRange }
+  TCIDUnicodeCharOrName = record
+    Name : string;
+    UnicodeChar : Cardinal;
+  end;
+  TCIDUnicodeCharOrNameArray = Array of TCIDUnicodeCharOrName;
+
+  TCIDRange = record
+    RangeStart, RangeEnd : Cardinal;
+    CharOffset : Cardinal;
+    CharNames : TCIDUnicodeCharOrNameArray;
+    Function Contains(aCode : Cardinal) : Boolean;
+  end;
+  TCIDRangeArray = Array of TCIDRange;
+
+  { TPDFCMapData }
+
+  TPDFCMapData = class(TObject)
+  private
+    FBFChars: TBFCharArray;
+    FCIDRange: TCIDRangeArray;
+    FCodeSpaceRange: TCodeSpaceRangeArray;
+    FNotDefRange: TNotDefRangeArray;
+  Public
+    Function Interpret(aRaw : RawByteString) : RawByteString;
+    function GetNotDef(aCode: Integer): UnicodeString;
+    function GetBFRangeChar(aCode: Cardinal; out aRes: Unicodestring): Boolean;
+    function GetBFChar(aCode: Cardinal; out aRes: Unicodestring): Boolean;
+    Function IndexInBFRange(aCode : Integer): Integer;
+    Function IndexInBFChar(aCode : Integer): Integer;
+    function IsValidCode(aCode: Cardinal): Boolean;
+    Property CodeSpaceRange : TCodeSpaceRangeArray Read FCodeSpaceRange Write FCodeSpaceRange;
+    Property NotDefRange : TNotDefRangeArray Read FNotDefRange Write FNotDefRange;
+    Property BFRange : TCIDRangeArray Read FCIDRange Write FCIDRange;
+    Property BFChars : TBFCharArray Read FBFChars Write FBFChars;
+  end;
+
+  { TPDFCMap }
+
+  TPDFCMap = Class(TPDFIndirect)
+  private
+    FData: TPDFCMapData;
+    function GetCMAPName: String;
+    function GetRegistry: String;
+    function GetSupplement: Integer;
+    procedure SetData(AValue: TPDFCMapData);
+  Public
+    Destructor Destroy; override;
+    Property CMapName : String Read GetCMAPName;
+    Property Registry : String Read GetRegistry;
+    Property Supplement : Integer Read GetSupplement;
+    Function Interpret(aRaw : RawByteString) : RawByteString;
+    Property Data : TPDFCMapData Read FData Write SetData;
+  end;
+
+  { TPDFContentStream }
+
+  TPDFContentStream = class(TPDFIndirect);
+
+
+  { TPDFMalFormed }
+
+  TPDFMalFormed = class(TPDFTokensObject)
+    class function ElementType : TPDFElementType; override;
+  end;
+
+  // This keeps a reference to the dictionary of the original TPDFIndirect object.
+  // If that is destroyed, this object must also be destroyed.
+  TPDFDocumentInfo = class(TPDFObject)
+  Private
+    FSource : TPDFDictionary;
+  Protected
+    Function GetKeyName(aIndex : Integer) : RawByteString;
+    Function GetString(aIndex : Integer) : String;
+    Function GetDate(aIndex : Integer) : TDateTime;
+    Function GetName(aIndex : integer) : String;
+    Property Source : TPDFDictionary Read FSource;
+  Public
+    Constructor Create(aSource : TPDFDictionary);
+    // Keep Indexes unique, all indexes are passed through 1 routine to get the key name.
+    Property Title : String Index 0 Read GetString;
+    Property Author : String Index 1 Read GetString;
+    Property Subject : String Index 2 Read GetString;
+    Property Keywords : String Index 3 Read GetString;
+    Property Creator : String Index 4 Read GetString;
+    Property Producer : String index 5 Read GetString;
+    Property CreationDate : TDateTime Index 6 Read GetDate;
+    Property ModDate : TDateTime Index 7 Read GetDate;
+    Property Trapped : String Index 8 Read GetName;
+  end;
+
+
+  { TPDFDocument }
+
+  TPDFDocument = Class(TPDFContainer)
+  private
+    FPDFVersion: String;
+    FStartXref: TPDFStartXRef;
+    FTrailerDict: TPDFDictionary;
+    FCatalog : TPDFCatalogObject;
+    FPages : TPDFIndirect;
+    FXrefs : TPDFXRefArray;
+    function GetPage(aIndex : Integer): TPDFPageObject;
+    function GetPageNode(aIndex : Integer): TPDFIndirect;
+    function GetPageCount: Integer;
+    function GetStartXRef: TPDFStartXRef;
+    function getXRef(aIndex : Integer): TPDFXRef;
+    function GetXRefCount: Integer;
+  Public
+    Procedure SetXrefArray(aArray : TPDFXRefArray);
+    // Find indirect object with given object ID and generation.
+    Function FindInDirectObject(aID : Integer; aGeneration : Integer) : TPDFIndirect;
+    Function FindInDirectObject(aRef : TPDFRef) : TPDFIndirect;
+    // Returns Nil if aRef.IsEmpty=true
+    Function FindInDirectObject(aRef : TPDFRefData) : TPDFIndirect;
+    // Replace the object at given ref. Returns the old object
+    Function ReplaceIndirectObj(aRef : TPDFRefData; aObject : TPDFIndirect) : TPDFIndirect;
+    function ReplaceIndirectObj(aID: Integer; aGeneration: Integer; aObject : TPDFIndirect): TPDFIndirect;
+    // Find font. Returns Nil if none found or object is not a font.
+    Function FindFont(aRef : TPDFRef) : TPDFFontObject;
+    Function FindFont(aRef : TPDFRefData) : TPDFFontObject;
+    // Get font. Raises exception if not found or object is not a font.
+    Function GetFont(aRef : TPDFRef) : TPDFFontObject;
+    Function GetFont(aRef : TPDFRefData) : TPDFFontObject;
+    // Find the document information object. May return Nil.
+    Function FindDocumentInfoObject : TPDFIndirect;
+    // Find document information, resolved to TPDFDocumentInfo. You must free this object yourself.
+    Function FindDocumentInfo : TPDFDocumentInfo;
+    // Get the document information object. Raises exception if not found. You must free this object yourself.
+    Function GetDocumentInfo : TPDFDocumentInfo;
+    //
+    // Note: the following functions require parsing the document with ResolveObjects=True
+    //
+    // Find global catalog. Return nil if none found
+    Function FindCatalog : TPDFCatalogObject;
+    // Find global catalog. Raise exception if none found
+    Function GetCatalog : TPDFCatalogObject;
+    // Find pages object. Return nil if none found.
+    Function FindPages : TPDFIndirect;
+    // Find pages object. Raise exception if none found.
+    Function GetPages : TPDFIndirect;
+    // Will add to items only if it does not yet exist.
+    Function AddInDirectObject(aObj : TPDFIndirect) : Boolean;
+    // version in header line
+    Property PDFversion: String Read FPDFVersion Write FPDFVersion;
+    // StartXRef.
+    Property StartXRef : TPDFStartXRef Read GetStartXRef Write FStartXref;
+    // Trailer dictionary, set during parsing
+    Property TrailerDict : TPDFDictionary Read FTrailerDict Write FTrailerDict;
+    // Page count in this document
+    Property PageCount : Integer Read GetPageCount;
+    // Get the top-level indirect objects that represents a page tree node. 0 based.
+    // Can be a pages node or a page object
+    Property PageNodes[aIndex : Integer] : TPDFIndirect Read GetPageNode;
+    // Get a page by index (0-based) (leaf in the page tree)
+    Property Pages[aIndex : Integer] : TPDFPageObject Read GetPage;
+    // Count of elements in XREfs
+    Property XRefCount : Integer Read GetXRefCount;
+    // Indexed access to XRefs
+    Property XRefs[aIndex : Integer] : TPDFXRef Read getXRef;
+  end;
+
+implementation
+
+Resourcestring
+  SErrNotAnInteger = 'Token is not an integer';
+  SErrNotAString = 'Token is not a string';
+  SErrNotAName = 'Token is not a name';
+  SErrNotAnInt64 = 'Token is not an int64';
+  SErrNoSuchDictValue = 'No such dictionary value: %s';
+  SErrDictValueIsNotInteger = 'Dictionary entry %s is not an integer';
+  SErrDictValueIsNotArray = 'Dictionary entry %s is not an array';
+  SErrDictValueIsNotDict = 'Dictionary entry %s is not a dictionary';
+  SErrNoFontAt = 'No font found at: %s';
+
+{ TPDFTrailer }
+
+
+Function Canonicalize(const S : RawByteString) : RawByteString;
+
+begin
+  Result:=S;
+end;
+
+{ TNotDefRange }
+
+function TNotDefRange.Contains(aCode: Cardinal): Boolean;
+begin
+  Result:=(RangeStart<=aCode) and (aCode<=RangeEnd);
+end;
+
+{ TCodeSpaceRange }
+
+function TCodeSpaceRange.Contains(aCode: Cardinal): Boolean;
+begin
+  Result:=(RangeStart<=aCode) and (aCode<=RangeEnd);
+end;
+
+{ TCIDRange }
+
+function TCIDRange.Contains(aCode: Cardinal): Boolean;
+begin
+  Result:=(RangeStart<=aCode) and (aCode<=RangeEnd);
+end;
+
+{ TPDFCMapData }
+
+function TPDFCMapData.IsValidCode (aCode : Cardinal) : Boolean;
+
+Var
+  I : Integer;
+
+begin
+  Result:=False;
+  I:=Length(FCodeSpaceRange)-1;
+  While not Result and (I>=0) do
+    begin
+    Result:=FCodeSpaceRange[i].Contains(aCode);
+    Dec(I);
+    end;
+end;
+
+function UInt32ToUnicodeChars(aChars : Cardinal) : UnicodeString;
+
+Var
+  First,Second : Word;
+
+begin
+  Second:=aChars and $FFFF;
+  First:=(aChars shr 16) and $FFFF;
+  if (First<>0) then
+    Result:=UnicodeChar(First)
+  else
+    Result:='';
+  Result:=Result+UnicodeChar(Second);
+end;
+
+function TPDFCMapData.GetNotDef(aCode : Integer) : UnicodeString;
+
+Var
+  Idx : Integer;
+
+begin
+  Idx:=Length(FNotDefRange)-1;
+  While (Idx>=0) and not FNotDefRange[Idx].Contains(aCode) do
+    Dec(Idx);
+  if (Idx>=0) then
+    Result:=UInt32ToUnicodeChars(FNotDefRange[Idx].ReplaceMent)
+  else
+    Result:='<?>'
+end;
+
+
+function TPDFCMapData.GetBFRangeChar(aCode : Cardinal; out aRes : Unicodestring) : Boolean;
+
+var
+  Idx,cOffset : integer;
+
+begin
+  aRes:='';
+  Idx:=IndexInBFRange(aCode);
+  Result:=Idx<>-1;
+  if Result then
+    begin
+    With FCIDRange[Idx] do
+      begin
+      cOffset:=(aCode-RangeStart);
+      if Length(CharNames)<>0 then
+        begin
+        if cOffset<Length(CharNames) then
+          begin
+          if CharNames[cOffset].Name='' then
+            aRes:=UInt32ToUnicodeChars(CharNames[cOffset].UnicodeChar)
+          else
+            aRes:='?!';
+          end
+        else
+          aRes:='<?>'
+        end
+      else
+        aRes:=UInt32ToUnicodeChars(CharOffset+cOffset);
+      end;
+    end;
+end;
+
+function TPDFCMapData.GetBFChar(aCode: Cardinal; out aRes: Unicodestring
+  ): Boolean;
+var
+  Idx : integer;
+
+begin
+  aRes:='';
+  Idx:=IndexInBFChar(aCode);
+  Result:=Idx<>-1;
+  if Result then
+    With BFChars[Idx] do
+      if Length(DestName)<>0 then
+        aRes:='XAXA' // Todo
+      else
+        aRes:=UInt32ToUnicodeChars(Dest);
+end;
+
+function TPDFCMapData.IndexInBFRange(aCode: Integer): Integer;
+
+begin
+  Result:=Length(FCIDRange)-1;
+  While  (Result>=0) and not FCIDRange[Result].Contains(aCode) do
+    Dec(Result);
+end;
+
+function TPDFCMapData.IndexInBFChar(aCode: Integer): Integer;
+
+begin
+  Result:=Length(FBFChars)-1;
+  While  (Result>=0) and (FBFChars[Result].Src<>aCode) do
+    Dec(Result);
+end;
+
+function TPDFCMapData.Interpret(aRaw: RawByteString): RawByteString;
+
+Var
+  aCode : Cardinal;
+  I,Len : Integer;
+  aResult : UnicodeString;
+  C : UnicodeString;
+
+begin
+  aResult:='';
+  I:=1;
+  Len:=Length(aRaw);
+  While (I<Len) do
+    begin
+    aCode:=(Ord(aRaw[i]) shl 8) +Ord(aRaw[i+1]);
+    if not IsValidCode(aCode) then
+      C:=GetNotDef(aCode)
+    else if not GetBFRangeChar(aCode,C) then
+      if not GetBFChar(aCode,C) then
+        C:='?';
+    aResult:=aResult+C;
+    inc(I,2);
+    end;
+  Result:=UTF8Encode(aResult);
+
+end;
+
+{ TPDFCMap }
+
+function TPDFCMap.GetCMAPName: String;
+begin
+  if Assigned(ObjectDict) then
+    Result:=ObjectDict.GetStringValue(SCMAPKeyCMapName)
+  else
+    Result:='';
+end;
+
+function TPDFCMap.GetRegistry: String;
+begin
+  if Assigned(ObjectDict) then
+    Result:=ObjectDict.GetStringValue(SPDFCIDSystemInfoKeyOrdering)
+  else
+    Result:='';
+end;
+
+function TPDFCMap.GetSupplement: Integer;
+begin
+  if Assigned(ObjectDict) then
+    Result:=ObjectDict.GetIntegerValue(SPDFCIDSystemInfoKeySupplement)
+  else
+    Result:=0;
+end;
+
+procedure TPDFCMap.SetData(AValue: TPDFCMapData);
+begin
+  if FData=AValue then Exit;
+  FData.Free;
+  FData:=AValue;
+end;
+
+destructor TPDFCMap.Destroy;
+begin
+  FreeAndNil(FData);
+  inherited Destroy;
+end;
+
+function TPDFCMap.Interpret(aRaw: RawByteString): RawByteString;
+
+begin
+  Result:=Data.Interpret(aRaw);
+end;
+
+{ TPDFTextCommand }
+
+function TPDFTextCommand.GetFullText(aUnicodeMap: TPDFCMap): RawByteString;
+
+Var
+  aRaw : RawByteString;
+
+begin
+  aRaw:=GetFullText();
+  if not Assigned(aUnicodeMap) then
+    Result:=aRaw
+  else
+    Result:=aUnicodeMap.InterPret(aRaw);
+end;
+
+{ TPDFRefData }
+
+function TPDFRefData.IsEmpty: Boolean;
+begin
+  Result:=(ObjectGeneration=0) and (ObjectID=0);
+end;
+
+function TPDFRefData.AsString: String;
+begin
+  Result:=Format('%d %d',[ObjectID,ObjectGeneration]);
+end;
+
+{ TPDFFontObject }
+function TPDFFontObject.GetValueName(AIndex: Integer): String;
+
+begin
+  Case aIndex of
+    0 : Result:=SPDFFontKeyType;
+    1 : Result:=SPDFFontKeySubType;
+    2 : Result:=SPDFFontKeyName;
+    3 : Result:=SPDFFontKeyBaseFont;
+    4 : Result:=SPDFFontKeyEncoding;
+    5 : Result:=SPDFFontKeyToUnicode;
+  end;
+end;
+
+function TPDFFontObject.GetString(AIndex: Integer): String;
+
+Var
+  Obj : TPDFObject;
+
+begin
+  Obj:=ObjectDict.FindValue(GetValueName(aIndex));
+  if Obj is TPDFValue then
+    Result:=TPDFValue(Obj).Value
+  else
+    Result:='';
+end;
+
+function TPDFFontObject.GetToUnicode: TPDFRefData;
+
+Var
+  Ref : TPDFRef;
+
+begin
+  Ref:=GetToUnicodeObj;
+  if Assigned(Ref) then
+    Result:=Ref.FRef
+  else
+    Result:=Default(TPDFRefData);
+end;
+
+function TPDFFontObject.GetToUnicodeObj: TPDFRef;
+
+Var
+  Obj : TPDFObject;
+
+begin
+  Obj:=ObjectDict.FindValue(GetValueName(5));
+  if Assigned(Obj) and (Obj is TPDFRef) then
+    Result:=Obj as TPDFRef
+  else
+    Result:=Nil;
+end;
+
+class function TPDFFontObject.RegisterAsType: String;
+begin
+  Result:=SPDFTypeFont;
+end;
+
+destructor TPDFFontObject.Destroy;
+begin
+  FreeAndNil(FUnicodeCMAP);
+  inherited Destroy;
+end;
+
+function TPDFFontObject.GetDescription: String;
+
+  Procedure MaybeAdd(Const aName,aValue : String);
+
+  begin
+    if aValue<>'' then
+      Result:=Result+sLineBreak+aName+': '+aValue;
+  end;
+
+begin
+  Result:=Format('Font (%d %d):',[ObjectID,ObjectGeneration]);
+  MaybeAdd('Name',Name);
+  MaybeAdd('Subtype',SubType);
+  MaybeAdd('Type',Type_);
+  MaybeAdd('BaseFont',BaseFont);
+  MaybeAdd('Encoding',Encoding);
+  if Assigned(ToUnicode) then
+    MaybeAdd('ToUnicode', ToUnicodeRef.AsString);
+end;
+
+function TPDFFontObject.ResolveToUnicodeMapStream(aDoc: TPDFDocument): TPDFIndirect;
+
+Var
+  Ref : TPDFRef;
+
+begin
+  Result:=Nil;
+  Ref:=ToUnicode;
+  if assigned(Ref) then
+    Result:=aDoc.FindInDirectObject(Ref);
+end;
+
+{ TPDFImageDataCommand }
+
+class procedure TPDFImageDataCommand.ParseImageOperands(
+  aOperands: TPDFTokenArray; out aImageData: TPDFImageData);
+
+Var
+  I,J : Integer;
+
+
+begin
+  aImageData:=Default(TPDFImageData);
+  I:=0;
+  While (I<Length(aOperands)-1) do
+    begin
+    if aOperands[i].IsName then
+      begin
+      Case Copy(aOperands[i].TokenData,2,Length(aOperands[i].TokenData)-1) of
+        SPDFImageKeyW :
+          begin
+          Inc(I);
+          aImageData.Width:=aOperands[i].AsInteger;
+          end;
+        SPDFImageKeyH :
+          begin
+          Inc(I);
+          aImageData.Height:=aOperands[i].AsInteger;
+          end;
+        SPDFImageKeyBPC:
+          begin
+          Inc(I);
+          aImageData.BitsPerComponent:=aOperands[i].AsInteger;
+          end;
+        SPDFImageKeyCS:
+          begin
+          Inc(I);
+          aImageData.ColorSpace:=aOperands[i].TokenData;
+          end;
+        SPDFImageKeyF:
+          begin
+          Inc(i);
+          If aOperands[i].TokenType<>ptSquareOpen then
+            begin
+            Inc(i);
+            aImageData.Filters:=[aOperands[i].TokenData];
+            end
+          else
+            begin
+            Inc(I);
+            J:=I;
+            While (J<Length(aOperands)) and (aOperands[J].TokenType<>ptSquareClose) do
+              Inc(J);
+            SetLength(aImageData.Filters,J);
+            J:=I;
+            While (J<Length(aOperands)) and (aOperands[J].TokenType<>ptSquareClose) do
+              begin
+              aImageData.Filters[J-I]:=aOperands[J].TokenData;
+              Inc(J);
+              end
+            end;
+          end;
+      end;
+      end;
+    inc(I);
+    end;
+  Case Copy(aImageData.ColorSpace,2,Length(aImageData.ColorSpace)-1) of
+    SPDFImageKeyCMYK : aImageData.ColorSpaceComponents:=4;
+    SPDFImageKeyRGB : aImageData.ColorSpaceComponents:=3;
+    SPDFImageKeyG : aImageData.ColorSpaceComponents:=1;
+  end;
+end;
+
+class function TPDFImageDataCommand.RegisterCommandName: String;
+begin
+  Result:='ID';
+end;
+
+{ TPDFTd_Command }
+
+class function TPDFTd_Command.RegisterCommandName: String;
+begin
+  Result:='Td';
+end;
+
+{ TPDFTj_Command }
+
+class function TPDFTj_Command.RegisterCommandName: String;
+begin
+  Result:='Tj';
+end;
+
+function TPDFTj_Command.GetFullText: RawByteString;
+begin
+  Result:='';
+  if Length(Self.Tokens)>0 then
+    try
+      Result:=Tokens[0].AsString;
+    except
+      on E : exception do
+        begin
+        Writeln('Exception ',E.ClassName,'getting text for token: "',E.Message,'". Token data :',GetDescription);
+        Raise;
+        end;
+
+    end;
+end;
+
+{ TPDFTfCommand }
+
+function TPDFTfCommand.GetFontName: String;
+begin
+  Result:='';
+  If (Length(Tokens)>0) then
+    if Tokens[0].IsString then
+      Result:=Tokens[0].AsString
+    else if Tokens[0].IsName then
+      Result:=Tokens[0].AsName;
+end;
+
+function TPDFTfCommand.GetFontSize: Integer;
+
+begin
+  Result:=0;
+  If (Length(Tokens)>1) and Tokens[1].IsInteger then
+    Result:=Tokens[1].AsInteger
+end;
+
+class function TPDFTfCommand.RegisterCommandName: String;
+begin
+  Result:='Tf';
+end;
+
+{ TPDFTdCommand }
+
+class function TPDFTdCommand.RegisterCommandName: String;
+begin
+  Result:='TD';
+end;
+
+{ TPDFTJCommand }
+
+class function TPDFTJCommand.RegisterCommandName: String;
+begin
+  Result:='TJ';
+end;
+
+function TPDFTJCommand.GetFullText(aUnicodeMap: TPDFCMap): RawByteString;
+Var
+  i : integer;
+
+begin
+  Result:='';
+  if Length(Tokens)>=2 then
+    For I:=1 to Length(Tokens)-2 do
+      begin
+      if Tokens[I].TokenType=ptString then
+        Result:=Result+aUnicodeMap.InterPret(Tokens[I].TokenData)
+      else if Tokens[i].IsNumber then
+        begin
+        if Abs(Tokens[i].AsDouble)>TextArraySpaceTreshold then
+          Result:=Result+' '
+        end
+      else
+        Raise EConvertError.Create('Unexpected char');
+      end;
+end;
+
+function TPDFTJCommand.GetFullText: RawByteString;
+
+Var
+  i : integer;
+
+begin
+  Result:='';
+  if Length(Tokens)>=2 then
+    For I:=1 to Length(Tokens)-2 do
+      begin
+      if Tokens[I].TokenType=ptString then
+        Result:=Result+Tokens[I].TokenData
+      else if Tokens[i].IsNumber then
+        begin
+        if Abs(Tokens[i].AsDouble)>TextArraySpaceTreshold then
+          Result:=Result+' '
+        end
+      else
+        Raise EConvertError.Create('Unexpected char');
+      end;
+end;
+
+{ TPDFETCommand }
+
+class function TPDFETCommand.RegisterCommandName: String;
+begin
+  Result:='ET';
+end;
+
+{ TPDFBTCommand }
+
+class function TPDFBTCommand.RegisterCommandName: String;
+begin
+  Result:='BT';
+end;
+
+{ TPDFCommand }
+
+class constructor TPDFCommand.Init;
+begin
+  _ClassList:=TFPDataHashTable.Create;
+end;
+
+class destructor TPDFCommand.Done;
+begin
+  FreeAndNil(_ClassList);
+end;
+
+class function TPDFCommand.ElementType: TPDFElementType;
+begin
+  Result:=peCommand;
+end;
+
+class function TPDFCommand.RegisterCommandName: String;
+begin
+  Result:='';
+end;
+
+class function TPDFCommand.FindClassForCommand(const aCommand: String
+  ): TPDFCommandClass;
+begin
+  Result:=TPDFCommandClass(_ClassList.Items[aCommand]);
+end;
+
+class procedure TPDFCommand.RegisterCommand(const aCommand: String;
+  aClass: TPDFCommandClass);
+begin
+  _ClassList.Add(aCommand,aClass)
+end;
+
+class procedure TPDFCommand.UnRegisterCommand(const aCommand: String);
+begin
+  _ClassList.Delete(aCommand);
+end;
+
+class procedure TPDFCommand.Register;
+
+Var
+  S : String;
+
+begin
+  S:=RegisterCommandName;
+  If S<>'' then
+    RegisterCommand(S,Self);
+end;
+
+class procedure TPDFCommand.UnRegister;
+Var
+  S : String;
+
+begin
+  S:=RegisterCommandName;
+  If S<>'' then
+    UnRegisterCommand(S);
+end;
+
+constructor TPDFCommand.Create(const aCommand: String; aTokens : TPDFTokenArray);
+begin
+  Inherited Create(aTokens);
+  FCommand:=aCommand;
+end;
+
+
+{ TPDFCommandList }
+
+function TPDFCommandList.GetCommand(aIndex : Integer): TPDFCommand;
+begin
+  Result:=Objects[aIndex] as TPDFCommand;
+end;
+
+
+{ TPDFPagesObject }
+
+function TPDFPagesObject.GetChildCount: Integer;
+
+Var
+  Value : TPDFObject;
+  Kids : TPDFArray absolute Value;
+
+begin
+  Result:=0;
+  if Not CheckObjectDict then
+     exit;
+  Value:=ObjectDict.FindValue(SPDFKeyKids);
+  if Assigned(Value) and (Value is TPDFArray)  then
+    Result:=Kids.Count div 3;
+end;
+
+function TPDFPagesObject.GetChildObject(aIndex : integer): TPDFIndirect;
+
+Var
+  Ref : TPDFRefData;
+
+begin
+  Result:=Nil;
+  if Not CheckObjectDict then
+     exit;
+  Ref:=ChildRef[aIndex];
+  Result:=Document.FindIndirectObject(Ref);
+end;
+
+function TPDFPagesObject.GetChildRef(aIndex : integer): TPDFRefData;
+
+Var
+  Value : TPDFObject;
+  Idx : Integer;
+  Kids : TPDFArray absolute Value;
+
+begin
+  Result:=Default(TPDFRefData);
+  Idx:=aIndex * 3;
+  Value:=ObjectDict.FindValue(SPDFKeyKids);
+  if Assigned(Value) and (Value is TPDFArray)  then
+    if Kids.IsIntegerAt(Idx) and  Kids.IsIntegerAt(Idx+1) and  Kids.IsKeywordAt(Idx+2,'R')  then
+      begin
+      Result.ObjectID:=Kids.GetIntegerAt(Idx);
+      Result.ObjectGeneration:=Kids.GetIntegerAt(Idx+1);
+      end;
+end;
+
+function TPDFPagesObject.GetPageCount: Integer;
+begin
+  Result:=ObjectDict.GetIntegerValue(SPDFKeyCount);
+end;
+
+class function TPDFPagesObject.RegisterAsType: String;
+begin
+  Result:=SPDFTypePages;
+end;
+
+function TPDFPagesObject.ParentRef: TPdfRef;
+
+var
+  Obj : TPDFObject;
+
+begin
+  Result:=nil;
+  If Assigned(ObjectDict) then
+    begin
+    Obj:=ObjectDict.FindValue(SPDFKeyParent);
+    if Obj is TPDFRef then
+      Result:=TPDFRef(Obj);
+    end;
+end;
+
+function TPDFPagesObject.Parent: TPDFIndirect;
+
+Var
+  Ref : TPDFRef;
+
+begin
+  Result:=Nil;
+  Ref:=ParentRef;
+  if (Ref<>Nil) and assigned(Document)  then
+    Result:=Document.FindInDirectObject(Ref);
+end;
+
+function TPDFPagesObject.FindPage(aIndex: Integer): TPDFPageObject;
+Var
+  aCount : Integer;
+  I,aOffset : integer;
+  aNode : TPDFIndirect;
+  aPages : TPDFPagesObject absolute aNode;
+  aPage : TPDFPageObject absolute aNode;
+
+begin
+  Result:=nil;
+  aOffset:=0;
+  I:=0;
+  aCount:=ChildCount;
+  While (Result=Nil) and (I<aCount) do
+    begin
+    aNode:=ChildObject[I];
+    if aNode is TPDFPagesObject then
+      begin
+      if (aOffset<=aIndex) and (aIndex<aOffset+aPages.PageCount) then
+        Result:=aPages.FindPage(aIndex-aOffset)
+      else
+        Inc(aOffset,aPages.PageCount);
+      end
+    else if aNode is TPDFPageObject then
+      if I=aIndex then
+        Result:=aPage
+      else
+        inc(aOffset);
+    inc(I);
+    end;
+end;
+
+{ TPDFIndirectXRef }
+
+class function TPDFIndirectXRef.RegisterAsType: String;
+begin
+  Result:=SPDFTypeXref;
+end;
+
+destructor TPDFIndirectXRef.destroy;
+begin
+  FreeAndNil(FXRef);
+  inherited destroy;
+end;
+
+{ TPDFCatalogObject }
+
+function TPDFCatalogObject.GetPages: TPDFPagesObject;
+
+var
+  aVal : TPDFObject;
+  aPages : TPDFRef;
+
+begin
+  Result:=nil;
+  aPages:=Nil;
+  if not CheckObjectDict then
+    Exit;
+  aVal:=ObjectDict.FindValue(SPDFKeyPages);
+  if aVal is TPDFRef then
+    Result:=Document.FindInDirectObject(aPages) as TPDFPagesObject;
+end;
+
+class function TPDFCatalogObject.RegisterAsType: String;
+begin
+  Result:=SPDFTypeCatalog;
+end;
+
+{ TPDFObjectStreamObject }
+
+class function TPDFObjectStreamObject.RegisterAsType: String;
+begin
+  Result:=SPDFTypeObjStm;
+end;
+
+{ TPDFPageObject }
+
+{ TPDFContentStream }
+
+constructor TPDFPageObject.Create;
+begin
+  inherited Create;
+  FCommandList:=CreateCommandList;
+end;
+
+destructor TPDFPageObject.Destroy;
+begin
+  FreeAndNil(FCommandList);
+  inherited Destroy;
+end;
+
+function TPDFPageObject.FindResources: TPDFDictionary;
+Var
+  Obj : TPDFObject;
+  aParent : TPDFIndirect;
+
+begin
+  if FResources=Nil then
+    begin
+    Obj:=ObjectDict.FindValue(SPDFPageKeyResources);
+    if Obj is TPDFRef then
+      begin
+      Obj:=Document.FindInDirectObject(TPDFRef(Obj));
+      if Obj is TPDFIndirect then
+        begin
+        aParent:=TPDFIndirect(Obj);
+        Obj:=TPDFIndirect(aParent).ObjectDict;
+        if assigned(Obj) then
+            Writeln('Indirect resource : ', TPDFDictionary(Obj).GetDescription)
+        else
+          Writeln('Indirect object ',aParent.ObjectID,'does not have a dict');
+        end;
+      end;
+    if Obj is TPDFDictionary then
+      FResources:=Obj as TPDFDictionary
+    end;
+  if (FResources=Nil) then
+    begin
+    aParent:=Parent;
+    while (FResources=Nil) and (aParent<>Nil) do
+      begin
+      if assigned(aParent.ObjectDict) then
+        begin
+        Obj:=aParent.ObjectDict.FindValue(SPDFPageKeyResources);
+        if Obj is TPDFDictionary then
+         FResources:=Obj as TPDFDictionary;
+        end;
+      if FResources=Nil then
+        begin
+        if aParent is TPDFPagesObject then
+          aParent:=TPDFPagesObject(aParent).Parent
+        else
+          aParent:=nil;
+        end;
+      end;
+    end;
+  Result:=Fresources;
+end;
+
+function TPDFPageObject.FindFontRefObj(aFontName: String): TPDFRef;
+
+var
+  aDict  : TPDFDictionary;
+  aFont : TPDFObject;
+
+begin
+  Result:=nil;
+  aDict:=Resources.FindDictionaryValue(SPDFResourceKeyFont);
+  if assigned(aDict) then
+    begin
+    aFont:=aDict.FindValue(aFontName);
+    if (aFont is TPDFRef) then
+      Result:=TPDFRef(aFont);
+    end;
+end;
+
+function TPDFPageObject.FindFontRef(aFontName: String): TPDFRefData;
+
+var
+  aRef : TPDFRef;
+
+begin
+  Result:=Default(TPDFRefData);
+  aRef:=FindFontRefObj(aFontName);
+  if Assigned(aRef) then
+    Result:=aRef.FRef;
+end;
+
+class function TPDFPageObject.CreateCommandList: TPDFCommandList;
+begin
+  Result:=TPDFCommandList.Create;
+end;
+
+
+function TPDFPageObject.GetContentRef(aIndex : integer): TPDFrefData;
+
+Var
+  Cont : TPDFObject;
+  ContArray : TPDFArray absolute cont;
+  idx : integer;
+
+begin
+  Result:=Default(TPDFRefData);
+  If aIndex>=ContentCount then
+    Raise EListError.CreateFmt('Content index %d out of bounds [0..%d]',[aIndex,ContentCount-1]);
+  Cont:=ObjectDict.FindValue(SPDFPageKeyContents);
+  if (aIndex=0) and (Cont is TPDFRef) then
+    Result:=(Cont as TPDFRef).FRef
+  else if Cont is TPDFArray then
+    begin
+    Idx:=aIndex*3;
+    if ContArray.IsIntegerAt(Idx) and  ContArray.IsIntegerAt(Idx+1) and  ContArray.IsKeywordAt(Idx+2,'R')  then
+      begin
+      Result.ObjectID:=ContArray.GetIntegerAt(Idx);
+      Result.ObjectGeneration:=ContArray.GetIntegerAt(Idx+1);
+      end;
+    end;
+end;
+
+function TPDFPageObject.GetParent: TPDFIndirect;
+
+begin
+  Result:=Nil;
+  if Not CheckObjectDict then
+    exit;
+  Result:=Document.FindInDirectObject(ParentRef);
+end;
+
+function TPDFPageObject.GetContent(aIndex : integer): TPDFIndirect;
+
+Var
+  Ref : TPDFRefData;
+
+begin
+  Result:=nil;
+  if Not CheckObjectDict then
+    exit;
+  Ref:=ContentRef[aIndex];
+  Result:=Document.FindInDirectObject(Ref)
+end;
+
+function TPDFPageObject.GetContentCount: Integer;
+
+Var
+  Cont: TPDFObject;
+
+begin
+  Result:=0;
+  Cont:=ObjectDict.FindValue(SPDFPageKeyContents);
+  if Assigned(Cont) then
+    begin
+    if (Cont is TPDFRef) then
+      Result:=1
+    else if Cont is TPDFArray then
+      Result:=TPDFArray(cont).Count div 3;
+    end;
+end;
+
+function TPDFPageObject.GetParentRef: TPDFRef;
+
+Var
+  aVal : TPDFObject;
+
+begin
+  Result:=Nil;
+  If Not CheckObjectDict then
+    Exit;
+  aVal:=ObjectDict.FindValue(SPDFKeyParent);
+  if assigned(aVal) and (aVal is TPDFRef) then
+    Result:=TPDFRef(aVal);
+end;
+
+function TPDFPageObject.GetResources: TPDFDictionary;
+
+
+begin
+  Result:=FindResources;
+  if Result=Nil then
+    Raise EPDF.CreateFmt('No resource dictionary for page with ID %d',[Self.ObjectID]);
+end;
+
+
+class function TPDFPageObject.RegisterAsType: String;
+begin
+  Result:=SPDFTypePage;
+end;
+
+{ TPDFContainerObjectEnumerator }
+
+constructor TPDFContainerObjectEnumerator.Create(aContainer: TPDFContainer);
+begin
+  FContainer:=aContainer;
+  FCurrentIdx:=-1;
+end;
+
+function TPDFContainerObjectEnumerator.GetCurrent: TPDFObject;
+begin
+  Result:=FContainer.Objects[FCurrentIdx];
+end;
+
+function TPDFContainerObjectEnumerator.MoveNext: Boolean;
+begin
+  Inc(FCurrentIDX);
+  Result:=FCurrentIDX<FContainer.Count;
+end;
+
+{ TPDFRef }
+
+constructor TPDFRef.Create(aID, aVersion: Integer);
+begin
+  Inherited Create;
+  FRef.ObjectID:=aID;
+  FRef.ObjectGeneration:=aVersion;
+end;
+
+function TPDFRef.GetDescription: String;
+begin
+  Result:=Format('Ref (%d %d)',[ObjectID,ObjectGeneration]);
+end;
+
+class function TPDFRef.ElementType: TPDFElementType;
+begin
+  Result:=peRef;
+end;
+
+{ TPDFSingleObjectEnumerator }
+
+constructor TPDFSingleObjectEnumerator.Create(aObj: TPDFObject);
+begin
+  FObject:=aObj;
+  FFirst:=True;
+end;
+
+function TPDFSingleObjectEnumerator.GetCurrent: TPDFObject;
+begin
+  Result:=FObject;
+end;
+
+function TPDFSingleObjectEnumerator.MoveNext: Boolean;
+begin
+  Result:=FFirst;
+  FFirst:=False;
+end;
+
+{ TPDFObjectEnumerator }
+
+function TPDFObjectEnumerator.MoveNext: Boolean;
+begin
+  Result:=False;
+end;
+
+{ TPDFXRef }
+
+class function TPDFXRef.ElementType: TPDFElementType;
+begin
+  Result:=peXREFItem;
+end;
+
+function TPDFXRef.Match(aObjectID, aObjectGeneration: Integer): Boolean;
+begin
+  Result:=InUse and (ReferenceIndex=aObjectID) and (aObjectGeneration=ObjectGeneration);
+end;
+
+function TPDFXRef.GetDescription: String;
+begin
+  Result:=Format('Xref object %d (InUse: %s',[ReferenceIndex,BoolToStr(InUse,'True','False')]);
+  If Not InUse then
+    Result:=Result+Format(', Next gen: %d)',[ObjectGeneration])
+  else
+    begin
+    Result:=Result+Format(', Compressed: %s',[BoolToStr(Compressed,'True','False')]);
+    if Not Compressed then
+      Result:=Result+Format(', Offset: %d,  Generation: %d)',[ObjectOffset,ObjectGeneration])
+    else
+      Result:=Result+Format(', Stream obj: %d, Index : %d)',[StreamObjectNr,ObjectIndex]);
+    end
+end;
+
+{ TPDFToken }
+
+function TPDFToken.CheckString(const aValue : RawByteString): Boolean;
+begin
+  Result:=(TokenType=ptKeyword) and (aValue=TokenData);
+end;
+
+function TPDFToken.HasOnlyHexChars: Boolean;
+
+var
+  I,Len : Integer;
+
+begin
+  Result:=True;
+  Len:=Length(TokenData);
+  I:=1;
+  While Result and (I<=Len) do
+    begin
+    Result:=TokenData[i] in ['0'..'9','A'..'F','a'..'f'];
+    Inc(I);
+    end;
+end;
+
+
+function TPDFToken.IsInteger: Boolean;
+
+Var
+  I : Integer;
+
+begin
+  Result:=(TokenType=ptNumber) and TryStrToInt(TokenData,I);
+end;
+
+function TPDFToken.IsHexInteger: Boolean;
+begin
+  Result:=(IsInteger) or ((TokenType=ptString) and (HasOnlyHexChars))
+end;
+
+function TPDFToken.AsHexInteger: Integer;
+begin
+  Result:=StrToInt('$'+TokenData);
+end;
+
+
+function TPDFToken.AsBEHexInteger: Integer;
+
+Var
+  I : integer;
+
+begin
+  if not (TokenType=ptHexString) then
+    Raise EConvertError.Create('Not a hex string');
+  Result:=0;
+  for I:=1 to Length(TokenData) do
+    Result:=(Result shl 8) + Ord(TokenData[i])
+end;
+
+function TPDFToken.AsInteger: Integer;
+begin
+  if not ((TokenType=ptNumber) and TryStrToInt(TokenData,Result)) then
+    Raise EConvertError.Create(SErrNotAnInteger);
+end;
+
+function TPDFToken.AsDouble: Double;
+
+var
+  c : Integer;
+
+begin
+  if not (TokenType=ptNumber) then
+    Raise EConvertError.Create(SErrNotAnInteger)
+  else
+    begin
+    Val(TokenData,Result,C);
+    if C<>0 then
+      Raise EConvertError.Create(SErrNotAnInteger)
+    end;
+
+end;
+
+function TPDFToken.IsHexString: Boolean;
+begin
+  Result:=(TokenType=ptHexString)
+end;
+
+function TPDFToken.IsString: Boolean;
+begin
+  Result:=(TokenType in [ptKeyword,ptHexString,ptString]);
+end;
+
+function TPDFToken.AsString: RawByteString;
+
+Var
+  I,Len : Integer;
+  UString : Unicodestring;
+//  P : PUnicodeChar;
+  P : PWord;
+
+begin
+  if isString then
+    begin
+    If (Length(TokenData)>2) and (TokenData[1]=#254) and (TokenData[2]=#255) then
+      begin
+      Len:=Length(TokenData)-2;
+      SetLength(UString,Len div 2);
+      Move(TokenData[3],UString[1],Len);
+      P:=PWord(PUnicodeChar(UString));
+      For I:=1 to Length(UString) do
+        begin
+        P^:=Swap(P^);
+        Inc(P);
+        end;
+      Result:=UTF8Encode(UString);
+      end
+    else
+      Result:=TokenData
+    end
+  else
+    Raise EConvertError.Create(SErrNotAString);
+end;
+
+function TPDFToken.AsName: RawByteString;
+begin
+  if IsName then
+    Result:=TokenData
+  else
+    Raise EConvertError.Create(SErrNotAName);
+end;
+
+function TPDFToken.IsInt64: Boolean;
+
+Var
+  I : Int64;
+
+begin
+  Result:=(TokenType=ptNumber) and TryStrToInt64(TokenData,I);
+end;
+
+function TPDFToken.AsInt64: Int64;
+
+
+begin
+  if not ((TokenType=ptNumber) and TryStrToInt64(TokenData,Result)) then
+    Raise EConvertError.Create(SErrNotAnInt64);
+
+end;
+
+function TPDFToken.IsWhiteSpace: Boolean;
+begin
+  Result:=(TokenType=ptWhiteSpace);
+end;
+
+function TPDFToken.IsKeyword: Boolean;
+begin
+  Result:=TokenType=ptKeyword;
+end;
+
+function TPDFToken.IsName: Boolean;
+begin
+  Result:=TokenType=ptName;
+end;
+
+function TPDFToken.IsNumber: Boolean;
+begin
+  Result:=TokenType = ptNumber;
+end;
+
+{ TPDFDocument }
+
+function TPDFDocument.GetStartXRef: TPDFStartXRef;
+begin
+  Result:=FStartXref;
+  if Result=Nil then
+    Raise EPDF.Create('No StartXRef found');
+end;
+
+function TPDFDocument.getXRef(aIndex : Integer): TPDFXRef;
+begin
+  if (aIndex<0) or (aIndex>=Length(FXrefs)) then
+    Raise EListError.CreateFmt(SListIndexError,[aIndex]);
+  Result:=FXrefs[aIndex];
+end;
+
+function TPDFDocument.GetXRefCount: Integer;
+begin
+  Result:=Length(FXRefs);
+end;
+
+procedure TPDFDocument.SetXrefArray(aArray: TPDFXRefArray);
+begin
+  FXrefs:=aArray;
+end;
+
+function TPDFDocument.GetPageCount: Integer;
+
+
+Var
+  lPages : TPDFIndirect;
+
+begin
+  Result:=0;
+  lPages:=GetPages;
+  if Assigned(lPages) and Assigned(lPages.ObjectDict) then
+     Result:=lPages.ObjectDict.GetIntegerValue(SPDFKeyCount);
+end;
+
+function TPDFDocument.GetPageNode(aIndex : Integer): TPDFIndirect;
+
+Var
+  lPages : TPDFIndirect;
+  Value : TPDFObject;
+  Idx : Integer;
+  Kids : TPDFArray absolute Value;
+  ObjID,ObjGen : Integer;
+
+begin
+  Result:=Nil;
+  lPages:=GetPages;
+  if Assigned(lPages) and Assigned(lPages.ObjectDict) then
+    begin
+    Value:=lPages.ObjectDict.FindValue('Kids');
+    Idx:=aIndex * 3;
+    if Assigned(Value) and (Value is TPDFArray) and (Idx<Kids.Count) then
+      if Kids.IsIntegerAt(Idx) and  Kids.IsIntegerAt(Idx+1) and  Kids.IsKeywordAt(Idx+2,'R')  then
+        begin
+        ObjID:=Kids.GetIntegerAt(Idx);
+        ObjGen:=Kids.GetIntegerAt(Idx+1);
+        Result:=FindIndirectObject(ObjID,ObjGen);
+      end;
+    end;
+end;
+
+function TPDFDocument.GetPage(aIndex : Integer): TPDFPageObject;
+
+Var
+  aCount : Integer;
+  I,aOffset : integer;
+  aNode : TPDFIndirect;
+  aPages : TPDFPagesObject absolute aNode;
+  aPage : TPDFPageObject absolute aNode;
+
+begin
+  Result:=nil;
+  aOffset:=0;
+  I:=0;
+  aCount:=PageCount;
+  While (Result=Nil) and (I<aCount) do
+    begin
+    aNode:=PageNodes[I];
+    if aNode is TPDFPagesObject then
+      begin
+      if (aOffset<=aIndex) and (aIndex<aOffset+aPages.PageCount) then
+        Result:=aPages.FindPage(aIndex-aOffset)
+      else
+        Inc(aOffset,aPages.PageCount);
+      end
+    else if aNode is TPDFPageObject then
+      if I=aIndex then
+        Result:=aPage
+      else
+        inc(aOffset);
+    inc(I);
+    end;
+end;
+
+function TPDFDocument.FindInDirectObject(aID: Integer; aGeneration: Integer
+  ): TPDFIndirect;
+
+Var
+  I,aMax : integer;
+
+begin
+  I:=0;
+  aMax:=Count;
+  Result:=Nil;
+  While (Result=Nil) and (I<aMax) do
+    begin
+    if Objects[i].InheritsFrom(TPDFIndirect) then
+      begin
+      Result:=TPDFIndirect(Objects[i]);
+      if Not Result.Match(aID,aGeneration) then
+        Result:=Nil;
+      end;
+    Inc(I);
+    end;
+end;
+
+function TPDFDocument.FindInDirectObject(aRef: TPDFRef): TPDFIndirect;
+begin
+  With aRef do
+    Result:=FindInDirectObject(ObjectID,ObjectGeneration);
+end;
+
+function TPDFDocument.FindInDirectObject(aRef: TPDFRefData): TPDFIndirect;
+begin
+  if aRef.IsEmpty then
+    Result:=Nil
+  else
+    Result:=FindIndirectObject(aRef.ObjectID,aRef.ObjectGeneration);
+end;
+
+function TPDFDocument.ReplaceIndirectObj(aRef: TPDFRefData;
+  aObject: TPDFIndirect): TPDFIndirect;
+begin
+  Result:=ReplaceIndirectObj(aRef.ObjectID,aRef.ObjectGeneration,aObject);
+end;
+
+function TPDFDocument.ReplaceIndirectObj(aID: Integer; aGeneration: Integer;aObject: TPDFIndirect): TPDFIndirect;
+
+Var
+  I,aMax : integer;
+  aObj: TPDFIndirect;
+
+begin
+  I:=0;
+  aMax:=Count;
+  Result:=Nil;
+  While (Result=Nil) and (I<aMax) do
+    begin
+    if Objects[i].InheritsFrom(TPDFIndirect) then
+      begin
+      aObj:=TPDFIndirect(Objects[i]);
+      if aObj.Match(aID,aGeneration) then
+        Result:=Replace(I,aObject) as TPDFIndirect;
+      end;
+    Inc(I);
+    end;
+end;
+
+function TPDFDocument.FindFont(aRef: TPDFRef): TPDFFontObject;
+begin
+  Result:=FindFont(aRef.Ref);
+end;
+
+function TPDFDocument.FindFont(aRef: TPDFRefData): TPDFFontObject;
+
+Var
+  aObj : TPDFIndirect;
+
+begin
+  aObj:=FindInDirectObject(aRef);
+  if (aObj<>Nil) and Not (aObj is TPDFFontObject) then
+    Result:=Nil
+  else
+    Result:=TPDFFontObject(aObj)
+end;
+
+function TPDFDocument.GetFont(aRef: TPDFRef): TPDFFontObject;
+begin
+  Result:=GetFont(aRef.Ref)
+end;
+
+function TPDFDocument.GetFont(aRef: TPDFRefData): TPDFFontObject;
+begin
+  Result:=FindFont(aRef);
+  if Result=Nil then
+    Raise EPDF.CreateFmt(SErrNoFontAt,[aRef.AsString]);
+end;
+
+function TPDFDocument.FindDocumentInfoObject: TPDFIndirect;
+
+Var
+  aVal : TPDFObject;
+  Ref : TPDFRef absolute aVal;
+
+begin
+  Result:=Nil;
+  if Not Assigned(TrailerDict) then exit;
+  aVal:=TrailerDict.FindValue(SPDFKeyInfo);
+  if Assigned(aVal) and (aVal is TPDFRef) then
+    Result:=FindIndirectObject(Ref);
+end;
+
+function TPDFDocument.FindDocumentInfo: TPDFDocumentInfo;
+
+Var
+  Obj : TPDFIndirect; 
+
+begin
+  Result:=Nil;
+  Obj:=FindDocumentInfoObject;
+  if Assigned(Obj) and Assigned(Obj.ObjectDict) then
+    Result:=TPDFDocumentInfo.Create(Obj.ObjectDict);
+end;
+
+function TPDFDocument.GetDocumentInfo: TPDFDocumentInfo;
+
+begin
+  Result:=FindDocumentInfo;
+  if Result=Nil then
+    Raise EPDF.Create('Document has no Document Information object');
+end;
+
+function TPDFDocument.FindCatalog: TPDFCatalogObject;
+Var
+  aVal : TPDFObject;
+  aRoot : TPDFRef absolute aVal;
+
+begin
+  Result:=Nil;
+  if (FCatalog=Nil) and  Assigned(TrailerDict) then
+    begin
+    aVal:=TrailerDict.FindValue(SPDFKeyRoot);
+    if aVal is TPDFRef then
+      FCatalog:=FindInDirectObject(aRoot) as TPDFCatalogObject;
+    end;
+  Result:=FCatalog;
+end;
+
+function TPDFDocument.GetCatalog: TPDFCatalogObject;
+
+begin
+  Result:=FindCatalog;
+  if Result=Nil then
+    Raise EPDF.Create('No catalog object found!');
+end;
+
+function TPDFDocument.FindPages: TPDFIndirect;
+
+Var
+  Cat : TPDFIndirect;
+  aVal : TPDFObject;
+  aPages : TPDFRef absolute aVal;
+
+begin
+  Result:=Nil;
+  if (FPages=Nil) then
+    begin
+    Cat:=FindCatalog;
+    if Assigned(Cat) then
+      begin
+      aVal:=Cat.ObjectDict.FindValue(SPDFKeyPages);
+      if aVal is TPDFRef then
+        FPages:=FindInDirectObject(aPages);
+      end;
+    end;
+  Result:=FPages;
+end;
+
+function TPDFDocument.GetPages: TPDFIndirect;
+begin
+  Result:=FindPages;
+  if Result=Nil then
+    Raise EPDF.Create('No pages object found!');
+end;
+
+function TPDFDocument.AddInDirectObject(aObj: TPDFIndirect): Boolean;
+begin
+  Result:=FindIndirectObject(aObj.ObjectID,aObj.ObjectGeneration)=Nil;
+  if Result then
+    begin
+    Add(aObj);
+    aObj.FDocument:=Self;
+    end;
+end;
+
+{ TPDFArray }
+
+class function TPDFArray.ElementType: TPDFElementType;
+begin
+  Result:=peArray;
+end;
+
+function TPDFArray.GetDescription: String;
+
+var
+  Obj : TPDFObject;
+
+begin
+  Result:='Array [';
+  For Obj in self do
+    Result:=Result+' '+Obj.GetDescription;
+  Result:=Result+']';
+end;
+
+function TPDFArray.IsIntegerAt(aIndex: Integer): Boolean;
+begin
+  Result:=(Objects[aIndex] is TPDFValue) and  (TPDFValue(Objects[aIndex]).IsInteger);
+end;
+
+function TPDFArray.IsKeywordAt(aIndex: Integer; const aKeyWord: RawByteString
+  ): Boolean;
+begin
+  Result:=(Objects[aIndex] is TPDFValue) and  (TPDFValue(Objects[aIndex]).IsKeyword(aKeyword));
+end;
+
+function TPDFArray.GetIntegerAt(aIndex: Integer): Integer;
+begin
+  If (Objects[aIndex] is TPDFValue) and  (TPDFValue(Objects[aIndex]).IsInteger) then
+    Result:=TPDFValue(Objects[aIndex]).AsInteger
+  else
+    Raise EConvertError.Create('Array element %d is not an integer value');
+end;
+
+{ TPDFStream }
+
+constructor TPDFStream.Create(const aData: TBytes);
+begin
+  Inherited Create();
+  FData:=aData;
+end;
+
+class function TPDFStream.ElementType: TPDFElementType;
+begin
+  Result:=peStream;
+end;
+
+{ TPDFValue }
+
+function TPDFValue.GetAsInteger: Integer;
+begin
+  Result:=StrToInt(Value);
+end;
+
+function TPDFValue.GetAsBoolean: Boolean;
+begin
+  Result:=(Value='true');
+  if not Result then
+    if (Value<>'false') then
+      Raise EConvertError.CreateFmt('Not a valid boolean value : %s',[Value]);
+end;
+
+function TPDFValue.GetAsDateTime: TDateTime;
+
+Var
+  aDate : String;
+  Idx : Integer;
+  Y,M,d,h,n,s : Word;
+
+  Function GetNextPart(aLen : Integer) : Word; inline;
+  begin
+    Result:=StrToIntDef(Copy(aDate,Idx,aLen),0);
+    Inc(Idx,aLen);
+  end;
+
+begin
+  Result:=0;
+  if Value='' then 
+    exit;
+  If not (Copy(Value,1,2)='D:') then
+    Raise EConvertError.CreateFmt('Not a valid Date/Time value : %s',[Value]);
+  aDate:=Value;
+  Delete(aDate,1,2);
+  idx:=1;
+  Y:=GetNextPart(4);
+  M:=GetNextPart(2);
+  if M=0 then M:=1;
+  D:=GetNextPart(2);
+  if D=0 then D:=1;
+  H:=GetNextPart(2);
+  N:=GetNextPart(2);
+  S:=GetNextPart(2);
+  Result:=EncodeDate(Y,M,D)+EncodeTime(H,N,S,0); // Dropping the offset for the moment
+end;
+
+function TPDFValue.GetAsFloat: Double;
+
+Var
+  C : Integer;
+
+begin
+  Val(Value,Result,C);
+  if C<>0 then
+    Raise EConvertError.CreateFmt('Not a valid float value : %s',[Value]);
+end;
+
+function TPDFValue.GetAsInt64: Int64;
+begin
+  Result:=StrToInt64(Value);
+end;
+
+constructor TPDFValue.Create(aValue: RawByteString; aTokenType : TPDFTokenType);
+begin
+  Inherited Create();
+  FValue:=aValue;
+  FTokenType:=aTokenType;
+end;
+
+class function TPDFValue.ElementType: TPDFElementType;
+begin
+  Result:=peValue
+end;
+
+function TPDFValue.GetDescription: String;
+begin
+  Result:='Value ('+Value+')';
+end;
+
+function TPDFValue.IsKeyword(aKeyWord: string): Boolean;
+begin
+  Result:=(Value=aKeyWord)
+end;
+
+function TPDFValue.IsInteger: Boolean;
+
+Var
+  I : Integer;
+
+begin
+  Result:=TryStrToInt(Value,I);
+end;
+
+function TPDFValue.IsInt64: Boolean;
+Var
+  I : Int64;
+
+begin
+  Result:=TryStrToInt64(Value,I);
+end;
+
+{ TPDFDictionary }
+
+class function TPDFDictionary.ElementType: TPDFElementType;
+begin
+  Result:=peDictionary;
+end;
+
+function TPDFDictionary.GetDescription: String;
+
+var
+  I : Integer;
+  E : TPDFDictEntry;
+
+begin
+  Result:='Dictionary: <<';
+  For I:=0 to Count-1 do
+    begin
+    E:=Objects[i] as TPDFDictEntry;
+    Result:=Result+sLineBreak+E.GetDescription;
+    end;
+  Result:=Result+sLineBreak+'>>';
+end;
+
+function TPDFDictionary.AddEntry(aKey: String; aValue: TPDFObject): TPDFDictEntry;
+begin
+  Result:=TPDFDictEntry.Create();
+  if (aKey<>'') and (aKey[1]='/') then
+    Result.Key:=Copy(aKey,2,Length(aKey)-1)
+  else
+    Result.Key:=aKey;
+  Result.Value:=aValue;
+  AddEntry(Result);
+end;
+
+function TPDFDictionary.AddEntry(aEntry: TPDFDictEntry): Integer;
+begin
+  Result:=Add(aEntry);
+end;
+
+function TPDFDictionary.ContainsKey(const aKeyword: RawByteString): boolean;
+begin
+  Result:=IndexOfKey(aKeyword)<>-1;
+end;
+
+function TPDFDictionary.IndexofKey(const aKeyword: RawByteString): Integer;
+
+begin
+  Result:=Count-1;
+  While (Result>=0) and (TPDFDictEntry(Objects[Result]).Key<>aKeyWord) do Dec(Result);
+end;
+
+function TPDFDictionary.FindKey(const aKeyword: RawByteString): TPDFDictEntry;
+
+Var
+  Idx : Integer;
+
+begin
+  Result:=nil;
+  Idx:=IndexOfKey(aKeyWord);
+  if Idx<>-1 then
+    Result:=Objects[Idx] as TPDFDictEntry
+end;
+
+function TPDFDictionary.GetKey(const aKeyword: RawByteString): TPDFDictEntry;
+
+begin
+  Result:=FindKey(aKeyword);
+  if (Result=Nil) then
+    Raise EPDF.CreateFmt('No such key in dictionary: %s',[aKeyWord]);
+end;
+
+function TPDFDictionary.FindValue(const aKeyword: RawByteString): TPDFObject;
+
+Var
+  Entry : TPDFDictEntry;
+
+begin
+  Result:=Nil;
+  Entry:=FindKey(aKeyword);
+  if Assigned(Entry) then
+    Result:=Entry.Value;
+end;
+
+function TPDFDictionary.FindArrayValue(const aKeyword: RawByteString
+  ): TPDFArray;
+Var
+  aVal : TPDFObject;
+
+begin
+  Result:=nil;
+  aVal:=FindValue(aKeyWord) as TPDFObject;
+  if aVal=Nil then exit;
+  if not (aVal is TPDFArray) then
+    Raise EPDF.CreateFmt(SErrDictValueIsNotArray,[aKeyWord]);
+  Result:=TPDFArray(aVal);
+end;
+
+function TPDFDictionary.FindDictionaryValue(const aKeyword: RawByteString
+  ): TPDFDictionary;
+
+var
+  aVal: TPDFObject;
+
+begin
+  Result:=Nil;
+  aVal:=FindValue(aKeyWord);
+  if aVal=Nil then
+    exit;
+  if not (aVal is TPDFDictionary) then
+    Raise EPDF.CreateFmt(SErrDictValueIsNotDict,[aKeyWord]);
+  Result:=TPDFDictionary(aVal);
+end;
+
+function TPDFDictionary.GetValue(const aKeyword: RawByteString): TPDFObject;
+
+begin
+  Result:=FindValue(aKeyWord);
+  if (Result=Nil) then
+    Raise EPDF.CreateFmt('No such value in dictionary: %s',[aKeyWord]);
+end;
+
+function TPDFDictionary.GetStringValue(const aKeyword: RawByteString): RawByteString;
+
+Var
+  aVal : TPDFValue;
+
+begin
+  aVal:=FindValue(aKeyWord) as TPDFValue;
+  if (aVal=Nil) then
+    Raise EPDF.CreateFmt(SErrNoSuchDictValue,[aKeyWord]);
+  Result:=aVal.Value;
+end;
+
+function TPDFDictionary.GetInt64Value(const aKeyword: RawByteString): Int64;
+
+Var
+  aVal : TPDFValue;
+
+begin
+  aVal:=FindValue(aKeyWord) as TPDFValue;
+  if (aVal=Nil) then
+    Raise EPDF.CreateFmt(SErrNoSuchDictValue,[aKeyWord]);
+  if not aVal.IsInt64 then
+    Raise EPDF.CreateFmt(SErrDictValueIsNotInteger,[aKeyWord]);
+  Result:=aVal.AsInt64;
+end;
+
+function TPDFDictionary.GetIntegerValue(const aKeyword: RawByteString): Integer;
+Var
+  aVal : TPDFValue;
+
+begin
+  aVal:=FindValue(aKeyWord) as TPDFValue;
+  if (aVal=Nil) then
+    Raise EPDF.CreateFmt(SErrNoSuchDictValue,[aKeyWord]);
+  if not aVal.IsInteger then
+    Raise EPDF.CreateFmt(SErrDictValueIsNotInteger,[aKeyWord]);
+  Result:=aVal.AsInteger;
+end;
+
+function TPDFDictionary.GetArrayValue(const aKeyword: RawByteString): TPDFArray;
+Var
+  aVal : TPDFObject;
+
+begin
+  Result:=FindArrayValue(aKeyWord);
+  if (Result=Nil) then
+    Raise EPDF.CreateFmt(SErrNoSuchDictValue,[aKeyWord]);
+end;
+
+function TPDFDictionary.GetDictionaryValue(const aKeyword: RawByteString
+  ): TPDFDictionary;
+
+begin
+  Result:=FindDictionaryValue(aKeyWord);
+  if (Result=Nil) then
+    Raise EPDF.CreateFmt(SErrNoSuchDictValue,[aKeyWord]);
+end;
+
+{ TPDFDictEntry }
+
+class function TPDFDictEntry.ElementType: TPDFElementType;
+begin
+  Result:=peDictEntry;
+end;
+
+destructor TPDFDictEntry.Destroy;
+begin
+  FreeAndNil(FValue);
+  inherited Destroy;
+end;
+
+function TPDFDictEntry.GetDescription: String;
+
+begin
+  if Assigned(Value) then
+    Result:=Value.GetDescription
+  else
+    Result:='(nil)';
+  Result:=Format('Entry "%s" : %s',[Key,Result]);
+end;
+
+{ TPDFTokensObject }
+
+constructor TPDFTokensObject.Create(const aTokens: TPDFTokenArray);
+begin
+  Inherited Create();
+  FTokens:=aTokens;
+end;
+
+{ TPDFContainer }
+
+function TPDFContainer.GetCount: Integer;
+begin
+  Result:=FItems.Count;
+end;
+
+function TPDFContainer.GetObject(aIndex : integer): TPDFObject;
+begin
+  Result:=TPDFObject(FItems[aIndex]);
+end;
+
+procedure TPDFContainer.SetObject(aIndex : integer; AValue: TPDFObject);
+begin
+  FItems[aIndex]:=aValue;
+end;
+
+function TPDFContainer.Replace(aIndex: Integer; aObj: TPDFObject): TPDFObject;
+
+Var
+  OldOwns : Boolean;
+
+begin
+  OldOwns:=FItems.OwnsObjects;
+  FItems.OwnsObjects:=False;
+  try
+    Result:=TPDFObject(FItems[aIndex]);
+    FItems[aIndex]:=aObj;
+  finally
+    FItems.OwnsObjects:=OldOwns;
+  end;
+end;
+
+constructor TPDFContainer.Create();
+begin
+  inherited Create();
+  FItems:=TFPObjectList.Create(OwnsObjects);
+end;
+
+class function TPDFContainer.OwnsObjects: Boolean;
+begin
+  Result:=True;
+end;
+
+class function TPDFContainer.ElementType: TPDFElementType;
+begin
+  Result:=peContainer;
+end;
+
+destructor TPDFContainer.destroy;
+begin
+  FreeAndNil(FItems);
+  inherited destroy;
+end;
+
+function TPDFContainer.GetEnumerator: TPDFObjectEnumerator;
+begin
+  Result:=TPDFContainerObjectEnumerator.Create(Self);
+end;
+
+function TPDFContainer.Add(aObject: TPDFObject): Integer;
+begin
+  Result:=FItems.Add(aObject);
+end;
+
+{ TPDFMalFormed }
+
+class function TPDFMalFormed.ElementType: TPDFElementType;
+begin
+  Result:=peMalFormed
+end;
+
+{ TPDFStartXRef }
+
+class function TPDFStartXRef.ElementType: TPDFElementType;
+begin
+  Result:=peStartXRef;
+end;
+
+{ TPDFIndirect }
+
+function TPDFIndirect.GetObjectType: String;
+begin
+  if (FObjectType='') then
+    if Assigned(ObjectDict) then
+      begin
+      if ObjectDict.ContainsKey('Type') then
+        FObjectType:=ObjectDict.GetStringValue('Type');
+      if (FObjectType<>'') and (FObjectType[1]='/') then
+        Delete(FObjectType,1,1);
+      end
+{$IFDEF DEBUGOBJECTDICT}
+    else
+      Writeln('Warning : object ',ObjectID,' has no dictionary assigned');
+{$ELSE}
+      ;
+{$ENDIF}
+  Result:=FObjectType;
+end;
+
+function TPDFIndirect.CheckObjectDict: Boolean;
+begin
+  Result:=Assigned(ObjectDict);
+{$IFDEF DEBUGOBJECTDICT}
+  if Result=Nil then
+    Writeln('No object dict for object ',ClassName,' : ',GetDescription);
+{$ENDIF}
+end;
+
+function TPDFIndirect.FindDictValue(const aKey : RawbyteString) : TPDFObject;
+
+begin
+  Result:=Nil;
+  if not CheckObjectDict then 
+    exit;
+  Result:=ObjectDict.FindValue(aKey);
+end;
+
+function TPDFIndirect.GetDictValue(const aKey : RawbyteString) : TPDFObject;
+
+begin
+  Result:=FindDictValue(aKey);
+  if Result=Nil then
+    Raise EPDF.CreateFmt('No such value in object dictionary: %s',[aKey]) 
+end;
+
+class constructor TPDFIndirect.InitIndirect;
+begin
+  _ClassList:=TFPDataHashTable.Create;
+end;
+
+class destructor TPDFIndirect.DoneIndirect;
+begin
+  _ClassList.Free;
+end;
+
+constructor TPDFIndirect.Create(aSource: TPDFIndirect);
+
+Var
+  L : TFPObjectList;
+
+begin
+  Create ;
+  L:=aSource.FItems;
+  aSource.FItems:=Self.FItems;
+  Self.FItems:=L;
+  FObjectType:=aSource.FObjectType;
+  FDict:=aSource.FDict;
+  FObjectID:=aSource.ObjectID;
+  FObjectGeneration:= aSource.ObjectGeneration;
+  FStream:=aSource.FStream;
+  FUnfilteredStream:=aSource.FUnfilteredStream;
+  FObjectPositions:=aSource.FObjectPositions;
+end;
+
+destructor TPDFIndirect.Destroy;
+begin
+  FreeAndNil(FunFilteredStream);
+  inherited Destroy;
+end;
+
+function TPDFIndirect.ResolveObjectType: TPDFIndirect;
+
+Var
+  aType : String;
+  aClass : TPDFIndirectClass;
+
+begin
+  aType:=ObjectType;
+  If (aType='') then
+    Result:=Self
+  else
+    begin
+    aClass:=FindClassForType(aType);
+    if aClass=Nil then
+      Result:=Self
+    else
+      Result:=aClass.Create(Self);
+    end;
+end;
+
+class function TPDFIndirect.FindClassForType(aType: String): TPDFIndirectClass;
+begin
+  Result:=TPDFIndirectClass(_ClassList.Items[LowerCase(aType)]);
+end;
+
+class procedure TPDFIndirect.RegisterType(aType: String;
+  aClass: TPDFIndirectClass);
+begin
+  _ClassList.Add(LowerCase(aType),aClass);
+end;
+
+class procedure TPDFIndirect.UnRegisterType(aType: String);
+begin
+  _ClassList.Delete(LowerCase(aType));
+end;
+
+class procedure TPDFIndirect.Register;
+
+Var
+  S : String;
+
+begin
+  S:=RegisterAsType;
+  if S<>'' then
+    RegisterType(S,Self);
+end;
+
+class function TPDFIndirect.RegisterAsType: String;
+begin
+  Result:='';
+end;
+
+class procedure TPDFIndirect.UnRegister;
+Var
+  S : String;
+
+begin
+  S:=RegisterAsType;
+  if S<>'' then
+    UnRegisterType(S);
+end;
+
+class function TPDFIndirect.ElementType: TPDFElementType;
+begin
+  Result:=peIndirectObject;
+end;
+
+function TPDFIndirect.GetDescription: String;
+begin
+  Result:=Format('Indirect object %d (Type: %s)',[ObjectID,ObjectType]);
+
+end;
+
+function TPDFIndirect.Add(aObject: TPDFObject): Integer;
+begin
+  Result:=inherited Add(aObject);
+  if (FDict=Nil) and (aObject is TPDFDictionary) then
+    FDict:=TPDFDictionary(aObject);
+  if (FStream=Nil) and (aObject is TPDFStream) then
+    FStream:=TPDFStream(aObject);
+end;
+
+function TPDFIndirect.Match(aObjectID: Integer; aObjectGeneration: Integer): Boolean;
+begin
+  Result:=(aObjectID=FObjectID) and (aObjectGeneration=FObjectGeneration);
+end;
+
+class function TPDFTrailer.ElementType: TPDFElementType;
+begin
+  Result:=peTrailer;
+end;
+
+function TPDFTrailer.Contains(const aKeyword: RawByteString): boolean;
+
+begin
+  Result:=IndexOfKey(aKeyword)<>0;
+end;
+
+{ TPDFXRefList }
+
+function TPDFXRefList.GetItm(aIndex : integer): TPDFXRef;
+begin
+  Result:=Objects[aIndex] as TPDFXRef;
+end;
+
+procedure TPDFXRefList.SetItm(aIndex : integer; AValue: TPDFXRef);
+begin
+  Objects[aIndex]:=aValue;
+end;
+
+class function TPDFXRefList.ElementType: TPDFElementType;
+begin
+  Result:=peXREF;
+end;
+
+function TPDFXRefList.IndexOfReference(aObjectID, aObjectGeneration: Integer): Integer;
+begin
+  Result:=Count-1;
+  While (Result>=0) and not References[Result].Match(aObjectID,aObjectGeneration) do
+    Dec(Result);
+end;
+
+function TPDFXRefList.FindReference(aObjectID, aObjectGeneration: Integer
+  ): TPDFXRef;
+
+var
+  Idx : integer;
+
+begin
+  Idx:=IndexOfReference(aObjectID,aObjectGeneration);
+  if Idx=-1 then
+    Result:=Nil
+  else
+    Result:=References[Idx];
+end;
+
+{ TPDFObject }
+
+
+{$IFDEF DEBUGPDFALLOCATION}
+Var
+  _Global : TFPObjectList;
+
+destructor TPDFObject.destroy;
+begin
+  _Global.Remove(Self);
+  inherited destroy;
+end;
+{$ENDIF}
+
+constructor TPDFObject.Create();
+begin
+{$IFDEF DEBUGPDFALLOCATION}
+  // Nothing
+  if not assigned(_Global) then
+    _Global:=TFPObjectList.Create(False);
+  _Global.Add(Self);
+{$ENDIF}
+end;
+
+function TPDFObject.GetEnumerator: TPDFObjectEnumerator;
+begin
+  Result:=TPDFSingleObjectEnumerator.Create(Self);
+end;
+
+function TPDFObject.GetDescription: String;
+begin
+  Result:=GetEnumName(TypeInfo(TPDFElementType),Ord(ElementType))
+end;
+
+{ TPDFComment }
+
+constructor TPDFComment.Create(const aComment: RawByteString);
+begin
+  Inherited Create();
+  FComment:=aComment;
+end;
+
+class function TPDFComment.ElementType: TPDFElementType;
+begin
+  Result:=peComment;
+end;
+
+{ TPDFDocumentInfo }
+
+Constructor TPDFDocumentInfo.Create(aSource : TPDFDictionary);
+
+begin
+  Inherited Create;
+  FSource:=aSource;
+end;
+
+Function TPDFDocumentInfo.GetKeyName(aIndex : Integer) : RawByteString;
+
+begin
+  Result:='';
+  Case aIndex of
+    0 : Result:=SPDFKeyTitle;
+    1 : Result:=SPDFKeyAuthor;
+    2 : Result:=SPDFKeySubject;
+    3 : Result:=SPDFKeyKeywords;
+    4 : Result:=SPDFKeyCreator;
+    5 : Result:=SPDFKeyProducer;
+    6 : Result:=SPDFKeyCreationDate;
+    7 : Result:=SPDFKeyModDate;
+    8 : Result:=SPDFKeyTrapped;
+  else
+    Result:=''; // Silence compiler warning  
+  end;    
+  
+end;
+
+Function TPDFDocumentInfo.GetString(aIndex : Integer) : String;
+
+Var
+  Key : RawByteString;
+  Obj : TPDFObject;
+  Val : TPDFValue absolute Obj;
+  
+begin
+  Result:='';
+  Key:=GetKeyName(aindex);
+  if Key<>'' then
+    begin
+    Obj:=FSource.FindValue(Key);
+    if Obj is TPDFValue then
+      Result:=Val.Value; // Conversion may need to be done here.
+    end;
+end;
+
+Function TPDFDocumentInfo.GetDate(aIndex : Integer) : TDateTime;
+
+ Var
+  Key : RawByteString;
+  Obj : TPDFObject;
+  Val : TPDFValue absolute Obj;
+
+begin
+  Result:=0;
+  Key:=GetKeyName(aindex);
+  if Key<>'' then
+    begin
+    Obj:=FSource.FindValue(Key);
+    if Obj is TPDFValue then
+      Result:=Val.AsDateTime; 
+    end;
+end;
+
+Function TPDFDocumentInfo.GetName(aIndex : integer) : String;
+
+begin
+  Result:=GetString(aIndex);
+end;
+
+Procedure RegisterStandardClasses;
+
+begin
+  TPDFPageObject.Register;
+  TPDFPagesObject.Register;
+  TPDFCatalogObject.Register;
+  TPDFObjectStreamObject.Register;
+  TPDFIndirectXRef.Register;
+  TPDFFontObject.Register;
+end;
+
+Procedure RegisterStandardCommands;
+
+begin
+  TPDFBTCommand.Register;
+  TPDFETCommand.Register;
+  TPDFTJCommand.Register;
+  TPDFTj_Command.Register;
+  TPDFTfCommand.Register;
+  TPDFTd_Command.Register;
+  TPDFTDCommand.Register;
+end;
+
+{$IFDEF DEBUGPDFALLOCATION}
+Procedure DumpAllocations;
+
+Var
+  I : Integer;
+begin
+  if assigned (_Global) then
+    begin
+    For I:=0 to _Global.Count-1 do
+      Writeln('Not freed: ',_Global[i].ClassName,' : ',TPDFObject(_Global[i]).GetDescription);
+    FreeAndNil(_Global);
+    end;
+  Flush(output);
+end;
+{$ENDIF}
+
+initialization
+  RegisterStandardClasses;
+  RegisterStandardCommands;
+
+finalization
+{$IFDEF DEBUGPDFALLOCATION}
+  DumpAllocations
+{$ENDIF}
+end.
+

+ 2372 - 0
packages/fcl-pdf/src/fppdfparser.pp

@@ -0,0 +1,2372 @@
+{ **********************************************************************
+  This file is part of the Free Component Library
+
+  PDF Parser
+  Copyright (c) 2022 by Michael Van Canneyt [email protected]
+
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+  **********************************************************************}
+
+unit fppdfparser;
+
+{$mode ObjFPC}{$H+}
+{$J-}
+
+{$DEFINE DEBUGSTREAMS}
+{ $DEFINE DUMPSTREAMS}
+
+interface
+
+uses
+  Types, Typinfo, Classes, SysUtils, fppdfobjects, fppdfscanner, fppdfsource, streamex, fppdfpredict;
+
+Const
+  MaxTrailerDistance = 6;  // Maximum number of bytes to scan backwards for trailer dictionary end: >>
+
+
+Type
+  { TPDFParser }
+  TFilterData = Record
+    FilterName : String;
+    Source : TStream;
+    Dest : TStream;
+    ParamDict : TPDFDictionary;
+  end;
+
+  EPDFParser = Class(EPDF)
+    ErrorNumber : Integer;
+    ObjectID,ObjectGeneration : Integer;
+  end;
+
+  TFilterEvent = Procedure (Sender : TObject; Var aData : TFilterData) of object;
+  TLogKind = (lkInfo,lkWarning,lkError);
+  TLogNotifyEvent = Procedure(sender : TObject; aKind : TLogkind; const aMessage : string) of object;
+
+  TIndexPair = Array[1..2] of Longint;
+  TIndexPairArray = Array of TIndexPair;
+  TProgressKind = (pkXRef,pkIndirect,pkContentStream);
+
+  TNewCommandEvent = procedure(Sender : TObject; aStream : TPDFPageObject; aCommand : TPDFCommand) of object;
+  TProgressEvent = Procedure(Sender : TObject; aKind : TProgressKind;aCurrent,aCount : Integer) of object;
+
+  TPDFParser = class
+  Private
+    FLoadObjects: Boolean;
+    FOnLog: TLogNotifyEvent;
+    FOnProgress: TProgressEvent;
+    FOnUnknownFilter: TFilterEvent;
+    FResolveContentStreams: Boolean;
+    FResolveObjects: Boolean;
+    FScanner : TPDFScanner;
+    FLastDict : TPDFDictionary; // Last created dictionary
+    FloadingXRef : TPDFXRefArray;
+    FDoc : TPDFDocument;
+
+    procedure ParseCMAPBFChar(aMap: TPDFCMapData);
+    procedure ParseCMAPBFRange(aMap: TPDFCMapData);
+    procedure ParseCMAPCodeSpaceRange(aMap: TPDFCMapData);
+    procedure ParseInlineImageData(var aOperands: TPDFTokenArray; aScanner: TPDFScanner );
+    procedure SetResolveContentStreams(AValue: Boolean);
+  Protected
+    // Progress, Logging & Errors.
+    procedure DoProgress(aKind : TProgressKind; aCurrent,aCount : Integer);
+    Procedure DoLog(aKind : TLogKind; Const Msg : string);
+    Procedure DoLog(aKind : TLogKind; Const Fmt : string; Const Args : Array of const);
+    Procedure DoInfo(Const Msg : string);
+    Procedure DoInfo(Const Fmt : string; Const args : Array of const);
+    Procedure DoWarning(Const Msg : string);
+    Procedure DoWarning(Const Fmt : string; Const args : Array of const);
+    Procedure DoError(const Nr : Integer; Const Msg : string);
+    Procedure DoError(const Nr : Integer; Const Fmt : string; Const Args : Array of const);
+    Procedure DoUnknownStruct(aID : String);
+  Protected
+    // Factory methods
+    Function CreateScanner(aFile : TStream; aBufferSize : Cardinal) : TPDFScanner; virtual;
+    Function DefaultPDFPageClass : TPDFPageClass; virtual;
+    function CreateCommand(const aName: string; aOperands: TPDFTokenArray): TPDFCommand; virtual;
+    // Parsing & Token handling
+    function ParseSingleValue(aStartToken: TPDFToken; SkipWhiteSpace : Boolean): TPDFObject;
+    function GetIntegerToken(const aContext: String): Integer;
+    function GetInt64Token(const aContext: String): Int64;
+    function GetStringToken(const aContext: String): RawByteString;
+    procedure UnsupportedToken(const aMsg: UTF8String; aToken: TPDFToken);
+    function VerifyDocument: Int64;
+    // Stream Filters
+    function FindStreamLength(aReposition : Int64): Integer;
+    function FilterStream(aStream: TStream; aFilterName: String;  aParams: TPDFDictionary): TStream;
+    function GetUnfilteredStream(aObj: TPDFIndirect): TStream;
+    function ResolveFilters(aStream: TStream; aDict: TPDFDictionary): TStream;
+    // XRef handling
+    function GetXRefStreamSubSections(aObjectDict: TPDFDictionary): TIndexPairArray;
+    function ParseXREFStream(aStream: TStream; aObjectDict: TPDFDictionary): TPDFXRefList;
+    function ParseXRefAt(aStartPos: Int64; out ParentObject : TPDFIndirect): TPDFXRefList;
+    function GetLastXRef(StartAt: Int64; out FoundAt: Int64): TPDFStartXRef;
+    procedure LoadIndirectObjects;
+    // Object Stream
+    function ParseIndirectObjectInStream(aSTMObj: TPDFIndirect; aIdx, aObjectID: Integer): TPDFIndirect;
+    function ParseStreamObjectPositions(aSTMObj: TPDFIndirect  ): TObjectPositionArray;
+    // Content streams
+    procedure DoResolveContentStreams(aDoc: TPDFDocument;
+      aOnCommand: TNewCommandEvent=nil);
+    // Trailer
+    function DetectTrailerAt(aStartPos: Int64): TPDFTrailer;
+    // Dictionary handling
+    procedure DoParseDictionary(aDict: TPDFDictionary; const aToken: TPDFToken);
+    function ParseDictValue: TPDFObject;
+    // Indirect Object handling
+    function MaybeResolve(aObject: TPDFIndirect): TPDFIndirect; virtual;
+    function ParseIndirectObject(aAt: Int64): TPDFIndirect;
+    Procedure ParseIndirectObjectBody(aObject : TPDFIndirect);
+    Function FindIndirectObject(aObjectID,aObjectGeneration : Integer) : TPDFIndirect;
+    // Parsing
+    procedure ParseCMap(aStream: TStream; aMap: TPDFCMap);
+    function ParseCMap(aStream: TStream): TPDFCMap;
+    function ParseIndirectObject(aID, aGen: Int64): TPDFIndirect; virtual;
+    function ParseArray(aPrevToken: TPDFToken): TPDFArray; virtual;
+    function ParseDictionary(const aToken: TPDFToken): TPDFDictionary; virtual;
+    function ParseMalFormed(aToken: TPDFToken; aEndToken: RawByteString) : TPDFMalFormed; virtual;
+    function ParseStreamValue(aStartToken: TPDFToken; aStreamLength: Integer): TPDFStream; virtual;
+    function ParseTrailer(const aToken: TPDFToken): TPDFTrailer; virtual;
+    function ParseValue(StopAt : TPDFTokenTypes): TPDFObject; virtual;
+    function ParseXREF(const aToken: TPDFToken): TPDFXRefList; virtual;
+    function ParseXREFItem(aIndex: Integer): TPDFXRef; virtual;
+    function ParseStartXREF(const aToken: TPDFToken): TPDFStartXRef; virtual;
+  Public
+    Constructor Create(aFile : TStream; aBufferSize : Cardinal = DefaultBufferSize); virtual;
+    Destructor Destroy; override;
+    function GetPageContentStream(aDoc: TPDFDocument; aPage: TPDFPageObject
+      ): TStream;
+    Function GetObject : TPDFObject;
+    Procedure ParseDocument(aDoc : TPDFDocument); virtual;
+    function LoadXREFobject(Itm: TPDFXRef; addToDocument : Boolean = True): TPDFIndirect; virtual;
+    procedure ParseContentStream(aObject: TPDFPageObject; aStream: TStream;
+      aOnCommand: TNewCommandEvent); virtual;
+    Procedure ResolveToUnicodeCMaps(aDoc : TPDFDocument);
+    class procedure Unpredict(var Data: TFilterData);
+    Class procedure AsciiHEXDecode(aSrc,aDest : TStream);
+    Class Function AsciiHEXDecode(aSrc : TStream) : TStream;
+    Class procedure Ascii85Decode(aSrc,aDest : TStream);
+    Class Function Ascii85Decode(aSrc : TStream) : TStream;
+    Class procedure LZWDecode(aSrc,aDest : TStream);
+    Class Function LZWDecode(aSrc : TStream) : TStream;
+    Class procedure Deflate(aSrc,aDest : TStream);
+    Class Function Deflate(aSrc : TStream) : TStream;
+    Class procedure RunlengthDecode(aSrc,aDest : TStream);
+    Class Function RunlengthDecode(aSrc : TStream) : TStream;
+    Property Document : TPDFDocument Read FDoc;
+    // load all objects when XRef is parsed ?
+    Property LoadObjects : Boolean Read FLoadObjects Write FLoadObjects;
+    // When loading objects, resolve objects ?
+    Property ResolveObjects : Boolean Read FResolveObjects Write FResolveObjects;
+    // Resolve content streams of pages ?
+    Property ResolveContentStreams : Boolean Read FResolveContentStreams Write SetResolveContentStreams;
+    // Called when an unknown filter is encountered
+    Property OnUnknownFilter : TFilterEvent Read FOnUnknownFilter Write FOnUnknownFilter;
+    // Log function
+    Property OnLog : TLogNotifyEvent Read FOnLog Write FOnLog;
+    // Progress indicator when loading
+    Property OnProgress : TProgressEvent Read FOnProgress Write FOnProgress;
+  end;
+
+
+Const
+  // Error codes
+  penUnknownToken = 1;
+  penExpectedInteger = 2;
+  penExpectedInt64 = 3;
+  penNotStreamObject = 4;
+  penNotObjectStream = 5;
+  penObjectStreamWithoutDict = 6;
+  penNoSuchObjectInStream = 7;
+  penNotIndirectObjectAtToken = 8;
+  penNotAtStreamStart = 9;
+  penExpectedWhiteSpaceAfterStream = 10;
+  penInvalidStreamEnd = 11;
+  penNoArrayStartToken = 12;
+  penNoDictionaryForLength = 13;
+  penNoLengthEntryInDictionary = 14;
+  penDictionaryNoLengthObject = 15;
+  penDictionaryLengthNotValue = 16;
+  penInvalidDictionaryRef = 17;
+  penEOFWhileScanningString = 18;
+  penInvalidHexString = 19;
+  penNotAtDictStart = 20;
+  penNotDictKeyToken = 21;
+  penNotAtDictEnd = 22;
+  penNotOnTrailerDict = 23;
+  penInvalidTrailer = 24;
+  penUnknownConstruct = 25;
+  penMissingPDFHeader = 26;
+  penMissingPDFVersion = 27;
+  penInvalidPDFVersion = 28;
+  penPDFEOFNotFound = 29;
+  penObjStmObjectIDInvalid = 30;
+  penObjStmObjectOffsetInvalid = 31;
+  penSizeElementNotInteger = 32;
+  penUnknownFilterValueClass = 33;
+  penUnknownFilter = 34;
+  penNoStartXRef = 35;
+  penNoXrefAt = 36;
+  penEOFWhileScanningCommands = 37;
+  penNoSuchStreamObject = 38;
+  penInvalidImageData = 39;
+  penInvalidImageDataSize = 40;
+  penContentStreamNotFound = 41;
+  penExpectedHexInteger = 42;
+  penExpectedIdentifierN = 43;
+  penExpectedName = 44;
+
+implementation
+
+uses strutils, zstream, ascii85, chainstream, lzwstream, fppdfconsts;
+
+resourcestring
+  SErrNoStartXRef = 'No startxref found, starting at position %d';
+  SErrNoXRefAt = 'No xref found at position %d';
+  SErrExpectedWhiteSpaceAfterStream = 'Expected whitespace after stream';
+  SErrUnknownToken = '%s: Unknown token "%s" : %s';
+  SErrExpectedInteger = '%s: Expected an integer, got: %s';
+  SErrExpectedInt64 = '%s: Expected an int64, got: %s';
+  SErrNotAnIndirectObjectAtToken = 'Not an indirect object at token: %s';
+  SErrInvalidStreamEnd = 'Invalid stream end token %d : ';
+  SErrNoDictionaryForLength = 'No dictionary to get stream length from';
+  SErrNoLengthEntryInDictionary = 'Dictionary does not have Length entry';
+  SErrDictionaryLengthNotValue = 'Dictionary Length is not a value';
+  SErrUnknownFilterValueClass = 'Unknown filter value class';
+  SErrUnknownFilter =  'Unknown stream filter : %s';
+  SErrInvalidDictionaryRef = 'Invalid dictionary reference value: %s ';
+  SErrDictionaryNoLengthObject = 'Invalid dictionary length object reference [%d %d]';
+  SErrEOFWhileScanningString = 'EOF encountered while scanning string';
+  sErrContentStreamNotFound = 'Invalid content stream object reference [%d %d]';
+
+  // SErrDictionaryNoLengthInObject = 'Invalid dictionary length object reference [%d %d] : No length in object';
+  SErrNoArrayStartToken = 'Not at array start: %s';
+  SErrNotAtStreamStart = 'Not at stream start: %s';
+
+  SErrObjectIsNotObjectStream = 'Object %d is not a ObjStm object.';
+  SErrStreamObjectWithoutDict = 'ObjStm Object %d does not have a dictionary';
+  SErrNoSuchObjectInstream = 'No object %d in stream %s (%d)';
+  SErrNotStreamObject = 'Object %d is not a stream, it is a %s object';
+  SErrExpectedString = ': Expected string';
+  SErrXRefindex = 'XRef index';
+  SErrXREFVersion = 'XRef generation';
+  SErrXREFUseToken = 'XRef use token';
+  SErrXREFStartindex = 'XRef start index';
+  SErrXRefCount = 'XREF count';
+  SErrNotAtDictStart = 'Invalid dictionary: not at <<, but found: %s';
+  SErrNotDictKeyToken = 'Invalid dictionary: token is not a key start %s';
+  SErrNotAtDictEnd = 'Invalid dictionary: end not at >> but found: %s';
+  SErrNotOnTrailerDict = 'Not on trailer dictionary: %s';
+  SErrInvalidTrailer = 'Invalid trailer';
+  SErrUnknownConstruct = 'Unknown PDF construct (%s)';
+  SErrMissingPDFHeader = 'Missing PDF header';
+  SErrMissingPDFVersion = 'Missing PDF version';
+  SErrInvalidPDFversion = 'Invalid PDF version: %s';
+  SErrPDFEOFNotFound = 'PDF %%EOF not found';
+  SWarnObjectIDDoesNotMatch = 'Indirect object stream at index %d has id %d, expected %d';
+  SErrObjStmObjectIDInvalid = 'ObjStm Object %d index %d is invalid. Expected object ID';
+  SErrObjStmObjectOffsetInvalid = 'ObjStm Object %d index %d is invalid. Expected offset';
+  SerrSizeElementNotInteger = 'Size Element %d of W in XREf stream dictionary is not an integer value';
+  SErrEOFWhileScanningCommands = 'EOF while scanning commands';
+  SErrNoSuchStreamObject = 'No stream object %d for indirect object %d';
+  SErrInvalidImageData = 'Invalid image data';
+  SErrInvalidImageDataSize = 'Invalid image data size';
+  SErrExpectedHexInteger = '%s: Expected a hexadecimal integer, got: %s';
+  SErrExpectedIdentifierN = '%s: Expected identifier "%s", got "%s"';
+  SErrExpectedName = '%s: Expected name "%s", got "%s"';
+
+{$IFDEF DEBUGSTREAMS}
+{$IFDEF DUMPSTREAMS}
+
+Procedure DumpStream(S : Tstream; const aMessage : String);
+
+var
+  i : Integer;
+  B : TBytesStream;
+
+begin
+  if S is TBytesStream then
+    B:=TBytesStream(S)
+  else
+    begin
+    B:=TBytesStream.Create([]);
+    B.CopyFrom(S,0);
+    end;
+  try
+    Write(aMessage,': ');
+    For I:=0 to Length(B.Bytes)-1 do
+      Write(B.Bytes[I], ' ');
+    Writeln;
+  finally
+    if S<>B then
+      B.Free;
+  end;
+end;
+{$ENDIF}
+
+Var
+  aFileCount : Integer = 0;
+
+procedure SaveStreamToFile(aContext : String; aStream : TStream; const aFileName : String = '');
+
+Var
+  FN : String;
+  aPos : Int64;
+
+begin
+  inc(aFileCount);
+  FN:=aFileName;
+  if FN='' then
+    FN:=Format('streamdata-%.5d-%s.txt',[aFileCount,aContext]);
+  aPos:=aStream.Position;
+  with TFileStream.Create(FN,fmCreate) do
+    try
+      CopyFrom(aStream,0);
+      aStream.Position:=aPos;
+    finally
+      Free;
+    end;
+end;
+{$ENDIF}
+
+{ TPDFParser }
+
+function TPDFParser.CreateScanner(aFile: TStream; aBufferSize : Cardinal): TPDFScanner;
+begin
+  Result:=TPDFScanner.Create(aFile,aBufferSize);
+end;
+
+constructor TPDFParser.Create(aFile: TStream; aBufferSize : Cardinal);
+begin
+  FScanner:=CreateScanner(aFile,aBufferSize);
+  FLoadObjects:=True;
+  FResolveObjects:=True;
+  FResolveContentStreams:=True;
+end;
+
+
+procedure TPDFParser.UnsupportedToken(const aMsg: UTF8String; aToken: TPDFToken
+  );
+
+Var
+  N : String;
+  S : RawByteString;
+
+begin
+  S:=Copy(aToken.TokenData,1,127);
+  N:=GetEnumName(TypeInfo(TPDFTokenType), Ord(aToken.TokenType));
+  DoError(penUnknownToken, SErrUnknownToken, [aMsg, N, S])
+end;
+
+function TPDFParser.FindIndirectObject(aObjectID, aObjectGeneration: Integer ): TPDFIndirect;
+
+Var
+  XRef : TPDFXRef;
+
+begin
+  Result:=FDoc.FindInDirectObject(aObjectID,aObjectGeneration);
+  if assigned(FloadingXRef) and (aObjectID<Length(FloadingXRef)) then
+    begin
+    XRef:=FloadingXRef[aObjectID];
+    Result:=Xref.Instance; // Should always be Nil
+    if Result=nil then
+      Result:=LoadXRefObject(XRef);
+    end;
+end;
+
+// On exit, we're on the first token after the XREF offset.
+function TPDFParser.ParseStartXREF(const aToken: TPDFToken): TPDFStartXRef;
+
+Var
+  Token : TPDFToken;
+  Idx : Int64;
+
+begin
+  Result:=TPDFStartXRef.Create();
+  try
+    token:=FScanner.GetToken(False);
+    if (Token.TokenType=ptNumber) and TryStrToInt64(Token.TokenData,Idx) then
+      Result.Index:=Idx
+    else
+      begin
+      FScanner.Unget(Token);
+      UnsupportedToken(SErrXrefIndex,Token);
+      end;
+  except
+    Result.Free;
+    Raise;
+  end;
+end;
+
+function TPDFParser.GetIntegerToken(const aContext: String) : Integer;
+
+Var
+  Token : TPDFToken;
+
+begin
+  Token:=FScanner.GetToken(False);
+  if Not Token.IsInteger then
+    DoError(penExpectedInteger,SErrExpectedInteger, [aContext, Token.TokenData])
+  else
+    Result:=Token.AsInteger;
+end;
+
+function TPDFParser.GetInt64Token(const aContext: String): Int64;
+Var
+  Token : TPDFToken;
+
+begin
+  Token:=FScanner.GetToken(False);
+  if Not Token.IsInt64 then
+    DOError(penExpectedInt64,SErrExpectedInt64, [aContext, Token.TokenData])
+  else
+    Result:=Token.AsInt64;
+end;
+
+function TPDFParser.GetStringToken(const aContext: String) : RawByteString;
+
+Var
+  Token : TPDFToken;
+
+begin
+  Token:=FScanner.GetToken(False);
+  if (Token.TokenType=ptKeyword) then
+    Result:=Token.TokenData
+  else
+    UnsupportedToken(aContext+SErrExpectedString,Token);
+end;
+
+// Parse inderect object at specified position in stream
+
+function TPDFParser.ParseIndirectObject(aAt : Int64): TPDFIndirect;
+
+Var
+  lToken,lToken2,ltoken3 : TPDFToken;
+
+begin
+  FScanner.Reposition(aAt);
+  lToken:=FScanner.GetToken(False);
+  if not lToken.IsInteger then
+     DoError(penNotIndirectObjectAtToken,SErrNotAnIndirectObjectAtToken,[lToken.TokenData]);
+  lToken2:=FScanner.GetToken(False);
+  if Not lToken.IsInteger then
+    DoError(penNotIndirectObjectAtToken,SErrNotAnIndirectObjectAtToken,[lToken2.TokenData]);
+  lToken3:=FScanner.GetToken(False);
+  if not lToken3.CheckString(SPDFObj) then
+    DoError(penNotIndirectObjectAtToken,SErrNotAnIndirectObjectAtToken,[lToken3.TokenData]);
+  Result:=ParseIndirectObject(lToken.AsInt64,lToken2.AsInteger);
+end;
+
+procedure TPDFParser.ParseIndirectObjectBody(aObject: TPDFIndirect);
+Var
+  aObj : TPDFObject;
+  aValue : TPDFValue absolute aObj;
+  Done : Boolean;
+  lToken : TPDFToken;
+
+begin
+  DoProgress(pkIndirect,aObject.ObjectID,Length(FloadingXRef));
+  Done:=False;
+  Repeat
+    // Get first value token
+    lToken:=FScanner.GetToken(False);
+    aObj:=ParseSingleValue(lToken,True);
+    if aObj is TPDFDictionary then
+      FLastDict:=TPDFDictionary(aObj);
+    if Assigned(aObj) then
+      begin
+      Done:=((aObj is TPDFValue) and (aValue.Value=SPDFEndObj)) ;
+       if Done then
+        aObj.Free
+      else
+        aObject.Add(aObj);
+      end;
+  until Done or FScanner.Source.IsEOF;
+end;
+
+function TPDFParser.MaybeResolve(aObject : TPDFIndirect) : TPDFIndirect;
+
+begin
+  Result:=aObject;
+  if Self.ResolveObjects then
+    if (aObject.ObjectType<>'') then
+      Result:=aObject.ResolveObjectType;
+end;
+
+function TPDFParser.CreateCommand(const aName: string; aOperands: TPDFTokenArray
+  ): TPDFCommand;
+
+Var
+  aClass : TPDFCommandClass;
+
+begin
+  aClass:= TPDFCommand.FindClassForCommand(aName);
+  if aClass=Nil then
+    AClass:=TPDFUnknownCommand;
+  Result:=aClass.Create(aName,aOperands);
+end;
+
+procedure TPDFParser.ParseInlineImageData(var aOperands: TPDFTokenArray;
+  aScanner: TPDFScanner);
+
+Var
+  aData : TPDFImageData;
+  lToken: TPDFToken;
+  aSize : integer;
+  B : TBytes;
+
+begin
+  TPDFImageDataCommand.ParseImageOperands(aOperands,aData);
+  With aData do
+    aSize:=(Height*Width*((BitsPercomponent+7) div 8)) * ColorSpaceComponents;
+  if (aSize<=0) then
+    DoError(penInvalidImageDataSize,SErrInvalidImageDataSize)
+  else
+    begin
+    if not (Length(aData.filters)=0) then
+      Raise ENotImplemented.Create('No support for filtered image data');
+    If TPDFScanner.CharacterClass(aScanner.Source.GetByte())<>ccWhitespace then
+      DoError(penInvalidImageData,SErrInvalidImageData);
+    B:=[];
+    SetLength(B,aSize);
+    aScanner.Source.CopyBytes(B,aSize);
+    lToken:=Default(TPDFToken);
+    lToken.TokenType:=ptByteString;
+    SetLength(lToken.TokenData,aSize);
+    Move(B,lToken.TokenData[1],aSize);
+    aOperands[Length(AOperands)-1]:=ltoken;
+    end;
+end;
+
+procedure TPDFParser.ParseContentStream(aObject: TPDFPageObject; aStream : TStream; aOnCommand : TNewCommandEvent);
+
+var
+  aScanner : TPDFScanner;
+  aToken : TPDFToken;
+  aOperands,aTokens : TPDFTokenArray;
+  CmdName : RawByteString;
+  Cmd : TPDFCommand;
+  I,Len : integer;
+
+begin
+  aOperands:=[];
+  aTokens:=[];
+  Len:=100;
+  SetLength(aTokens,Len);
+{$IFDEF DEBUGSTREAMS}
+  SaveStreamToFile('pagecontent',aStream,'content.txt');
+{$ENDIF}
+  aScanner:=FScanner;
+
+  try
+    FScanner:=TPDFScanner.Create(aStream);
+     While Not FScanner.Source.IsEOF do
+       begin
+       I:=0;
+       aToken:=FScanner.GetToken(False);
+       While Not (atoken.TokenType in [ptKeyWord,ptEOF]) do
+         begin
+         Len:=Length(aTokens);
+         if I>=Len then
+           begin
+           inc(Len,100);
+           SetLength(aTokens,Len);
+           end;
+         aTokens[i]:=aToken;
+         Inc(I);
+         aToken:=FScanner.GetToken(False);
+         end;
+       if (aToken.TokenType=ptEOF) then
+         begin
+         if (I>0) then
+           DoError(penEOFWhileScanningCommands,SErrEOFWhileScanningCommands);
+         end
+       else
+         begin
+         CmdName:=atoken.AsString;
+         if CmdName='ID' then
+           Inc(I);
+         aOperands:=Copy(aTokens,0,I);
+         if CmdName='ID' then
+           begin
+           aOperands[I-1]:=Default(TPDFToken);
+           ParseInlineImageData(aOperands,FScanner);
+           end;
+         Cmd:=CreateCommand(CmdName,aOperands);
+         if Assigned(aOnCommand) then
+           aOnCommand(Self,aObject,Cmd)
+         else
+           aObject.CommandList.Add(Cmd);
+         end;
+      end;
+  finally
+    if FSCanner<>aScanner then
+      FreeAndNil(FScanner);
+    FScanner:=aScanner;
+  end;
+end;
+
+// On entry, we're on begincodespacerange.
+// On exit, we're on endcodespacerange
+Procedure TPDFParser.ParseCMAPCodeSpaceRange(aMap : TPDFCMapData);
+
+Var
+  L : TCodeSpaceRangeArray;
+  aCount: Integer;
+  Len : Integer;
+
+  Procedure Grow;
+
+  begin
+    if aCount<Len then
+      exit;
+    Len:=Length(l);
+    Len:=Len+100;
+    SetLength(L,Len);
+  end;
+
+Var
+  lToken :TPDFToken;
+  aRange : TCodeSpaceRange;
+
+begin
+  Len:=0;
+  aCount:=0;
+  lToken:=FScanner.GetToken(False);
+  While lToken.IsHexString do
+    begin
+    Grow;
+    aRange.RangeStart:=lToken.AsBEHexInteger;
+    lToken:=FScanner.GetToken(False);
+    if not ltoken.IsHexString then
+      DoError(penExpectedHexInteger,SErrExpectedHexInteger,['codespacerange',ltoken.TokenData]);
+    aRange.RangeEnd:=lToken.AsBEHexInteger;
+    l[aCount]:=aRange;
+    inc(aCount);
+    lToken:=FScanner.GetToken(false);
+    end;
+  if not lToken.CheckString('endcodespacerange') then
+    DoError(penExpectedIdentifierN,SErrExpectedIdentifierN,['codespacerange','endcodespacerange',ltoken.TokenData]);
+  SetLength(L,aCount);
+  aMap.CodeSpaceRange:=Concat(aMap.CodeSpaceRange,L);
+end;
+
+// On entry, we're on beginbfchar.
+// On exit, we're on endbfchar
+Procedure TPDFParser.ParseCMAPBFChar(aMap : TPDFCMapData);
+
+Var
+  L : TBFCharArray;
+  aCount: Integer;
+  Len : Integer;
+
+  Procedure Grow;
+
+  begin
+    if aCount<Len then
+      exit;
+    Len:=Length(l);
+    Len:=Len+100;
+    SetLength(L,Len);
+  end;
+
+Var
+  lToken :TPDFToken;
+  aChar : TBFChar;
+
+begin
+  Len:=0;
+  aCount:=0;
+  lToken:=FScanner.GetToken(False);
+  While lToken.IsHexString do
+    begin
+    Grow;
+    aChar.Src:=lToken.AsBEHexInteger;
+    lToken:=FScanner.GetToken(False);
+    if ltoken.IsHexString then
+      aChar.Dest:=ltoken.AsBEHexInteger
+    else if ltoken.IsName then
+      aChar.DestName:=lToken.AsName
+    else
+      DoError(penExpectedHexInteger,SErrExpectedHexInteger,['bfchar',ltoken.TokenData]);
+    l[aCount]:=aChar;
+    inc(aCount);
+    lToken:=FScanner.GetToken(False);
+    end;
+  if not lToken.CheckString('endbfchar') then
+    DoError(penExpectedIdentifierN,SErrExpectedIdentifierN,['bfchar','endbfchar',ltoken.TokenData]);
+  SetLength(L,aCount);
+  aMap.BFChars:=Concat(aMap.BFChars,L);
+end;
+
+Procedure TPDFParser.ParseCMAPBFRange(aMap : TPDFCMapData);
+
+Var
+  L : TCIDRangeArray;
+  aNames : TCIDUnicodeCharOrNameArray;
+  aNameCount,
+  aCount: Integer;
+  NameLen,
+  Len : Integer;
+
+  Procedure Grow;
+
+  begin
+    if aCount<Len then
+      exit;
+    Len:=Length(l);
+    Len:=Len+100;
+    SetLength(L,Len);
+  end;
+
+  Procedure GrowNames;
+
+  begin
+    if aNameCount<NameLen then
+      exit;
+    NameLen:=Length(aNames);
+    NameLen:=NameLen+100;
+    SetLength(aNames,NameLen);
+  end;
+
+Var
+  lToken :TPDFToken;
+  aRange : TCIDRange;
+
+begin
+  Len:=0;
+  aCount:=0;
+  lToken:=FScanner.GetToken(False);
+  While lToken.IsHexString do
+    begin
+    Grow;
+    aRange.RangeStart:=lToken.AsBEHexInteger;
+    lToken:=FScanner.GetToken(False);
+    if not ltoken.IsHexString then
+      DoError(penExpectedHexInteger,SErrExpectedHexInteger,['bfrange',ltoken.TokenData]);
+    aRange.RangeEnd:=lToken.AsBEHexInteger;
+    lToken:=FScanner.GetToken(False);
+    if ltoken.IsHexString then
+      aRange.CharOffset:=ltoken.AsBEHexInteger
+    else if ltoken.TokenType=ptSquareOpen then
+      begin
+      aNames:=[];
+      aNameCount:=0;
+      NameLen:=0;
+      lToken:=FScanner.GetToken(False);
+      While ltoken.TokenType<>ptSquareClose do
+        begin
+        GrowNames;
+        if ltoken.IsHexString then
+          aNames[aNameCount].UnicodeChar:=lToken.AsBEHexInteger
+        else if ltoken.IsName then
+          aNames[aNameCount].Name:=ltoken.AsName
+        else
+          DoError(penExpectedName,SErrExpectedName,['bfrange',ltoken.TokenData]);
+        inc(aNameCount);
+        lToken:=FScanner.GetToken(False);
+        end;
+      SetLength(aNames,aNameCount);
+      aRange.CharNames:=aNames;
+      end
+    else
+      DoError(penExpectedHexInteger,SErrExpectedHexInteger,['bfrange',ltoken.TokenData]);
+    l[aCount]:=aRange;
+    inc(aCount);
+    lToken:=FScanner.GetToken(False);
+    end;
+  if not lToken.CheckString('endbfrange') then
+    DoError(penExpectedIdentifierN,SErrExpectedIdentifierN,['bfrange','endbfrange',ltoken.TokenData]);
+  SetLength(L,aCount);
+  aMap.BFRange:=Concat(aMap.BFRange,L);
+end;
+
+
+Procedure TPDFParser.ParseCMap(aStream : TStream; aMap : TPDFCMap);
+
+Var
+  aScanner: TPDFScanner;
+  lToken : TPDFToken;
+  Data : TPDFCMapData;
+
+begin
+  aScanner:=FScanner;
+  FScanner:=Nil;
+  try
+    FScanner:=TPDFScanner.Create(aStream);
+    Data:=TPDFCMapData.Create;
+    aMap.Data:=Data;
+    lToken:=FScanner.GetToken(False);
+
+    while ltoken.TokenType<>ptEOF do
+      begin
+      if lToken.CheckString('begincodespacerange') then
+        ParseCMAPCodeSpaceRange(Data)
+      else if lToken.CheckString('beginbfrange') then
+        ParseCMAPBFRange(Data)
+      else if lToken.CheckString('beginbfchar') then
+        ParseCMAPBFChar(Data)
+      else if lToken.CheckString('endcmap') then
+        break;
+      lToken:=FScanner.GetToken(False);
+      end;
+  finally
+    FScanner.Free;
+    FScanner:=aScanner;
+  end;
+end;
+
+function TPDFParser.ParseCMap(aStream : TStream) : TPDFCMap;
+
+begin
+  Result:=TPDFCMap.Create;
+  try
+    SaveStreamToFile('parsecmap',aStream);
+    ParseCMap(aStream,Result);
+  except
+    Result.Free;
+    Raise;
+  end;
+end;
+
+procedure TPDFParser.ResolveToUnicodeCMaps(aDoc: TPDFDocument);
+
+var
+  Obj : TPDFObject;
+  aCmapObj : TPDFIndirect;
+  aFont : TPDFFontObject absolute obj;
+  aStream : TStream;
+
+begin
+  For Obj in aDoc do
+    if Obj is TPDFFontObject then
+      begin
+      if not Assigned(aFont.UnicodeCMap) then
+        begin
+        aCMApObj:=aFont.ResolveToUnicodeMapStream(aDoc);
+        if assigned(aCmapObj) and assigned(aCmapObj.Stream) then
+          begin
+          aStream:=TBytesStream.Create(aCMAPObj.Stream.Data);
+          aStream:=ResolveFilters(aStream,aCmapObj.ObjectDict);
+          aCMAPObj.UnfilteredStream:=aStream;
+          aFont.UnicodeCMap:=ParseCMap(aStream);
+          end;
+        end;
+      end;
+end;
+
+// On entry, we are on obj, on exit, we are on endObj
+function TPDFParser.ParseIndirectObject(aID, aGen: Int64): TPDFIndirect;
+
+Var
+  Obj : TPDFIndirect;
+
+begin
+  Result:=nil;
+  Obj:=TPDFIndirect.Create();
+  try
+    Obj.ObjectID:=aID;
+    Obj.ObjectGeneration:=aGen;
+    ParseIndirectObjectBody(Obj);
+    try
+      Result:=MaybeResolve(Obj);
+    finally
+      if Result<>Obj then
+        FreeAndNil(Obj);
+    end;
+  except
+    on E : Exception do
+      begin
+      if E is EPDFParser then
+        begin
+        E.Message:=Format('(Obj %d %d) ',[aId,aGen])+E.Message;
+        EPDFParser(E).ObjectGeneration:=aGen;
+        EPDFParser(E).ObjectID:=aID;;
+        end;
+      FLastDict:=Nil;
+      Result.Free;
+      Raise
+      end;
+  end;
+end;
+
+
+function TPDFParser.ParseXREFItem(aIndex : Integer): TPDFXRef;
+
+Var
+  ObjectOffset : Int64;
+  ObjectGeneration : Integer;
+  N : String;
+
+begin
+  ObjectOffset:=GetInt64Token(SErrXREFindex);
+  ObjectGeneration:=GetIntegerToken(SErrXREFVersion);
+  N:=GetStringToken(SErrXREFUseToken);
+  Result:=TPDFXRef.Create();
+  Result.InUse:=(N='n');
+  Result.ReferenceIndex:=aIndex;
+  Result.ObjectOffset:=ObjectOffset;
+  Result.ObjectGeneration:=ObjectGeneration;
+end;
+
+// On exit, we're on the first token after the XREF items
+function TPDFParser.ParseXREF(const aToken: TPDFToken): TPDFXRefList;
+
+Var
+  I,aStartIndex,aCount : Integer;
+  lToken : TPDFToken;
+  Itm : TPDFXRef;
+  EndByTrailer : Boolean;
+  T : TPDFTrailer;
+
+begin
+  Result:=TPDFXRefList.Create();
+  try
+    Repeat
+      aStartIndex:=GetIntegerToken(SErrXREFStartindex);
+      aCount:=GetIntegerToken(SErrXRefCount);
+      For I:=1 to aCount do
+        begin
+        Itm:=ParseXREFItem(aStartIndex);
+        if aStartIndex>=Length(FloadingXRef) then
+          begin
+          DoWarning('Object ID %d Out Of Range [0..%d[',[aStartIndex,Length(FloadingXRef)]);
+          Itm.Free;
+          end
+        else if FloadingXRef[aStartIndex]=Nil then
+          begin
+          FloadingXRef[aStartIndex]:=Itm;
+          Result.Add(Itm);
+          end
+        else
+          Itm.Free;
+        Inc(aStartindex);
+        DoProgress(pkXRef,I,aCount);
+        end;
+      lToken:=FScanner.GetToken(False);
+      FScanner.Unget(lToken);
+    until Not lToken.IsInteger;
+  except
+    Result.Free;
+    Raise;
+  end;
+end;
+
+// On exit, we're on the first End token or EOF
+function TPDFParser.ParseMalFormed(aToken: TPDFToken; aEndToken: RawByteString
+  ): TPDFMalFormed;
+
+var
+  Token : TPDFToken;
+  Content : Array of TPDFToken;
+
+begin
+  Result:=Nil;
+  Content:=[atoken];
+  Token:=FScanner.GetToken;
+  While (Token.TokenType<>ptEOF) and ((aEndToken<>'') and (Token.TokenData<>aEndToken)) do
+    begin
+    Content:=Concat(Content,[Token]);
+    Token:=FScanner.GetToken;
+    end;
+  Result:=TPDFMalFormed.Create(Content);
+end;
+
+function TPDFParser.ParseStreamValue(aStartToken: TPDFToken;
+  aStreamLength: Integer): TPDFStream;
+
+Var
+  S : TBytesStream;
+  B : TBytes;
+  lToken : TPDFToken;
+
+begin
+  if not aStartToken.CheckString(SPDFStream) then
+    DoError(penNotAtStreamStart,SErrNotAtStreamStart,[aStartToken.TokenData]);
+  B:=[];
+  Result:=Nil;
+  if FScanner.FindByte(10)=-1 then
+    DoError(penExpectedWhiteSpaceAfterStream,SErrExpectedWhiteSpaceAfterStream);
+  FScanner.Source.Next;
+  SetLength(B,aStreamLength);
+  if aStreamLength>0 then
+    begin
+    FScanner.CopyBytes(B,aStreamLength);
+    lToken:=FScanner.GetToken(False);
+    end
+  else
+    begin
+    S:=TBytesStream.Create(B);
+    try
+      FScanner.ReadStreamData(S);
+    finally
+      B:=S.Bytes;
+    end;
+    end;
+  if Not ((lToken.TokenType=ptKeyword) and (lToken.TokenData=SPDFEndStream)) then
+    DoError(penInvalidStreamEnd,SErrInvalidStreamEnd, [ltoken.TokenType, ltoken.TokenData]);
+  Result:=TPDFStream.Create(B);
+end;
+
+// Start on [, end at ]
+function TPDFParser.ParseArray(aPrevToken: TPDFToken): TPDFArray;
+
+var
+  lToken : TPDFToken;
+  V : TPDFObject;
+
+begin
+  if aPrevToken.TokenType<>ptSquareOpen then
+    DoError(penNoArrayStartToken,SErrNoArrayStartToken,[aPrevToken.TokenData]);
+  Result:=TPDFArray.Create();
+  lToken:=FScanner.GetToken(False);
+  While Not (lToken.TokenType in [ptSquareClose,ptEOF]) do
+    begin
+    V:=ParseSingleValue(lToken,True);
+    if V<>Nil then
+      Result.Add(V);
+    lToken:=FScanner.GetToken(False);
+    end;
+end;
+
+function TPDFParser.FindStreamLength(aReposition: Int64): Integer;
+
+Var
+  aVal : TPDFObject;
+  Ref : TPDFRef absolute aVal;
+  Obj : TPDFIndirect;
+
+
+begin
+  if Not Assigned(FLastDict) then
+    DoError(penNoDictionaryForLength,SErrNoDictionaryForLength);
+  aVal:=FLastDict.FindValue(SPDFKeyLength);
+  if Not Assigned(aVal) then
+    DoError(penNoLengthEntryInDictionary,SErrNoLengthEntryInDictionary);
+  if (aVal is TPDFValue) then
+    Result:=TPDFValue(aVal).AsInteger
+  else if aVal is TPDFRef then
+    begin
+    Obj:=FindIndirectObject(Ref.ObjectID,Ref.ObjectGeneration);
+    if Not assigned(Obj) then
+      begin
+      with Ref do
+        DoError(penDictionaryNoLengthObject,SErrDictionaryNoLengthObject,[Ref.ObjectID,Ref.ObjectGeneration]);
+      end
+    else
+      begin
+      if Not ((Obj.Count=1) and Obj.Objects[0].InheritsFrom(TPDFValue)) then
+        DoError(penDictionaryNoLengthObject,SErrDictionaryNoLengthObject,[Ref.ObjectID,Ref.ObjectGeneration]);
+      aVal:=Obj.Objects[0];
+      Result:=TPDFValue(aVal).AsInteger
+      end;
+    if FScanner.Position<>aRePosition then
+      FScanner.Reposition(aRePosition,True);
+    end
+  else
+    DoError(penDictionaryLengthNotValue,SErrDictionaryLengthNotValue);
+end;
+
+// On entry, we're on the first token of the value or whitespace
+function TPDFParser.ParseSingleValue(aStartToken: TPDFToken; SkipWhiteSpace : Boolean): TPDFObject;
+
+begin
+  Result:=nil;
+  if (aStartToken.isWhiteSpace) and SkipWhiteSpace  then
+    aStartToken:=FScanner.GetToken(False);
+  Case aStartToken.TokenType of
+    ptWhiteSpace:
+      Result:=Nil;
+    ptKeyword:
+      begin
+      Case LowerCase(aStartToken.TokenData) of
+        SPDFStream :
+          begin
+          Result:=ParseStreamValue(aStartToken,FindStreamLength(FScanner.Position));
+          end
+      else
+        Result:=TPDFValue.Create(aStartToken.TokenData,aStartToken.TokenType);
+      end;
+      end;
+    ptName:
+      Result:=TPDFValue.Create(aStartToken.TokenData,aStartToken.TokenType);
+    ptShl:
+      Result:=ParseDictionary(aStartToken);
+    ptNumber:
+      Result:=TPDFValue.Create(aStartToken.TokenData,aStartToken.TokenType);
+    ptHexString,
+    ptString:
+      Result:=TPDFValue.Create(aStartToken.AsString,aStartToken.TokenType);
+    ptSquareOpen:
+      Result:=ParseArray(aStartToken);
+    else
+      Result:=Nil;
+      // if Not (aStartToken.TokenType in StopAt) then
+      //  Raise EPDFParser.CreateFmt('Unknown value from token type %d : %s',[Ord(aStartToken.TokenType),aStartToken.TokenData]);
+    end;
+
+end;
+
+// On entry, we're still on the start token of the value
+function TPDFParser.ParseDictValue: TPDFObject;
+
+Var
+  aObj: TPDFObject;
+  aID,aGen : Int64;
+  lToken : TPDFToken;
+  aValue : TPDFValue absolute aObj ;
+
+
+begin
+  lToken:=FScanner.GetToken;
+  aObj:=ParseSingleValue(lToken,True);
+  if (aObj is TPDFValue) and aValue.IsInt64 then
+      begin
+      aID:=aValue.AsInt64;
+      lToken:=FSCanner.GetToken(false);
+      if (lToken.TokenType in [ptName,ptShr,ptEOF]) then
+        FScanner.Unget(lToken)
+      else if lToken.IsInteger then
+        begin
+        FreeAndNil(aObj);
+        aGen:=lToken.AsInt64;
+        lToken:=FScanner.GetToken(False);
+        if not lToken.CheckString(SPDFTokenR) then
+          DoError(penInvalidDictionaryRef,SErrInvalidDictionaryRef,[lToken.TokenData]);
+        aObj:=TPDFRef.Create(aID,aGen)
+        end
+      else if lToken.isString and not lToken.CheckString(SPDFPageKeyID) then
+        FScanner.Unget(lToken)
+      end;
+  Result:=aObj;
+end;
+
+function TPDFParser.ParseValue(StopAt: TPDFTokenTypes): TPDFObject;
+
+// Starts before First token of value
+// Ends at last token of value
+var
+  lToken : TPDFToken;
+  Container : TPDFContainer;
+
+begin
+  Result:=Nil;
+  Container:=Nil;
+  lToken:=FScanner.GetToken(ptWhiteSpace in StopAt);
+  repeat
+    If (Result<>Nil) and (Container=Nil) then
+      begin
+      Container:=TPDFContainer.Create();
+      Container.Add(Result);
+      Result:=Nil;
+      end;
+    Result:=ParseSingleValue(lToken,Not (ptWhiteSpace in StopAt));
+    If (Result<>Nil) and (Container<>Nil) then
+      begin
+      Container.Add(Result);
+      Result:=Nil;
+      end;
+    lToken:=FScanner.GetToken(ptWhiteSpace in StopAt);
+    if Not (lToken.TokenType in StopAt) then
+      lToken:=FScanner.GetToken(ptWhiteSpace in StopAt);
+  Until (lToken.TokenType in StopAt);
+  If Result=Nil then
+    Result:=Container;
+  FScanner.Unget(lToken);
+end;
+
+
+
+
+
+// On entry, we're at <<, on exit we're on >>
+
+procedure TPDFParser.DoParseDictionary(aDict: TPDFDictionary;
+  const aToken: TPDFToken);
+
+Var
+  aName : RawByteString;
+  aValue : TPDFObject;
+  lToken : TPDFToken;
+
+begin
+  aName:='';
+  aValue:=Nil;
+  if aToken.TokenType<>ptShl then
+    DoError(penNotAtDictStart,SErrNotAtDictStart,[aToken.TokenData]);
+  lToken:=FScanner.GetToken(False);
+  While Not (lToken.TokenType in [ptEOF,ptShr]) do
+    begin
+    if lToken.TokenType<>ptName then
+      DoError(penNotDictKeyToken,SErrNotDictKeyToken,[lToken.TokenData]);
+    aName:=lToken.TokenData;
+    aValue:=ParseDictValue();
+    aDict.AddEntry(aName,aValue);
+    lToken:=FScanner.GetToken(False);
+    end;
+  if lToken.TokenType<>ptShr then
+    DoError(penNotAtDictEnd,SErrNotAtDictEnd,[aToken.TokenData]);
+end;
+
+class procedure TPDFParser.AsciiHEXDecode(aSrc, aDest: TStream);
+
+Var
+  B,B2 : TBytes;
+  Direct : Boolean;
+
+begin
+  if aSrc is TBytesStream then
+    B:=TBytesStream(aSrc).Bytes
+  else
+    begin
+    SetLength(B,aSrc.Size);
+    aSrc.ReadBuffer(B[0],aSrc.Size);
+    end;
+  Direct:=(aDest is TBytesStream) and (aDest.Size>=(Length(B) div 2));
+  if Direct then
+    B2:=TBytesStream(aSrc).Bytes
+  else
+    begin
+    SetLength(B2,(aSrc.Size div 2));
+    aSrc.ReadBuffer(B[0],aSrc.Size);
+    end;
+  HexToBin(PChar(B),PChar(B2),Length(B2));
+  if not Direct then
+    aDest.WriteBuffer(B2[0],Length(B2));
+end;
+
+class function TPDFParser.AsciiHEXDecode(aSrc: TStream): TStream;
+
+var
+  B : TBytes;
+
+begin
+  B:=[];
+  SetLength(B,aSrc.Size div 2);
+  Result:=TBytesStream.Create(B);
+  try
+    AsciiHEXDecode(aSrc,Result);
+  except
+    Result.Free;
+    Raise;
+  end;
+end;
+
+class procedure TPDFParser.Ascii85Decode(aSrc, aDest: TStream);
+
+Var
+  Decode : TASCII85DecoderStream;
+
+begin
+  Decode:=TASCII85DecoderStream.create(aSrc);
+  try
+    Decode.SourceOwner:=False;
+    aDest.CopyFrom(Decode,0);
+  finally
+    Decode.Free;
+  end;
+end;
+
+class function TPDFParser.Ascii85Decode(aSrc: TStream): TStream;
+begin
+  Result:=TBytesStream.Create([]);
+  try
+    Ascii85Decode(aSrc,Result);
+  except
+    Result.Free;
+    Raise;
+  end;
+end;
+
+class procedure TPDFParser.LZWDecode(aSrc, aDest: TStream);
+Var
+  Defl : TLZWDecompressionStream;
+
+begin
+  Defl:=TLZWDecompressionStream.create(aSrc,[zoTIFFCodes]);
+  try
+    Defl.SourceOwner:=False;
+    aDest.CopyFrom(aSrc,0);
+  finally
+    Defl.Free;
+  end;
+end;
+
+class procedure TPDFParser.Deflate(aSrc, aDest: TStream);
+
+Var
+  Defl : TDecompressionStream;
+
+begin
+  Defl:=TDecompressionStream.create(aSrc,False);
+  try
+    Defl.SourceOwner:=False;
+    aDest.CopyFrom(Defl,0);
+  finally
+    Defl.Free;
+  end;
+end;
+
+class function TPDFParser.LZWDecode(aSrc: TStream): TStream;
+
+begin
+  Result:=TBytesStream.Create([]);
+  try
+    LZWDecode(aSrc,Result);
+  except
+    Result.Free;
+    Raise;
+  end;
+end;
+
+class function TPDFParser.Deflate(aSrc: TStream): TStream;
+
+begin
+  Result:=TBytesStream.Create([]);
+  try
+    Deflate(aSrc,Result);
+  except
+    Result.Free;
+    Raise;
+  end;
+end;
+
+class procedure TPDFParser.RunlengthDecode(aSrc, aDest: TStream);
+
+Var
+  I : Integer;
+  RLE,B,Cnt,C : Byte;
+  Buf : Array[0..128] of byte;
+
+begin
+  RLE:=128;
+  For I:=0 to aSrc.Size-1 do
+    begin
+    aSrc.Read(RLE,1);
+    if RLE<128 then
+      aDest.CopyFrom(aSrc,RLE)
+    else if RLE>128 then
+      begin
+      B:=aSrc.ReadByte;
+      CNT:=257-RLE;
+      For C:=0 to Cnt-1 do
+        Buf[C]:=B;
+      aDest.WriteBuffer(Buf,Cnt);
+      end
+    else if RLE=128 then
+      Break;
+    end;
+end;
+
+class function TPDFParser.RunlengthDecode(aSrc: TStream): TStream;
+begin
+  Result:=TBytesStream.Create([]);
+  try
+    RunlengthDecode(aSrc,Result);
+  except
+    Result.Free;
+    Raise;
+  end;
+
+end;
+
+// On entry, we're at <<, on exit we're on >>
+function TPDFParser.ParseDictionary(const aToken: TPDFToken): TPDFDictionary;
+
+begin
+  if aToken.TokenType<>ptShl then
+    DoError(penNotAtDictStart,SErrNotAtDictStart,[aToken.TokenData]);
+  Result:=TPDFDictionary.Create();
+  try
+    DoParseDictionary(Result,aToken);
+  except
+    Result.Free;
+    Raise;
+  end;
+end;
+
+// on entry, we're on 'Trailer';
+function TPDFParser.ParseTrailer(const aToken: TPDFToken): TPDFTrailer;
+
+Var
+  lToken : TPDFToken;
+
+begin
+  if not aToken.CheckString(SPDFTrailer) then
+    DoError(penNotOnTrailerDict,SErrNotOnTrailerDict,[aToken.TokenData]);
+  Result:=TPDFTrailer.Create();
+  try
+    lToken:=FScanner.GetToken(False);
+    if lToken.TokenType<>ptShl then
+      DoError(penInvalidTrailer,SErrInvalidTrailer);
+    DoParseDictionary(Result,LToken);
+  except
+    Result.Free;
+    Raise;
+
+  end;
+end;
+
+function TPDFParser.GetObject: TPDFObject;
+
+Var
+  Content : Array of TPDFToken;
+  Token,Token2,Token3 : TPDFToken;
+  TokenStr : RawByteString;
+  Context : TPDFContext;
+  I1,I2 : Int64;
+
+begin
+  Result:=Nil;
+  Context:=cNone;
+  Content:=[];
+  Repeat
+    Token:=FScanner.GetToken(Context=cObj);
+    TokenStr:=Token.TokenData;
+  //  Writeln('Examining Token (',Token.TokenType,') : >>>',Token.TokenData,'<<<');
+    Case Token.TokenType of
+      ptEOF:
+        exit;
+      ptComment:
+        begin
+        if Context<>cNone then
+          Content:=Concat(Content,[Token])
+        else
+          begin
+          Result:=TPDFComment.Create(Token.TokenData);
+          Exit;
+          end;
+        end;
+      ptName:
+        begin
+          Token2:=FScanner.GetToken(True);
+          if Context<>cNone then
+            begin
+            Token.TokenType:=ptName;
+            Token.TokenData:=Token.TokenData+Token2.TokenData;
+            Content:=Concat(Content,[Token]);
+            end
+          else
+            begin
+            DoUnknownStruct('20221112110649');
+            end;
+        end;
+      ptShl,ptShr,
+      ptSquareOpen,ptSquareClose,
+      ptCurlyOpen,ptCurlyClose,
+      ptHexString,
+      ptString:
+        begin
+        if Context<>cNone then
+          Content:=Concat(Content,[Token])
+        else
+          DoUnknownStruct('20221112110702');
+        end;
+      ptWhiteSpace:
+        begin
+        if Context<>cNone then
+          Content:=Concat(Content,[Token])
+        else
+          DoUnknownStruct('20221112110708');
+        end;
+     else // Case
+        if TryStrToInt64(TokenStr,I1) then
+          begin
+          Token2:=FScanner.GetToken(False);
+          if TryStrToInt64(Token2.TokenData,I2) then
+            begin
+            Token3:=FScanner.GetToken(False);
+            if Token3.TokenData=SPDFObj then
+              begin
+              Context:=cNone;
+              Content:=[];
+              Exit(ParseIndirectObject(I1,I2))
+              end
+            else
+              begin
+              FScanner.Unget(Token3);
+              FScanner.Unget(Token2);
+              DoUnknownStruct('20221112110726');
+
+              end;
+            end
+          else
+            begin
+            FScanner.Unget(Token2);
+            DoUnknownStruct('20221112110742');
+            end;
+          end
+        else if TokenStr=SPDFTrailer then
+          begin
+          Context:=cNone;
+          Content:=[];
+          exit(ParseTrailer(Token));
+          end
+        else if TokenStr=SPDFXref then
+          begin
+          Context:=cNone;
+          Content:=[];
+          exit(ParseXREF(Token));
+          end
+        else if TokenStr=SPDFStartxref then
+          begin
+          Context:=cNone;
+          Content:=[];
+          Exit(ParseStartXReF(Token));
+          end
+        else
+          begin
+          Context:=cNone;
+          Content:=[];
+          Exit(ParseMalFormed(Token,''));
+          end;
+
+    end; // Case
+  Until Token.TokenType=ptEOF;
+end;
+
+function TPDFParser.VerifyDocument: Int64;
+
+Var
+  lToken : TPDFToken;
+  Ver : String;
+
+begin
+  if Not FScanner.CompareBytes('%PDF-') then
+    DoError(penMissingPDFHeader,SErrMissingPDFHeader);
+  lToken:=FScanner.GetToken;
+  if (lToken.TokenType<>ptNumber) then
+    DoError(penMissingPDFVersion,SerrMissingPDFversion);
+  Ver:=lToken.TokenData;
+  if Ver='' then
+    DoError(penInvalidPDFversion,sErrInvalidPDFversion,[ver]);
+  FDoc.PDFversion:=Copy(Ver,2,Length(Ver)-1);
+  FScanner.Reposition(-1,False);
+  Result:=FScanner.FindBytesBackWard('%%EOF',False);
+  if Result=-1 then
+    DoError(penPDFEOFNotFound,SErrPDFEOFNotFound);
+end;
+
+function TPDFParser.GetLastXRef(StartAt: Int64; out FoundAt : Int64): TPDFStartXRef;
+
+Var
+  lToken : TPDFToken;
+
+begin
+  if (FScanner.Position<>StartAt) then
+    FScanner.Reposition(StartAt,False);
+  if FScanner.FindBytesBackward(SPDFStartXRef,False)=-1 then
+    DoError(penNoStartXRef,SErrNoStartXRef,[StartAt]);
+  lToken:=FScanner.GetToken(False);
+  if (lToken.TokenData<>SPDFStartxref) then
+    DoError(penNoStartXRef,SErrNoStartXRef,[lToken.TokenPos]);
+  FoundAt:=lToken.TokenPos;
+  Result:=ParseStartXREF(lToken);
+end;
+
+class procedure TPDFParser.Unpredict(var Data: TFilterData);
+
+Var
+  Tmp : TBytesStream;
+  Buf,B2 : TBytes;
+  aCount : Integer;
+  aPredictor,aColors,aColumns,aBitsPerComponent : Integer;
+  aStream : TPredictSTream;
+
+  Function MaybeInt(aKey : string; aDefault : integer) : Integer;
+
+  begin
+  if Data.ParamDict.ContainsKey(aKey) then
+    Result:=Data.ParamDict.GetIntegerValue(aKey)
+  else
+    Result:=aDefault;
+  end;
+
+begin
+  Buf:=[];
+  B2:=[];
+  if Data.ParamDict.ContainsKey(SPDFKeyPredictor) then
+    begin
+    aPredictor:=MaybeInt(SPDFKeyPredictor,1);
+    if aPredictor<>1 then
+      begin
+      aColumns:=MaybeInt(SPDFKeyColumns,1);
+      aColors:=MaybeInt(SPDFKeyColors,1);
+      aBitsperComponent:=MaybeInt(SPDFKeyBitsPerComponent,8);
+      Data.Dest.Position:=0;
+      tmp:=nil;
+      aStream:=TPredictStream.Create(Data.Dest,aPredictor,aColumns,aColors,aBitsPerComponent);
+      try
+        SetLength(B2,Data.Dest.Size);
+        Tmp:=TBytesStream.Create(B2);
+        Tmp.Position:=0;
+        aStream.SourceOwner:=True;
+        SetLength(Buf,Data.Dest.Size);
+        Repeat
+          aCount:=aStream.Read(Buf[0],Length(Buf));
+          if aCount>0 then
+            Tmp.WriteBuffer(Buf[0],aCount);
+        Until (aCount=0);
+{$IFDEF DEBUGSTREAMS}
+        // DumpStream(Tmp,'Buffer');
+{$ENDIF}
+        Tmp.Position:=0;
+
+        Data.Dest:=Tmp;
+      finally
+        if Data.Dest<>Tmp then
+          Tmp.Free;
+        aStream.Free;
+      end;
+      end;
+    end;
+end;
+
+
+function TPDFParser.FilterStream(aStream : TStream; aFilterName : String; aParams : TPDFDictionary) : TStream;
+
+Var
+  Data : TFilterData;
+
+begin
+  Result:=Nil;
+  Data:=Default(TFilterData);
+  Data.FilterName:=aFilterName;
+  Data.Source:=aStream;
+  Data.ParamDict:=aParams;
+  if (Data.FilterName<>'') and (Data.FilterName[1]='/') then
+    Delete(Data.FilterName,1,1);
+  try
+    Case Data.Filtername of
+      SPDFFilterFlateDecode :
+        begin
+        Data.Dest:=Deflate(Data.Source);
+        if assigned(Data.ParamDict) and Data.ParamDict.ContainsKey(SPDFKeyPredictor) then
+          Unpredict(Data);
+{$IFDEF DUMPSTREAMS}
+        DumpStream(Data.Dest,'Unpredicted Buffer data');
+{$ENDIF}
+        end;
+      SPDFFilterLZWDecode:
+        begin
+        Data.Dest:=LZWDecode(Data.Source);
+        if assigned(Data.ParamDict) and Data.ParamDict.ContainsKey(SPDFKeyPredictor) then
+          Unpredict(Data);
+  {$IFDEF DUMPSTREAMS}
+        DumpStream(Data.Dest,'Unpredicted Buffer data');
+  {$ENDIF}
+        end;
+      SPDFFilterASCIIHexDecode:
+        Data.Dest:=ASCIIHexDecode(Data.Source);
+      SPDFFilterASCII85Decode:
+        Data.Dest:=ASCII85Decode(Data.Source);
+      SPDFFilterRunLengthDecode:
+        Data.Dest:=ASCII85Decode(Data.Source);
+    else
+      if Assigned(FOnUnknownFilter) then
+        FOnUnknownFilter(Self,Data)
+      else
+        DoError(penUnknownFilter,SErrUnknownFilter,[aFilterName]);
+    end;
+    Result:=Data.Dest;
+  except
+    Data.Dest.Free;
+    Raise;
+  end;
+end;
+
+function TPDFParser.ResolveFilters(aStream : TStream; aDict : TPDFDictionary): TStream;
+
+var
+  aFilters : TPDFObject;
+  aFilter : TPDFObject;
+  aPDFValue : TPDFValue absolute aFilter;
+  aParams : TPDFObject;
+  aParamDict : TPDFDictionary;
+  aRes, aRes2 : TStream;
+
+begin
+  aRes:=aStream;
+  aFilters:=aDict.FindValue(SPDFKeyFilter);
+  aParams:=aDict.FindValue(SPDFKeyDecodeParms);
+  if (aParams<>Nil) and (aParams is TPDFDictionary) then
+    aParamDict:=TPDFDictionary(aParams)
+  else if aParams is TPDFArray then
+    DoUnknownStruct('20221113170651')
+  else
+    aParamDict:=Nil;
+  if assigned(aFilters) then
+    For aFilter in aFilters do
+      begin
+      if aFilter is TPDFValue then
+        begin
+        aRes2:=FilterStream(aRes,aPDFValue.Value,aParamDict);
+        aRes.Free;
+        aRes:=aRes2;
+        aRes.Position:=0;
+  {$IFDEF DUMPSTREAMS}
+        DumpStream(aRes,'Filtered stream intermediate result');
+  {$ENDIF}
+        end
+      else
+        DoError(penUnknownFilterValueClass,SErrUnknownFilterValueClass, [aFilter.ClassName]);
+      end;
+  Result:=aRes;
+end;
+
+function TPDFParser.GetXRefStreamSubSections(aObjectDict : TPDFDictionary) : TIndexPairArray;
+
+Var
+  I,aSize : integer;
+  aPair : TIndexPair;
+  Idx : TPDFArray;
+  Index : TIndexPairArray;
+
+begin
+  aSize:=aObjectDict.GetIntegerValue(SPDFKeySize);
+  if aObjectDict.ContainsKey(SPDFKeyIndex) then
+    Idx:=aObjectDict.GetArrayValue(SPDFKeyIndex)
+  else
+    Idx:=nil;
+  if not Assigned(Idx) then
+    begin
+    SetLength(Index,1);
+    aPair[1]:=0;
+    aPair[2]:=aSize;
+    Index[0]:=aPair;
+    end
+   else
+    begin
+    SetLength(Index,Idx.Count Div 2);
+    I:=0;
+    While (I<Idx.Count) do
+      begin
+      aPair[1]:=Idx.GetIntegerAt(I);
+      aPair[2]:=Idx.GetIntegerAt(I+1);
+      index[I div 2]:=aPair;
+      Inc(I,2);
+      end;
+    end;
+  Result:=Index;
+end;
+
+function TPDFParser.ParseXREFStream(aStream : TStream; aObjectDict : TPDFDictionary) : TPDFXRefList;
+
+  Function GetData (Var P : PByte; aSize : Integer; aDefault : Integer) : Integer;
+
+  Var
+    I : Integer;
+
+  begin
+    Result:=0;
+    if aSize=0 then
+      Result:=aDefault
+    else
+      For I:=1 to aSize do
+        begin
+        // write(' ',P^);
+        Result:=256*Result+P^;
+        Inc(P);
+        end;
+  end;
+
+
+Var
+  Entry : Array of Byte;
+  Sizes : Array[0..2] of Byte;
+  Indexes : TIndexPairArray;
+  Fields : Array[0..2] of Integer;
+  aID,aFirst,aLast : integer;
+  aPair : TIndexPair;
+  O,O2 : TPDFObject;
+  W : TPDFArray absolute O;
+  Idx : TPDFArray absolute O;
+  V : TPDFValue absolute O2;
+  I,J,aSize : Integer;
+  D : PByte;
+  Itm: TPDFXRef;
+
+
+begin
+  Entry:=[];
+  aSize:=aObjectDict.GetIntegerValue(SPDFKeySize);
+  if Length(FloadingXRef)=0 then
+    Setlength(FloadingXRef,aSize)
+  else if aSize>Length(FloadingXRef) then
+    begin
+    DoWarning('Increasing size of XRef table from %d to %d',[Length(FloadingXRef),aSize]);
+    Setlength(FloadingXRef,aSize);
+    end;
+  W:=aObjectDict.GetArrayValue(SPDFKeyW);
+  For I:=0 to 2 do
+    begin
+    O2:=W.Objects[I];
+    if Not ((O2 is TPDFValue) and V.IsInteger) then
+      DoError(penSizeElementNotInteger, SerrSizeElementNotInteger ,[i]);
+    Sizes[i]:=V.AsInteger;
+    end;
+  // Writeln(AObjectDict.GetDescription);
+  // Writeln('Sizes : ',Sizes[0],',',Sizes[1],',',Sizes[2]);
+  SetLength(Entry,Sizes[0]+Sizes[1]+Sizes[2]);
+{$IFDEF DUMPSTREAMS}
+  DumpStream(aStream,'Stream');
+{$ENDIF}
+  //Writeln('Stream: ');
+  Indexes:=GetXRefStreamSubSections(aObjectDict);
+  Result:=TPDFXRefList.Create();
+  try
+    for I:=0 to Length(Indexes)-1 do
+      begin
+      aPair:=Indexes[I];
+      aFirst:=aPair[1];
+      aSize:=aPair[2];
+      For J:=0 to aSize-1 do
+        begin
+        aStream.ReadBuffer(Entry[0],Length(Entry));
+        {
+        Write('Entry : (');
+        For j:=0 to Length(Entry)-1 do
+          System.Write(Entry[j],' ');
+        writeln(')');
+        }
+        D:=@Entry[0];
+        Fields[0]:=GetData(D,Sizes[0],1);
+        Fields[1]:=GetData(D,Sizes[1],0);
+        Fields[2]:=GetData(D,Sizes[2],0);
+        aID:=aFirst+J;
+        // Writeln;
+        // Writeln('Compressed Xref (',aID,') : ',Fields[0],',',Fields[1],',',Fields[2]);
+        if FloadingXRef[aID]=Nil then
+          begin
+          Itm:=TPDFXRef.Create();
+          Itm.ReferenceIndex:=aID;
+          Itm.Inuse:=Fields[0]<>0;
+          Itm.Compressed:=(Fields[0]=2);
+          Case Fields[0] of
+          0 :
+            begin
+            Itm.NextFreeObject:=Fields[1];
+            Itm.ObjectGeneration:=Fields[2];
+            end;
+          1 :
+            begin
+            Itm.ObjectOffset:=Fields[1];
+            Itm.ObjectGeneration:=Fields[2];
+            end;
+          2:
+            begin
+            Itm.StreamObjectNr:=Fields[1];
+            Itm.ObjectIndex:=Fields[2];
+            end;
+          end;
+          FloadingXRef[aID]:=Itm;
+        // Writeln(' -> ',Itm.GetDescription);
+          Result.Add(Itm);
+          end;
+        DoProgress(pkXref,J,aSize);
+        end;
+      end;
+  except
+    Result.Free;
+    Raise;
+  end;
+end;
+
+function TPDFParser.GetUnfilteredStream(aObj : TPDFIndirect) : TStream;
+
+var
+  aStream : TStream;
+
+
+begin
+  Result:=aObj.UnfilteredStream;
+  if Result<>nil then
+    exit;
+  aStream:=TBytesStream.Create(aObj.Stream.Data);
+  try
+    aStream:=ResolveFilters(aStream,aObj.ObjectDict);
+    aStream.Position:=0;
+    aObj.UnfilteredStream:=aStream;
+    Result:=aStream;
+  except
+    aStream.Free;
+    Raise;
+  end;
+end;
+
+function TPDFParser.ParseXRefAt(aStartPos: Int64; out ParentObject : TPDFIndirect): TPDFXRefList;
+
+Var
+  lToken,lToken2,lToken3 : TPDFToken;
+  aStream : TStream;
+
+begin
+  Writeln('Parsing XREF at : ',aStartPos);
+  Result:=Nil;
+  ParentObject:=Nil;
+  if (FScanner.Position<>aStartPos) then
+    FScanner.Reposition(aStartPos,True);
+  lToken:=FScanner.GetToken(False);
+  if (lToken.TokenData=SPDFXref) then
+    Result:=ParseXREF(lToken)
+  else
+    if (lToken.IsInteger) then
+      begin
+      lToken2:=FScanner.GetToken(False);
+      if lToken2.IsInteger then
+        begin
+        lToken3:=FScanner.GetToken(False);
+        if lToken3.CheckString(SPDFObj) then
+          begin
+          ParentObject:=ParseIndirectObject(ltoken.AsInteger,ltoken2.AsInteger);
+          if (ParentObject.ObjectType=SPDFTypeXref) then
+            begin
+            if Not (ParentObject is TPDFIndirectXRef) then
+              // This ongtre MUST be done
+              ParentObject:=ParentObject.ResolveObjectType;
+            aStream:=GetUnfilteredStream(ParentObject);
+            Result:=ParseXREFStream(aStream,ParentObject.ObjectDict);
+            (ParentObject as TPDFIndirectXRef).XRef:=Result;
+            Result.Compressed:=True;
+            // Writeln('Parent : ',ParentObject.ObjectID);
+            end;
+          end;
+        end;
+      end;
+  if (Result=Nil) then
+    DoError(penNoXrefAt,SErrNoXRefAt,[aStartPos])
+end;
+
+function TPDFParser.ParseStreamObjectPositions(aSTMObj: TPDFIndirect ) :TObjectPositionArray;
+
+Var
+  I,N : Integer;
+  Position : TObjectPosition;
+  lToken : TPDFToken;
+
+begin
+  Result:=[];
+  N:=aStmObj.ObjectDict.GetIntegerValue(SPDFKeyN);
+  SetLength(Result,N);
+  For I:=0 to N-1 do
+    begin
+    lToken:=FScanner.GetToken(False);
+    if Not lToken.IsInteger then
+      DoError(penObjStmObjectIDInvalid, SErrObjStmObjectIDInvalid ,[aSTMObj.ObjectID,I]);
+    Position.ID:=lToken.AsInteger;
+    lToken:=FScanner.GetToken(False);
+    if Not lToken.IsInteger then
+      DoError(penObjStmObjectOffsetInvalid, SErrObjStmObjectOffsetInvalid,[aSTMObj.ObjectID,I]);
+    Position.Offset:=lToken.AsInteger;
+    Result[i]:=Position;
+    end;
+  aStmObj.ObjectPositions:=Result;
+end;
+
+procedure TPDFParser.SetResolveContentStreams(AValue: Boolean);
+begin
+  if FResolveContentStreams=AValue then Exit;
+  FResolveContentStreams:=AValue;
+  if aValue then
+    ResolveObjects:=true;
+end;
+
+procedure TPDFParser.DoProgress(aKind: TProgressKind; aCurrent, aCount: Integer);
+begin
+  If Assigned(FOnProgress) then
+    FOnProgress(Self,aKind,aCurrent,aCount)
+end;
+
+procedure TPDFParser.DoLog(aKind: TLogKind; const Msg: string);
+begin
+  if assigned(FonLog) then
+    FonLog(Self,aKind,Msg);
+end;
+
+procedure TPDFParser.DoLog(aKind: TLogKind; const Fmt: string;
+  const Args: array of const);
+begin
+  DoLog(aKind,Format(Fmt,Args));
+end;
+
+procedure TPDFParser.DoInfo(const Msg: string);
+begin
+  DoLog(lkInfo,Msg);
+end;
+
+procedure TPDFParser.DoInfo(const Fmt: string; const args: array of const);
+begin
+  DoInfo(Format(Fmt,Args));
+end;
+
+procedure TPDFParser.DoWarning(const Msg: string);
+begin
+  DoLog(lkWarning,Msg);
+end;
+
+procedure TPDFParser.DoWarning(const Fmt: string; const args: array of const);
+begin
+  DoWarning(Format(Fmt,Args));
+end;
+
+procedure TPDFParser.DoError(const Nr: Integer; const Msg: string);
+
+Var
+  Err : EPDFParser;
+
+begin
+  DoLog(lkError,'Error nr %d : %s',[Nr,Msg]);
+  Err:=EPDFParser.Create(Msg);
+  Err.ErrorNumber:=nr;
+  Raise Err;
+end;
+
+
+procedure TPDFParser.DoError(const Nr: Integer; const Fmt: string;
+  const Args: array of const);
+begin
+  DoError(Nr,Format(Fmt,Args))
+end;
+
+procedure TPDFParser.DoUnknownStruct(aID: String);
+begin
+  DoError(penUnknownConstruct,SErrUnknownConstruct,[aID]);
+end;
+
+
+function TPDFParser.ParseIndirectObjectInStream(aSTMObj: TPDFIndirect; aIdx,
+  aObjectID: Integer): TPDFIndirect;
+
+Var
+  aStream,wstream : TStream;
+  First : Integer;
+  aScanner : TPDFScanner;
+  aEndOffset : Integer;
+  Position : TObjectPosition;
+  Positions : TObjectPositionArray;
+  aStartOffset : Integer;
+  Obj : TPDFIndirect;
+  {$IFDEF DEBUGSTREAMS}
+  sname : string;
+  {$endif}
+begin
+  DoProgress(pkIndirect,aObjectID,Length(FloadingXRef));
+{$IFDEF DEBUGSTREAMS}
+  SName:='objectstream-'+IntToStr(aObjectID);
+  Writeln('Searching Indirect object stream. ID ',aObjectID,' index ',aIdx);
+{$ENDIF}
+  if aSTMObj.ObjectType<>SPDFTypeObjStm then
+    DoError(penNotObjectStream,SErrObjectIsNotObjectStream,[aSTMObj.ObjectID]);
+  if aStmObj.ObjectDict=Nil then
+    DoError(penObjectStreamWithoutDict,SErrStreamObjectWithoutDict,[aSTMObj.ObjectID]);
+  aStream:=GetUnFilteredStream(aSTMObj);
+{$IFDEF DEBUGSTREAMS}
+  SaveStreamToFile(sname,aStream);
+{$ENDIF}
+  wStream:=Nil;
+  aScanner:=Fscanner;
+  try
+    FScanner:=TPDFScanner.Create(aStream);
+    if Length(aStmObj.ObjectPositions)>0 then
+      Positions:=aStmObj.ObjectPositions
+    else
+      Positions:=ParseStreamObjectPositions(aStmObj);
+    First:=aStmObj.ObjectDict.GetIntegerValue(SPDFKeyFirst);
+    Position:=Positions[aIdx];
+    aStartOffset:=First+Position.Offset;
+    if aIdx<Length(Positions)-1 then
+      aEndOffset:=First+Positions[aIdx+1].Offset
+    else
+      aEndOffset:=aStream.Size;
+    aStream.Position:=aStartOffset;
+    wStream:=TWindowedStream.Create(aStream,aEndOffset-aStartOffset);
+    {$IFDEF DEBUGSTREAMS}
+    SaveStreamToFile(sname+'-idx-'+IntToStr(aIdx),wStream);
+    {$ENDIF}
+    FreeAndNil(FScanner);
+    FScanner:=TPDFScanner.Create(wStream);
+    Result:=TPDFIndirect.Create();
+    Result.ObjectID:=Position.ID;
+    if Position.ID<>aObjectID then
+      DoWarning(SWarnObjectIDDoesNotMatch,[aIdx,Result.ObjectID,aObjectID]);
+    try
+      ParseIndirectObjectBody(Result);
+    except
+      Result.Free;
+      Raise;
+    end;
+    if ResolveObjects then
+      begin
+      Obj:=Result;
+      try
+        Result:=Obj.ResolveObjectType;
+      finally
+        if Obj<>Result then
+          Obj.Free;
+      end;
+      end;
+  finally
+    wStream.Free;
+    if FScanner<>aScanner then
+      FScanner.Free;
+    FScanner:=aScanner;
+  end;
+end;
+
+function TPDFParser.DefaultPDFPageClass: TPDFPageClass;
+begin
+  Result:=TPDFPageObject;
+end;
+
+function TPDFParser.LoadXREFobject(Itm: TPDFXRef; addToDocument : Boolean = True): TPDFIndirect;
+
+var
+  StmRef : TPDFXRef;
+  StmObj: TPDFIndirect;
+
+begin
+  Result:=Nil;
+{  Writeln('Checking ',Itm.GetDescription);
+  if Itm.ObjectIndex=131 then
+    Writeln('Indirect');}
+  if not Itm.Compressed then
+    begin
+    Result:=ParseIndirectObject(Itm.ObjectOffset);
+    end
+  else
+    begin
+    StmObj:=FDoc.FindInDirectObject(Itm.StreamObjectNr,0);
+    if StmObj=nil then
+      begin
+      if Assigned(FloadingXRef) then
+        begin
+        StmRef:=FloadingXRef[Itm.StreamObjectNr];
+        if StmRef<>Nil then
+           StmObj:=LoadXRefObject(StmRef);
+        end;
+      if StmObj=Nil then
+        DoError(penNoSuchStreamObject,SErrNoSuchStreamObject,[Itm.StreamObjectNr,Itm.ReferenceIndex]);
+      end;
+    if StmObj.ObjectType<>SPDFTypeObjStm then
+      DoError(penNotStreamObject,SErrNotStreamObject,[Itm.StreamObjectNr,SPDFTypeObjStm]);
+    Result:=ParseIndirectObjectInStream(StmObj,Itm.ObjectIndex, Itm.ReferenceIndex);
+    end;
+  if assigned(Result) then
+    begin
+    Result.ObjectID:=Itm.ReferenceIndex;
+    Itm.Instance:=Result;
+    if AddToDocument then
+      if not FDoc.AddInDirectObject(Result) then
+        FreeAndNil(Result)
+    end;
+end;
+
+procedure TPDFParser.LoadIndirectObjects;
+
+var
+  I : Integer;
+  Itm : TPDFXRef;
+  UseCompressed : Boolean;
+  Ind : TPDFIndirect;
+
+begin
+  For UseCompressed:=False to True do
+    begin
+    For I:=0 to Length(FloadingXRef)-1 do
+      begin
+      Itm:=FloadingXRef[i];
+      if Not Assigned(Itm) then
+        begin
+        if not UseCompressed then
+          DoWarning('No reference to object ID %d available',[I])
+        end
+      else if (Itm.Instance=Nil) and Itm.InUse and (Itm.Compressed=UseCompressed) then
+        if Itm.ReferenceIndex>0 then
+          begin
+          Ind:=LoadXRefObject(Itm);
+{          if Assigned(Ind) then
+            Writeln('Loaded ',Ind.GetDescription);}
+          end;
+      end;
+   end;
+end;
+
+function TPDFParser.DetectTrailerAt(aStartPos: Int64): TPDFTrailer;
+
+Var
+  aDictEnd,aPos : Int64;
+  lToken : TPDFToken;
+
+begin
+  Result:=Nil;
+  FScanner.Reposition(aStartPos,False);
+  aPos:=FScanner.Position;
+  aDictEnd:=FScanner.FindBytesBackward('>>');
+  if (aPos-aDictEnd)>MaxTrailerDistance then
+    exit;
+  aDictEnd:=FScanner.FindBytesBackward(SPDFTrailer);
+  lToken:=FScanner.GetToken(False);
+  Result:=ParseTrailer(lToken);
+end;
+
+procedure TPDFParser.ParseDocument(aDoc: TPDFDocument);
+
+var
+  aXRefStartPos, aEOFPos : Int64;
+  Start : TPDFStartXRef;
+  XRef : TPDFXRefList;
+  XRefObj : TPDFIndirect;
+  Trailer : TPDFTrailer;
+  XRefDict : TPDFDictionary;
+  aStartIndex : Int64;
+  lToken : TPDFToken;
+
+begin
+  FDoc:=aDoc;
+  aEOFPos:=VerifyDocument;
+  Start:=GetLastXRef(aEOFPos,aXRefStartPos);
+  FDoc.Add(Start);
+  Trailer:=DetectTrailerAt(aXRefStartPos);
+  FDoc.TrailerDict:=Trailer;
+  if Assigned(Trailer) then
+    begin
+    FDoc.Add(Trailer);
+    if Trailer.ContainsKey(SPDFKeySize) then
+      SetLength(FloadingXRef,Trailer.GetIntegerValue(SPDFKeySize));
+    XRefDict:=Trailer;
+    end;
+  aStartIndex:=Start.Index;
+  While (aStartIndex>0) do
+    begin
+    XRef:=ParseXRefAt(aStartIndex,XRefObj);
+    if XRef<>nil then
+      begin
+      if Assigned(XRefObj) then
+        begin
+        if not FDoc.AddInDirectObject(XRefObj) then
+          XRefObj.Free
+        else
+          begin
+          XRefDict:=XRefObj.ObjectDict;
+          if FDoc.TrailerDict=Nil then
+            FDoc.TrailerDict:=XRefDict;
+          end;
+        end
+      else
+        FDoc.Add(XRef);
+      end;
+    If Assigned(XRefDict) and (XRefDict.ContainsKey(SPDFKeyPrev)) then
+      aStartIndex:=XRefDict.GetInt64Value(SPDFKeyPrev)
+    else
+      begin
+      // Updated PDF documents refer to the original PDF document's XREF.
+      // This XRef is then followed by a trailer which contains a 'Prev'.
+      // We check for this here.
+      lToken:=FScanner.GetToken(False);
+      if Not ltoken.CheckString(SPDFTrailer) then
+        begin
+        FScanner.Unget(lToken);
+        aStartIndex:=0;
+        end
+      else
+        begin
+        Trailer:=ParseTrailer(lToken);
+        If Assigned(Trailer) then
+          begin
+          if Trailer.ContainsKey('Prev') then
+            aStartIndex:=Trailer.GetInt64Value('Prev')
+          else
+            aStartIndex:=0;
+          FDoc.Add(Trailer);
+          end
+        else
+          aStartIndex:=0;
+        end;
+      end;
+    XRefDict:=Nil;
+    end;
+  FDoc.SetXrefArray(FloadingXRef);
+  if LoadObjects then
+    begin
+    LoadIndirectObjects;
+    If ResolveContentStreams then
+      DoResolveContentStreams(FDoc);
+    end;
+end;
+
+function TPDFParser.GetPageContentStream(aDoc : TPDFDocument; aPage: TPDFPageObject) : TStream;
+
+  Function GetContentStream(Idx : Integer) : TStream;
+  var
+    Ref : TPDFRefData;
+    Obj : TPDFIndirect;
+
+  begin
+    Ref:=aPage.ContentRef[Idx];
+    Obj:=aDoc.FindInDirectObject(Ref);
+    if Assigned(Obj) and Assigned(Obj.ObjectDict) and Assigned(Obj.Stream) then
+      Result:=GetUnfilteredStream(Obj)
+    else
+      DoError(penContentStreamNotFound,sErrContentStreamNotFound,[Ref.ObjectID,Ref.ObjectGeneration]);
+  end;
+
+Var
+  I : Integer;
+  Streams : Array of TStream;
+
+begin
+  Result:=Nil;
+  try
+    if aPage.ContentCount=1 then
+      Result:=GetContentStream(0)
+    else
+      begin
+      SetLength(Streams,aPage.ContentCount);
+      For I:=0 to aPage.ContentCount-1 do
+        Streams[I]:=GetContentStream(I);
+      Result:=TChainedStream.Create(Streams);
+      Streams:=[];
+      end;
+
+  except
+    FreeAndNil(Result);
+    For I:=0 to Length(Streams)-1 do
+      Streams[I].Free;
+    Raise;
+  end;
+end;
+
+procedure TPDFParser.DoResolveContentStreams(aDoc: TPDFDocument; aOnCommand : TNewCommandEvent = Nil);
+
+Var
+  I,J,aCount : Integer;
+  Obj,ObjFree : TPDFIndirect;
+  Cont : TPDFContentStream;
+  aPage : TPDFPageObject;
+  aStream : TStream;
+
+begin
+  aCount:=aDoc.PageCount;
+  For I:=0 to aCount-1 do
+    begin
+    aPage:=aDoc.Pages[I];
+    DoProgress(pkContentStream,I+1,aCount);
+    aStream:=GetPageContentStream(aDoc,aPage);
+    ParseContentStream(aPage,aStream,aOnCommand)
+    end;
+end;
+
+destructor TPDFParser.Destroy;
+begin
+  FreeAndNil(FScanner);
+  inherited Destroy;
+end;
+
+
+end.

+ 316 - 0
packages/fcl-pdf/src/fppdfpredict.pp

@@ -0,0 +1,316 @@
+{ **********************************************************************
+  This file is part of the Free Component Library
+
+  PDF predictor (de)compression
+  Copyright (c) 2022 by Michael Van Canneyt [email protected]
+
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+  **********************************************************************}
+
+unit fppdfpredict;
+
+{$mode ObjFPC}{$H+}
+
+interface
+
+uses
+  Classes, SysUtils, fpreadpng  ;
+
+Type
+  EPredict = Class(Exception);
+
+  { TPredictStream }
+
+  TPredictStream = Class(TOwnerStream)
+    FPredictor : Integer;
+    Fcolumns : Integer;
+    Fcolors : Integer;
+    FBitsPerComponent : integer;
+    FStride : Integer;
+    Fbpp : Integer;
+    finbuffer,
+    foutbuffer,
+    frefbuffer : TBytes;
+    FRefPos : PByte;
+    freadPos,
+    fwritePos : Pbyte;
+    Buffer : Array[0..4096] of byte;
+  private
+    function GetComponent(Line: PByte; x : Integer): Integer;
+    Procedure PredictTiff(aOut,aIn : PByte);
+    procedure PredictPng(aOut, aIn: PByte; len : Integer; aPredictor : Byte);
+    Procedure PutComponent(buf: PByte; x, value: Integer);
+    class function Paeth(l,p,lp: integer): integer;
+  Public
+    Constructor Create(aSource : TStream; aPredictor,aColumns,aColors,aBitsPerComponent : Integer);
+    function Read(var aBuffer ; aCount : Integer) : Integer; override;
+  end;
+
+implementation
+
+
+function TPredictStream.GetComponent(Line : PByte; x : Integer) : Integer;
+begin
+  Case FBitsPerComponent of
+     1:  Result :=(line[x shr 3] shr (7-(x and 7))) and 1;
+     2:  Result :=(line[x shr 2] shr ((3-(x and 3)) shl 1)) and 3;
+     4:  Result :=(line[x shr 1] shr ((1-(x and 1)) shl 2)) and 15;
+     8:  Result :=line[x];
+     16: Result :=(line[x shl 1] shl 8)+line[(x shl 1)+1];
+  else
+   Result:=0;
+  end;
+end;
+
+
+procedure TPredictStream.PutComponent(buf : PByte; x, value: Integer);
+begin
+  Case FBitsPerComponent of
+  1:
+    buf[x shr 3] := buf[x shr 3] or (value shl (7 - (x and 7)));
+  2:
+    buf[x shr 2] := buf[x shr 2] or (value shl ((3 - (x and 3)) shl 1));
+  4:
+    buf[x shr 1] := buf[x shr 1] or (value shl ((1 - (x and 1)) shl 2));
+  8:
+    buf[x]:=value;
+  16:
+    begin
+    buf[x shl 1] := value shr 8;
+    buf[(x shl 1)+1]:=value;
+    end;
+  end;
+end;
+
+
+class function TPredictStream.Paeth(l,p,lp: integer) : integer;
+
+// taken from ReadPNG
+
+Var
+  dl,dp,dlp : Integer;
+  r : Integer;
+
+begin
+  r:=l+p-lp;
+  dl:=abs(r-l);
+  dp:=abs(r-p);
+  dlp:=abs(r-lp);
+  if (dl <= dp) and (dl <= dlp) then
+    Result:=l
+  else if dp <= dlp then
+    Result:=p
+  else
+    Result:=lp;
+end;
+
+constructor TPredictStream.Create(aSource: TStream; aPredictor, aColumns, aColors, aBitsPerComponent: Integer);
+
+begin
+  Inherited Create(aSource);
+  if Not (aPredictor in [1,2,10,11,12,13,14,15]) then
+    Raise EPredict.CreateFmt('Invalid predictor value: %d',[aPredictor]);
+  if Not (aBitsPerComponent in [1,2,4,8,16]) then
+    Raise EPredict.CreateFmt('Invalid bits per component: %d',[aBitsPerComponent]);
+  if (aColors > 32) then
+    Raise EPredict.CreateFmt('Invalid amount of: %d',[aColors]);
+  if (aColumns > (MaxInt div (aBitsPerComponent*aColors))) then
+    Raise EPredict.CreateFmt('Too many columns leads to overflow: %d',[aColumns]);
+  FPredictor:=aPredictor;
+  Fcolumns:=aColumns;
+  FBitsPerComponent:=aBitsPerComponent;
+  FColors:=aColors;
+  FStride:=((FBitsPerComponent * FColors * FColumns) + 7) div 8;
+  // Writeln('bpc ',FBitsPerComponent,', colors: ',FColors,' columns: ',FColumns,' Stride ',7);
+  FBPP:=((FBitsPerComponent * FColors ) + 7) div 8;
+  SetLength(FInbuffer,FStride+1);
+  SetLength(FOutbuffer,FStride);
+  SetLength(FRefBuffer,FStride);
+  FReadPos:=PByte(FOutBuffer);
+  FWritePos:=PByte(FOutBuffer);
+end;
+
+procedure TPredictStream.PredictTiff(aOut, aIn: PByte);
+
+Var
+  Left : array[0..31] of integer;
+  Mask,i,j : integer;
+  x,a,b,c : Integer;
+
+begin
+  Mask:=(1 shl FBitsPerComponent)-1;
+  for I:=0 to FColors-1 do
+    Left[I]:=0;
+  if (FBitsPerComponent=8) then
+    begin
+    for I:=0 to FColumns-1 do
+      for J:=0 to FColors-1 do
+        begin
+        Left[J]:=(aIn^+left[J]) and $FF;
+        aOut^:=Left[J];
+        Inc(aIn);
+        Inc(aOut);
+        end;
+    exit;
+    end;
+  if (FBitsPerComponent<8) then
+    FillChar(aOut^,Fstride,0);
+  for I:=0 to FColumns-1 do
+    for J:=0 to FColors-1 do
+      begin
+      x:=(i*FColors)+j;
+      a:=GetComponent(aIn,x);
+      b:=a+left[J];
+      c:=b and mask;
+      PutComponent(aOut,x,c);
+      Left[J]:=c;
+      end;
+end;
+
+procedure TPredictStream.PredictPng(aOut, aIn: PByte; len: Integer; aPredictor : Byte);
+
+var
+  I : integer;
+  bpp : Integer;
+  ref : PByte;
+
+begin
+  Ref:=PByte(FRefbuffer);
+  bpp:=FBPP;
+  if bpp>len then
+    bpp:=len;
+  Case aPredictor of
+    0 :
+      move(aIn^,aOut^,len);
+    1 :
+      begin
+      move(aIn^,aOut^,bpp);
+      inc(aIn,bpp);
+      Inc(aOut,bpp);
+      for I:=len-bpp downto 1 do
+        begin
+        aout^:=aIn^+aOut[-bpp];
+        inc(aOut);
+        end;
+      end;
+    2 :
+      begin
+      for I:=1 to bpp do
+        begin
+        aOut^:=aIn^+Ref^;
+        inc(aOut);
+        inc(aIn);
+        inc(Ref);
+        end;
+      for I:=Len-bpp downto 1 do
+        begin
+        aOut^:=aIn^+Ref^;
+        inc(aout);
+        inc(aIn);
+        inc(Ref);
+        end;
+      end;
+    3:
+      begin
+      for I:=1 to bpp do
+        begin
+        aOut^:=aIn^+(Ref^ div 2);
+        inc(aout);
+        inc(aIn);
+        inc(Ref);
+        end;
+      for I:=Len-bpp downto 1 do
+        begin
+        aOut^:=aIn^+((aOut[-bpp] + ref^) div 2);
+        inc(aout);
+        inc(aIn);
+        inc(Ref);
+        end;
+      end;
+    4:
+      begin
+      for I:=1 to bpp do
+        begin
+        aOut^:=aIn^+Paeth(0,ref^,0);
+        inc(aout);
+        inc(aIn);
+        inc(Ref);
+        end;
+      for I:=Len-bpp downto 1 do
+        begin
+        aOut^:=aIn^+Paeth(aOut[-bpp],Ref^,ref[-bpp]);
+        inc(aout);
+        inc(aIn);
+        inc(Ref);
+        end;
+      end;
+  else
+    // Do nothing
+  end;
+end;
+
+function TPredictStream.Read(var aBuffer ; aCount : Integer) : Integer;
+
+var
+  buf,p,ep : PByte;
+  n : Integer;
+  isPng : Boolean;
+
+begin
+  Result:=0;
+  buf:=@aBuffer;
+  p:=buf;
+  isPng:=(FPredictor>=10);
+  ep:=buf+aCount;
+  // Copy rest of foutbuffer to aBuffer
+  while (FReadPos < FWritePos) and (p<ep) do
+     begin
+     P^:=FReadPos^;
+     Inc(p);
+     Inc(FreadPos);
+     Inc(Result);
+     end;
+  // Read more data to outbuffer.
+  while (p<ep) do
+    begin
+    n:=Source.read(FInBuffer[0],FStride + Ord(IsPng));
+    if (n=0) then
+      break;
+    if (FPredictor=1) then
+      // Just move data
+      move(FinBuffer[0], FOutBuffer[0], N)
+    else if (FPredictor= 2) then
+      // Tiff 2
+      PredictTiff(PByte(FOutBuffer),PByte(FInBuffer))
+    else
+      begin
+      // PNG - First byte is the actual predictor
+      // System.Write('In[0] : ',FInBuffer[0],' -> ');
+      PredictPNG(PByte(FOutBuffer),PByte(@FinBuffer[1]),N-1, FInBuffer[0]);
+      // For I:=0 to Length(FoutBuffer)-1 do
+      //  System.Write(FoutBuffer[i],' ');
+      // writeln;
+      // Move out to ref for next round.
+      Move(FOutBuffer[0],FRefBuffer[0],FStride);
+      end;
+    FReadPos:=PByte(FOutBuffer);
+    FWritePos:=PByte(@FoutBuffer[N-Ord(ispng)]);
+    // Move to output buffer
+    while (FReadPos<FWritePos) and (P<EP) do
+      begin
+      P^:=FReadPos^;
+      inc(P);
+      inc(FReadPos);
+      Inc(Result);
+      end;
+    end;
+end;
+
+end.
+

+ 718 - 0
packages/fcl-pdf/src/fppdfscanner.pp

@@ -0,0 +1,718 @@
+{ **********************************************************************
+  This file is part of the Free Component Library
+
+  PDF Scanner
+  Copyright (c) 2022 by Michael Van Canneyt [email protected]
+
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+  **********************************************************************}
+unit fppdfscanner;
+
+{$mode ObjFPC}{$H+}
+
+interface
+
+uses
+  Classes, SysUtils, fppdfobjects, fppdfsource;
+
+Const
+  StringLengthDelta = 100;
+
+
+Type
+  TCharacterClass = (ccWhitespace,ccDelimiter,ccRegular);
+  TPDFContext = (cNone,cObj,cXRef,cTrailer);
+
+  { EPDFScanner }
+
+  EPDFScanner = Class(EPDF)
+  private
+    FErrorNumber: Integer;
+    FPosition: Int64;
+  Public
+    Property ErrorNumber : Integer Read FErrorNumber Write FErrorNumber;
+    Property Position : Int64 Read FPosition Write FPosition;
+  end;
+
+  { TPDFScanner }
+
+  TPDFScanner = Class
+  Private
+    FLastTokenPosition: Int64;
+    FSource : TPDFSourceStream;
+    FUnget : Array[1..100] of TPDFToken;
+    FunGetLen : Integer;
+    function GetPosition: Int64;
+    function GetStream: TStream;
+  protected
+    Procedure DoError(const Nr : Integer; Const Msg : string);
+    Procedure DoError(const Nr : Integer; Const Fmt : string; Const Args : Array of const);
+    function ParseHexString(const aStartByte: Byte): RawByteString; virtual;
+    function ParseString(const aStartByte: Byte): RawByteString; virtual;
+    Property PDF : TPDFSourceStream Read FSource;
+    Function DoGetToken : TPDFToken; virtual;
+  Public
+    Constructor Create(aFile : TStream; aBufSize : Cardinal = DefaultBufferSize); virtual;
+    Destructor Destroy; override;
+    Class Function CharacterClass(aByte : Byte) : TCharacterClass;
+    Function GetToken(aAllowWhiteSpace : boolean = True) : TPDFToken;
+    Function CopyBytes(B : TBytes; aCount : Integer) : Integer;
+    Procedure ReadStreamData(S : TStream);
+    Procedure Unget(aToken : TPDFToken);
+    Procedure Reposition(aPosition : Int64; ReadForward : Boolean = True);
+    Function CompareBytes(aString : RawByteString; DoUnget : Boolean = False) : Boolean;
+    Function CompareBytesBackwards(aString : RawByteString; DoUnget : Boolean = False) : Boolean;
+    function FindByte(aByte: Byte): Int64;
+    function GetTillByte(aByte: Byte): RawByteString;
+    Function FindByteBackwards(aByte : Byte) : Int64;
+    Function FindBytes(aString : RawByteString; DoUnget : Boolean = False) : Int64;
+    Function FindBytesBackward(aString : RawByteString; DoUnget : Boolean = False) : Int64;
+    Property Stream : TStream Read GetStream;
+    Property Source : TPDFSourceStream Read FSource;
+    Property LastTokenPosition : Int64 Read FLastTokenPosition;
+    Property Position : Int64 Read GetPosition;
+  end;
+
+Const
+  senEOFWhileScanningString = 1;
+  senInvalidOctalCharacter = 2;
+  senEOFWhileScanningEOD = 3;
+  senInvalidCharWhileScanningEOD = 4;
+  senInvalidHexString = 5;
+
+implementation
+
+resourcestring
+  SErrEOFWhileScanningString = 'EOF while scanning string literal';
+  SErrInvalidOctalCharacter = 'Invalid octal character: "%s"';
+  SErrEOFWhileScanningEOD = 'EOF while scanning >>';
+  SErrInvalidCharWhileScanningEOD = 'Invalid character "%s" while scanning >>';
+  SErrInvalidHexString = 'Invalid hex string: %s';
+
+constructor TPDFScanner.Create(aFile: TStream; aBufSize: Cardinal);
+
+begin
+  FSource:=TPDFSourceStream.Create(aFile,aBufSize);
+  FSource.FillBufferForwardAt(0);
+end;
+
+destructor TPDFScanner.Destroy;
+begin
+  FreeAndNil(FSource);
+  inherited Destroy;
+end;
+
+class function TPDFScanner.CharacterClass(aByte: Byte): TCharacterClass;
+
+begin
+  Result:=ccRegular;
+  if aByte in [0,9,10,12,13,32] then
+    result:=ccWhitespace
+  else if aByte in [$28,$29,$3C,$3E,$5B,$5D,$7B,$7D,$2F,$25] then
+    Result:=ccDelimiter
+end;
+
+function TPDFScanner.GetToken(aAllowWhiteSpace: boolean = True): TPDFToken;
+begin
+  Repeat
+    Result:=DoGetToken;
+  until aAllowWhiteSpace or (Result.TokenType<>ptWhiteSpace) ;
+  FLastTokenPosition:=Result.TokenPos;
+end;
+
+function TPDFScanner.CopyBytes(B: TBytes; aCount: Integer): Integer;
+
+begin
+  If FUngetLen>0 then
+    Raise EPDFScanner.Create('Cannot copy bytes with unget data present');
+  Result:=FSource.CopyBytes(B,aCount);
+end;
+
+procedure TPDFScanner.ReadStreamData(S: TStream);
+
+  Function IsEndOfStream(var aToken : TPDFToken) : Boolean;
+
+  begin
+    Result:=(aToken.TokenType=ptKeyword) and (aToken.TokenData='endstream');
+  end;
+
+Var
+  lToken : TPDFToken;
+
+
+
+begin
+  lToken:=GetToken;
+  While Not IsEndOfStream(lToken) do
+    begin
+    S.WriteBuffer(lToken.TokenData[1],Length(lToken.TokenData));
+    lToken:=GetToken;
+    end;
+  if IsEndOfStream(lToken) then
+    Unget(lToken);
+end;
+
+procedure TPDFScanner.Unget(aToken: TPDFToken);
+begin
+  Inc(FunGetLen);
+  FUnget[FunGetLen]:=aToken;
+end;
+
+procedure TPDFScanner.Reposition(aPosition: Int64; ReadForward : Boolean = True);
+
+Var
+  Res : Boolean;
+
+begin
+  if aPosition<0 then
+    aPosition:=FSource.StreamSize+aPosition;
+  if ReadForward then
+    Res:=FSource.FillBufferForwardAt(aPosition)
+  else
+    Res:=FSource.FillBufferBackwardAt(aPosition);
+  FunGetLen:=0;
+  if not Res then
+    Raise EPDFSCanner.CreateFmt('Invalid position : %d',[aPosition]);
+end;
+
+function TPDFScanner.CompareBytes(aString: RawByteString; DoUnget : Boolean = False): Boolean;
+
+Var
+  I,Len : Integer;
+  Bytes : PByte;
+
+begin
+  Result:=True;
+  Len:=Length(aString);
+  Bytes:=PByte(PChar(aString));
+  I:=0;
+  While Result and (I<len) do
+    begin
+    Result:=(FSource.GetByte=Bytes^);
+    Inc(I);
+    Inc(Bytes);
+    end;
+  if DoUnGet then
+    begin
+    While I>0 do
+      begin
+      FSource.Previous;
+      Dec(I);
+      end;
+    end;
+end;
+
+function TPDFScanner.CompareBytesBackwards(aString: RawByteString;
+  DoUnget: Boolean): Boolean;
+Var
+  I,Len : Integer;
+  Bytes : PByte;
+
+begin
+  Result:=True;
+  Len:=Length(aString);
+  Bytes:=PByte(PChar(@aString[Len]));
+  I:=Len;
+  While Result and (I>0) do
+    begin
+    Result:=(FSource.GetByte(True)=Bytes^);
+    Dec(I);
+    Dec(Bytes);
+    end;
+  If Result then // Put back on current position
+    FSource.Next;
+  if DoUnGet then
+    begin
+    While I<Len do
+      begin
+      FSource.Next;
+      Inc(I);
+      end;
+    end;
+end;
+
+function TPDFScanner.FindByte(aByte : Byte) : Int64;
+
+begin
+  Result:=-1;
+  While (Result=-1) and not FSource.IsEOF do
+    begin
+    if (aByte=FSource.Cursor^) then
+      Result:=FSource.Position
+    else
+      FSource.Next;
+    end;
+end;
+
+function TPDFScanner.GetTillByte(aByte: Byte): RawByteString;
+
+
+Var
+  I,Len : Integer;
+  aCurrent : Byte;
+
+
+begin
+  Result:='';
+  if FunGetLen>0 then
+    Raise EPDFScanner.Create('Cannot get till byte when unget tokens are present');
+  Len:=100;
+  SetLength(Result,Len);
+  I:=0;
+
+  aCurrent:=Source.Cursor^;
+  While (Not FSource.IsEOF) and (aCurrent<>aByte) do
+    begin
+    Inc(I);
+    if I>Len then
+      begin
+      Len:=Len+StringLengthDelta;
+      SetLength(Result,Len);
+      end;
+    Result[i]:=AnsiChar(aCurrent);
+    Source.Next;
+    aCurrent:=Source.Cursor^;
+    end;
+  if FSource.IsEOF then
+    Raise EPDFScanner.Create('EOF encountered while scanning for byte');
+  Source.Next;
+  SetLength(Result,I);
+end;
+
+function TPDFScanner.FindByteBackwards(aByte: Byte): Int64;
+
+{Var
+  C1,C2 : Char;
+}
+begin
+  Result:=-1;
+  While (Result=-1) and Not FSource.isBOF do
+    begin
+ //   C1:=Char(aByte);
+//    C2:=Char(FSource.Cursor^);
+    if (aByte=FSource.Cursor^) then
+      Result:=FSource.Position
+    else
+      FSource.Previous;
+    end;
+end;
+
+function TPDFScanner.FindBytes(aString: RawByteString; DoUnget : Boolean = False): Int64;
+
+Var
+  B : Byte;
+
+begin
+  Result:=-1;
+  B:=Ord(aString[1]);
+  While (Result=-1) and not FSource.IsEOF do
+    begin
+    Result:=FindByte(B);
+    if Result<>-1 then
+      begin
+      if not CompareBytes(aString,DoUnget) then
+        begin
+        Result:=-1;
+        FSource.Next;
+        end;
+      end
+    end;
+end;
+
+function TPDFScanner.FindBytesBackward(aString: RawByteString; DoUnget : Boolean = False): Int64;
+Var
+  B : Byte;
+  len : Integer;
+
+begin
+  Result:=-1;
+  Len:=Length(aString);
+  B:=Ord(aString[Len]);
+  While (Result=-1) and Not FSource.isBOF do
+    begin
+    Result:=FindByteBackwards(B);
+    if Result<>-1 then
+      begin
+      if CompareBytesBackwards(aString,DoUnget) then
+        Dec(Result,Length(aString)-1)
+      else
+        begin
+        Result:=-1;
+        FSource.Previous;
+        end;
+      end;
+    end;
+end;
+
+function TPDFScanner.GetStream: TStream;
+begin
+  Result:=FSource.Stream;
+end;
+
+function TPDFScanner.GetPosition: Int64;
+begin
+  Result:=FSource.Position;
+end;
+
+function TPDFScanner.DoGetToken: TPDFToken;
+
+
+Var
+  CurrentToken : RawByteString;
+  CharPos : Integer;
+
+  Procedure AddToToken(aByte : Byte);
+  var
+    L : Integer;
+
+  begin
+    Inc(CharPos);
+    L:=Length(CurrentToken);
+    if CharPos>L then
+      SetLength(CurrentToken,L*2);
+    CurrentToken[CharPos]:=Char(aByte);
+  end;
+
+  Procedure SetToken(aByte : Byte; aType : TPDFTokenType); // inline;
+
+  begin
+    CurrentToken:=Char(aByte);
+    CharPos:=1;
+    Result.TokenType:=aType;
+  end;
+
+Var
+  aByte : Byte;
+  aChar : Char absolute aByte; // For debugging
+  isNumeric : Boolean;
+  I : Integer;
+
+begin
+  if FunGetLen>0 then
+    begin
+    Result:=FUnget[FunGetLen];
+    Dec(FunGetLen);
+    Exit;
+    end;
+  SetLength(CurrentToken,10);
+  CharPos:=0;
+  Result:=Default(TPDFToken);
+  Result.TokenPos:=FSource.Position;
+  if FSource.isEOF then
+    Exit;
+  aByte:=FSource.GetByte;
+  // C:=Char(aByte);
+  //  Writeln('Examining: "'+C+'"');
+  Case CharacterClass(aByte) of
+    ccWhitespace :
+      begin
+      result.TokenType:=ptWhiteSpace;
+      While (not FSource.isEOF) and (CharacterClass(aByte)=ccWhiteSpace) do
+        begin
+        AddToToken(aByte);
+        aByte:=FSource.GetByte;
+        end;
+      if not FSource.IsEOF then
+        FSource.Previous
+      else if (CharacterClass(aByte)=ccWhiteSpace) then
+        AddToToken(aByte);
+      end;
+    ccRegular:
+      begin
+      Result.TokenType:=ptKeyword;
+      While not FSource.isEOF and (CharacterClass(aByte)=ccRegular) do
+        begin
+        AddToToken(aByte);
+        aByte:=FSource.GetByte;
+        end;
+      if not FSource.IsEOF then
+        FSource.Previous
+      else if (CharacterClass(aByte)=ccRegular) then
+        AddToToken(aByte);
+      I:=1;
+      isNumeric:=True;
+      While IsNumeric and (I<=CharPos) do
+        begin
+        isNumeric:=CurrentToken[i] in ['0'..'9','+','-','.'];
+        Inc(I);
+        end;
+      if IsNumeric then
+        Result.TokenType:=ptNumber;
+      end;
+
+    ccDelimiter:
+      begin
+      case aByte of
+        $3C: // <
+           begin
+           Result.TokenType:=ptshl;
+           AddToToken(aByte);
+           if Not FSource.IsEOF then
+             begin
+             aByte:=FSource.GetByte;
+             if aByte=$3C then
+               AddToToken(aByte)
+             else
+               begin
+               Result.TokenType:=ptHexString;
+               CurrentToken:=ParseHexString(aByte);
+               CharPos:=Length(CurrentToken);
+               end;
+             end;
+           end;
+        $3E: // >
+           begin
+           AddToToken(aByte);
+           if FSource.IsEOF then
+             DoError(senEOFWhileScanningEOD,SErrEOFWhileScanningEOD);
+           aByte:=FSource.GetByte;
+           if aByte=$3E then
+             begin
+             Result.TokenType:=ptShr;
+             AddToToken(aByte);
+             end
+           else
+             DoError(senInvalidCharWhileScanningEOD,SErrInvalidCharWhileScanningEOD,[aChar]);
+           end;
+        $25: // %
+          begin
+          Result.TokenType:=ptComment;
+          While Not FSource.IsEOF do
+            begin
+            AddToToken(aByte);
+            if (aByte in [10,13]) then
+              begin
+              if not FSource.IsEOF then
+                aByte:=FSource.GetByte;
+              Break;
+              end;
+            if FSource.IsEOF then
+              aByte:=0
+            else
+              aByte:=FSource.GetByte;
+            end;
+          if FSource.IsEOF then
+            begin
+            if aByte<>0 then
+              AddToToken(aByte)
+            end
+          else if aByte=10 then
+            AddToToken(aByte)
+          else
+            FSource.Previous;
+          end;
+        $2F: // /
+          begin
+          Result.TokenType:=ptName;
+          AddToToken(aByte);
+          aByte:=FSource.GetByte;
+          While (not FSource.IsEOF) and (CharacterClass(aByte) = ccRegular) do
+            begin
+            AddToToken(aByte);
+            aByte:=FSource.GetByte;
+            end;
+          FSource.Previous;
+          end;
+        $5B: SetToken(aByte,ptSquareOpen);  // [
+        $5D: SetToken(aByte,ptSquareClose); // ]
+        $7B: SetToken(aByte,ptCurlyOpen);   // {
+        $7D: SetToken(aByte,ptCurlyClose);  // }
+        $28:   // (
+          begin
+          Result.TokenType:=ptString;
+          CurrentToken:=ParseString(aByte);
+          CharPos:=Length(CurrentToken);
+          end;
+        end;
+      end; // Case ccdelimiter
+  end; // Case CharacterClass(aByte)
+  SetLength(CurrentToken,CharPos);
+  Result.TokenData:=CurrentToken;
+  SetCodePage(Result.TokenData,1252,False);
+
+end;
+
+function HexToBin(HexValue, BinValue: PChar; BinBufSize: Integer): Integer;
+// more complex, have to accept more than bintohex
+// A..F    1000001
+// a..f    1100001
+// 0..9     110000
+var i,j,h,l : integer;
+
+begin
+  Result:=0;
+  i:=binbufsize;
+  while (i>0) and (hexvalue^<>#0) do
+    begin
+    While (hexvalue^ IN [' ',#10,#13,#12,#9]) do
+      inc(hexvalue);
+    if hexvalue^=#0 then
+       break;
+    if hexvalue^ IN ['A'..'F','a'..'f'] then
+      h:=((ord(hexvalue^)+9) and 15)
+    else if hexvalue^ IN ['0'..'9'] then
+      h:=((ord(hexvalue^)) and 15)
+    else
+      Exit(-1);
+    inc(hexvalue);
+    While (hexvalue^ IN [' ',#10,#13,#12,#9]) do
+      inc(hexvalue);
+    if hexvalue^ IN ['A'..'F','a'..'f'] then
+      l:=(ord(hexvalue^)+9) and 15
+    else if hexvalue^ IN ['0'..'9'] then
+      l:=(ord(hexvalue^)) and 15
+    else
+      Exit(-1);
+    j := l + (h shl 4);
+    inc(hexvalue);
+    binvalue^:=chr(j);
+    inc(binvalue);
+    dec(i);
+    end;
+  result:=binbufsize-i;
+end;
+
+
+function TPDFScanner.ParseHexString(const aStartByte: Byte): RawByteString;
+
+Var
+  aValue : RawBytestring;
+  lRes,lRawlen : Integer;
+
+begin
+  aValue:=Char(aStartByte)+GetTillByte(Ord('>'));
+  lRawlen:=Length(aValue) div 2;
+  SetLength(Result,lRawLen);
+  lRes:=HexToBin(PChar(aValue),PChar(Result),lRawLen);
+  if lRes=-1 then
+    DoError(senInvalidHexString,SErrInvalidHexString,[aValue]);
+  SetLength(Result,lRes);
+  // We're still on >, move to next character
+  if not Source.Cursor^=Ord('>') then
+    Raise Exception.Create('not on >');
+end;
+
+
+function TPDFScanner.ParseString(const aStartByte: Byte): RawByteString;
+
+Const
+  cOpen = ord('(');
+  cClose = ord(')');
+  cEscape = Ord('\');
+
+Var
+  CurrentToken : RawByteString;
+  CharPos : Integer;
+  lOpenCount : Integer;
+  aByte,aByte2 : Byte;
+  aChar : Char absolute aByte;
+  aChar2 : Char absolute aByte2;
+  aChar3,aChar4 : Char;
+  aOctal : integer;
+
+
+  Procedure AddToToken(cByte : Byte);
+  var
+    L : Integer;
+
+  begin
+    Inc(CharPos);
+    L:=Length(CurrentToken);
+    if CharPos>L then
+      SetLength(CurrentToken,L*2);
+    CurrentToken[CharPos]:=Char(cByte);
+  end;
+
+
+begin
+  CharPos:=0;
+  CurrentToken:=Default(RawBytestring);
+  SetLength(CurrentToken,10);
+  lOpenCount:=1;
+  repeat
+    aByte:=FSource.GetByte;
+    While not (FSource.isEOF or (aByte in [cOpen,cClose,cEscape])) do
+      begin
+      AddToToken(aByte);
+      aByte:=FSource.GetByte;
+      end;
+    Case aByte of
+    cEscape :
+      begin
+      if FSource.IsEOF then
+        DoError(senEOFWhileScanningString,SErrEOFWhileScanningString);
+      aByte2:=FSource.GetByte();
+      case aChar2 of
+        #10 : ; // Ignore
+        'n' : AddToToken(10);
+        'r' : AddToToken(13);
+        't' : AddToToken(9);
+        'b' : AddToToken(8);
+        'f' : AddToToken(12);
+        '(' : AddToToken(ord('('));
+        ')' : AddToToken(ord(')'));
+        '\' : AddToToken(ord('\'));
+        '0'..'9':
+            begin
+            if FSource.IsEOF then
+              DoError(senEOFWhileScanningString,SErrEOFWhileScanningString);
+            aChar3:=Char(FSource.GetByte());
+            if FSource.IsEOF then
+              DoError(senEOFWhileScanningString,SErrEOFWhileScanningString);
+            aChar4:=Char(FSource.GetByte());
+            aOctal:=StrToIntDef('&'+aChar2+aChar3+aChar4,-1);
+            if (aOctal=-1) or (aOctal>=256) then
+              DoError(senInvalidOctalCharacter,SErrInvalidOctalCharacter,[aChar2+aChar3+aChar4]);
+            AddToToken(aOctal and $FF)
+            end
+      else
+        // Ignore
+      end;
+      end;
+    cOpen:
+      begin
+      inc(lOpenCOunt);
+      AddToToken(cOpen);
+      end;
+    cClose:
+      begin
+      Dec(lOpenCOunt);
+      if lOpenCount>=1 then
+        AddToToken(cClose);
+      end;
+    end;
+  until (lOpenCount<=0) or FSource.IsEOF;
+  if lOpenCount>0 then
+    DoError(senEOFWhileScanningString,SErrEOFWhileScanningString);
+  SetLength(CurrentToken,CharPos);
+  Result:=CurrentToken;
+  SetCodePage(Result,1252,False);
+end;
+
+procedure TPDFScanner.DoError(const Nr: Integer; const Msg: string);
+
+Var
+  Err : EPDFScanner;
+
+begin
+  Err:=EPDFScanner.Create(Msg);
+  Err.ErrorNumber:=Nr;
+  Err.Position:=FSource.Position;
+  Raise Err;
+end;
+
+procedure TPDFScanner.DoError(const Nr: Integer; const Fmt: string;
+  const Args: array of const);
+begin
+  DoError(Nr,Format(Fmt,Args));
+end;
+
+
+end.
+

+ 259 - 0
packages/fcl-pdf/src/fppdfsource.pp

@@ -0,0 +1,259 @@
+{ **********************************************************************
+  This file is part of the Free Component Library
+
+  PDF File/Stream navigation
+  Copyright (c) 2022 by Michael Van Canneyt [email protected]
+
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+  **********************************************************************}
+{
+  PDF needs to be able to seek to arbitrary positions in the file.
+  Since we need to maintain a buffer for faster scanning,
+  we implement this here.
+}
+
+unit fppdfsource;
+
+{$mode ObjFPC}{$H+}
+
+interface
+
+uses
+  Classes, SysUtils;
+
+Const
+  DefaultBufferSize  = 4 * 1024; // 4K buffer
+
+Type
+  { TPDFSourceStream }
+
+  TPDFSourceStream = Class
+  Private
+    FBuffer : Array of Byte;
+    FDatasize : Cardinal;
+    FCursor : PByte;
+    FStream : TStream;
+    FStreamSize,
+    FPosition : Int64;
+    function GetIsBOB: Boolean;
+    function GetIsBof: Boolean;
+    function GetIsEOB: Boolean;
+    function GetIsEOF: Boolean;
+    function GetPosInBuf: PTrInt;
+  Protected
+    // Is cursor before beginning of buffer ?
+    Property IsBOB : Boolean Read GetIsBOB;
+    // Position in buffer (0-Based)
+    Property PosInBuf : PtrInt Read GetPosInBuf;
+    // Size of data in buffer
+    Property Datasize : Cardinal Read FDatasize;
+  Public
+    Constructor Create(aStream : TStream; aBufSize : Cardinal = DefaultBufferSize);
+    // First byte of buffer is aPosition. aPosition is 0 based.
+    Function FillBufferForwardAt(aPosition : Int64): Boolean;
+    // Last byte of buffer is at aPosition. aPosition is 0 based.
+    Function FillBufferBackWardAt(aPosition : Int64): Boolean;
+    // Return byte at current position.
+    // If Reverse is False, move position forward (next)
+    // If Reverse is true, move position backwards (Previous)
+    function GetByte(Reverse : Boolean = False): Byte;
+    // Return byte at current position, move position forward.
+    Function CopyBytes(B : TBytes; aCount : Integer) : Integer;
+    // Move position backwards
+    Procedure Previous;
+    // Move position forward
+    Procedure Next;
+    // Current position.
+    Property Position : Int64 Read FPosition;
+    // Points to Current byte in buffer.
+    Property Cursor : PByte Read FCursor;
+    // Is cursor after endof buffer ?
+    Property IsEOB : Boolean Read GetIsEOB;
+    // Current position is <0.
+    Property IsBof : Boolean Read GetIsBof;
+    // Current position is >=Size
+    Property IsEOF : Boolean Read GetIsEOF;
+    Property Stream : TStream Read FStream;
+    Property StreamSize : Int64 Read FStreamSize;
+  end;
+
+
+implementation
+
+
+function TPDFSourceStream.GetIsEOF: Boolean;
+begin
+  Result:=FPosition>=FStreamSize;
+end;
+
+function TPDFSourceStream.GetPosInBuf: PTrInt;
+begin
+  Result:=PTRUInt(FCursor)-PtrUInt(FBuffer)
+end;
+
+function TPDFSourceStream.GetIsBof: Boolean;
+begin
+  Result:=FPosition<0;
+end;
+
+function TPDFSourceStream.GetIsBOB: Boolean;
+begin
+  Result:=PTRUInt(FCursor)<PtrUInt(FBuffer)
+end;
+
+function TPDFSourceStream.GetIsEOB: Boolean;
+
+var
+  aBufPos : Integer;
+
+begin
+  aBufPos:=(PTRUInt(FCursor)-PtrUInt(FBuffer));
+  Result:=aBufPos>=FDatasize;
+end;
+
+constructor TPDFSourceStream.Create(aStream: TStream; aBufSize: Cardinal);
+begin
+  FStream:=aStream;
+  SetLength(FBuffer,aBufSize);
+  FDatasize:=0;
+  FCursor:=PByte(FBuffer);
+  FStreamSize:=aStream.Size; // avoid calling this a zillion times
+  aStream.Position:=0;
+  FPosition:=0;
+end;
+
+function TPDFSourceStream.FillBufferForwardAt(aPosition: Int64): Boolean;
+begin
+  Result:=aPosition<FStreamSize;
+  if Not Result then
+    begin
+    FDataSize:=0;
+    Exit;
+    end;
+  FStream.Position:=aPosition;
+  FDataSize:=FStream.Read(FBuffer[0],Length(FBuffer));
+  FCursor:=PByte(FBuffer);
+  FPosition:=aPosition;
+end;
+
+function TPDFSourceStream.FillBufferBackWardAt(aPosition: Int64): Boolean;
+
+Var
+  aReadLen: Integer;
+  aStart : Int64;
+
+begin
+  Result:=(aPosition<FStreamSize) and (aPosition>=0);
+  if not Result then
+    exit;
+  aReadLen:=Length(FBuffer);
+  if aReadLen>aPosition+1 then
+    aReadLen:=aPosition+1;
+  aStart:=aPosition+1-aReadLen;
+  FStream.Position:=aStart;
+  FCursor:=PByte(FBuffer);
+  FDataSize:=FStream.Read(FBuffer[0],aReadLen);
+  Result:=(FDataSize>0);
+  inc(FCursor,FDataSize-1);
+  FPosition:=aPosition;
+end;
+
+function TPDFSourceStream.GetByte(Reverse: Boolean): Byte;
+
+begin
+  Result:=0;
+  if Reverse then
+    begin
+    if (FCursor=Nil) then
+      Raise EInOutError.Create('Read before end of stream');
+    Result:=FCursor^;
+    Dec(FCursor);
+    Dec(FPosition);
+    if IsBOB then
+      FillBufferBackwardAt(FPosition);
+    end
+  else
+    begin
+    if (FCursor=Nil) then
+      Raise EInOutError.Create('Read after end of stream');
+    Result:=FCursor^;
+    Inc(FCursor);
+    Inc(FPosition);
+    if isEOB then
+      FillBufferForwardAt(FPosition);
+    end;
+end;
+
+
+function TPDFSourceStream.CopyBytes(B: TBytes; aCount: Integer): Integer;
+
+Var
+  lMoveCount : Integer;
+
+begin
+  Result:=0;
+  if FDataSize=0 then
+    if not FillBufferForwardAt(FPosition) then
+      Exit;
+  if FCursor=Nil then
+    Raise EInOutError.Create('Read after end of stream');
+  While aCount>0 do
+    begin
+    lMoveCount:=FDataSize-PosInBuf;
+    if lMoveCount>aCount then
+      lMoveCount:=aCount;
+    Move(FCursor^,B[Result],lMoveCount);
+    Inc(Result,lMoveCount);
+    inc(FPosition,lMoveCount);
+    inc(FCursor,lMoveCount);
+    Dec(aCount,lMoveCount);
+    if (aCount>0) then
+      if not FillBufferForwardAt(FPosition) then
+        aCount:=0;
+    end;
+end;
+
+procedure TPDFSourceStream.Previous;
+begin
+  if isBOB then
+    begin
+    if not FillBufferBackWardAt(FPosition-1) then
+      Raise EInOutError.Create('Read before end of stream');
+    end
+  else
+    begin
+    Dec(FCursor);
+    Dec(FPosition);
+    if isBOB and not IsBOF then
+      if not FillBufferBackWardAt(FPosition) then
+        Raise EInOutError.Create('Read before end of stream');
+    end;
+end;
+
+procedure TPDFSourceStream.Next;
+begin
+  if isEOB then
+    begin
+    if not FillBufferForWardAt(FPosition+1) then
+      Raise EInOutError.Create('Read after end of stream');
+    end
+  else
+    begin
+    Inc(FCursor);
+    Inc(FPosition);
+    if isEOB and not isEOF then
+      begin
+      if not FillBufferForWardAt(FPosition) then
+        Raise EInOutError.Create('Read after end of stream 2');
+      end;
+    end;
+end;
+
+end.
+