123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581 |
- {
- $Id: xmlread.pp,v 1.17 2005/05/02 13:06:51 michael Exp $
- This file is part of the Free Component Library
- XML reading routines.
- Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- unit XMLRead;
- {$MODE objfpc}
- {$H+}
- interface
- {off $DEFINE MEM_CHECK}
- uses
- {$IFDEF MEM_CHECK}MemCheck,{$ENDIF}
- SysUtils, Classes, DOM;
- type
- EXMLReadError = class(Exception);
- procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File); overload;
- procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream); overload;
- procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream; const AFilename: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const AFilename: String); overload;
- procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File); overload;
- procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream); overload;
- procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream; const AFilename: String); overload;
- // =======================================================
- implementation
- const
- Letter = ['A'..'Z', 'a'..'z'];
- Digit = ['0'..'9'];
- PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
- '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
- '#', '@', '$', '_', '%'];
- WhitespaceChars: set of Char = [#9, #10, #13, ' '];
- NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
- function ComparePChar(p1, p2: PChar): boolean;
- begin
- if p1<>p2 then begin
- if (p1<>nil) and (p2<>nil) then begin
- while true do begin
- if (p1^=p2^) then begin
- if p1^<>#0 then begin
- inc(p1);
- inc(p2);
- end else begin
- Result:=true;
- exit;
- end;
- end else begin
- Result:=false;
- exit;
- end;
- end;
- Result:=true;
- end else begin
- Result:=false;
- end;
- end else begin
- Result:=true;
- end;
- end;
- function CompareLPChar(p1, p2: PChar; Max: integer): boolean;
- begin
- if p1<>p2 then begin
- if (p1<>nil) and (p2<>nil) then begin
- while Max>0 do begin
- if (p1^=p2^) then begin
- if (p1^<>#0) then begin
- inc(p1);
- inc(p2);
- dec(Max);
- end else begin
- Result:=true;
- exit;
- end;
- end else begin
- Result:=false;
- exit;
- end;
- end;
- Result:=true;
- end else begin
- Result:=false;
- end;
- end else begin
- Result:=true;
- end;
- end;
- function CompareIPChar(p1, p2: PChar): boolean;
- begin
- if p1<>p2 then begin
- if (p1<>nil) and (p2<>nil) then begin
- while true do begin
- if (p1^=p2^) or (upcase(p1^)=upcase(p2^)) then begin
- if p1^<>#0 then begin
- inc(p1);
- inc(p2);
- end else begin
- Result:=true;
- exit;
- end;
- end else begin
- Result:=false;
- exit;
- end;
- end;
- Result:=true;
- end else begin
- Result:=false;
- end;
- end else begin
- Result:=true;
- end;
- end;
- function CompareLIPChar(p1, p2: PChar; Max: integer): boolean;
- begin
- if p1<>p2 then begin
- if (p1<>nil) and (p2<>nil) then begin
- while Max>0 do begin
- if (p1^=p2^) or (upcase(p1^)=upcase(p2^)) then begin
- if (p1^<>#0) then begin
- inc(p1);
- inc(p2);
- dec(Max);
- end else begin
- Result:=true;
- exit;
- end;
- end else begin
- Result:=false;
- exit;
- end;
- end;
- Result:=true;
- end else begin
- Result:=false;
- end;
- end else begin
- Result:=true;
- end;
- end;
- type
- TXMLReaderDocument = class(TXMLDocument)
- public
- procedure SetDocType(ADocType: TDOMDocumentType);
- end;
- TXMLReaderDocumentType = class(TDOMDocumentType)
- public
- constructor Create(ADocument: TXMLReaderDocument);
- property Name: DOMString read FNodeName write FNodeName;
- end;
- TSetOfChar = set of Char;
- TXMLReader = class
- protected
- buf, BufStart: PChar;
- Filename: String;
- procedure RaiseExc(const descr: String);
- function SkipWhitespace: Boolean;
- procedure ExpectWhitespace;
- procedure ExpectString(const s: String);
- function CheckFor(s: PChar): Boolean;
- function CheckForChar(c: Char): Boolean;
- procedure SkipString(const ValidChars: TSetOfChar);
- function GetString(const ValidChars: TSetOfChar): String;
- function GetString(BufPos: PChar; Len: integer): String;
- function CheckName: Boolean;
- function GetName(var s: String): Boolean;
- function ExpectName: String; // [5]
- procedure SkipName;
- procedure ExpectAttValue(attr: TDOMAttr); // [10]
- function ExpectPubidLiteral: String; // [12]
- procedure SkipPubidLiteral;
- function ParseComment(AOwner: TDOMNode): Boolean; // [15]
- function ParsePI: Boolean; // [16]
- procedure ExpectProlog; // [22]
- function ParseEq: Boolean; // [25]
- procedure ExpectEq;
- procedure ParseMisc(AOwner: TDOMNode); // [27]
- function ParseMarkupDecl: Boolean; // [29]
- function ParseCharData(AOwner: TDOMNode): Boolean; // [14]
- function ParseCDSect(AOwner: TDOMNode): Boolean; // [18]
- function ParseElement(AOwner: TDOMNode): Boolean; // [39]
- procedure ExpectElement(AOwner: TDOMNode);
- function ParseReference(AOwner: TDOMNode): Boolean; // [67]
- procedure ExpectReference(AOwner: TDOMNode);
- function ParsePEReference: Boolean; // [69]
- function ParseExternalID: Boolean; // [75]
- procedure ExpectExternalID;
- function ParseEncodingDecl: String; // [80]
- procedure SkipEncodingDecl;
- procedure ResolveEntities(RootNode: TDOMNode);
- public
- doc: TDOMDocument;
- procedure ProcessXML(ABuf: PChar; const AFilename: String); // [1]
- procedure ProcessFragment(AOwner: TDOMNode; ABuf: PChar; const AFilename: String);
- procedure ProcessDTD(ABuf: PChar; const AFilename: String); // ([29])
- end;
- { TXMLReaderDocument }
- procedure TXMLReaderDocument.SetDocType(ADocType: TDOMDocumentType);
- begin
- FDocType := ADocType;
- end;
- constructor TXMLReaderDocumentType.Create(ADocument: TXMLReaderDocument);
- begin
- inherited Create(ADocument);
- end;
- procedure TXMLReader.RaiseExc(const descr: String);
- var
- apos: PChar;
- x, y: Integer;
- begin
- // find out the line in which the error occured
- apos := BufStart;
- x := 1;
- y := 1;
- while apos < buf do begin
- if apos[0] = #10 then begin
- Inc(y);
- x := 1;
- end else
- Inc(x);
- Inc(apos);
- end;
- raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
- IntToStr(x) + '): ' + descr);
- end;
- function TXMLReader.SkipWhitespace: Boolean;
- begin
- Result := False;
- while buf[0] in WhitespaceChars do
- begin
- Inc(buf);
- Result := True;
- end;
- end;
- procedure TXMLReader.ExpectWhitespace;
- begin
- if not SkipWhitespace then
- RaiseExc('Expected whitespace');
- end;
- procedure TXMLReader.ExpectString(const s: String);
- procedure RaiseStringNotFound;
- var
- s2: PChar;
- s3: String;
- begin
- GetMem(s2, Length(s) + 1);
- StrLCopy(s2, buf, Length(s));
- s3 := StrPas(s2);
- FreeMem(s2);
- RaiseExc('Expected "' + s + '", found "' + s3 + '"');
- end;
- var
- i: Integer;
- begin
- for i := 1 to Length(s) do
- if buf[i - 1] <> s[i] then begin
- RaiseStringNotFound;
- end;
- Inc(buf, Length(s));
- end;
- function TXMLReader.CheckFor(s: PChar): Boolean;
- begin
- if buf[0] <> #0 then begin
- if (buf[0]=s[0]) and (CompareLPChar(buf, s, StrLen(s))) then begin
- Inc(buf, StrLen(s));
- Result := True;
- end else
- Result := False;
- end else begin
- Result := False;
- end;
- end;
- function TXMLReader.CheckForChar(c: Char): Boolean;
- begin
- if (buf[0]=c) and (c<>#0) then begin
- inc(buf);
- Result:=true;
- end else begin
- Result:=false;
- end;
- end;
- procedure TXMLReader.SkipString(const ValidChars: TSetOfChar);
- begin
- while buf[0] in ValidChars do begin
- Inc(buf);
- end;
- end;
- function TXMLReader.GetString(const ValidChars: TSetOfChar): String;
- var
- OldBuf: PChar;
- i, len: integer;
- begin
- OldBuf:=Buf;
- while buf[0] in ValidChars do begin
- Inc(buf);
- end;
- len:=buf-OldBuf;
- SetLength(Result, Len);
- for i:=1 to len do begin
- Result[i]:=OldBuf[0];
- inc(OldBuf);
- end;
- end;
- function TXMLReader.GetString(BufPos: PChar; Len: integer): string;
- var i: integer;
- begin
- SetLength(Result,Len);
- for i:=1 to Len do begin
- Result[i]:=BufPos[0];
- inc(BufPos);
- end;
- end;
- {$IFDEF FPC}
- {$DEFINE UsesFPCWidestrings}
- {$ENDIF}
- {$IFDEF UsesFPCWidestrings}
- {procedure SimpleWide2AnsiMove(source:pwidechar;dest:pchar;len:sizeint);
- var
- i : sizeint;
- begin
- for i:=1 to len do
- begin
- if word(source^)<256 then
- dest^:=char(word(source^))
- else
- dest^:='?';
- inc(dest);
- inc(source);
- end;
- end;
- procedure SimpleAnsi2WideMove(source:pchar;dest:pwidechar;len:sizeint);
- var
- i : sizeint;
- begin
- for i:=1 to len do
- begin
- dest^:=widechar(byte(source^));
- inc(dest);
- inc(source);
- end;
- end;
- }
- {$ENDIF}
- procedure TXMLReader.ProcessXML(ABuf: PChar; const AFilename: String); // [1]
- {$IFDEF UsesFPCWidestrings}
- var
- OldWideStringManager,MyWideStringManager : TWideStringManager;
- {$ENDIF}
- begin
- buf := ABuf;
- BufStart := ABuf;
- Filename := AFilename;
- {$IFDEF UsesFPCWidestrings}
- GetWideStringManager(MyWideStringManager);
- MyWideStringManager.Wide2AnsiMoveProc:=@defaultWide2AnsiMove;
- MyWideStringManager.Ansi2WideMoveProc:=@defaultAnsi2WideMove;
- SetWideStringManager(MyWideStringManager, OldWideStringManager);
- try
- {$ENDIF}
- doc := TXMLReaderDocument.Create;
- ExpectProlog;
- {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ProcessXML A');{$ENDIF}
- ExpectElement(doc);
- {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ProcessXML B');{$ENDIF}
- ParseMisc(doc);
- {$IFDEF UsesFPCWidestrings}
- finally
- SetWideStringManager(OldWideStringManager);
- end;
- {$ENDIF}
- if buf[0] <> #0 then
- RaiseExc('Text after end of document element found');
- end;
- procedure TXMLReader.ProcessFragment(AOwner: TDOMNode; ABuf: PChar; const AFilename: String);
- {$IFDEF UsesFPCWidestrings}
- var
- OldWideStringManager: TWideStringManager;
- {$ENDIF}
- begin
- buf := ABuf;
- BufStart := ABuf;
- Filename := AFilename;
- {$IFDEF UsesFPCWidestrings}
- SetWideStringManager(WideStringManager, OldWideStringManager);
- try
- {$ENDIF}
- SkipWhitespace;
- while ParseCharData(AOwner) or ParseCDSect(AOwner) or ParsePI or
- ParseComment(AOwner) or ParseElement(AOwner) or
- ParseReference(AOwner) do
- SkipWhitespace;
- {$IFDEF UsesFPCWidestrings}
- finally
- SetWideStringManager(OldWideStringManager);
- end;
- {$ENDIF}
- end;
- function TXMLReader.CheckName: Boolean;
- var OldBuf: PChar;
- begin
- if not (buf[0] in (Letter + ['_', ':'])) then begin
- Result := False;
- exit;
- end;
- OldBuf := buf;
- Inc(buf);
- SkipString(Letter + ['0'..'9', '.', '-', '_', ':']);
- buf := OldBuf;
- Result := True;
- end;
- function TXMLReader.GetName(var s: String): Boolean; // [5]
- var OldBuf: PChar;
- begin
- if not (buf[0] in (Letter + ['_', ':'])) then begin
- SetLength(s, 0);
- Result := False;
- exit;
- end;
- OldBuf := buf;
- Inc(buf);
- SkipString(Letter + ['0'..'9', '.', '-', '_', ':']);
- s := GetString(OldBuf,buf-OldBuf);
- Result := True;
- end;
- function TXMLReader.ExpectName: String; // [5]
- procedure RaiseNameNotFound;
- begin
- RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
- end;
- var OldBuf: PChar;
- begin
- if not (buf[0] in (Letter + ['_', ':'])) then
- RaiseNameNotFound;
- OldBuf := buf;
- Inc(buf);
- SkipString(Letter + ['0'..'9', '.', '-', '_', ':']);
- Result:=GetString(OldBuf,buf-OldBuf);
- end;
- procedure TXMLReader.SkipName;
- procedure RaiseSkipNameNotFound;
- begin
- RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
- end;
- begin
- if not (buf[0] in (Letter + ['_', ':'])) then
- RaiseSkipNameNotFound;
- Inc(buf);
- SkipString(Letter + ['0'..'9', '.', '-', '_', ':']);
- end;
- procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
- var
- OldBuf: PChar;
- procedure FlushStringBuffer;
- var
- s: String;
- begin
- if OldBuf<>buf then begin
- s := GetString(OldBuf,buf-OldBuf);
- OldBuf := buf;
- attr.AppendChild(doc.CreateTextNode(s));
- SetLength(s, 0);
- end;
- end;
- var
- StrDel: char;
- begin
- if (buf[0] <> '''') and (buf[0] <> '"') then
- RaiseExc('Expected quotation marks');
- StrDel:=buf[0];
- Inc(buf);
- OldBuf := buf;
- while (buf[0]<>StrDel) and (buf[0]<>#0) do begin
- if buf[0] <> '&' then begin
- Inc(buf);
- end else
- begin
- if OldBuf<>buf then FlushStringBuffer;
- ParseReference(attr);
- OldBuf := buf;
- end;
- end;
- if OldBuf<>buf then FlushStringBuffer;
- inc(buf);
- ResolveEntities(Attr);
- end;
- function TXMLReader.ExpectPubidLiteral: String;
- begin
- SetLength(Result, 0);
- if CheckForChar('''') then begin
- SkipString(PubidChars - ['''']);
- ExpectString('''');
- end else if CheckForChar('"') then begin
- SkipString(PubidChars - ['"']);
- ExpectString('"');
- end else
- RaiseExc('Expected quotation marks');
- end;
- procedure TXMLReader.SkipPubidLiteral;
- begin
- if CheckForChar('''') then begin
- SkipString(PubidChars - ['''']);
- ExpectString('''');
- end else if CheckForChar('"') then begin
- SkipString(PubidChars - ['"']);
- ExpectString('"');
- end else
- RaiseExc('Expected quotation marks');
- end;
- function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
- var
- comment: String;
- OldBuf: PChar;
- begin
- if CheckFor('<!--') then begin
- OldBuf := buf;
- while (buf[0] <> #0) and (buf[1] <> #0) and
- ((buf[0] <> '-') or (buf[1] <> '-')) do begin
- Inc(buf);
- end;
- comment:=GetString(OldBuf,buf-OldBuf);
- AOwner.AppendChild(doc.CreateComment(comment));
- ExpectString('-->');
- Result := True;
- end else
- Result := False;
- end;
- function TXMLReader.ParsePI: Boolean; // [16]
- begin
- if CheckFor('<?') then begin
- if CompareLIPChar(buf,'XML ',4) then
- RaiseExc('"<?xml" processing instruction not allowed here');
- SkipName;
- if SkipWhitespace then
- while (buf[0] <> #0) and (buf[1] <> #0) and not
- ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
- ExpectString('?>');
- Result := True;
- end else
- Result := False;
- end;
- procedure TXMLReader.ExpectProlog; // [22]
- procedure ParseVersionNum;
- begin
- if doc.InheritsFrom(TXMLDocument) then
- TXMLDocument(doc).XMLVersion :=
- GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
- end;
- procedure ParseDoctypeDecls;
- begin
- repeat
- SkipWhitespace;
- until not (ParseMarkupDecl or ParsePEReference);
- ExpectString(']');
- end;
- var
- DocType: TXMLReaderDocumentType;
- begin
- if CheckFor('<?xml') then
- begin
- // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
- // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
- SkipWhitespace;
- ExpectString('version');
- ParseEq;
- if buf[0] = '''' then
- begin
- Inc(buf);
- ParseVersionNum;
- ExpectString('''');
- end else if buf[0] = '"' then
- begin
- Inc(buf);
- ParseVersionNum;
- ExpectString('"');
- end else
- RaiseExc('Expected single or double quotation mark');
- // EncodingDecl?
- SkipEncodingDecl;
- // SDDecl?
- SkipWhitespace;
- if CheckFor('standalone') then
- begin
- ExpectEq;
- if buf[0] = '''' then
- begin
- Inc(buf);
- if not (CheckFor('yes''') or CheckFor('no''')) then
- RaiseExc('Expected ''yes'' or ''no''');
- end else if buf[0] = '''' then
- begin
- Inc(buf);
- if not (CheckFor('yes"') or CheckFor('no"')) then
- RaiseExc('Expected "yes" or "no"');
- end;
- SkipWhitespace;
- end;
- ExpectString('?>');
- end;
- // Check for "Misc*"
- ParseMisc(doc);
- // Check for "(doctypedecl Misc*)?" [28]
- if CheckFor('<!DOCTYPE') then
- begin
- DocType := TXMLReaderDocumentType.Create(doc as TXMLReaderDocument);
- if doc.InheritsFrom(TXMLReaderDocument) then
- TXMLReaderDocument(doc).SetDocType(DocType);
- SkipWhitespace;
- DocType.Name := ExpectName;
- SkipWhitespace;
- if CheckForChar('[') then
- begin
- ParseDoctypeDecls;
- SkipWhitespace;
- ExpectString('>');
- end else if not CheckForChar('>') then
- begin
- ParseExternalID;
- SkipWhitespace;
- if CheckForChar('[') then
- begin
- ParseDoctypeDecls;
- SkipWhitespace;
- end;
- ExpectString('>');
- end;
- ParseMisc(doc);
- end;
- end;
- function TXMLReader.ParseEq: Boolean; // [25]
- var
- savedbuf: PChar;
- begin
- savedbuf := buf;
- SkipWhitespace;
- if buf[0] = '=' then begin
- Inc(buf);
- SkipWhitespace;
- Result := True;
- end else begin
- buf := savedbuf;
- Result := False;
- end;
- end;
- procedure TXMLReader.ExpectEq;
- begin
- if not ParseEq then
- RaiseExc('Expected "="');
- end;
- // Parse "Misc*":
- // Misc ::= Comment | PI | S
- procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
- begin
- repeat
- SkipWhitespace;
- until not (ParseComment(AOwner) or ParsePI);
- end;
- function TXMLReader.ParseMarkupDecl: Boolean; // [29]
- function ParseElementDecl: Boolean; // [45]
- procedure ExpectChoiceOrSeq; // [49], [50]
- procedure ExpectCP; // [48]
- begin
- if CheckForChar('(') then
- ExpectChoiceOrSeq
- else
- SkipName;
- if CheckForChar('?') then
- else if CheckForChar('*') then
- else if CheckForChar('+') then;
- end;
- var
- delimiter: Char;
- begin
- SkipWhitespace;
- ExpectCP;
- SkipWhitespace;
- delimiter := #0;
- while not CheckForChar(')') do begin
- if delimiter = #0 then begin
- if (buf[0] = '|') or (buf[0] = ',') then
- delimiter := buf[0]
- else
- RaiseExc('Expected "|" or ","');
- Inc(buf);
- end else
- ExpectString(delimiter);
- SkipWhitespace;
- ExpectCP;
- end;
- end;
- begin
- if CheckFor('<!ELEMENT') then begin
- ExpectWhitespace;
- SkipName;
- ExpectWhitespace;
- // Get contentspec [46]
- if CheckFor('EMPTY') then
- else if CheckFor('ANY') then
- else if CheckForChar('(') then begin
- SkipWhitespace;
- if CheckFor('#PCDATA') then begin
- // Parse Mixed section [51]
- SkipWhitespace;
- if not CheckForChar(')') then
- repeat
- ExpectString('|');
- SkipWhitespace;
- SkipName;
- until CheckFor(')*');
- end else begin
- // Parse Children section [47]
- ExpectChoiceOrSeq;
- if CheckForChar('?') then
- else if CheckForChar('*') then
- else if CheckForChar('+') then;
- end;
- end else
- RaiseExc('Invalid content specification');
- SkipWhitespace;
- ExpectString('>');
- Result := True;
- end else
- Result := False;
- end;
- function ParseAttlistDecl: Boolean; // [52]
- var
- attr: TDOMAttr;
- begin
- if CheckFor('<!ATTLIST') then begin
- ExpectWhitespace;
- SkipName;
- SkipWhitespace;
- while not CheckForChar('>') do begin
- SkipName;
- ExpectWhitespace;
- // Get AttType [54], [55], [56]
- if CheckFor('CDATA') then
- else if CheckFor('ID') then
- else if CheckFor('IDREF') then
- else if CheckFor('IDREFS') then
- else if CheckFor('ENTITTY') then
- else if CheckFor('ENTITIES') then
- else if CheckFor('NMTOKEN') then
- else if CheckFor('NMTOKENS') then
- else if CheckFor('NOTATION') then begin // [57], [58]
- ExpectWhitespace;
- ExpectString('(');
- SkipWhitespace;
- SkipName;
- SkipWhitespace;
- while not CheckForChar(')') do begin
- ExpectString('|');
- SkipWhitespace;
- SkipName;
- SkipWhitespace;
- end;
- end else if CheckForChar('(') then begin // [59]
- SkipWhitespace;
- SkipString(Nmtoken);
- SkipWhitespace;
- while not CheckForChar(')') do begin
- ExpectString('|');
- SkipWhitespace;
- SkipString(Nmtoken);
- SkipWhitespace;
- end;
- end else
- RaiseExc('Invalid tokenized type');
- ExpectWhitespace;
- // Get DefaultDecl [60]
- if CheckFor('#REQUIRED') then
- else if CheckFor('#IMPLIED') then
- else begin
- if CheckFor('#FIXED') then
- SkipWhitespace;
- attr := doc.CreateAttribute('');
- ExpectAttValue(attr);
- end;
- SkipWhitespace;
- end;
- Result := True;
- end else
- Result := False;
- end;
- function ParseEntityDecl: Boolean; // [70]
- var
- NewEntity: TDOMEntity;
- function ParseEntityValue: Boolean; // [9]
- var
- strdel: Char;
- begin
- if (buf[0] <> '''') and (buf[0] <> '"') then begin
- Result := False;
- exit;
- end;
- strdel := buf[0];
- Inc(buf);
- while not CheckForChar(strdel) do
- if ParsePEReference then
- else if ParseReference(NewEntity) then
- else begin
- Inc(buf); // Normal haracter
- end;
- Result := True;
- end;
- begin
- if CheckFor('<!ENTITY') then begin
- ExpectWhitespace;
- if CheckForChar('%') then begin // [72]
- ExpectWhitespace;
- NewEntity := doc.CreateEntity(ExpectName);
- ExpectWhitespace;
- // Get PEDef [74]
- if ParseEntityValue then
- else if ParseExternalID then
- else
- RaiseExc('Expected entity value or external ID');
- end else begin // [71]
- NewEntity := doc.CreateEntity(ExpectName);
- ExpectWhitespace;
- // Get EntityDef [73]
- if ParseEntityValue then
- else begin
- ExpectExternalID;
- // Get NDataDecl [76]
- ExpectWhitespace;
- ExpectString('NDATA');
- ExpectWhitespace;
- SkipName;
- end;
- end;
- SkipWhitespace;
- ExpectString('>');
- Result := True;
- end else
- Result := False;
- end;
- function ParseNotationDecl: Boolean; // [82]
- begin
- if CheckFor('<!NOTATION') then begin
- ExpectWhitespace;
- SkipName;
- ExpectWhitespace;
- if ParseExternalID then
- else if CheckFor('PUBLIC') then begin // [83]
- ExpectWhitespace;
- SkipPubidLiteral;
- end else
- RaiseExc('Expected external or public ID');
- SkipWhitespace;
- ExpectString('>');
- Result := True;
- end else
- Result := False;
- end;
- begin
- Result := False;
- while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
- ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
- Result := True;
- end;
- procedure TXMLReader.ProcessDTD(ABuf: PChar; const AFilename: String);
- begin
- buf := ABuf;
- BufStart := ABuf;
- Filename := AFilename;
- doc := TXMLReaderDocument.Create;
- ParseMarkupDecl;
- {
- if buf[0] <> #0 then begin
- DebugLn('=== Unparsed: ===');
- //DebugLn(buf);
- DebugLn(StrLen(buf), ' chars');
- end;
- }
- end;
- function TXMLReader.ParseCharData(AOwner: TDOMNode): Boolean; // [14]
- var
- p: PChar;
- DataLen: integer;
- OldBuf: PChar;
- begin
- OldBuf := buf;
- while not (buf[0] in [#0, '<', '&']) do
- begin
- Inc(buf);
- end;
- DataLen:=buf-OldBuf;
- if DataLen > 0 then
- begin
- // Check if chardata has non-whitespace content
- p:=OldBuf;
- while (p<buf) and (p[0] in WhitespaceChars) do
- inc(p);
- if p<buf then
- AOwner.AppendChild(doc.CreateTextNode(GetString(OldBuf,DataLen)));
- Result := True;
- end
- else
- Result := False;
- end;
- function TXMLReader.ParseCDSect(AOwner: TDOMNode): Boolean; // [18]
- var
- OldBuf: PChar;
- begin
- if CheckFor('<![CDATA[') then
- begin
- OldBuf := buf;
- while not CheckFor(']]>') do
- begin
- Inc(buf);
- end;
- AOwner.AppendChild(doc.CreateCDATASection(GetString(OldBuf,buf-OldBuf-3))); { Copy CDATA, discarding terminator }
- Result := True;
- end
- else
- Result := False;
- end;
- function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
- var
- NewElem: TDOMElement;
- procedure CreateNameElement;
- var
- IsEmpty: Boolean;
- attr: TDOMAttr;
- name: string;
- begin
- {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt(' CreateNameElement A');{$ENDIF}
- GetName(name);
- NewElem := doc.CreateElement(name);
- AOwner.AppendChild(NewElem);
- SkipWhitespace;
- IsEmpty := False;
- while True do
- begin
- {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt(' CreateNameElement E');{$ENDIF}
- if CheckFor('/>') then
- begin
- IsEmpty := True;
- break;
- end;
- if CheckForChar('>') then
- break;
- // Get Attribute [41]
- attr := doc.CreateAttribute(ExpectName);
- NewElem.Attributes.SetNamedItem(attr);
- ExpectEq;
- ExpectAttValue(attr);
- SkipWhitespace;
- end;
- if not IsEmpty then
- begin
- // Get content
- SkipWhitespace;
- while ParseCharData(NewElem) or ParseCDSect(NewElem) or ParsePI or
- ParseComment(NewElem) or ParseElement(NewElem) or
- ParseReference(NewElem) do;
- // Get ETag [42]
- ExpectString('</');
- if ExpectName <> name then
- RaiseExc('Unmatching element end tag (expected "</' + name + '>")');
- SkipWhitespace;
- ExpectString('>');
- end;
- {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt(' CreateNameElement END');{$ENDIF}
- ResolveEntities(NewElem);
- end;
- var
- OldBuf: PChar;
- begin
- OldBuf := Buf;
- if CheckForChar('<') then
- begin
- {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ParseElement A');{$ENDIF}
- if not CheckName then
- begin
- Buf := OldBuf;
- Result := False;
- end else begin
- CreateNameElement;
- Result := True;
- end;
- end else
- Result := False;
- {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ParseElement END');{$ENDIF}
- end;
- procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
- begin
- if not ParseElement(AOwner) then
- RaiseExc('Expected element');
- end;
- function TXMLReader.ParsePEReference: Boolean; // [69]
- begin
- if CheckForChar('%') then begin
- SkipName;
- ExpectString(';');
- Result := True;
- end else
- Result := False;
- end;
- function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
- begin
- if not CheckForChar('&') then begin
- Result := False;
- exit;
- end;
- if CheckForChar('#') then begin // Test for CharRef [66]
- if CheckForChar('x') then begin
- // !!!: there must be at least one digit
- while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
- end else
- // !!!: there must be at least one digit
- while buf[0] in ['0'..'9'] do Inc(buf);
- end else
- AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
- ExpectString(';');
- Result := True;
- end;
- procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
- begin
- if not ParseReference(AOwner) then
- RaiseExc('Expected reference ("&Name;" or "%Name;")');
- end;
- function TXMLReader.ParseExternalID: Boolean; // [75]
- function GetSystemLiteral: String;
- var
- OldBuf: PChar;
- begin
- if buf[0] = '''' then begin
- Inc(buf);
- OldBuf := buf;
- while (buf[0] <> '''') and (buf[0] <> #0) do begin
- Inc(buf);
- end;
- Result := GetString(OldBuf,buf-OldBuf);
- ExpectString('''');
- end else if buf[0] = '"' then begin
- Inc(buf);
- OldBuf := buf;
- while (buf[0] <> '"') and (buf[0] <> #0) do begin
- Inc(buf);
- end;
- Result := GetString(OldBuf,buf-OldBuf);
- ExpectString('"');
- end else
- Result:='';
- end;
- procedure SkipSystemLiteral;
- begin
- if buf[0] = '''' then begin
- Inc(buf);
- while (buf[0] <> '''') and (buf[0] <> #0) do begin
- Inc(buf);
- end;
- ExpectString('''');
- end else if buf[0] = '"' then begin
- Inc(buf);
- while (buf[0] <> '"') and (buf[0] <> #0) do begin
- Inc(buf);
- end;
- ExpectString('"');
- end;
- end;
- begin
- if CheckFor('SYSTEM') then begin
- ExpectWhitespace;
- SkipSystemLiteral;
- Result := True;
- end else if CheckFor('PUBLIC') then begin
- ExpectWhitespace;
- SkipPubidLiteral;
- ExpectWhitespace;
- SkipSystemLiteral;
- Result := True;
- end else
- Result := False;
- end;
- procedure TXMLReader.ExpectExternalID;
- begin
- if not ParseExternalID then
- RaiseExc('Expected external ID');
- end;
- function TXMLReader.ParseEncodingDecl: String; // [80]
- function ParseEncName: String;
- var OldBuf: PChar;
- begin
- if not (buf[0] in ['A'..'Z', 'a'..'z']) then
- RaiseExc('Expected character (A-Z, a-z)');
- OldBuf := buf;
- Inc(buf);
- SkipString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
- Result := GetString(OldBuf,buf-OldBuf);
- end;
- begin
- SetLength(Result, 0);
- SkipWhitespace;
- if CheckFor('encoding') then begin
- ExpectEq;
- if buf[0] = '''' then begin
- Inc(buf);
- Result := ParseEncName;
- ExpectString('''');
- end else if buf[0] = '"' then begin
- Inc(buf);
- Result := ParseEncName;
- ExpectString('"');
- end;
- end;
- end;
- procedure TXMLReader.SkipEncodingDecl;
- procedure ParseEncName;
- begin
- if not (buf[0] in ['A'..'Z', 'a'..'z']) then
- RaiseExc('Expected character (A-Z, a-z)');
- Inc(buf);
- SkipString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
- end;
- begin
- SkipWhitespace;
- if CheckFor('encoding') then begin
- ExpectEq;
- if buf[0] = '''' then begin
- Inc(buf);
- ParseEncName;
- ExpectString('''');
- end else if buf[0] = '"' then begin
- Inc(buf);
- ParseEncName;
- ExpectString('"');
- end;
- end;
- end;
- { Currently, this method will only resolve the entities which are
- predefined in XML: }
- procedure TXMLReader.ResolveEntities(RootNode: TDOMNode);
- var
- Node, NextNode: TDOMNode;
- procedure ReplaceEntityRef(EntityNode: TDOMNode; const Replacement: string);
- var
- PrevSibling, NextSibling: TDOMNode;
- begin
- PrevSibling := EntityNode.PreviousSibling;
- NextSibling := EntityNode.NextSibling;
- if Assigned(PrevSibling) and (PrevSibling.NodeType = TEXT_NODE) then
- begin
- TDOMCharacterData(PrevSibling).AppendData(Replacement);
- RootNode.RemoveChild(EntityNode);
- if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
- begin
- // next sibling is to be removed, so we can't use it anymore
- NextNode := NextSibling.NextSibling;
- TDOMCharacterData(PrevSibling).AppendData(
- TDOMCharacterData(NextSibling).Data);
- RootNode.RemoveChild(NextSibling);
- end
- end else
- if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
- begin
- TDOMCharacterData(NextSibling).InsertData(0, Replacement);
- RootNode.RemoveChild(EntityNode);
- end else
- RootNode.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
- end;
- begin
- Node := RootNode.FirstChild;
- while Assigned(Node) do
- begin
- NextNode := Node.NextSibling;
- if Node.NodeType = ENTITY_REFERENCE_NODE then
- if Node.NodeName = 'amp' then
- ReplaceEntityRef(Node, '&')
- else if Node.NodeName = 'apos' then
- ReplaceEntityRef(Node, '''')
- else if Node.NodeName = 'gt' then
- ReplaceEntityRef(Node, '>')
- else if Node.NodeName = 'lt' then
- ReplaceEntityRef(Node, '<')
- else if Node.NodeName = 'quot' then
- ReplaceEntityRef(Node, '"');
- Node := NextNode;
- end;
- end;
- procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
- var
- reader: TXMLReader;
- buf: PChar;
- BufSize: LongInt;
- begin
- ADoc := nil;
- BufSize := FileSize(f) + 1;
- if BufSize <= 1 then
- exit;
- GetMem(buf, BufSize);
- try
- BlockRead(f, buf^, BufSize - 1);
- buf[BufSize - 1] := #0;
- Reader := TXMLReader.Create;
- try
- Reader.ProcessXML(buf, TFileRec(f).name);
- ADoc := TXMLDocument(Reader.doc);
- finally
- Reader.Free;
- end;
- finally
- FreeMem(buf);
- end;
- end;
- procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream; const AFilename: String);
- var
- reader: TXMLReader;
- buf: PChar;
- begin
- ADoc := nil;
- if f.Size = 0 then exit;
- GetMem(buf, f.Size + 1);
- try
- f.Read(buf^, f.Size);
- buf[f.Size] := #0;
- Reader := TXMLReader.Create;
- try
- Reader.ProcessXML(buf, AFilename);
- finally
- ADoc := TXMLDocument(Reader.doc);
- Reader.Free;
- end;
- finally
- FreeMem(buf);
- end;
- end;
- procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
- begin
- ReadXMLFile(ADoc, f, '<Stream>');
- end;
- procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
- var
- FileStream: TFileStream;
- MemStream: TMemoryStream;
- begin
- ADoc := nil;
- FileStream := TFileStream.Create(AFilename, fmOpenRead);
- if FileStream=nil then exit;
- MemStream := TMemoryStream.Create;
- try
- MemStream.LoadFromStream(FileStream);
- ReadXMLFile(ADoc, MemStream, AFilename);
- finally
- FileStream.Free;
- MemStream.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File);
- var
- Reader: TXMLReader;
- buf: PChar;
- BufSize: LongInt;
- begin
- BufSize := FileSize(f) + 1;
- if BufSize <= 1 then
- exit;
- GetMem(buf, BufSize);
- try
- BlockRead(f, buf^, BufSize - 1);
- buf[BufSize - 1] := #0;
- Reader := TXMLReader.Create;
- try
- Reader.Doc := AParentNode.OwnerDocument;
- Reader.ProcessFragment(AParentNode, buf, TFileRec(f).name);
- finally
- Reader.Free;
- end;
- finally
- FreeMem(buf);
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const AFilename: String);
- var
- Reader: TXMLReader;
- buf: PChar;
- begin
- if f.Size = 0 then
- exit;
- GetMem(buf, f.Size + 1);
- try
- f.Read(buf^, f.Size);
- buf[f.Size] := #0;
- Reader := TXMLReader.Create;
- Reader.Doc := AParentNode.OwnerDocument;
- try
- Reader.ProcessFragment(AParentNode, buf, AFilename);
- finally
- Reader.Free;
- end;
- finally
- FreeMem(buf);
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
- begin
- ReadXMLFragment(AParentNode, f, '<Stream>');
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
- var
- Stream: TStream;
- begin
- Stream := TFileStream.Create(AFilename, fmOpenRead);
- try
- ReadXMLFragment(AParentNode, Stream, AFilename);
- finally
- Stream.Free;
- end;
- end;
- procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
- var
- Reader: TXMLReader;
- buf: PChar;
- BufSize: LongInt;
- begin
- ADoc := nil;
- BufSize := FileSize(f) + 1;
- if BufSize <= 1 then
- exit;
- GetMem(buf, BufSize);
- try
- BlockRead(f, buf^, BufSize - 1);
- buf[BufSize - 1] := #0;
- Reader := TXMLReader.Create;
- try
- Reader.ProcessDTD(buf, TFileRec(f).name);
- ADoc := TXMLDocument(Reader.doc);
- finally
- Reader.Free;
- end;
- finally
- FreeMem(buf);
- end;
- end;
- procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream; const AFilename: String);
- var
- Reader: TXMLReader;
- buf: PChar;
- begin
- ADoc := nil;
- if f.Size = 0 then
- exit;
- GetMem(buf, f.Size + 1);
- try
- f.Read(buf^, f.Size);
- buf[f.Size] := #0;
- Reader := TXMLReader.Create;
- try
- Reader.ProcessDTD(buf, AFilename);
- ADoc := TXMLDocument(Reader.doc);
- finally
- Reader.Free;
- end;
- finally
- FreeMem(buf);
- end;
- end;
- procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
- begin
- ReadDTDFile(ADoc, f, '<Stream>');
- end;
- procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
- var
- Stream: TStream;
- begin
- ADoc := nil;
- Stream := TFileStream.Create(AFilename, fmOpenRead);
- try
- ReadDTDFile(ADoc, Stream, AFilename);
- finally
- Stream.Free;
- end;
- end;
- end.
- {
- $Log: xmlread.pp,v $
- Revision 1.17 2005/05/02 13:06:51 michael
- + Patch from Vincent Snijders to fix reading of entities
- Revision 1.16 2005/03/14 21:10:12 florian
- * adapated for the new widestring manager
- Revision 1.15 2005/02/14 17:13:18 peter
- * truncate log
- Revision 1.14 2005/02/01 20:23:39 florian
- * adapted to new widestring manager
- Revision 1.13 2005/01/22 20:54:51 michael
- * Patch from Colin Western to correctly read CDATA
- }
|