xmlread.pp 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666
  1. {
  2. This file is part of the Free Component Library
  3. XML reading routines.
  4. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  5. Modified in 2006 by Sergei Gorelkin, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit XMLRead;
  13. {$ifdef fpc}
  14. {$MODE objfpc}{$H+}
  15. {$endif}
  16. interface
  17. {off $DEFINE MEM_CHECK}
  18. uses
  19. {$IFDEF MEM_CHECK}MemCheck,{$ENDIF}
  20. SysUtils, Classes, DOM;
  21. type
  22. EXMLReadError = class(Exception);
  23. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  24. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: File); overload;
  25. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream); overload;
  26. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const AFilename: String); overload;
  27. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
  28. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File); overload;
  29. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream); overload;
  30. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const AFilename: String); overload;
  31. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  32. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: File); overload;
  33. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload;
  34. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const AFilename: String); overload;
  35. // =======================================================
  36. implementation
  37. type
  38. TSetOfChar = set of Char;
  39. const
  40. Letter = ['A'..'Z', 'a'..'z'];
  41. Digit = ['0'..'9'];
  42. PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  43. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  44. '#', '@', '$', '_', '%'];
  45. NmToken: TSetOfChar = Letter + Digit + ['.', '-', '_', ':'];
  46. type
  47. TXMLReaderDocumentType = class(TDOMDocumentType)
  48. public
  49. property Name: DOMString read FNodeName write FNodeName;
  50. end;
  51. TXMLReader = class;
  52. TCharSource = class
  53. private
  54. Buf: PChar;
  55. FReader: TXMLReader;
  56. public
  57. constructor Create(AReader: TXMLReader; ABuffer: PChar);
  58. function NextChar: WideChar; virtual; abstract;
  59. end;
  60. TUCS2CharSource = class(TCharSource)
  61. private
  62. FSwapEndian: Boolean;
  63. public
  64. function NextChar: WideChar; override;
  65. end;
  66. TUTF8CharSource = class(TCharSource)
  67. private
  68. procedure BadChar;
  69. public
  70. function NextChar: WideChar; override;
  71. end;
  72. TISO_8859_1CharSource = class(TCharSource)
  73. public
  74. function NextChar: WideChar; override;
  75. end;
  76. TXMLReader = class
  77. private
  78. FSource: TCharSource;
  79. FCurChar: WideChar;
  80. FLine: Integer; // <- To Locator
  81. FColumn: Integer; // <- To Locator
  82. FSeenCR: Boolean;
  83. FWhitespace: Boolean;
  84. FValue: array of WideChar;
  85. FValueLength: Integer;
  86. FName: array of WideChar;
  87. FNameLength: Integer;
  88. FInternalSubset: Boolean;
  89. FPrologParsed: Boolean;
  90. procedure RaiseExpectedQmark;
  91. procedure GetChar;
  92. procedure AppendValue(wc: WideChar);
  93. procedure AppendName(wc: WideChar);
  94. procedure DetectEncoding;
  95. protected
  96. buf: PChar; // <- To InputSource
  97. Filename: String; // <- To InputSource
  98. FCursor: TDOMNode;
  99. procedure RaiseExc(const descr: String); overload;
  100. procedure RaiseExc(Expected, Found: WideChar); overload;
  101. function SkipWhitespace: Boolean;
  102. procedure ExpectWhitespace;
  103. procedure ExpectString(const s: String);
  104. procedure ExpectChar(wc: WideChar);
  105. function CheckForChar(c: WideChar): Boolean;
  106. procedure SkipString(const ValidChars: TSetOfChar);
  107. function GetString(const ValidChars: TSetOfChar): WideString;
  108. procedure RaiseNameNotFound;
  109. function CheckName: Boolean;
  110. function ExpectName: WideString; // [5]
  111. procedure SkipName;
  112. procedure ExpectAttValue; // [10]
  113. procedure SkipPubidLiteral; // [12]
  114. procedure ParseComment; // [15]
  115. procedure ParsePI; // [16]
  116. procedure ExpectProlog; // [22]
  117. function ParseInternalDtd: Boolean;
  118. procedure ParseProlog;
  119. function ParseEq: Boolean; // [25]
  120. procedure ExpectEq;
  121. procedure ParseMisc; // [27]
  122. function ParseMarkupDecl(InternalSubset: Boolean): Boolean; // [29]
  123. procedure ParseCDSect; // [18]
  124. function ParseElementContent: Boolean;
  125. procedure ParseElement; // [39]
  126. procedure ExpectElement;
  127. function ResolvePredefined(const RefName: WideString): Boolean;
  128. function ParseReference: TDOMEntityReference; // [67]
  129. function ParsePEReference: Boolean; // [69]
  130. function ParseExternalID(InNotation: Boolean): Boolean; // [75]
  131. procedure ExpectExternalID;
  132. procedure ProcessTextAndRefs(Delim: WideChar; DiscardWS: Boolean);
  133. procedure ParseEntityDecl;
  134. procedure ParseAttlistDecl;
  135. procedure ParseElementDecl;
  136. procedure ParseNotationDecl;
  137. procedure ResolveEntities(RootNode: TDOMNode);
  138. public
  139. doc: TDOMDocument;
  140. destructor Destroy; override;
  141. procedure ProcessXML(ABuf: PChar; const AFilename: String); // [1]
  142. procedure ProcessFragment(AOwner: TDOMNode; ABuf: PChar; const AFilename: String);
  143. procedure ProcessDTD(ABuf: PChar; const AFilename: String); // ([29])
  144. end;
  145. {$i names.inc}
  146. // TODO: These CharSource classes still cannot be considered as the final solution...
  147. { TCharSource }
  148. constructor TCharSource.Create(AReader: TXMLReader; ABuffer: PChar);
  149. begin
  150. inherited Create;
  151. FReader := AReader;
  152. Buf := ABuffer;
  153. end;
  154. { TUCS2CharSource }
  155. function TUCS2CharSource.NextChar: WideChar;
  156. begin
  157. Result := PWideChar(buf)^;
  158. Inc(buf, sizeof(WideChar));
  159. if FSwapEndian then
  160. Result := WideChar(Swap(Word(Result)));
  161. end;
  162. { TUTF8CharSource }
  163. procedure TUTF8CharSource.BadChar;
  164. begin
  165. FReader.RaiseExc('Invalid character in UTF8 sequence');
  166. end;
  167. function TUTF8CharSource.NextChar: WideChar;
  168. var
  169. ch2, ch3: Byte;
  170. begin
  171. Result := WideChar(buf[0]);
  172. Inc(buf);
  173. if Result < #128 then { ASCII }
  174. Exit
  175. else if (Byte(Result) and $E0) = $C0 then { #$0080 - #$07FF }
  176. begin
  177. ch2 := ord(buf[0]); Inc(Buf);
  178. if (Ch2 and $C0) <> $80 then
  179. BadChar;
  180. Result := WideChar((Byte(Result) and $1F) shl 6 + (Ch2 and $3F));
  181. end
  182. else if (Byte(Result) and $F0) = $E0 then { #$0800 - #$FFFF }
  183. begin
  184. ch2 := ord(buf[0]); Inc(buf);
  185. if (Ch2 and $C0) <> $80 then
  186. BadChar;
  187. ch3 := ord(buf[0]); Inc(buf);
  188. if (Ch3 and $C0) <> $80 then
  189. BadChar;
  190. Result := WideChar(Word((Byte(Result) and $0F) shl 12) +
  191. (Ch2 and $3F) shl 6 + (Ch3 and $3F));
  192. end
  193. else { if (Byte(Result) and $F8) = $F0) then } // and $FC = $F8
  194. // and $FE = $FC
  195. FReader.RaiseExc('Unsupported UTF8 character');
  196. end;
  197. { TISO8859_1CharSource }
  198. function TISO_8859_1CharSource.NextChar: WideChar;
  199. begin
  200. Result := WideChar(buf[0]); Inc(Buf);
  201. end;
  202. { TXMLReader }
  203. procedure TXMLReader.DetectEncoding;
  204. var
  205. b: Char;
  206. begin
  207. b := buf[0];
  208. if (b = #$FE) and (buf[1] = #$FF) then
  209. begin
  210. Inc(buf, 2);
  211. FSource := TUCS2CharSource.Create(Self, buf);
  212. {$IFNDEF ENDIAN_BIG}
  213. TUCS2CharSource(FSource).FSwapEndian := True;
  214. {$ENDIF}
  215. end
  216. else if (b = #$FF) and (buf[1] = #$FE) then
  217. begin
  218. Inc(buf, 2);
  219. FSource := TUCS2CharSource.Create(Self, buf);
  220. {$IFDEF ENDIAN_BIG}
  221. TUCS2CharSource(FSource).FSwapEndian := True;
  222. {$ENDIF}
  223. end
  224. else
  225. FSource := TUTF8CharSource.Create(Self, Buf);
  226. GetChar;
  227. if FCurChar = #$FEFF then // skip BOM, if one is present
  228. GetChar;
  229. end;
  230. procedure TXMLReader.GetChar;
  231. begin
  232. FCurChar := FSource.NextChar;
  233. if FSeenCR then
  234. begin
  235. case FCurChar of
  236. #10, #$85: FCurChar := FSource.NextChar; // #$85 is xml 1.1 specific
  237. end;
  238. FSeenCR := False;
  239. end;
  240. FWhitespace := False;
  241. case FCurChar of
  242. #9, #10, #32: FWhitespace := True;
  243. #13: begin
  244. FSeenCR := True;
  245. FCurChar := #10;
  246. FWhitespace := True;
  247. end;
  248. #$85, #$2028: // xml 1.1 specific
  249. FCurChar := #10;
  250. #1..#8, #11, #12, #14..#31, // never allowed... btw, #0 is also forbidden
  251. #$D800..#$DFFF, // surrogates - should be supported some way
  252. #$FFFE..#$FFFF: // never allowed
  253. RaiseExc('Invalid character');
  254. end;
  255. if FCurChar = #10 then
  256. begin
  257. Inc(FLine);
  258. FColumn := 0;
  259. end
  260. else
  261. Inc(FColumn);
  262. end;
  263. procedure TXMLReader.AppendValue(wc: WideChar);
  264. var
  265. Alloc: Integer;
  266. begin
  267. Alloc := Length(FValue);
  268. if FValueLength >= Alloc then
  269. begin
  270. if Alloc = 0 then
  271. Alloc := 512
  272. else
  273. Alloc := Alloc * 2;
  274. SetLength(FValue, Alloc);
  275. end;
  276. FValue[FValueLength] := wc;
  277. Inc(FValueLength);
  278. end;
  279. procedure TXMLReader.AppendName(wc: WideChar);
  280. var
  281. Alloc: Integer;
  282. begin
  283. Alloc := Length(FName);
  284. if FNameLength >= Alloc then
  285. begin
  286. if Alloc = 0 then
  287. Alloc := 128
  288. else
  289. Alloc := Alloc * 2;
  290. SetLength(FName, Alloc);
  291. end;
  292. FName[FNameLength] := wc;
  293. Inc(FNameLength);
  294. end;
  295. procedure TXMLReader.RaiseExpectedQmark;
  296. begin
  297. RaiseExc('Expected single or double quotation mark');
  298. end;
  299. procedure TXMLReader.RaiseExc(Expected, Found: WideChar);
  300. begin
  301. RaiseExc('Expected "' + Expected + '", but found "' + Found + '"');
  302. end;
  303. procedure TXMLReader.RaiseExc(const descr: String);
  304. begin
  305. raise EXMLReadError.CreateFmt('In %s (line %d pos %d): %s', [Filename, FLine, FColumn, descr]);
  306. end;
  307. function TXMLReader.SkipWhitespace: Boolean;
  308. begin
  309. Result := False;
  310. while FWhitespace do
  311. begin
  312. GetChar;
  313. Result := True;
  314. end;
  315. end;
  316. procedure TXMLReader.ExpectWhitespace;
  317. begin
  318. if not SkipWhitespace then
  319. RaiseExc('Expected whitespace');
  320. end;
  321. procedure TXMLReader.ExpectChar(wc: WideChar);
  322. begin
  323. if not CheckForChar(wc) then
  324. RaiseExc(wc, FCurChar);
  325. end;
  326. procedure TXMLReader.ExpectString(const s: String);
  327. procedure RaiseStringNotFound;
  328. begin
  329. RaiseExc('Expected "' + s + '"');
  330. end;
  331. var
  332. I: Integer;
  333. begin
  334. for I := 1 to Length(s) do
  335. begin
  336. if FCurChar <> WideChar(s[i]) then
  337. RaiseStringNotFound;
  338. GetChar;
  339. end;
  340. end;
  341. function TXMLReader.CheckForChar(c: WideChar): Boolean;
  342. begin
  343. Result := (FCurChar = c);
  344. if Result then
  345. GetChar;
  346. end;
  347. procedure TXMLReader.SkipString(const ValidChars: TSetOfChar);
  348. begin
  349. FValueLength := 0;
  350. while (ord(FCurChar) < 256) and (char(FCurChar) in ValidChars) do
  351. begin
  352. AppendValue(FCurChar);
  353. GetChar;
  354. end;
  355. end;
  356. function TXMLReader.GetString(const ValidChars: TSetOfChar): WideString;
  357. begin
  358. SkipString(ValidChars);
  359. SetString(Result, PWideChar(@FValue[0]), FValueLength);
  360. end;
  361. destructor TXMLReader.Destroy;
  362. begin
  363. FSource.Free;
  364. inherited Destroy;
  365. end;
  366. procedure TXMLReader.ProcessXML(ABuf: PChar; const AFilename: String); // [1]
  367. begin
  368. buf := ABuf;
  369. Filename := AFilename;
  370. FLine := 1;
  371. FColumn := 0;
  372. doc := TXMLDocument.Create;
  373. FCursor := doc;
  374. DetectEncoding;
  375. ExpectProlog;
  376. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ProcessXML A');{$ENDIF}
  377. ExpectElement;
  378. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ProcessXML B');{$ENDIF}
  379. ParseMisc;
  380. if FCurChar <> #0 then
  381. RaiseExc('Text after end of document element found');
  382. end;
  383. procedure TXMLReader.ProcessFragment(AOwner: TDOMNode; ABuf: PChar; const AFilename: String);
  384. begin
  385. buf := ABuf;
  386. Filename := AFilename;
  387. FLine := 1;
  388. FColumn := 0;
  389. FCursor := AOwner;
  390. DetectEncoding;
  391. if not ParseElementContent then
  392. ;
  393. end;
  394. function TXMLReader.CheckName: Boolean; // [5]
  395. begin
  396. Result := (Byte(FCurChar) in NamingBitmap[nameStartPages[hi(Word(FCurChar))]].Work);
  397. if Result then
  398. begin
  399. FNameLength := 0;
  400. repeat
  401. AppendName(FCurChar);
  402. GetChar;
  403. until not (Byte(FCurChar) in NamingBitmap[namePages[hi(Word(FCurChar))]].Work);;
  404. end;
  405. end;
  406. procedure TXMLReader.RaiseNameNotFound;
  407. begin
  408. RaiseExc('Name starts with invalid character');
  409. end;
  410. function TXMLReader.ExpectName: WideString; // [5]
  411. begin
  412. if not CheckName then
  413. RaiseNameNotFound;
  414. SetString(Result, PWideChar(@FName[0]), FNameLength);
  415. end;
  416. procedure TXMLReader.SkipName;
  417. begin
  418. if not CheckName then
  419. RaiseNameNotFound;
  420. end;
  421. // ---------------------
  422. function TXMLReader.ResolvePredefined(const RefName: WideString): Boolean;
  423. begin
  424. Result := True;
  425. if RefName = 'amp' then
  426. AppendValue('&')
  427. else if RefName = 'apos' then
  428. AppendValue('''')
  429. else if RefName = 'gt' then
  430. AppendValue('>')
  431. else if RefName = 'lt' then
  432. AppendValue('<')
  433. else if RefName = 'quot' then
  434. AppendValue('"')
  435. else
  436. Result := False;
  437. end;
  438. function TXMLReader.ParseReference: TDOMEntityReference;
  439. var
  440. RefName: WideString;
  441. Radix, Value: Integer;
  442. begin
  443. Result := nil;
  444. if CheckForChar('#') then // character reference [66]
  445. begin
  446. if CheckForChar('x') then
  447. Radix := 16
  448. else
  449. Radix := 10;
  450. Value := 0;
  451. repeat
  452. case FCurChar of
  453. '0'..'9': Value := Value * Radix + Ord(FCurChar) - Ord('0');
  454. 'a'..'f': if Radix = 16 then Value := Value * 16 + Ord(FCurChar) - Ord('a') + 10 else Break;
  455. 'A'..'F': if Radix = 16 then Value := Value * 16 + Ord(FCurChar) - Ord('A') + 10 else Break;
  456. else
  457. Break;
  458. end;
  459. GetChar;
  460. until False;
  461. case Value of
  462. // TODO: in XML1.1, references to $01..$1F are VALID
  463. $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
  464. AppendValue(WideChar(Value));
  465. $10000..$10FFFF:
  466. begin
  467. AppendValue(WideChar($D7C0 + (Value shr 10)));
  468. AppendValue(WideChar($DC00 xor (Value and $3FF)));
  469. end;
  470. else
  471. RaiseExc('Invalid character reference');
  472. end;
  473. end
  474. else
  475. begin
  476. RefName := ExpectName;
  477. if not ResolvePredefined(RefName) then
  478. begin
  479. // TODO: try resolve the entity here
  480. Result := doc.CreateEntityReference(RefName);
  481. end;
  482. end;
  483. ExpectChar(';'); // reference terminator
  484. end;
  485. procedure TXMLReader.ProcessTextAndRefs(Delim: WideChar; DiscardWS: Boolean);
  486. var
  487. nonWs: Boolean;
  488. RefNode: TDOMEntityReference;
  489. begin
  490. FValueLength := 0;
  491. nonWs := False;
  492. while (FCurChar <> Delim) and (FCurChar <> #0) and (FCurChar <> '<') do
  493. begin
  494. if not FWhitespace then
  495. nonWs := True;
  496. if FCurChar <> '&' then
  497. begin
  498. AppendValue(FCurChar);
  499. if (FValueLength >= 3) and (FValue[FValueLength-1] = '>') and
  500. (FValue[FValueLength-2] = ']') and (FValue[FValueLength-3] = ']') then
  501. RaiseExc('Literal '']]>'' is not allowed in text');
  502. GetChar;
  503. end
  504. else
  505. begin
  506. GetChar; // skip '&'
  507. RefNode := ParseReference;
  508. if Assigned(RefNode) then
  509. begin
  510. if FValueLength > 0 then
  511. begin
  512. if (not DiscardWs) or nonWs then
  513. FCursor.AppendChild(doc.CreateTextNodeBuf(@FValue[0], FValueLength));
  514. FValueLength := 0;
  515. nonWs := False;
  516. end;
  517. FCursor.AppendChild(RefNode);
  518. end;
  519. end;
  520. end; // while
  521. if ((not DiscardWs) or nonWs) and (FValueLength > 0) then
  522. begin
  523. FCursor.AppendChild(doc.CreateTextNodeBuf(@FValue[0], FValueLength));
  524. FValueLength := 0;
  525. end;
  526. end;
  527. procedure TXMLReader.ExpectAttValue; // [10]
  528. var
  529. Delim: WideChar;
  530. begin
  531. if (FCurChar <> '''') and (FCurChar <> '"') then
  532. RaiseExpectedQmark;
  533. Delim := FCurChar;
  534. GetChar; // skip quote
  535. ProcessTextAndRefs(Delim, False);
  536. if FCurChar = '<' then
  537. RaiseExc('"<" is not allowed in attribute value');
  538. GetChar; // skip trailing quote
  539. end;
  540. procedure TXMLReader.SkipPubidLiteral; // [12]
  541. var
  542. Delim: WideChar;
  543. begin
  544. if (FCurChar = '''') or (FCurChar = '"') then
  545. begin
  546. Delim := FCurChar;
  547. GetChar; // skip quote
  548. SkipString(PubidChars - [Char(Delim)]); // <-- PubidChars do not contain `"`
  549. ExpectChar(Delim);
  550. end
  551. else
  552. RaiseExpectedQMark;
  553. end;
  554. // starting '<!' already consumed, FCurChar = '-'
  555. procedure TXMLReader.ParseComment; // [15]
  556. begin
  557. ExpectString('--');
  558. FValueLength := 0;
  559. repeat
  560. AppendValue(FCurChar);
  561. GetChar;
  562. if (FValueLength >= 2) and (FValue[FValueLength-1] = '-') and
  563. (FValue[FValueLength-2] = '-') then
  564. begin
  565. Dec(FValueLength, 2);
  566. Break;
  567. end;
  568. until FCurChar = #0; // should not happen
  569. if FCurChar = #0 then
  570. RaiseExc('Unterminated comment');
  571. ExpectChar('>');
  572. FCursor.AppendChild(doc.CreateCommentBuf(@FValue[0], FValueLength));
  573. end;
  574. // starting '<?' already consumed
  575. procedure TXMLReader.ParsePI; // [16]
  576. var
  577. Name, Value: WideString;
  578. begin
  579. Name := ExpectName;
  580. if (FNameLength = 3) and
  581. ((FName[0] = 'X') or (FName[0] = 'x')) and
  582. ((FName[1] = 'M') or (FName[1] = 'm')) and
  583. ((FName[2] = 'L') or (FName[2] = 'l')) then
  584. begin
  585. if Name <> 'xml' then // FIX: ibm23n04.xml
  586. RaiseExc('"xml" reserved word must be lowercase');
  587. if not FPrologParsed then
  588. begin
  589. ParseProlog;
  590. FPrologParsed := True;
  591. Exit;
  592. end
  593. else
  594. RaiseExc('"<?xml" processing instruction not allowed here');
  595. end;
  596. if FCurChar <> '?' then
  597. ExpectWhitespace;
  598. FValueLength := 0;
  599. repeat
  600. AppendValue(FCurChar);
  601. GetChar;
  602. if (FValueLength >= 2) and (FValue[FValueLength-1] = '>') and
  603. (FValue[FValueLength-2] = '?') then
  604. begin
  605. Dec(FValueLength, 2);
  606. Break;
  607. end;
  608. until FCurChar = #0; // should not happen
  609. if FCurChar = #0 then
  610. RaiseExc('Unterminated processing instruction');
  611. SetString(Value, PWideChar(@FValue[0]), FValueLength);
  612. FCursor.AppendChild(Doc.CreateProcessingInstruction(Name, Value));
  613. end;
  614. // here we come from ParsePI, 'xml' is already consumed
  615. procedure TXMLReader.ParseProlog;
  616. var
  617. Delim: WideChar;
  618. svalue: WideString;
  619. begin
  620. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  621. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  622. SkipWhitespace;
  623. ExpectString('version');
  624. ExpectEq;
  625. if (FCurChar = '''') or (FCurChar = '"') then
  626. begin
  627. Delim := FCurChar;
  628. GetChar; // skip quote
  629. if doc.InheritsFrom(TXMLDocument) then
  630. TXMLDocument(doc).XMLVersion := GetString(NmToken);
  631. ExpectChar(Delim);
  632. if FCurChar <> '?' then
  633. ExpectWhitespace;
  634. end
  635. else
  636. RaiseExpectedQMark;
  637. if FCurChar = 'e' then // [80]
  638. begin
  639. ExpectString('encoding');
  640. ExpectEq;
  641. if (FCurChar = '''') or (FCurChar = '"') then
  642. begin
  643. Delim := FCurChar;
  644. GetChar; // skip quote
  645. if not ((ord(FCurChar) < 256) and (char(FCurChar) in ['A'..'Z', 'a'..'z'])) then
  646. RaiseExc('Expected character (A-Z, a-z)');
  647. SkipString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  648. // TODO: analyze encoding string, and adjust FSource if needed and possible
  649. ExpectChar(Delim);
  650. if FCurChar <> '?' then
  651. ExpectWhitespace;
  652. end
  653. else
  654. RaiseExpectedQMark;
  655. end;
  656. // SDDecl?
  657. if FCurChar = 's' then
  658. begin
  659. ExpectString('standalone');
  660. ExpectEq;
  661. if (FCurChar = '''') or (FCurChar = '"') then
  662. begin
  663. Delim := FCurChar;
  664. GetChar; // skip quote
  665. svalue := ExpectName;
  666. if (svalue <> 'yes') and (svalue <> 'no') then
  667. RaiseExc('Standalone attribute may only have value "yes" or "no"');
  668. ExpectChar(Delim);
  669. end
  670. else
  671. RaiseExpectedQMark;
  672. SkipWhitespace;
  673. end;
  674. ExpectString('?>');
  675. end;
  676. function TXMLReader.ParseInternalDtd: Boolean;
  677. var
  678. DocType: TXMLReaderDocumentType;
  679. begin
  680. // Check for "(doctypedecl Misc*)?" [28]
  681. Result := (FCurChar = 'D');
  682. if Result then
  683. begin
  684. FPrologParsed := True;
  685. ExpectString('DOCTYPE');
  686. // create the DTD object
  687. DocType := TXMLReaderDocumentType.Create(doc as TXMLDocument);
  688. if doc.InheritsFrom(TXMLDocument) then
  689. TXMLDocument(doc).AppendChild(DocType);
  690. SkipWhitespace;
  691. DocType.Name := ExpectName;
  692. SkipWhitespace;
  693. ParseExternalID(False); // may be absent, ignore result
  694. SkipWhitespace;
  695. if CheckForChar('[') then
  696. begin
  697. repeat
  698. SkipWhitespace;
  699. until not (ParseMarkupDecl(True) or ParsePEReference);
  700. ExpectChar(']');
  701. SkipWhitespace;
  702. end;
  703. ExpectChar('>');
  704. ParseMisc;
  705. Exit;
  706. end;
  707. end;
  708. procedure TXMLReader.ExpectProlog; // [22]
  709. begin
  710. FPrologParsed := False;
  711. // Check for "Misc*".
  712. // ParseMisc() is inlined here and slightly modified
  713. // because we need to distinguish '<DOC...' from '<!DOC...'
  714. repeat
  715. SkipWhitespace;
  716. if not CheckForChar('<') then
  717. Break;
  718. if CheckForChar('!') then
  719. begin
  720. if FCurChar = '-' then
  721. ParseComment
  722. else
  723. if ParseInternalDtd then
  724. Exit;
  725. end
  726. else
  727. if CheckForChar('?') then
  728. ParsePI
  729. else
  730. Break;
  731. until False;
  732. end;
  733. function TXMLReader.ParseEq: Boolean; // [25]
  734. begin
  735. SkipWhitespace;
  736. Result := CheckForChar('=');
  737. if Result then
  738. SkipWhitespace;
  739. end;
  740. procedure TXMLReader.ExpectEq;
  741. begin
  742. if not ParseEq then
  743. RaiseExc('Expected "="');
  744. end;
  745. // Parse "Misc*":
  746. // Misc ::= Comment | PI | S
  747. procedure TXMLReader.ParseMisc; // [27]
  748. begin
  749. repeat
  750. SkipWhitespace;
  751. if not CheckForChar('<') then
  752. Break;
  753. if CheckForChar('!') then
  754. begin
  755. if FCurChar = '-' then
  756. ParseComment
  757. else
  758. RaiseExc('Document type declarations not allowed here');
  759. end
  760. else
  761. if CheckForChar('?') then
  762. ParsePI
  763. else
  764. Break;
  765. until False;
  766. end;
  767. { DTD stuff }
  768. procedure TXMLReader.ParseElementDecl; // [45]
  769. procedure ExpectChoiceOrSeq; // [49], [50]
  770. procedure ExpectCP; // [48]
  771. begin
  772. if CheckForChar('(') then
  773. ExpectChoiceOrSeq
  774. else
  775. SkipName;
  776. if CheckForChar('?') then
  777. else if CheckForChar('*') then
  778. else if CheckForChar('+') then;
  779. end;
  780. var
  781. Delim: WideChar;
  782. begin
  783. SkipWhitespace;
  784. ExpectCP;
  785. Delim := #0;
  786. repeat
  787. SkipWhitespace;
  788. if (FCurChar = #0) or CheckForChar(')') then
  789. Break;
  790. if Delim = #0 then
  791. begin
  792. if (FCurChar = '|') or (FCurChar = ',') then
  793. Delim := FCurChar
  794. else
  795. RaiseExc('Expected "|" or ","');
  796. end
  797. else
  798. if FCurChar <> Delim then
  799. RaiseExc(Delim, FCurChar);
  800. GetChar; // skip delimiter
  801. SkipWhitespace;
  802. ExpectCP;
  803. until False;
  804. end;
  805. begin
  806. SkipName;
  807. ExpectWhitespace;
  808. // Get contentspec [46]
  809. if FCurChar = 'E' then
  810. ExpectString('EMPTY')
  811. else if FCurChar = 'A' then
  812. ExpectString('ANY')
  813. else if CheckForChar('(') then
  814. begin
  815. SkipWhitespace;
  816. if CheckForChar('#') then
  817. begin
  818. // Parse Mixed section [51]
  819. ExpectString('PCDATA');
  820. SkipWhitespace;
  821. if not CheckForChar(')') then
  822. begin
  823. repeat
  824. ExpectChar('|');
  825. SkipWhitespace;
  826. SkipName;
  827. SkipWhitespace;
  828. until FCurChar = ')';
  829. GetChar;
  830. ExpectChar('*');
  831. end
  832. else // 'PCDATA' followed by ')' - fixes valid/P96/ibm69v01.xml
  833. CheckForChar('*');
  834. end
  835. else // Parse Children section [47]
  836. begin
  837. ExpectChoiceOrSeq;
  838. if CheckForChar('?') then
  839. else if CheckForChar('*') then
  840. else if CheckForChar('+') then;
  841. end;
  842. end
  843. else
  844. RaiseExc('Invalid content specification');
  845. SkipWhitespace;
  846. ExpectChar('>');
  847. end;
  848. procedure TXMLReader.ParseNotationDecl; // [82]
  849. begin
  850. SkipName;
  851. ExpectWhitespace;
  852. // Unclear rule...
  853. // IE understands 'SYSTEM' followed by literal and 'PUBLIC' followed by 2 literals
  854. // this is what is handled in ParseExternalID().
  855. if ParseExternalID(True) then
  856. (* else if CheckFor('PUBLIC') then
  857. begin // [83]
  858. ExpectWhitespace;
  859. SkipPubidLiteral;
  860. end *) else
  861. RaiseExc('Expected external or public ID');
  862. SkipWhitespace;
  863. ExpectChar('>');
  864. end;
  865. procedure TXMLReader.ParseAttlistDecl; // [52]
  866. var
  867. SaveCurNode: TDOMNode;
  868. ValueRequired: Boolean;
  869. Token: WideString;
  870. begin
  871. SkipName;
  872. SkipWhitespace;
  873. while not CheckForChar('>') do
  874. begin
  875. SkipName;
  876. ExpectWhitespace;
  877. Token := GetString(['A'..'Z']); // Get AttType [54], [55], [56]
  878. if Token = 'CDATA' then
  879. else if Token = 'ID' then
  880. else if Token = 'IDREF' then
  881. else if Token = 'IDREFS' then
  882. else if Token = 'ENTITY' then
  883. else if Token = 'ENTITIES' then
  884. else if Token = 'NMTOKEN' then
  885. else if Token = 'NMTOKENS' then
  886. else if Token = 'NOTATION' then // [57], [58]
  887. begin
  888. ExpectWhitespace;
  889. ExpectChar('(');
  890. SkipWhitespace;
  891. SkipName;
  892. SkipWhitespace;
  893. while not CheckForChar(')') do
  894. begin
  895. ExpectChar('|');
  896. SkipWhitespace;
  897. SkipName;
  898. SkipWhitespace;
  899. end;
  900. end
  901. else
  902. if CheckForChar('(') then
  903. begin // [59]
  904. SkipWhitespace;
  905. SkipString(Nmtoken);
  906. if FValueLength = 0 then // Fix ibm59n01.xml - name should be present
  907. RaiseNameNotFound;
  908. SkipWhitespace;
  909. while not CheckForChar(')') do
  910. begin
  911. ExpectChar('|');
  912. SkipWhitespace;
  913. SkipString(Nmtoken);
  914. SkipWhitespace;
  915. end;
  916. end else
  917. RaiseExc('Invalid tokenized type');
  918. ExpectWhitespace;
  919. // Get DefaultDecl [60]
  920. ValueRequired := False;
  921. if CheckForChar('#') then
  922. begin
  923. Token := GetString(['A'..'Z']);
  924. if Token = 'REQUIRED' then
  925. else if Token = 'IMPLIED' then
  926. else if Token = 'FIXED' then
  927. begin
  928. ExpectWhitespace; // Fix ibm60n05.xml
  929. ValueRequired := True;
  930. end
  931. else
  932. RaiseExc('Illegal attribute definition'); // Fix sun/not-wf/attlist08.xml
  933. end
  934. else
  935. ValueRequired := True;
  936. if ValueRequired then
  937. begin
  938. SaveCurNode := FCursor;
  939. FCursor := doc.CreateAttribute('');
  940. ExpectAttValue;
  941. FCursor.Free; // avoid memory leaks
  942. FCursor := SaveCurNode;
  943. end;
  944. SkipWhitespace;
  945. end;
  946. end;
  947. procedure TXMLReader.ParseEntityDecl; // [70]
  948. function ParseEntityValue: Boolean; // [9]
  949. var
  950. Delim: WideChar;
  951. begin
  952. if (FCurChar = '''') or (FCurChar = '"') then
  953. begin
  954. Delim := FCurChar;
  955. GetChar; // skip quote
  956. while not ((FCurChar = #0) or CheckForChar(Delim)) do
  957. if ParsePEReference then
  958. begin
  959. if FInternalSubset then
  960. RaiseExc('PE references in internal subset may not occur inside declarations');
  961. end
  962. else if CheckForChar('&') then
  963. begin
  964. ParseReference().Free; // may look awful... but avoid memory leaks
  965. end
  966. else begin
  967. GetChar; // Normal character
  968. end;
  969. Result := True;
  970. end
  971. else
  972. Result := False;
  973. end;
  974. begin
  975. if CheckForChar('%') then // [72]
  976. begin
  977. ExpectWhitespace;
  978. ExpectName;
  979. ExpectWhitespace;
  980. // Get PEDef [74]
  981. if ParseEntityValue then
  982. // SYSTEM | PUBLIC
  983. else if ParseExternalID(False) then
  984. else
  985. RaiseExc('Expected entity value or external ID');
  986. end
  987. else // [71]
  988. begin
  989. ExpectName;
  990. ExpectWhitespace;
  991. // Get EntityDef [73]
  992. if ParseEntityValue then
  993. else
  994. begin
  995. ExpectExternalID;
  996. // Get NDataDecl [76]
  997. if FCurChar <> '>' then
  998. ExpectWhitespace; // FIX: ibm76n03.xml: whitespace REQUIRED before NDATA
  999. if FCurChar = 'N' then
  1000. begin
  1001. ExpectString('NDATA');
  1002. ExpectWhitespace;
  1003. SkipName;
  1004. end;
  1005. end;
  1006. end;
  1007. SkipWhitespace;
  1008. ExpectChar('>');
  1009. end;
  1010. function TXMLReader.ParseMarkupDecl(InternalSubset: Boolean): Boolean; // [29]
  1011. var
  1012. Token: WideString;
  1013. begin
  1014. Result := False;
  1015. FInternalSubset := InternalSubset;
  1016. repeat
  1017. SkipWhitespace;
  1018. if not CheckForChar('<') then // condition is true for #0
  1019. Exit;
  1020. if CheckForChar('!') then
  1021. begin
  1022. if FCurChar = '-' then
  1023. ParseComment
  1024. else
  1025. begin
  1026. Token := GetString(['A'..'Z']);
  1027. ExpectWhitespace;
  1028. if Token = 'ELEMENT' then
  1029. ParseElementDecl
  1030. else if Token = 'ENTITY' then
  1031. ParseEntityDecl
  1032. else if Token = 'ATTLIST' then
  1033. ParseAttlistDecl
  1034. else if Token = 'NOTATION' then
  1035. ParseNotationDecl
  1036. else
  1037. RaiseExc('Wrong declaration type');
  1038. end;
  1039. end
  1040. else if CheckForChar('?') then
  1041. ParsePI
  1042. until False;
  1043. end;
  1044. procedure TXMLReader.ProcessDTD(ABuf: PChar; const AFilename: String);
  1045. begin
  1046. buf := ABuf;
  1047. Filename := AFilename;
  1048. FLine := 1;
  1049. FColumn := 0;
  1050. DetectEncoding;
  1051. doc := TXMLDocument.Create;
  1052. repeat
  1053. SkipWhitespace;
  1054. until not (ParseMarkupDecl(False) or ParsePEReference);
  1055. end;
  1056. // starting '<!' already consumed
  1057. procedure TXMLReader.ParseCDSect; // [18]
  1058. var
  1059. name: WideString;
  1060. begin
  1061. ExpectString('[CDATA[');
  1062. FValueLength := 0;
  1063. repeat
  1064. AppendValue(FCurChar);
  1065. GetChar;
  1066. if (FValueLength >= 3) and (FValue[FValueLength-1] = '>') and
  1067. (FValue[FValueLength-2] = ']') and (FValue[FValueLength-3] = ']') then
  1068. begin
  1069. Dec(FValueLength, 3);
  1070. Break;
  1071. end;
  1072. until FCurChar = #0;
  1073. if FCurChar = #0 then
  1074. RaiseExc('Unterminated CDATA section');
  1075. SetString(name, PWideChar(@FValue[0]), FValueLength);
  1076. FCursor.AppendChild(doc.CreateCDATASection(name));
  1077. end;
  1078. {
  1079. returns True at end of stream.
  1080. this is ok for fragments but error for document
  1081. returns False when '<' is followed by ([^![?] | NameStartChar)
  1082. this is ok for document (expect ETag then) but error for fragment
  1083. }
  1084. function TXMLReader.ParseElementContent: Boolean;
  1085. begin
  1086. Result := False;
  1087. repeat
  1088. if FCurChar = '<' then
  1089. begin
  1090. GetChar;
  1091. if FCurChar = '!' then
  1092. begin
  1093. GetChar;
  1094. if FCurChar = '[' then
  1095. ParseCDSect
  1096. else if FCurChar = '-' then
  1097. ParseComment
  1098. else
  1099. RaiseExc('Document type declarations not allowed here');
  1100. end
  1101. else if CheckName then
  1102. ParseElement
  1103. else if CheckForChar('?') then
  1104. ParsePI
  1105. else
  1106. Exit;
  1107. end
  1108. else
  1109. ProcessTextAndRefs('<', True);
  1110. until FCurChar = #0;
  1111. Result := True;
  1112. end;
  1113. // Element name already in FNameBuffer
  1114. procedure TXMLReader.ParseElement; // [39] [40] [44]
  1115. var
  1116. NewElem: TDOMElement;
  1117. IsEmpty: Boolean;
  1118. attr, OldAttr: TDOMAttr;
  1119. begin
  1120. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt(' ParseElement A');{$ENDIF}
  1121. NewElem := doc.CreateElementBuf(@FName[0], FNameLength);
  1122. FCursor.AppendChild(NewElem);
  1123. Assert(NewElem.ParentNode = FCursor, 'AppendChild did not set ParentNode');
  1124. FCursor := NewElem;
  1125. IsEmpty := False;
  1126. while FCurChar <> '>' do
  1127. begin
  1128. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt(' ParseElement E');{$ENDIF}
  1129. if CheckForChar('/') then
  1130. begin
  1131. IsEmpty := True;
  1132. FCursor := FCursor.ParentNode;
  1133. Break;
  1134. end;
  1135. // Get Attribute [41]
  1136. ExpectWhitespace;
  1137. if not CheckName then // allow stuff like <element >, <element />
  1138. Continue;
  1139. attr := doc.CreateAttributeBuf(@FName[0], FNameLength);
  1140. // WFC: Attribute must be unique
  1141. // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
  1142. OldAttr := TDOMAttr(NewElem.Attributes.SetNamedItem(Attr));
  1143. if Assigned(OldAttr) then
  1144. begin
  1145. OldAttr.Free;
  1146. RaiseExc('Duplicate attribute');
  1147. end;
  1148. ExpectEq;
  1149. Assert(attr.OwnerElement = NewElem, 'DOMAttr.OwnerElement not set correctly');
  1150. FCursor := attr;
  1151. ExpectAttValue;
  1152. FCursor := NewElem;
  1153. end;
  1154. ExpectChar('>');
  1155. if not IsEmpty then
  1156. begin
  1157. SkipWhitespace;
  1158. if not ParseElementContent then
  1159. begin
  1160. if CheckForChar('/') then // Get ETag [42]
  1161. begin
  1162. if ExpectName <> NewElem.NodeName then
  1163. RaiseExc('Unmatching element end tag (expected "</' + NewElem.NodeName + '>")');
  1164. SkipWhitespace;
  1165. ExpectChar('>');
  1166. FCursor := FCursor.ParentNode;
  1167. end
  1168. else
  1169. RaiseNameNotFound;
  1170. end
  1171. else // End of stream in content
  1172. RaiseExc('Document element not closed');
  1173. end;
  1174. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt(' ParseElement END');{$ENDIF}
  1175. end;
  1176. procedure TXMLReader.ExpectElement;
  1177. begin
  1178. if CheckName then
  1179. ParseElement
  1180. else
  1181. RaiseExc('Expected element');
  1182. end;
  1183. function TXMLReader.ParsePEReference: Boolean; // [69]
  1184. begin
  1185. Result := CheckForChar('%');
  1186. if Result then
  1187. begin
  1188. SkipName;
  1189. ExpectChar(';');
  1190. end;
  1191. end;
  1192. function TXMLReader.ParseExternalID(InNotation: Boolean): Boolean; // [75]
  1193. function SkipSystemLiteral: Boolean;
  1194. var
  1195. Delim: WideChar;
  1196. begin
  1197. if (FCurChar = '''') or (FCurChar = '"') then
  1198. begin
  1199. Delim := FCurChar;
  1200. GetChar; // skip quote
  1201. while (FCurChar <> Delim) and (FCurChar <> #0) do
  1202. begin
  1203. GetChar;
  1204. end;
  1205. ExpectChar(Delim); // <-- to check the EOF only
  1206. Result := True;
  1207. end
  1208. else
  1209. Result := False;
  1210. end;
  1211. begin
  1212. if FCurChar = 'S' then
  1213. begin
  1214. ExpectString('SYSTEM');
  1215. ExpectWhitespace;
  1216. if not SkipSystemLiteral then
  1217. RaiseExpectedQMark; // FIX ibm75n06.xml: system literal MUST be present
  1218. Result := True;
  1219. end
  1220. else
  1221. if FCurChar = 'P' then
  1222. begin
  1223. ExpectString('PUBLIC');
  1224. ExpectWhitespace;
  1225. SkipPubidLiteral;
  1226. if InNotation then
  1227. begin
  1228. SkipWhitespace;
  1229. SkipSystemLiteral;
  1230. end
  1231. else
  1232. begin
  1233. ExpectWhitespace;
  1234. if not SkipSystemLiteral then
  1235. RaiseExpectedQMark; // FIX ibm75n06.xml: system literal MUST be present
  1236. end;
  1237. Result := True;
  1238. end else
  1239. Result := False;
  1240. end;
  1241. procedure TXMLReader.ExpectExternalID;
  1242. begin
  1243. if not ParseExternalID(False) then
  1244. RaiseExc('Expected external ID');
  1245. end;
  1246. { Currently, this method will only resolve the entities which are
  1247. predefined in XML: }
  1248. procedure TXMLReader.ResolveEntities(RootNode: TDOMNode);
  1249. var
  1250. Node, NextNode: TDOMNode;
  1251. procedure ReplaceEntityRef(EntityNode: TDOMNode; const Replacement: WideString);
  1252. var
  1253. PrevSibling, NextSibling: TDOMNode;
  1254. begin
  1255. PrevSibling := EntityNode.PreviousSibling;
  1256. NextSibling := EntityNode.NextSibling;
  1257. if Assigned(PrevSibling) and (PrevSibling.NodeType = TEXT_NODE) then
  1258. begin
  1259. TDOMCharacterData(PrevSibling).AppendData(Replacement);
  1260. RootNode.RemoveChild(EntityNode);
  1261. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  1262. begin
  1263. // next sibling is to be removed, so we can't use it anymore
  1264. NextNode := NextSibling.NextSibling;
  1265. TDOMCharacterData(PrevSibling).AppendData(
  1266. TDOMCharacterData(NextSibling).Data);
  1267. RootNode.RemoveChild(NextSibling);
  1268. end
  1269. end else
  1270. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  1271. begin
  1272. TDOMCharacterData(NextSibling).InsertData(0, Replacement);
  1273. RootNode.RemoveChild(EntityNode);
  1274. end else
  1275. RootNode.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
  1276. end;
  1277. begin
  1278. Node := RootNode.FirstChild;
  1279. while Assigned(Node) do
  1280. begin
  1281. NextNode := Node.NextSibling;
  1282. if Node.NodeType = ENTITY_REFERENCE_NODE then
  1283. if Node.NodeName = 'amp' then
  1284. ReplaceEntityRef(Node, '&')
  1285. else if Node.NodeName = 'apos' then
  1286. ReplaceEntityRef(Node, '''')
  1287. else if Node.NodeName = 'gt' then
  1288. ReplaceEntityRef(Node, '>')
  1289. else if Node.NodeName = 'lt' then
  1290. ReplaceEntityRef(Node, '<')
  1291. else if Node.NodeName = 'quot' then
  1292. ReplaceEntityRef(Node, '"');
  1293. Node := NextNode;
  1294. end;
  1295. end;
  1296. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: File);
  1297. var
  1298. reader: TXMLReader;
  1299. buf: PChar;
  1300. BufSize: LongInt;
  1301. begin
  1302. ADoc := nil;
  1303. BufSize := FileSize(f) + 2; // need double termination for the case of Unicode
  1304. if BufSize <= 2 then
  1305. exit;
  1306. GetMem(buf, BufSize);
  1307. try
  1308. BlockRead(f, buf^, BufSize - 2);
  1309. buf[BufSize - 1] := #0;
  1310. buf[BufSize] := #0;
  1311. Reader := TXMLReader.Create;
  1312. try
  1313. Reader.ProcessXML(buf, TFileRec(f).name);
  1314. ADoc := TXMLDocument(Reader.doc);
  1315. finally
  1316. Reader.Free;
  1317. end;
  1318. finally
  1319. FreeMem(buf);
  1320. end;
  1321. end;
  1322. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const AFilename: String);
  1323. var
  1324. reader: TXMLReader;
  1325. buf: PChar;
  1326. StreamSize: Int64;
  1327. begin
  1328. ADoc := nil;
  1329. StreamSize := f.Size; // access to Size causes at least two seeks...
  1330. if StreamSize = 0 then exit;
  1331. GetMem(buf, StreamSize + 2);
  1332. try
  1333. f.Read(buf^, StreamSize);
  1334. buf[StreamSize] := #0;
  1335. buf[StreamSize+1] := #0;
  1336. Reader := TXMLReader.Create;
  1337. try
  1338. Reader.ProcessXML(buf, AFilename);
  1339. finally
  1340. ADoc := TXMLDocument(Reader.doc);
  1341. Reader.Free;
  1342. end;
  1343. finally
  1344. FreeMem(buf);
  1345. end;
  1346. end;
  1347. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream);
  1348. begin
  1349. ReadXMLFile(ADoc, f, '<Stream>');
  1350. end;
  1351. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String);
  1352. var
  1353. FileStream: TStream;
  1354. begin
  1355. ADoc := nil;
  1356. FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  1357. if FileStream = nil then exit; //? it throws exception if cannot be created...
  1358. try
  1359. ReadXMLFile(ADoc, FileStream, AFilename);
  1360. finally
  1361. FileStream.Free;
  1362. end;
  1363. end;
  1364. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File);
  1365. var
  1366. Reader: TXMLReader;
  1367. buf: PChar;
  1368. BufSize: LongInt;
  1369. begin
  1370. BufSize := FileSize(f) + 2;
  1371. if BufSize <= 2 then
  1372. exit;
  1373. GetMem(buf, BufSize);
  1374. try
  1375. BlockRead(f, buf^, BufSize - 2);
  1376. buf[BufSize - 1] := #0;
  1377. buf[BufSize] := #0;
  1378. Reader := TXMLReader.Create;
  1379. try
  1380. Reader.Doc := AParentNode.OwnerDocument;
  1381. Reader.ProcessFragment(AParentNode, buf, TFileRec(f).name);
  1382. finally
  1383. Reader.Free;
  1384. end;
  1385. finally
  1386. FreeMem(buf);
  1387. end;
  1388. end;
  1389. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const AFilename: String);
  1390. var
  1391. Reader: TXMLReader;
  1392. buf: PChar;
  1393. StreamSize: Int64;
  1394. begin
  1395. StreamSize := f.Size;
  1396. if StreamSize = 0 then
  1397. exit;
  1398. GetMem(buf, StreamSize + 2);
  1399. try
  1400. f.Read(buf^, StreamSize);
  1401. buf[StreamSize] := #0;
  1402. buf[StreamSize+1] := #0;
  1403. Reader := TXMLReader.Create;
  1404. Reader.Doc := AParentNode.OwnerDocument;
  1405. try
  1406. Reader.ProcessFragment(AParentNode, buf, AFilename);
  1407. finally
  1408. Reader.Free;
  1409. end;
  1410. finally
  1411. FreeMem(buf);
  1412. end;
  1413. end;
  1414. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
  1415. begin
  1416. ReadXMLFragment(AParentNode, f, '<Stream>');
  1417. end;
  1418. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  1419. var
  1420. Stream: TStream;
  1421. begin
  1422. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  1423. try
  1424. ReadXMLFragment(AParentNode, Stream, AFilename);
  1425. finally
  1426. Stream.Free;
  1427. end;
  1428. end;
  1429. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: File);
  1430. var
  1431. Reader: TXMLReader;
  1432. buf: PChar;
  1433. BufSize: LongInt;
  1434. begin
  1435. ADoc := nil;
  1436. BufSize := FileSize(f) + 1;
  1437. if BufSize <= 1 then
  1438. exit;
  1439. GetMem(buf, BufSize);
  1440. try
  1441. BlockRead(f, buf^, BufSize - 1);
  1442. buf[BufSize - 1] := #0;
  1443. Reader := TXMLReader.Create;
  1444. try
  1445. Reader.ProcessDTD(buf, TFileRec(f).name);
  1446. ADoc := TXMLDocument(Reader.doc);
  1447. finally
  1448. Reader.Free;
  1449. end;
  1450. finally
  1451. FreeMem(buf);
  1452. end;
  1453. end;
  1454. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const AFilename: String);
  1455. var
  1456. Reader: TXMLReader;
  1457. buf: PChar;
  1458. begin
  1459. ADoc := nil;
  1460. if f.Size = 0 then
  1461. exit;
  1462. GetMem(buf, f.Size + 1);
  1463. try
  1464. f.Read(buf^, f.Size);
  1465. buf[f.Size] := #0;
  1466. Reader := TXMLReader.Create;
  1467. try
  1468. Reader.ProcessDTD(buf, AFilename);
  1469. ADoc := TXMLDocument(Reader.doc);
  1470. finally
  1471. Reader.Free;
  1472. end;
  1473. finally
  1474. FreeMem(buf);
  1475. end;
  1476. end;
  1477. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream);
  1478. begin
  1479. ReadDTDFile(ADoc, f, '<Stream>');
  1480. end;
  1481. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
  1482. var
  1483. Stream: TStream;
  1484. begin
  1485. ADoc := nil;
  1486. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  1487. try
  1488. ReadDTDFile(ADoc, Stream, AFilename);
  1489. finally
  1490. Stream.Free;
  1491. end;
  1492. end;
  1493. end.