whtml.pas 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 1998 by Berczi Gabor
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. unit WHTML;
  11. {$I globdir.inc}
  12. interface
  13. uses Objects;
  14. type
  15. PTextFile = ^TTextFile;
  16. TTextFile = object(TObject)
  17. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  18. function GetFileName : string; virtual;
  19. end;
  20. PMemoryTextFile = ^TMemoryTextFile;
  21. TMemoryTextFile = object(TTextFile)
  22. constructor Init;
  23. procedure AddLine(const S: string); virtual;
  24. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  25. function GetFileName : string; virtual;
  26. function GetLineCount : sw_integer;
  27. destructor Done; virtual;
  28. private
  29. Lines : PUnsortedStrCollection;
  30. end;
  31. PDOSTextFile = ^TDOSTextFile;
  32. TDOSTextFile = object(TMemoryTextFile)
  33. constructor Init(AFileName: string);
  34. function GetFileName : string; virtual;
  35. private
  36. DosFileName : string;
  37. end;
  38. PSGMLParser = ^TSGMLParser;
  39. TSGMLParser = object(TObject)
  40. constructor Init;
  41. function Process(HTMLFile: PTextFile): boolean; virtual;
  42. function ProcessLine(LineText: string): boolean; virtual;
  43. destructor Done; virtual;
  44. public
  45. Line,LinePos: sw_integer;
  46. procedure DocSoftBreak; virtual;
  47. function GetFileName : string;
  48. function DocAddTextChar(C: char): boolean; virtual;
  49. procedure DocAddText(S: string); virtual;
  50. procedure DocProcessTag(Tag: string); virtual;
  51. procedure DocProcessComment(Comment: string); virtual;
  52. function DocDecodeNamedEntity(Name: string; var Entity: string): boolean; virtual;
  53. private
  54. CurTag: string;
  55. FileName : string;
  56. InTag,InComment,InString: boolean;
  57. end;
  58. PHTMLParser = ^THTMLParser;
  59. THTMLParser = object(TSGMLParser)
  60. procedure DocSoftBreak; virtual;
  61. function DocAddTextChar(C: char): boolean; virtual;
  62. procedure DocProcessTag(Tag: string); virtual;
  63. function DocGetTagParam(Name: string; var Value: string): boolean; virtual;
  64. procedure DocProcessComment(Comment: string); virtual;
  65. function DocDecodeNamedEntity(Name: string; var E: string): boolean; virtual;
  66. public
  67. TagName,TagParams: string;
  68. DisableCrossIndexing : boolean;
  69. procedure DocUnknownTag; virtual;
  70. procedure DocTYPE; virtual;
  71. procedure DocHTML(Entered: boolean); virtual;
  72. procedure DocHEAD(Entered: boolean); virtual;
  73. procedure DocMETA; virtual;
  74. procedure DocTITLE(Entered: boolean); virtual;
  75. procedure DocBODY(Entered: boolean); virtual;
  76. procedure DocAnchor(Entered: boolean); virtual;
  77. procedure DocHeading(Level: integer; Entered: boolean); virtual;
  78. procedure DocParagraph(Entered: boolean); virtual;
  79. procedure DocBreak; virtual;
  80. procedure DocImage; virtual;
  81. procedure DocBold(Entered: boolean); virtual;
  82. procedure DocCite(Entered: boolean); virtual;
  83. procedure DocCode(Entered: boolean); virtual;
  84. procedure DocEmphasized(Entered: boolean); virtual;
  85. procedure DocItalic(Entered: boolean); virtual;
  86. procedure DocKbd(Entered: boolean); virtual;
  87. procedure DocPreformatted(Entered: boolean); virtual;
  88. procedure DocSample(Entered: boolean); virtual;
  89. procedure DocStrong(Entered: boolean); virtual;
  90. procedure DocTeleType(Entered: boolean); virtual;
  91. procedure DocVariable(Entered: boolean); virtual;
  92. procedure DocSpan(Entered: boolean); virtual;
  93. procedure DocDiv(Entered: boolean); virtual;
  94. procedure DocList(Entered: boolean); virtual;
  95. procedure DocOrderedList(Entered: boolean); virtual;
  96. procedure DocListItem(Entered: boolean); virtual;
  97. procedure DocDefList(Entered: boolean); virtual;
  98. procedure DocDefTerm(Entered: boolean); virtual;
  99. procedure DocDefExp(Entered: boolean); virtual;
  100. procedure DocTable(Entered: boolean); virtual;
  101. procedure DocTableRow(Entered: boolean); virtual;
  102. procedure DocTableHeaderItem(Entered: boolean); virtual;
  103. procedure DocTableItem(Entered: boolean); virtual;
  104. procedure DocHorizontalRuler; virtual;
  105. end;
  106. implementation
  107. uses
  108. WUtils;
  109. function TTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  110. begin
  111. Abstract;
  112. GetLine:=false;
  113. end;
  114. function TTextFile.GetFileName : string;
  115. begin
  116. GetFileName:='unknown';
  117. end;
  118. constructor TMemoryTextFile.Init;
  119. begin
  120. inherited Init;
  121. New(Lines, Init(500,500));
  122. end;
  123. function TMemoryTextFile.GetFileName : string;
  124. begin
  125. GetFileName:='unknown';
  126. end;
  127. function TMemoryTextFile.GetLineCount : sw_integer;
  128. begin
  129. GetLineCount:=Lines^.Count;
  130. end;
  131. procedure TMemoryTextFile.AddLine(const S: string);
  132. begin
  133. Lines^.Insert(NewStr(S));
  134. end;
  135. function TMemoryTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  136. var OK: boolean;
  137. PS: PString;
  138. begin
  139. OK:=(Lines<>nil) and (Idx<Lines^.Count);
  140. if OK then
  141. begin
  142. PS:=Lines^.At(Idx);
  143. if PS=nil then S:='' else S:=PS^;
  144. end;
  145. GetLine:=OK;
  146. end;
  147. destructor TMemoryTextFile.Done;
  148. begin
  149. if Lines<>nil then
  150. Dispose(Lines, Done);
  151. Lines:=nil;
  152. inherited Done;
  153. end;
  154. constructor TDOSTextFile.Init(AFileName: string);
  155. (*{$ifdef TPUNIXLF}
  156. procedure readln(var t:text;var s:string);
  157. var
  158. c : char;
  159. i : longint;
  160. begin
  161. c:=#0;
  162. i:=0;
  163. while (not eof(t)) and (c<>#10) and (i<255) do
  164. begin
  165. read(t,c);
  166. if (i<255) and (c<>#10) then
  167. begin
  168. inc(i);
  169. s[i]:=c;
  170. end;
  171. end;
  172. if (i>0) and (s[i]=#13) then
  173. dec(i);
  174. s[0]:=chr(i);
  175. end;
  176. {$endif}*)
  177. var f: file;
  178. linecomplete,hasCR: boolean;
  179. S: string;
  180. OldFMode : Integer;
  181. begin
  182. inherited Init;
  183. if AFileName='' then Fail;
  184. {$I-}
  185. Assign(f,AFileName);
  186. OldFMode:= FileMode;
  187. FileMode:= 0;
  188. Reset(f,1);
  189. FileMode:= OldFMode;
  190. if IOResult<>0 then Fail;
  191. DosFileName:=AFileName;
  192. Dispose(Lines,Done);
  193. New(Lines, Init(500,2000));
  194. while (Eof(f)=false) and (IOResult=0) do
  195. begin
  196. ReadlnFromFile(f,S,linecomplete,hasCR,true);
  197. AddLine(S);
  198. end;
  199. Close(f);
  200. {$I+}
  201. end;
  202. function TDosTextFile.GetFileName : string;
  203. begin
  204. GetFileName:=DosFileName;
  205. end;
  206. constructor TSGMLParser.Init;
  207. begin
  208. inherited Init;
  209. FileName:='';
  210. end;
  211. function TSGMLParser.GetFileName : string;
  212. begin
  213. GetFileName:=FileName;
  214. end;
  215. function TSGMLParser.Process(HTMLFile: PTextFile): boolean;
  216. var S: string;
  217. OK,LineOK: boolean;
  218. begin
  219. if HTMLFile=nil then Exit;
  220. InTag:=false; InComment:=false; InString:=false; CurTag:='';
  221. Line:=0; OK:=true;
  222. FileName:=HTMLFile^.GetFileName;
  223. repeat
  224. LineOK:=HTMLFile^.GetLine(Line,S);
  225. if LineOK then
  226. begin
  227. Inc(Line);
  228. OK:=ProcessLine(S);
  229. end;
  230. until (LineOK=false) or (OK=false);
  231. Process:=OK;
  232. end;
  233. function TSGMLParser.ProcessLine(LineText: string): boolean;
  234. var OK: boolean;
  235. C: char;
  236. NewInString: boolean;
  237. OldInComment: boolean;
  238. WasThereAnyText: boolean;
  239. Pos2: integer;
  240. Name,Entity: string;
  241. LiteralCode: boolean;
  242. LiteralStart,LiteralEnd,P: integer;
  243. const TabSize : integer = 8;
  244. Tab = #9;
  245. begin
  246. WasThereAnyText:=false;
  247. OK:=true; LinePos:=1;
  248. LiteralStart:=0; LiteralEnd:=0;
  249. repeat
  250. P:=Pos(TAB,LineText);
  251. if P>0 then
  252. LineText:=copy(LineText,1,P-1)+CharStr(' ',TabSize)+copy(LineText,P+1,255);
  253. until P=0;
  254. while (LinePos<=length(LineText)) and OK do
  255. begin
  256. LiteralCode:=false;
  257. NewInString:=InString; OldInComment:=InComment;
  258. C:=LineText[LinePos];
  259. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  260. if (LiteralCode=false) and (C='&') then
  261. begin
  262. LiteralStart:=0; LiteralEnd:=0;
  263. Name:=''; Pos2:=LinePos+1;
  264. while (Pos2<=length(LineText)) and (LineText[Pos2]<>';') do
  265. begin
  266. Name:=Name+LineText[Pos2];
  267. Inc(Pos2);
  268. end;
  269. Inc(Pos2);
  270. if DocDecodeNamedEntity(Name,Entity) then
  271. begin
  272. LineText:=copy(LineText,1,LinePos-1)+Entity+copy(LineText,Pos2,255);
  273. LiteralStart:=LinePos; LiteralEnd:=LiteralStart+length(Entity)-1;
  274. C:=LineText[LinePos];
  275. end;
  276. end;
  277. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  278. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=false) then
  279. NewInString:=true;
  280. if (LiteralCode=false) and (C='<') and (InTag=false) then
  281. InTag:=true;
  282. if InTag then CurTag:=CurTag+C else
  283. WasThereAnyText:=DocAddTextChar(C);
  284. if (LiteralCode=false) and InTag and (InString=false) and (CurTag='<!--') then
  285. InComment:=true;
  286. { A comment can be longer than 255 chars
  287. move the test to LineText string,
  288. This is why the Previous, Next and Up Tags where not working ... PM
  289. if (LiteralCode=false) and InTag and InComment and (InString=false) and (length(CurTag)>=3) and
  290. (copy(CurTag,length(CurTag)-2,3)='-->') then
  291. InComment:=false; }
  292. if (LiteralCode=false) and InTag and InComment and (InString=false) and (LinePos>=3) and
  293. (copy(LineText,LinePos-2,3)='-->') then
  294. InComment:=false;
  295. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=true) then
  296. NewInString:=false;
  297. if (LiteralCode=false) and (C='>') and (InTag=true) then
  298. begin
  299. InTag:=false;
  300. if OldInComment then
  301. DocProcessComment(CurTag)
  302. else
  303. DocProcessTag(CurTag);
  304. CurTag:='';
  305. end;
  306. InString:=NewInString;
  307. Inc(LinePos);
  308. end;
  309. { whtml does not depend on whelp,
  310. so I can not use hscLineBreak here. PM }
  311. if InTag then
  312. begin
  313. if InString then
  314. CurTag:=CurTag+#0;
  315. end
  316. else if WasThereAnyText then
  317. DocSoftBreak;
  318. ProcessLine:=true;
  319. end;
  320. procedure TSGMLParser.DocSoftBreak;
  321. begin
  322. Abstract;
  323. end;
  324. function TSGMLParser.DocAddTextChar(C: char): boolean;
  325. begin
  326. Abstract;
  327. DocAddTextChar:=false;
  328. end;
  329. procedure TSGMLParser.DocAddText(S: string);
  330. var I: sw_integer;
  331. begin
  332. for I:=1 to length(S) do
  333. DocAddTextChar(S[I]);
  334. end;
  335. function TSGMLParser.DocDecodeNamedEntity(Name: string; var Entity: string): boolean;
  336. begin
  337. DocDecodeNamedEntity:=false;
  338. end;
  339. procedure TSGMLParser.DocProcessTag(Tag: string);
  340. begin
  341. Abstract;
  342. end;
  343. procedure TSGMLParser.DocProcessComment(Comment: string);
  344. begin
  345. Abstract;
  346. end;
  347. destructor TSGMLParser.Done;
  348. begin
  349. inherited Done;
  350. end;
  351. procedure THTMLParser.DocSoftBreak;
  352. begin
  353. end;
  354. function THTMLParser.DocAddTextChar(C: char): boolean;
  355. begin
  356. { Abstract }
  357. DocAddTextChar:=false;
  358. end;
  359. function THTMLParser.DocDecodeNamedEntity(Name: string; var E: string): boolean;
  360. var Found: boolean;
  361. Code: word;
  362. CC: word;
  363. begin
  364. Found:=true; Code:=$ffff;
  365. Name:=LowCaseStr(Name);
  366. if copy(Name,1,1)='#' then
  367. begin
  368. if Name[2]='x' then
  369. Val('$'+copy(Name,3,255),Code,CC)
  370. else
  371. Val(copy(Name,2,255),Code,CC);
  372. if CC<>0 then
  373. begin
  374. {$ifdef DEBUG}
  375. DebugMessage(FileName,'NamedEntity '+Name+' not converted',1,1);
  376. {$endif DEBUG}
  377. Code:=$ffff;
  378. end;
  379. end;
  380. { #0 to #127 is same for Unicode and Code page 437 }
  381. if (code<=127) then
  382. begin
  383. E:=chr(code);
  384. DocDecodeNamedEntity:=true;
  385. exit;
  386. end;
  387. if (Code=$22{34}) or (Name='quot') then E:='"' else { double quote sign }
  388. if (Code=$26{38}) or (Name='amp') then E:='&' else { ampersand }
  389. if (Code=$27{39}) or (Name='apos') then E:='''' else { apostrophe }
  390. if (Code=$3C{60}) or (Name='lt') then E:='<' else { less-than sign }
  391. if (Code=$3E{62}) or (Name='gt') then E:='>' else { greater-than sign }
  392. if (Code=$5B) then E:='[' else { [ }
  393. if (Code=$5C) then E:='\' else { \ }
  394. if (Code=$5D) then E:=']' else { ] }
  395. if (Code=$5E) then E:='^' else { ^ }
  396. if (Code=$5F) then E:='_' else { _ }
  397. if (Code=160) or (Name='nbsp') then E:=#255 else { no-break space }
  398. if (Code=161) or (Name='iexcl') then E:='­' else { inverted exclamation mark }
  399. if (Code=162) or (Name='cent') then E:='›' else { cent sign }
  400. if (Code=163) or (Name='pound') then E:='œ' else { pound sterling sign }
  401. if (Code=164) or (Name='curren') then E:='$' else { general currency sign }
  402. if (Code=165) or (Name='yen') then E:='�' else { yen sign }
  403. if (Code=166) or (Name='brvbar') then E:='|' else { broken vertical bar }
  404. if (Code=167) or (Name='sect') then E:='' else { section sign }
  405. if (Code=168) or (Name='uml') then E:='"' else { umlaut (dieresis) }
  406. if (Code=169) or (Name='copy') then E:='(C)' else { copyright sign }
  407. (* if (Code=170) or (Name='ordf') then E:=#255 else { ordinal indicator, feminine }*)
  408. if (Code=171) or (Name='laquo') then E:='"' else { angle quotation mark -left }
  409. if (Code=172) or (Name='not') then E:='!' else { not sign }
  410. if (Code=173) or (Name='shy') then E:='-' else { soft hypen }
  411. if (Code=174) or (Name='reg') then E:='(R)' else { registered sign }
  412. (* if (Code=175) or (Name='macr') then E:='?' else { macron }*)
  413. if (Code=176) or (Name='deg') then E:='ø' else { degree sign }
  414. if (Code=177) or (Name='plusmn') then E:='ñ' else { plus-or-minus sign }
  415. if (Code=178) or (Name='sup2') then E:='ý' else { superscript 2 }
  416. if (Code=179) or (Name='sup3') then E:='^3' else { superscript 3 }
  417. if (Code=180) or (Name='acute') then E:='''' else { acute accent }
  418. if (Code=181) or (Name='micro') then E:='æ' else { micro sign }
  419. (* if (Code=182) or (Name='para') then E:='?' else { paragraph sign }*)
  420. if (Code=183) or (Name='middot') then E:='ù' else { middle dot }
  421. (* if (Code=184) or (Name='cedil') then E:='?' else { cedilla }*)
  422. if (Code=185) or (Name='sup1') then E:='^1' else { superscript 1 }
  423. (* if (Code=186) or (Name='ordm') then E:='?' else { ordinal indicator, masculine }*)
  424. if (Code=187) or (Name='raquo') then E:='"' else { angle quoatation mark -right }
  425. if (Code=188) or (Name='frac14') then E:='¬' else { fraction one-quarter }
  426. if (Code=189) or (Name='frac12') then E:='«' else { fraction one-half }
  427. if (Code=190) or (Name='frac34') then E:='3/4' else { fraction three-quarters }
  428. if (Code=191) or (Name='iquest') then E:='¨' else { inverted question mark }
  429. if (Code=192) or (Name='Agrave') then E:='A' else { capital A, grave accent }
  430. if (Code=193) or (Name='Aacute') then E:='A' else { capital A, acute accent }
  431. if (Code=194) or (Name='Acirc') then E:='A' else { capital A, circumflex accent }
  432. if (Code=195) or (Name='Atilde') then E:='A' else { capital A, tilde accent }
  433. if (Code=196) or (Name='Auml') then E:='Ž' else { capital A, dieresis or umlaut }
  434. if (Code=197) or (Name='Aring') then E:='�' else { capital A, ring }
  435. if (Code=198) or (Name='AElig') then E:='’' else { capital AE diphthong }
  436. if (Code=199) or (Name='Ccedil') then E:='€' else { capital C, cedilla }
  437. if (Code=200) or (Name='Egrave') then E:='�' else { capital E, grave accent }
  438. if (Code=201) or (Name='Eacute') then E:='�' else { capital E, acute accent }
  439. if (Code=202) or (Name='Ecirc') then E:='E' else { capital E, circumflex accent }
  440. if (Code=203) or (Name='Euml') then E:='E' else { capital E, dieresis or umlaut }
  441. if (Code=204) or (Name='Igrave') then E:='I' else { capital I, grave accent }
  442. if (Code=205) or (Name='Iacute') then E:='I' else { capital I, acute accent }
  443. if (Code=206) or (Name='Icirc') then E:='I' else { capital I, circumflex accent }
  444. if (Code=207) or (Name='Iuml') then E:='I' else { capital I, dieresis or umlaut }
  445. (* if (Code=208) or (Name='ETH') then E:='?' else { capital Eth, Icelandic }*)
  446. if (Code=209) or (Name='Ntidle') then E:='¥' else { capital N, tilde }
  447. if (Code=210) or (Name='Ograve') then E:='O' else { capital O, grave accent }
  448. if (Code=211) or (Name='Oacute') then E:='O' else { capital O, acute accent }
  449. if (Code=212) or (Name='Ocirc') then E:='O' else { capital O, circumflex accent }
  450. if (Code=213) or (Name='Otilde') then E:='O' else { capital O, tilde }
  451. if (Code=214) or (Name='Ouml') then E:='™' else { capital O, dieresis or umlaut }
  452. if (Code=215) or (Name='times') then E:='*' else { multiply sign }
  453. if (Code=216) or (Name='Oslash') then E:='O' else { capital O, slash }
  454. if (Code=217) or (Name='Ugrave') then E:='U' else { capital U, grave accent }
  455. if (Code=218) or (Name='Uacute') then E:='U' else { capital U, acute accent }
  456. if (Code=219) or (Name='Ucirc') then E:='U' else { capital U, circumflex accent }
  457. if (Code=220) or (Name='Uuml') then E:='š' else { capital U, dieresis or umlaut }
  458. if (Code=221) or (Name='Yacute') then E:='Y' else { capital Y, acute accent }
  459. (* if (Code=222) or (Name='THORN') then E:='?' else { capital THORN, Icelandic }*)
  460. if (Code=223) or (Name='szlig') then E:='á' else { small sharp S, German }
  461. if (Code=224) or (Name='agrave') then E:='…' else { small a, grave accent }
  462. if (Code=225) or (Name='aacute') then E:=' ' else { small a, acute accent }
  463. if (Code=226) or (Name='acirc') then E:='ƒ' else { small a, circumflex accent }
  464. if (Code=227) or (Name='atilde') then E:='ƒ' else { small a, tilde }
  465. if (Code=228) or (Name='auml') then E:='„' else { small a, dieresis or umlaut }
  466. if (Code=229) or (Name='aring') then E:='†' else { small a, ring }
  467. if (Code=230) or (Name='aelig') then E:='ae' else { small ae, diphthong }
  468. if (Code=231) or (Name='ccedil') then E:='‡' else { small c, cedilla }
  469. if (Code=232) or (Name='egrave') then E:='Š' else { small e, grave accent }
  470. if (Code=233) or (Name='eacute') then E:='‚' else { small e, acute accent }
  471. if (Code=234) or (Name='ecirc') then E:='ˆ' else { small e, circumflex accent }
  472. if (Code=235) or (Name='euml') then E:='‰' else { small e, dieresis or umlaut }
  473. if (Code=236) or (Name='igrave') then E:='�' else { small i, grave accent }
  474. if (Code=237) or (Name='iacute') then E:='¡' else { small i, acute accent }
  475. if (Code=238) or (Name='icirc') then E:='Œ' else { small i, circumflex accent }
  476. if (Code=239) or (Name='iuml') then E:='‹' else { small i, dieresis or umlaut }
  477. (* if (Code=240) or (Name='eth') then E:='?' else { small eth, Icelandic }*)
  478. if (Code=241) or (Name='ntilde') then E:='¤' else { small n, tilde }
  479. if (Code=242) or (Name='ograve') then E:='•' else { small o, grave accent }
  480. if (Code=243) or (Name='oacute') then E:='¢' else { small o, acute accent }
  481. if (Code=244) or (Name='ocirc') then E:='“' else { small o, circumflex accent }
  482. if (Code=245) or (Name='otilde') then E:='“' else { small o, tilde }
  483. if (Code=246) or (Name='ouml') then E:='”' else { small o, dieresis or umlaut }
  484. if (Code=247) or (Name='divide') then E:='/' else { divide sign }
  485. if (Code=248) or (Name='oslash') then E:='"' else { small o, slash }
  486. if (Code=249) or (Name='ugrave') then E:='—' else { small u, grave accent }
  487. if (Code=250) or (Name='uacute') then E:='£' else { small u, acute accent }
  488. if (Code=251) or (Name='ucirc') then E:='–' else { small u, circumflex accent }
  489. if (Code=252) or (Name='uuml') then E:='�' else { small u, dieresis or umlaut }
  490. if (Code=253) or (Name='yacute') then E:='y' else { small y, acute accent }
  491. (* if (Code=254) or (Name='thorn') then E:='?' else { small thorn, Icelandic }*)
  492. if (Code=255) or (Name='yuml') then E:='y' else { small y, dieresis or umlaut }
  493. { Special codes appearing in TeXH generated files }
  494. if (code=$2c6{710}) or (Name='circ') then E:='^' else { Modifier Letter Circumflex Accent }
  495. if (code=$2dc{732}) or (Name='tilde') then E:='~' else { Small tilde }
  496. if (code=$2013{8211}) or (Name='endash') then E:='-' else { En dash }
  497. if (code=$2014{8212}) or (Name='emdash') then E:='--' else { Em dash }
  498. if (Code=$2018{8216}) or (Name='lsquo') then E:='`' else { Acute accent as generated by TeXH }
  499. if (Code=$2019{8217}) or (Name='rsquo') then E:='''' else { acute accent as generated by TeXH }
  500. if (code=$201C{8220}) or (Name='ldquo') then E:='''''' else { left double quotation marks }
  501. if (code=$201D{8221}) or (Name='rdquo') then E:='``' else { right double quotation marks }
  502. if (code=$2026{8230}) or (Name='hellip') then E:='...' else { horizontal ellipsis }
  503. if (Code=$FB00) then E:='ff' else { ff together }
  504. if (Code=$FB01) then E:='fi' else { fi together }
  505. if (Code=$FB02) then E:='fl' else { fl together }
  506. if (Code=$FB03) then E:='ffi' else { ffi together }
  507. if (Code=$FB04) then E:='ffl' else { ffl together }
  508. Found:=false;
  509. DocDecodeNamedEntity:=Found;
  510. {$ifdef DEBUG}
  511. if (Code<>$ffff) and not found then
  512. begin
  513. DebugMessage(FileName,'NamedEntity '+Name+' not handled',1,1);
  514. end;
  515. {$endif DEBUG}
  516. end;
  517. procedure THTMLParser.DocProcessTag(Tag: string);
  518. var UTagName,ETagName: string[30];
  519. P: byte;
  520. NotEndTag: boolean;
  521. begin
  522. if copy(Tag,1,1)='<' then Delete(Tag,1,1);
  523. if copy(Tag,length(Tag),1)='>' then Delete(Tag,length(Tag),1);
  524. Tag:=Trim(Tag);
  525. P:=Pos(' ',Tag); if P=0 then P:=length(Tag)+1;
  526. TagName:=copy(Tag,1,P-1); TagParams:=copy(Tag,P+1,255);
  527. UTagName:=UpcaseStr(TagName);
  528. NotEndTag:=copy(TagName,1,1)<>'/';
  529. if NotEndTag then ETagName:=UTagName else ETagName:=copy(UTagName,2,255);
  530. { <BR/> is also a Break tag... }
  531. if Copy(ETagName,Length(ETagName),1)='/' then
  532. begin
  533. ETagName:=copy(ETagName,1,Length(ETagName)-1);
  534. NotEndTag:=false;
  535. end;
  536. if (UTagName='!DOCTYPE') then DocTYPE else
  537. { Section tags }
  538. if (ETagName='HTML') then DocHTML(NotEndTag) else
  539. if (ETagName='HEAD') then DocHEAD(NotEndTag) else
  540. if (ETagName='TITLE') then DocTITLE(NotEndTag) else
  541. if (ETagName='BODY') then DocBODY(NotEndTag) else
  542. { Anchor tags }
  543. if (ETagName='A') then DocAnchor(NotEndTag) else
  544. { Direct formatting directives }
  545. if (ETagName='H1') then DocHeading(1,NotEndTag) else
  546. if (ETagName='H2') then DocHeading(2,NotEndTag) else
  547. if (ETagName='H3') then DocHeading(3,NotEndTag) else
  548. if (ETagName='H4') then DocHeading(4,NotEndTag) else
  549. if (ETagName='H5') then DocHeading(5,NotEndTag) else
  550. if (ETagName='H6') then DocHeading(6,NotEndTag) else
  551. if (ETagName='P') then DocParagraph(NotEndTag) else
  552. if (ETagName='BR') then DocBreak else
  553. if (ETagName='B') then DocBold(NotEndTag) else
  554. if (ETagName='CITE') then DocCite(NotEndTag) else
  555. if (ETagName='CODE') then DocCode(NotEndTag) else
  556. if (ETagName='EM') then DocEmphasized(NotEndTag) else
  557. if (ETagName='I') then DocItalic(NotEndTag) else
  558. if (ETagName='KBD') then DocKbd(NotEndTag) else
  559. if (ETagName='PRE') then DocPreformatted(NotEndTag) else
  560. if (ETagName='SAMP') then DocSample(NotEndTag) else
  561. if (ETagName='STRONG') then DocStrong(NotEndTag) else
  562. if (ETagName='TT') then DocTeleType(NotEndTag) else
  563. if (ETagName='VAR') then DocVariable(NotEndTag) else
  564. if (ETagName='SPAN') then DocSpan(NotEndTag) else
  565. if (ETagName='DIV') then DocDiv(NotEndTag) else
  566. { Unordered & ordered lists }
  567. if (ETagName='UL') then DocList(NotEndTag) else
  568. if (ETagName='OL') then DocOrderedList(NotEndTag) else
  569. if (ETagName='LI') then DocListItem(NotEndTag) else
  570. { Definition list }
  571. if (ETagName='DL') then DocDefList(NotEndTag) else
  572. if (ETagName='DT') then DocDefTerm(NotEndTag) else
  573. if (ETagName='DD') then DocDefExp(NotEndTag) else
  574. { Table }
  575. if (ETagName='TABLE') then DocTable(NotEndTag) else
  576. if (ETagName='TR') then DocTableRow(NotEndTag) else
  577. if (ETagName='TH') then DocTableHeaderItem(NotEndTag) else
  578. if (ETagName='TD') then DocTableItem(NotEndTag) else
  579. { Misc. tags }
  580. if (UTagName='META') then DocMETA else
  581. if (UTagName='IMG') then DocImage else
  582. if (UTagName='HR') then DocHorizontalRuler else
  583. DocUnknownTag;
  584. end;
  585. function THTMLParser.DocGetTagParam(Name: string; var Value: string): boolean;
  586. var Found: boolean;
  587. S: string;
  588. ParamName,ParamValue: string;
  589. InStr: boolean;
  590. I: sw_integer;
  591. begin
  592. Found:=false;
  593. Name:=UpcaseStr(Name);
  594. Value:='';
  595. S:=TagParams;
  596. repeat
  597. InStr:=false;
  598. ParamName:=''; ParamValue:='';
  599. S:=Trim(S); I:=1;
  600. while (I<=length(S)) and (S[I]<>'=') do
  601. begin
  602. if S[I]=' ' then
  603. ParamName:=''
  604. else
  605. ParamName:=ParamName+S[I];
  606. Inc(I);
  607. end;
  608. ParamName:=Trim(ParamName);
  609. if S[I]='=' then
  610. begin
  611. Inc(I); InStr:=false;
  612. while (I<=length(S)) and (S[I]=' ') do
  613. Inc(I);
  614. if (I<=length(S)) and (S[I]='"') then
  615. begin
  616. InStr:=true;
  617. Inc(I);
  618. end;
  619. while (I<=length(S)) and ((InStr=true) or (S[I]<>' ')) do
  620. begin
  621. if S[I]='"' then
  622. begin
  623. InStr:=not InStr;
  624. if InStr=false then Break;
  625. end
  626. else
  627. ParamValue:=ParamValue+S[I];
  628. Inc(I);
  629. end;
  630. end;
  631. Found:=(Name=UpcaseStr(ParamName));
  632. if Found then Value:=ParamValue;
  633. Delete(S,1,I);
  634. until Found or (S='');
  635. DocGetTagParam:=Found;
  636. end;
  637. procedure THTMLParser.DocProcessComment(Comment: string);
  638. begin
  639. end;
  640. procedure THTMLParser.DocUnknownTag;
  641. begin
  642. end;
  643. procedure THTMLParser.DocTYPE;
  644. begin
  645. end;
  646. procedure THTMLParser.DocHTML(Entered: boolean);
  647. begin
  648. end;
  649. procedure THTMLParser.DocHEAD(Entered: boolean);
  650. begin
  651. end;
  652. procedure THTMLParser.DocMETA;
  653. begin
  654. end;
  655. procedure THTMLParser.DocTITLE(Entered: boolean);
  656. begin
  657. end;
  658. procedure THTMLParser.DocBODY(Entered: boolean);
  659. begin
  660. end;
  661. procedure THTMLParser.DocAnchor(Entered: boolean);
  662. begin
  663. end;
  664. procedure THTMLParser.DocHeading(Level: integer; Entered: boolean);
  665. begin
  666. end;
  667. procedure THTMLParser.DocParagraph(Entered: boolean);
  668. begin
  669. end;
  670. procedure THTMLParser.DocBreak;
  671. begin
  672. end;
  673. procedure THTMLParser.DocImage;
  674. begin
  675. end;
  676. procedure THTMLParser.DocBold(Entered: boolean);
  677. begin
  678. end;
  679. procedure THTMLParser.DocCite(Entered: boolean);
  680. begin
  681. end;
  682. procedure THTMLParser.DocCode(Entered: boolean);
  683. begin
  684. end;
  685. procedure THTMLParser.DocEmphasized(Entered: boolean);
  686. begin
  687. end;
  688. procedure THTMLParser.DocItalic(Entered: boolean);
  689. begin
  690. end;
  691. procedure THTMLParser.DocKbd(Entered: boolean);
  692. begin
  693. end;
  694. procedure THTMLParser.DocPreformatted(Entered: boolean);
  695. begin
  696. end;
  697. procedure THTMLParser.DocSample(Entered: boolean);
  698. begin
  699. end;
  700. procedure THTMLParser.DocStrong(Entered: boolean);
  701. begin
  702. end;
  703. procedure THTMLParser.DocTeleType(Entered: boolean);
  704. begin
  705. end;
  706. procedure THTMLParser.DocVariable(Entered: boolean);
  707. begin
  708. end;
  709. procedure THTMLParser.DocSpan(Entered: boolean);
  710. begin
  711. end;
  712. procedure THTMLParser.DocDiv(Entered: boolean);
  713. var
  714. S: String;
  715. begin
  716. if Entered then
  717. begin
  718. if DocGetTagParam('CLASS',S) then
  719. if S='crosslinks' then
  720. begin
  721. DisableCrossIndexing:=true;
  722. {$ifdef DEBUG}
  723. DebugMessage(GetFileName,'Crosslinks found',Line,LinePos);
  724. {$endif DEBUG}
  725. end;
  726. end
  727. else
  728. begin
  729. {$ifdef DEBUG}
  730. if DisableCrossIndexing then
  731. begin
  732. DebugMessage(GetFileName,'Crosslinks end found',Line,LinePos);
  733. end;
  734. {$endif DEBUG}
  735. DisableCrossIndexing:=false;
  736. end;
  737. end;
  738. procedure THTMLParser.DocList(Entered: boolean);
  739. begin
  740. end;
  741. procedure THTMLParser.DocOrderedList(Entered: boolean);
  742. begin
  743. end;
  744. procedure THTMLParser.DocListItem(Entered: boolean);
  745. begin
  746. end;
  747. procedure THTMLParser.DocDefList(Entered: boolean);
  748. begin
  749. end;
  750. procedure THTMLParser.DocDefTerm(Entered: boolean);
  751. begin
  752. end;
  753. procedure THTMLParser.DocDefExp(Entered: boolean);
  754. begin
  755. end;
  756. procedure THTMLParser.DocTable(Entered: boolean);
  757. var
  758. S: String;
  759. begin
  760. if Entered then
  761. begin
  762. if DocGetTagParam('CLASS',S) then
  763. if S='bar' then
  764. begin
  765. DisableCrossIndexing:=true;
  766. {$ifdef DEBUG}
  767. DebugMessage(GetFileName,'Bar table found, cross indexing disabled ',Line,LinePos);
  768. {$endif DEBUG}
  769. end;
  770. end
  771. else
  772. begin
  773. {$ifdef DEBUG}
  774. if DisableCrossIndexing then
  775. begin
  776. DebugMessage(GetFileName,'Bar table end found',Line,LinePos);
  777. end;
  778. {$endif DEBUG}
  779. DisableCrossIndexing:=false;
  780. end;
  781. end;
  782. procedure THTMLParser.DocTableRow(Entered: boolean);
  783. begin
  784. end;
  785. procedure THTMLParser.DocTableHeaderItem(Entered: boolean);
  786. begin
  787. end;
  788. procedure THTMLParser.DocTableItem(Entered: boolean);
  789. begin
  790. end;
  791. procedure THTMLParser.DocHorizontalRuler;
  792. begin
  793. end;
  794. END.