whtml.pas 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 1998 by Berczi Gabor
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. unit WHTML;
  11. {$I globdir.inc}
  12. interface
  13. uses Objects;
  14. type
  15. PTextFile = ^TTextFile;
  16. TTextFile = object(TObject)
  17. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  18. function GetFileName : string; virtual;
  19. end;
  20. PMemoryTextFile = ^TMemoryTextFile;
  21. TMemoryTextFile = object(TTextFile)
  22. constructor Init;
  23. procedure AddLine(const S: string); virtual;
  24. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  25. function GetFileName : string; virtual;
  26. function GetLineCount : sw_integer;
  27. destructor Done; virtual;
  28. private
  29. Lines : PUnsortedStrCollection;
  30. end;
  31. PDOSTextFile = ^TDOSTextFile;
  32. TDOSTextFile = object(TMemoryTextFile)
  33. constructor Init(AFileName: string);
  34. function GetFileName : string; virtual;
  35. private
  36. DosFileName : string;
  37. end;
  38. PSGMLParser = ^TSGMLParser;
  39. TSGMLParser = object(TObject)
  40. constructor Init;
  41. function Process(HTMLFile: PTextFile): boolean; virtual;
  42. function ProcessLine(LineText: string): boolean; virtual;
  43. destructor Done; virtual;
  44. public
  45. Line,LinePos: sw_integer;
  46. procedure DocSoftBreak; virtual;
  47. function GetFileName : string;
  48. function DocAddTextChar(C: char): boolean; virtual;
  49. procedure DocAddText(S: string); virtual;
  50. procedure DocProcessTag(Tag: string); virtual;
  51. procedure DocProcessComment(Comment: string); virtual;
  52. function DocDecodeNamedEntity(Name: string; var Entity: string): boolean; virtual;
  53. private
  54. CurTag: string;
  55. FileName : string;
  56. InTag,InComment,InString: boolean;
  57. end;
  58. PHTMLParser = ^THTMLParser;
  59. THTMLParser = object(TSGMLParser)
  60. procedure DocSoftBreak; virtual;
  61. function DocAddTextChar(C: char): boolean; virtual;
  62. procedure DocProcessTag(Tag: string); virtual;
  63. function DocGetTagParam(Name: string; var Value: string): boolean; virtual;
  64. procedure DocProcessComment(Comment: string); virtual;
  65. function DocDecodeNamedEntity(Name: string; var E: string): boolean; virtual;
  66. public
  67. TagName,TagParams: string;
  68. DisableCrossIndexing : boolean;
  69. procedure DocUnknownTag; virtual;
  70. procedure DocTYPE; virtual;
  71. procedure DocHTML(Entered: boolean); virtual;
  72. procedure DocHEAD(Entered: boolean); virtual;
  73. procedure DocMETA; virtual;
  74. procedure DocTITLE(Entered: boolean); virtual;
  75. procedure DocBODY(Entered: boolean); virtual;
  76. procedure DocAnchor(Entered: boolean); virtual;
  77. procedure DocHeading(Level: integer; Entered: boolean); virtual;
  78. procedure DocParagraph(Entered: boolean); virtual;
  79. procedure DocBreak; virtual;
  80. procedure DocImage; virtual;
  81. procedure DocBold(Entered: boolean); virtual;
  82. procedure DocCite(Entered: boolean); virtual;
  83. procedure DocCode(Entered: boolean); virtual;
  84. procedure DocEmphasized(Entered: boolean); virtual;
  85. procedure DocItalic(Entered: boolean); virtual;
  86. procedure DocKbd(Entered: boolean); virtual;
  87. procedure DocPreformatted(Entered: boolean); virtual;
  88. procedure DocSample(Entered: boolean); virtual;
  89. procedure DocStrong(Entered: boolean); virtual;
  90. procedure DocTeleType(Entered: boolean); virtual;
  91. procedure DocVariable(Entered: boolean); virtual;
  92. procedure DocSpan(Entered: boolean); virtual;
  93. procedure DocDiv(Entered: boolean); virtual;
  94. procedure DocList(Entered: boolean); virtual;
  95. procedure DocOrderedList(Entered: boolean); virtual;
  96. procedure DocListItem(Entered: boolean); virtual;
  97. procedure DocDefList(Entered: boolean); virtual;
  98. procedure DocDefTerm(Entered: boolean); virtual;
  99. procedure DocDefExp(Entered: boolean); virtual;
  100. procedure DocTable(Entered: boolean); virtual;
  101. procedure DocTableRow(Entered: boolean); virtual;
  102. procedure DocTableHeaderItem(Entered: boolean); virtual;
  103. procedure DocTableItem(Entered: boolean); virtual;
  104. procedure DocHorizontalRuler; virtual;
  105. end;
  106. implementation
  107. uses
  108. WUtils;
  109. function TTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  110. begin
  111. Abstract;
  112. GetLine:=false;
  113. end;
  114. function TTextFile.GetFileName : string;
  115. begin
  116. GetFileName:='unknown';
  117. end;
  118. constructor TMemoryTextFile.Init;
  119. begin
  120. inherited Init;
  121. New(Lines, Init(500,500));
  122. end;
  123. function TMemoryTextFile.GetFileName : string;
  124. begin
  125. GetFileName:='unknown';
  126. end;
  127. function TMemoryTextFile.GetLineCount : sw_integer;
  128. begin
  129. GetLineCount:=Lines^.Count;
  130. end;
  131. procedure TMemoryTextFile.AddLine(const S: string);
  132. begin
  133. Lines^.Insert(NewStr(S));
  134. end;
  135. function TMemoryTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  136. var OK: boolean;
  137. PS: PString;
  138. begin
  139. OK:=(Lines<>nil) and (Idx<Lines^.Count);
  140. if OK then
  141. begin
  142. PS:=Lines^.At(Idx);
  143. if PS=nil then S:='' else S:=PS^;
  144. end;
  145. GetLine:=OK;
  146. end;
  147. destructor TMemoryTextFile.Done;
  148. begin
  149. if Lines<>nil then
  150. Dispose(Lines, Done);
  151. Lines:=nil;
  152. inherited Done;
  153. end;
  154. constructor TDOSTextFile.Init(AFileName: string);
  155. (*{$ifdef TPUNIXLF}
  156. procedure readln(var t:text;var s:string);
  157. var
  158. c : char;
  159. i : longint;
  160. begin
  161. c:=#0;
  162. i:=0;
  163. while (not eof(t)) and (c<>#10) and (i<255) do
  164. begin
  165. read(t,c);
  166. if (i<255) and (c<>#10) then
  167. begin
  168. inc(i);
  169. s[i]:=c;
  170. end;
  171. end;
  172. if (i>0) and (s[i]=#13) then
  173. dec(i);
  174. s[0]:=chr(i);
  175. end;
  176. {$endif}*)
  177. var f: file;
  178. linecomplete,hasCR: boolean;
  179. S: string;
  180. begin
  181. inherited Init;
  182. if AFileName='' then Fail;
  183. {$I-}
  184. Assign(f,AFileName);
  185. Reset(f,1);
  186. if IOResult<>0 then Fail;
  187. DosFileName:=AFileName;
  188. Dispose(Lines,Done);
  189. New(Lines, Init(500,2000));
  190. while (Eof(f)=false) and (IOResult=0) do
  191. begin
  192. ReadlnFromFile(f,S,linecomplete,hasCR,true);
  193. AddLine(S);
  194. end;
  195. Close(f);
  196. {$I+}
  197. end;
  198. function TDosTextFile.GetFileName : string;
  199. begin
  200. GetFileName:=DosFileName;
  201. end;
  202. constructor TSGMLParser.Init;
  203. begin
  204. inherited Init;
  205. FileName:='';
  206. end;
  207. function TSGMLParser.GetFileName : string;
  208. begin
  209. GetFileName:=FileName;
  210. end;
  211. function TSGMLParser.Process(HTMLFile: PTextFile): boolean;
  212. var S: string;
  213. OK,LineOK: boolean;
  214. begin
  215. if HTMLFile=nil then Exit;
  216. InTag:=false; InComment:=false; InString:=false; CurTag:='';
  217. Line:=0; OK:=true;
  218. FileName:=HTMLFile^.GetFileName;
  219. repeat
  220. LineOK:=HTMLFile^.GetLine(Line,S);
  221. if LineOK then
  222. begin
  223. Inc(Line);
  224. OK:=ProcessLine(S);
  225. end;
  226. until (LineOK=false) or (OK=false);
  227. Process:=OK;
  228. end;
  229. function TSGMLParser.ProcessLine(LineText: string): boolean;
  230. var OK: boolean;
  231. C: char;
  232. NewInString: boolean;
  233. OldInComment: boolean;
  234. WasThereAnyText: boolean;
  235. Pos2: integer;
  236. Name,Entity: string;
  237. LiteralCode: boolean;
  238. LiteralStart,LiteralEnd,P: integer;
  239. const TabSize : integer = 8;
  240. Tab = #9;
  241. begin
  242. WasThereAnyText:=false;
  243. OK:=true; LinePos:=1;
  244. LiteralStart:=0; LiteralEnd:=0;
  245. repeat
  246. P:=Pos(TAB,LineText);
  247. if P>0 then
  248. LineText:=copy(LineText,1,P-1)+CharStr(' ',TabSize)+copy(LineText,P+1,255);
  249. until P=0;
  250. while (LinePos<=length(LineText)) and OK do
  251. begin
  252. LiteralCode:=false;
  253. NewInString:=InString; OldInComment:=InComment;
  254. C:=LineText[LinePos];
  255. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  256. if (LiteralCode=false) and (C='&') then
  257. begin
  258. LiteralStart:=0; LiteralEnd:=0;
  259. Name:=''; Pos2:=LinePos+1;
  260. while (Pos2<=length(LineText)) and (LineText[Pos2]<>';') do
  261. begin
  262. Name:=Name+LineText[Pos2];
  263. Inc(Pos2);
  264. end;
  265. Inc(Pos2);
  266. if DocDecodeNamedEntity(Name,Entity) then
  267. begin
  268. LineText:=copy(LineText,1,LinePos-1)+Entity+copy(LineText,Pos2,255);
  269. LiteralStart:=LinePos; LiteralEnd:=LiteralStart+length(Entity)-1;
  270. C:=LineText[LinePos];
  271. end;
  272. end;
  273. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  274. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=false) then
  275. NewInString:=true;
  276. if (LiteralCode=false) and (C='<') and (InTag=false) then
  277. InTag:=true;
  278. if InTag then CurTag:=CurTag+C else
  279. WasThereAnyText:=DocAddTextChar(C);
  280. if (LiteralCode=false) and InTag and (InString=false) and (CurTag='<!--') then
  281. InComment:=true;
  282. { A comment can be longer than 255 chars
  283. move the test to LineText string,
  284. This is why the Previous, Next and Up Tags where not working ... PM
  285. if (LiteralCode=false) and InTag and InComment and (InString=false) and (length(CurTag)>=3) and
  286. (copy(CurTag,length(CurTag)-2,3)='-->') then
  287. InComment:=false; }
  288. if (LiteralCode=false) and InTag and InComment and (InString=false) and (LinePos>=3) and
  289. (copy(LineText,LinePos-2,3)='-->') then
  290. InComment:=false;
  291. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=true) then
  292. NewInString:=false;
  293. if (LiteralCode=false) and (C='>') and (InTag=true) then
  294. begin
  295. InTag:=false;
  296. if OldInComment then
  297. DocProcessComment(CurTag)
  298. else
  299. DocProcessTag(CurTag);
  300. CurTag:='';
  301. end;
  302. InString:=NewInString;
  303. Inc(LinePos);
  304. end;
  305. { whtml does not depend on whelp,
  306. so I can not use hscLineBreak here. PM }
  307. if InTag then
  308. begin
  309. if InString then
  310. CurTag:=CurTag+#0;
  311. end
  312. else if WasThereAnyText then
  313. DocSoftBreak;
  314. ProcessLine:=true;
  315. end;
  316. procedure TSGMLParser.DocSoftBreak;
  317. begin
  318. Abstract;
  319. end;
  320. function TSGMLParser.DocAddTextChar(C: char): boolean;
  321. begin
  322. Abstract;
  323. DocAddTextChar:=false;
  324. end;
  325. procedure TSGMLParser.DocAddText(S: string);
  326. var I: sw_integer;
  327. begin
  328. for I:=1 to length(S) do
  329. DocAddTextChar(S[I]);
  330. end;
  331. function TSGMLParser.DocDecodeNamedEntity(Name: string; var Entity: string): boolean;
  332. begin
  333. DocDecodeNamedEntity:=false;
  334. end;
  335. procedure TSGMLParser.DocProcessTag(Tag: string);
  336. begin
  337. Abstract;
  338. end;
  339. procedure TSGMLParser.DocProcessComment(Comment: string);
  340. begin
  341. Abstract;
  342. end;
  343. destructor TSGMLParser.Done;
  344. begin
  345. inherited Done;
  346. end;
  347. procedure THTMLParser.DocSoftBreak;
  348. begin
  349. end;
  350. function THTMLParser.DocAddTextChar(C: char): boolean;
  351. begin
  352. { Abstract }
  353. DocAddTextChar:=false;
  354. end;
  355. function THTMLParser.DocDecodeNamedEntity(Name: string; var E: string): boolean;
  356. var Found: boolean;
  357. Code: word;
  358. CC: word;
  359. begin
  360. Found:=true; Code:=$ffff;
  361. Name:=LowCaseStr(Name);
  362. if copy(Name,1,1)='#' then
  363. begin
  364. if Name[2]='x' then
  365. Val('$'+copy(Name,3,255),Code,CC)
  366. else
  367. Val(copy(Name,2,255),Code,CC);
  368. if CC<>0 then
  369. begin
  370. {$ifdef DEBUG}
  371. DebugMessage(FileName,'NamedEntity '+Name+' not converted',1,1);
  372. {$endif DEBUG}
  373. Code:=$ffff;
  374. end;
  375. end;
  376. { #0 to #127 is same for Unicode and Code page 437 }
  377. if (code<=127) then
  378. begin
  379. E:=chr(code);
  380. DocDecodeNamedEntity:=true;
  381. exit;
  382. end;
  383. if (Code=$22{34}) or (Name='quot') then E:='"' else { double quote sign }
  384. if (Code=$26{38}) or (Name='amp') then E:='&' else { ampersand }
  385. if (Code=$27{39}) or (Name='apos') then E:='''' else { apostrophe }
  386. if (Code=$3C{60}) or (Name='lt') then E:='<' else { less-than sign }
  387. if (Code=$3E{62}) or (Name='gt') then E:='>' else { greater-than sign }
  388. if (Code=$5B) then E:='[' else { [ }
  389. if (Code=$5C) then E:='\' else { \ }
  390. if (Code=$5D) then E:=']' else { ] }
  391. if (Code=$5E) then E:='^' else { ^ }
  392. if (Code=$5F) then E:='_' else { _ }
  393. if (Code=160) or (Name='nbsp') then E:=#255 else { no-break space }
  394. if (Code=161) or (Name='iexcl') then E:='­' else { inverted exclamation mark }
  395. if (Code=162) or (Name='cent') then E:='›' else { cent sign }
  396. if (Code=163) or (Name='pound') then E:='œ' else { pound sterling sign }
  397. if (Code=164) or (Name='curren') then E:='$' else { general currency sign }
  398. if (Code=165) or (Name='yen') then E:='�' else { yen sign }
  399. if (Code=166) or (Name='brvbar') then E:='|' else { broken vertical bar }
  400. if (Code=167) or (Name='sect') then E:='' else { section sign }
  401. if (Code=168) or (Name='uml') then E:='"' else { umlaut (dieresis) }
  402. if (Code=169) or (Name='copy') then E:='(C)' else { copyright sign }
  403. (* if (Code=170) or (Name='ordf') then E:=#255 else { ordinal indicator, feminine }*)
  404. if (Code=171) or (Name='laquo') then E:='"' else { angle quotation mark -left }
  405. if (Code=172) or (Name='not') then E:='!' else { not sign }
  406. if (Code=173) or (Name='shy') then E:='-' else { soft hypen }
  407. if (Code=174) or (Name='reg') then E:='(R)' else { registered sign }
  408. (* if (Code=175) or (Name='macr') then E:='?' else { macron }*)
  409. if (Code=176) or (Name='deg') then E:='ø' else { degree sign }
  410. if (Code=177) or (Name='plusmn') then E:='ñ' else { plus-or-minus sign }
  411. if (Code=178) or (Name='sup2') then E:='ý' else { superscript 2 }
  412. if (Code=179) or (Name='sup3') then E:='^3' else { superscript 3 }
  413. if (Code=180) or (Name='acute') then E:='''' else { acute accent }
  414. if (Code=181) or (Name='micro') then E:='æ' else { micro sign }
  415. (* if (Code=182) or (Name='para') then E:='?' else { paragraph sign }*)
  416. if (Code=183) or (Name='middot') then E:='ù' else { middle dot }
  417. (* if (Code=184) or (Name='cedil') then E:='?' else { cedilla }*)
  418. if (Code=185) or (Name='sup1') then E:='^1' else { superscript 1 }
  419. (* if (Code=186) or (Name='ordm') then E:='?' else { ordinal indicator, masculine }*)
  420. if (Code=187) or (Name='raquo') then E:='"' else { angle quoatation mark -right }
  421. if (Code=188) or (Name='frac14') then E:='¬' else { fraction one-quarter }
  422. if (Code=189) or (Name='frac12') then E:='«' else { fraction one-half }
  423. if (Code=190) or (Name='frac34') then E:='3/4' else { fraction three-quarters }
  424. if (Code=191) or (Name='iquest') then E:='¨' else { inverted question mark }
  425. if (Code=192) or (Name='Agrave') then E:='A' else { capital A, grave accent }
  426. if (Code=193) or (Name='Aacute') then E:='A' else { capital A, acute accent }
  427. if (Code=194) or (Name='Acirc') then E:='A' else { capital A, circumflex accent }
  428. if (Code=195) or (Name='Atilde') then E:='A' else { capital A, tilde accent }
  429. if (Code=196) or (Name='Auml') then E:='Ž' else { capital A, dieresis or umlaut }
  430. if (Code=197) or (Name='Aring') then E:='�' else { capital A, ring }
  431. if (Code=198) or (Name='AElig') then E:='’' else { capital AE diphthong }
  432. if (Code=199) or (Name='Ccedil') then E:='€' else { capital C, cedilla }
  433. if (Code=200) or (Name='Egrave') then E:='�' else { capital E, grave accent }
  434. if (Code=201) or (Name='Eacute') then E:='�' else { capital E, acute accent }
  435. if (Code=202) or (Name='Ecirc') then E:='E' else { capital E, circumflex accent }
  436. if (Code=203) or (Name='Euml') then E:='E' else { capital E, dieresis or umlaut }
  437. if (Code=204) or (Name='Igrave') then E:='I' else { capital I, grave accent }
  438. if (Code=205) or (Name='Iacute') then E:='I' else { capital I, acute accent }
  439. if (Code=206) or (Name='Icirc') then E:='I' else { capital I, circumflex accent }
  440. if (Code=207) or (Name='Iuml') then E:='I' else { capital I, dieresis or umlaut }
  441. (* if (Code=208) or (Name='ETH') then E:='?' else { capital Eth, Icelandic }*)
  442. if (Code=209) or (Name='Ntidle') then E:='¥' else { capital N, tilde }
  443. if (Code=210) or (Name='Ograve') then E:='O' else { capital O, grave accent }
  444. if (Code=211) or (Name='Oacute') then E:='O' else { capital O, acute accent }
  445. if (Code=212) or (Name='Ocirc') then E:='O' else { capital O, circumflex accent }
  446. if (Code=213) or (Name='Otilde') then E:='O' else { capital O, tilde }
  447. if (Code=214) or (Name='Ouml') then E:='™' else { capital O, dieresis or umlaut }
  448. if (Code=215) or (Name='times') then E:='*' else { multiply sign }
  449. if (Code=216) or (Name='Oslash') then E:='O' else { capital O, slash }
  450. if (Code=217) or (Name='Ugrave') then E:='U' else { capital U, grave accent }
  451. if (Code=218) or (Name='Uacute') then E:='U' else { capital U, acute accent }
  452. if (Code=219) or (Name='Ucirc') then E:='U' else { capital U, circumflex accent }
  453. if (Code=220) or (Name='Uuml') then E:='š' else { capital U, dieresis or umlaut }
  454. if (Code=221) or (Name='Yacute') then E:='Y' else { capital Y, acute accent }
  455. (* if (Code=222) or (Name='THORN') then E:='?' else { capital THORN, Icelandic }*)
  456. if (Code=223) or (Name='szlig') then E:='á' else { small sharp S, German }
  457. if (Code=224) or (Name='agrave') then E:='…' else { small a, grave accent }
  458. if (Code=225) or (Name='aacute') then E:=' ' else { small a, acute accent }
  459. if (Code=226) or (Name='acirc') then E:='ƒ' else { small a, circumflex accent }
  460. if (Code=227) or (Name='atilde') then E:='ƒ' else { small a, tilde }
  461. if (Code=228) or (Name='auml') then E:='„' else { small a, dieresis or umlaut }
  462. if (Code=229) or (Name='aring') then E:='†' else { small a, ring }
  463. if (Code=230) or (Name='aelig') then E:='ae' else { small ae, diphthong }
  464. if (Code=231) or (Name='ccedil') then E:='‡' else { small c, cedilla }
  465. if (Code=232) or (Name='egrave') then E:='Š' else { small e, grave accent }
  466. if (Code=233) or (Name='eacute') then E:='‚' else { small e, acute accent }
  467. if (Code=234) or (Name='ecirc') then E:='ˆ' else { small e, circumflex accent }
  468. if (Code=235) or (Name='euml') then E:='‰' else { small e, dieresis or umlaut }
  469. if (Code=236) or (Name='igrave') then E:='�' else { small i, grave accent }
  470. if (Code=237) or (Name='iacute') then E:='¡' else { small i, acute accent }
  471. if (Code=238) or (Name='icirc') then E:='Œ' else { small i, circumflex accent }
  472. if (Code=239) or (Name='iuml') then E:='‹' else { small i, dieresis or umlaut }
  473. (* if (Code=240) or (Name='eth') then E:='?' else { small eth, Icelandic }*)
  474. if (Code=241) or (Name='ntilde') then E:='¤' else { small n, tilde }
  475. if (Code=242) or (Name='ograve') then E:='•' else { small o, grave accent }
  476. if (Code=243) or (Name='oacute') then E:='¢' else { small o, acute accent }
  477. if (Code=244) or (Name='ocirc') then E:='“' else { small o, circumflex accent }
  478. if (Code=245) or (Name='otilde') then E:='“' else { small o, tilde }
  479. if (Code=246) or (Name='ouml') then E:='”' else { small o, dieresis or umlaut }
  480. if (Code=247) or (Name='divide') then E:='/' else { divide sign }
  481. if (Code=248) or (Name='oslash') then E:='"' else { small o, slash }
  482. if (Code=249) or (Name='ugrave') then E:='—' else { small u, grave accent }
  483. if (Code=250) or (Name='uacute') then E:='£' else { small u, acute accent }
  484. if (Code=251) or (Name='ucirc') then E:='–' else { small u, circumflex accent }
  485. if (Code=252) or (Name='uuml') then E:='�' else { small u, dieresis or umlaut }
  486. if (Code=253) or (Name='yacute') then E:='y' else { small y, acute accent }
  487. (* if (Code=254) or (Name='thorn') then E:='?' else { small thorn, Icelandic }*)
  488. if (Code=255) or (Name='yuml') then E:='y' else { small y, dieresis or umlaut }
  489. { Special codes appearing in TeXH generated files }
  490. if (code=$2c6{710}) or (Name='circ') then E:='^' else { Modifier Letter Circumflex Accent }
  491. if (code=$2dc{732}) or (Name='tilde') then E:='~' else { Small tilde }
  492. if (code=$2013{8211}) or (Name='endash') then E:='-' else { En dash }
  493. if (code=$2014{8212}) or (Name='emdash') then E:='--' else { Em dash }
  494. if (Code=$2018{8216}) or (Name='lsquo') then E:='`' else { Acute accent as generated by TeXH }
  495. if (Code=$2019{8217}) or (Name='rsquo') then E:='''' else { acute accent as generated by TeXH }
  496. if (code=$201C{8220}) or (Name='ldquo') then E:='''''' else { left double quotation marks }
  497. if (code=$201D{8221}) or (Name='rdquo') then E:='``' else { right double quotation marks }
  498. if (code=$2026{8230}) or (Name='hellip') then E:='...' else { horizontal ellipsis }
  499. if (Code=$FB00) then E:='ff' else { ff together }
  500. if (Code=$FB01) then E:='fi' else { fi together }
  501. if (Code=$FB02) then E:='fl' else { fl together }
  502. if (Code=$FB03) then E:='ffi' else { ffi together }
  503. if (Code=$FB04) then E:='ffl' else { ffl together }
  504. Found:=false;
  505. DocDecodeNamedEntity:=Found;
  506. {$ifdef DEBUG}
  507. if (Code<>$ffff) and not found then
  508. begin
  509. DebugMessage(FileName,'NamedEntity '+Name+' not handled',1,1);
  510. end;
  511. {$endif DEBUG}
  512. end;
  513. procedure THTMLParser.DocProcessTag(Tag: string);
  514. var UTagName,ETagName: string[30];
  515. P: byte;
  516. NotEndTag: boolean;
  517. begin
  518. if copy(Tag,1,1)='<' then Delete(Tag,1,1);
  519. if copy(Tag,length(Tag),1)='>' then Delete(Tag,length(Tag),1);
  520. Tag:=Trim(Tag);
  521. P:=Pos(' ',Tag); if P=0 then P:=length(Tag)+1;
  522. TagName:=copy(Tag,1,P-1); TagParams:=copy(Tag,P+1,255);
  523. UTagName:=UpcaseStr(TagName);
  524. NotEndTag:=copy(TagName,1,1)<>'/';
  525. if NotEndTag then ETagName:=UTagName else ETagName:=copy(UTagName,2,255);
  526. { <BR/> is also a Break tag... }
  527. if Copy(ETagName,Length(ETagName),1)='/' then
  528. begin
  529. ETagName:=copy(ETagName,1,Length(ETagName)-1);
  530. NotEndTag:=false;
  531. end;
  532. if (UTagName='!DOCTYPE') then DocTYPE else
  533. { Section tags }
  534. if (ETagName='HTML') then DocHTML(NotEndTag) else
  535. if (ETagName='HEAD') then DocHEAD(NotEndTag) else
  536. if (ETagName='TITLE') then DocTITLE(NotEndTag) else
  537. if (ETagName='BODY') then DocBODY(NotEndTag) else
  538. { Anchor tags }
  539. if (ETagName='A') then DocAnchor(NotEndTag) else
  540. { Direct formatting directives }
  541. if (ETagName='H1') then DocHeading(1,NotEndTag) else
  542. if (ETagName='H2') then DocHeading(2,NotEndTag) else
  543. if (ETagName='H3') then DocHeading(3,NotEndTag) else
  544. if (ETagName='H4') then DocHeading(4,NotEndTag) else
  545. if (ETagName='H5') then DocHeading(5,NotEndTag) else
  546. if (ETagName='H6') then DocHeading(6,NotEndTag) else
  547. if (ETagName='P') then DocParagraph(NotEndTag) else
  548. if (ETagName='BR') then DocBreak else
  549. if (ETagName='B') then DocBold(NotEndTag) else
  550. if (ETagName='CITE') then DocCite(NotEndTag) else
  551. if (ETagName='CODE') then DocCode(NotEndTag) else
  552. if (ETagName='EM') then DocEmphasized(NotEndTag) else
  553. if (ETagName='I') then DocItalic(NotEndTag) else
  554. if (ETagName='KBD') then DocKbd(NotEndTag) else
  555. if (ETagName='PRE') then DocPreformatted(NotEndTag) else
  556. if (ETagName='SAMP') then DocSample(NotEndTag) else
  557. if (ETagName='STRONG') then DocStrong(NotEndTag) else
  558. if (ETagName='TT') then DocTeleType(NotEndTag) else
  559. if (ETagName='VAR') then DocVariable(NotEndTag) else
  560. if (ETagName='SPAN') then DocSpan(NotEndTag) else
  561. if (ETagName='DIV') then DocDiv(NotEndTag) else
  562. { Unordered & ordered lists }
  563. if (ETagName='UL') then DocList(NotEndTag) else
  564. if (ETagName='OL') then DocOrderedList(NotEndTag) else
  565. if (ETagName='LI') then DocListItem(NotEndTag) else
  566. { Definition list }
  567. if (ETagName='DL') then DocDefList(NotEndTag) else
  568. if (ETagName='DT') then DocDefTerm(NotEndTag) else
  569. if (ETagName='DD') then DocDefExp(NotEndTag) else
  570. { Table }
  571. if (ETagName='TABLE') then DocTable(NotEndTag) else
  572. if (ETagName='TR') then DocTableRow(NotEndTag) else
  573. if (ETagName='TH') then DocTableHeaderItem(NotEndTag) else
  574. if (ETagName='TD') then DocTableItem(NotEndTag) else
  575. { Misc. tags }
  576. if (UTagName='META') then DocMETA else
  577. if (UTagName='IMG') then DocImage else
  578. if (UTagName='HR') then DocHorizontalRuler else
  579. DocUnknownTag;
  580. end;
  581. function THTMLParser.DocGetTagParam(Name: string; var Value: string): boolean;
  582. var Found: boolean;
  583. S: string;
  584. ParamName,ParamValue: string;
  585. InStr: boolean;
  586. I: sw_integer;
  587. begin
  588. Found:=false;
  589. Name:=UpcaseStr(Name);
  590. Value:='';
  591. S:=TagParams;
  592. repeat
  593. InStr:=false;
  594. ParamName:=''; ParamValue:='';
  595. S:=Trim(S); I:=1;
  596. while (I<=length(S)) and (S[I]<>'=') do
  597. begin
  598. if S[I]=' ' then
  599. ParamName:=''
  600. else
  601. ParamName:=ParamName+S[I];
  602. Inc(I);
  603. end;
  604. ParamName:=Trim(ParamName);
  605. if S[I]='=' then
  606. begin
  607. Inc(I); InStr:=false;
  608. while (I<=length(S)) and (S[I]=' ') do
  609. Inc(I);
  610. if (I<=length(S)) and (S[I]='"') then
  611. begin
  612. InStr:=true;
  613. Inc(I);
  614. end;
  615. while (I<=length(S)) and ((InStr=true) or (S[I]<>' ')) do
  616. begin
  617. if S[I]='"' then
  618. begin
  619. InStr:=not InStr;
  620. if InStr=false then Break;
  621. end
  622. else
  623. ParamValue:=ParamValue+S[I];
  624. Inc(I);
  625. end;
  626. end;
  627. Found:=(Name=UpcaseStr(ParamName));
  628. if Found then Value:=ParamValue;
  629. Delete(S,1,I);
  630. until Found or (S='');
  631. DocGetTagParam:=Found;
  632. end;
  633. procedure THTMLParser.DocProcessComment(Comment: string);
  634. begin
  635. end;
  636. procedure THTMLParser.DocUnknownTag;
  637. begin
  638. end;
  639. procedure THTMLParser.DocTYPE;
  640. begin
  641. end;
  642. procedure THTMLParser.DocHTML(Entered: boolean);
  643. begin
  644. end;
  645. procedure THTMLParser.DocHEAD(Entered: boolean);
  646. begin
  647. end;
  648. procedure THTMLParser.DocMETA;
  649. begin
  650. end;
  651. procedure THTMLParser.DocTITLE(Entered: boolean);
  652. begin
  653. end;
  654. procedure THTMLParser.DocBODY(Entered: boolean);
  655. begin
  656. end;
  657. procedure THTMLParser.DocAnchor(Entered: boolean);
  658. begin
  659. end;
  660. procedure THTMLParser.DocHeading(Level: integer; Entered: boolean);
  661. begin
  662. end;
  663. procedure THTMLParser.DocParagraph(Entered: boolean);
  664. begin
  665. end;
  666. procedure THTMLParser.DocBreak;
  667. begin
  668. end;
  669. procedure THTMLParser.DocImage;
  670. begin
  671. end;
  672. procedure THTMLParser.DocBold(Entered: boolean);
  673. begin
  674. end;
  675. procedure THTMLParser.DocCite(Entered: boolean);
  676. begin
  677. end;
  678. procedure THTMLParser.DocCode(Entered: boolean);
  679. begin
  680. end;
  681. procedure THTMLParser.DocEmphasized(Entered: boolean);
  682. begin
  683. end;
  684. procedure THTMLParser.DocItalic(Entered: boolean);
  685. begin
  686. end;
  687. procedure THTMLParser.DocKbd(Entered: boolean);
  688. begin
  689. end;
  690. procedure THTMLParser.DocPreformatted(Entered: boolean);
  691. begin
  692. end;
  693. procedure THTMLParser.DocSample(Entered: boolean);
  694. begin
  695. end;
  696. procedure THTMLParser.DocStrong(Entered: boolean);
  697. begin
  698. end;
  699. procedure THTMLParser.DocTeleType(Entered: boolean);
  700. begin
  701. end;
  702. procedure THTMLParser.DocVariable(Entered: boolean);
  703. begin
  704. end;
  705. procedure THTMLParser.DocSpan(Entered: boolean);
  706. begin
  707. end;
  708. procedure THTMLParser.DocDiv(Entered: boolean);
  709. var
  710. S: String;
  711. begin
  712. if Entered then
  713. begin
  714. if DocGetTagParam('CLASS',S) then
  715. if S='crosslinks' then
  716. begin
  717. DisableCrossIndexing:=true;
  718. {$ifdef DEBUG}
  719. DebugMessage(GetFileName,'Crosslinks found',Line,LinePos);
  720. {$endif DEBUG}
  721. end;
  722. end
  723. else
  724. begin
  725. {$ifdef DEBUG}
  726. if DisableCrossIndexing then
  727. begin
  728. DebugMessage(GetFileName,'Crosslinks end found',Line,LinePos);
  729. end;
  730. {$endif DEBUG}
  731. DisableCrossIndexing:=false;
  732. end;
  733. end;
  734. procedure THTMLParser.DocList(Entered: boolean);
  735. begin
  736. end;
  737. procedure THTMLParser.DocOrderedList(Entered: boolean);
  738. begin
  739. end;
  740. procedure THTMLParser.DocListItem(Entered: boolean);
  741. begin
  742. end;
  743. procedure THTMLParser.DocDefList(Entered: boolean);
  744. begin
  745. end;
  746. procedure THTMLParser.DocDefTerm(Entered: boolean);
  747. begin
  748. end;
  749. procedure THTMLParser.DocDefExp(Entered: boolean);
  750. begin
  751. end;
  752. procedure THTMLParser.DocTable(Entered: boolean);
  753. var
  754. S: String;
  755. begin
  756. if Entered then
  757. begin
  758. if DocGetTagParam('CLASS',S) then
  759. if S='bar' then
  760. begin
  761. DisableCrossIndexing:=true;
  762. {$ifdef DEBUG}
  763. DebugMessage(GetFileName,'Bar table found, cross indexing disabled ',Line,LinePos);
  764. {$endif DEBUG}
  765. end;
  766. end
  767. else
  768. begin
  769. {$ifdef DEBUG}
  770. if DisableCrossIndexing then
  771. begin
  772. DebugMessage(GetFileName,'Bar table end found',Line,LinePos);
  773. end;
  774. {$endif DEBUG}
  775. DisableCrossIndexing:=false;
  776. end;
  777. end;
  778. procedure THTMLParser.DocTableRow(Entered: boolean);
  779. begin
  780. end;
  781. procedure THTMLParser.DocTableHeaderItem(Entered: boolean);
  782. begin
  783. end;
  784. procedure THTMLParser.DocTableItem(Entered: boolean);
  785. begin
  786. end;
  787. procedure THTMLParser.DocHorizontalRuler;
  788. begin
  789. end;
  790. END.