whtml.pas 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 1998 by Berczi Gabor
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. unit WHTML;
  11. {$I globdir.inc}
  12. interface
  13. uses Objects;
  14. type
  15. PTextFile = ^TTextFile;
  16. TTextFile = object(TObject)
  17. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  18. function GetFileName : string; virtual;
  19. end;
  20. PMemoryTextFile = ^TMemoryTextFile;
  21. TMemoryTextFile = object(TTextFile)
  22. constructor Init;
  23. procedure AddLine(const S: string); virtual;
  24. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  25. function GetFileName : string; virtual;
  26. destructor Done; virtual;
  27. private
  28. Lines : PUnsortedStrCollection;
  29. end;
  30. PDOSTextFile = ^TDOSTextFile;
  31. TDOSTextFile = object(TMemoryTextFile)
  32. constructor Init(AFileName: string);
  33. function GetFileName : string; virtual;
  34. private
  35. DosFileName : string;
  36. end;
  37. PSGMLParser = ^TSGMLParser;
  38. TSGMLParser = object(TObject)
  39. constructor Init;
  40. function Process(HTMLFile: PTextFile): boolean; virtual;
  41. function ProcessLine(LineText: string): boolean; virtual;
  42. destructor Done; virtual;
  43. public
  44. Line,LinePos: sw_integer;
  45. procedure DocSoftBreak; virtual;
  46. function GetFileName : string;
  47. function DocAddTextChar(C: char): boolean; virtual;
  48. procedure DocAddText(S: string); virtual;
  49. procedure DocProcessTag(Tag: string); virtual;
  50. procedure DocProcessComment(Comment: string); virtual;
  51. function DocDecodeNamedEntity(Name: string; var Entity: string): boolean; virtual;
  52. private
  53. CurTag: string;
  54. FileName : string;
  55. InTag,InComment,InString: boolean;
  56. end;
  57. PHTMLParser = ^THTMLParser;
  58. THTMLParser = object(TSGMLParser)
  59. procedure DocSoftBreak; virtual;
  60. function DocAddTextChar(C: char): boolean; virtual;
  61. procedure DocProcessTag(Tag: string); virtual;
  62. function DocGetTagParam(Name: string; var Value: string): boolean; virtual;
  63. procedure DocProcessComment(Comment: string); virtual;
  64. function DocDecodeNamedEntity(Name: string; var E: string): boolean; virtual;
  65. public
  66. TagName,TagParams: string;
  67. DisableCrossIndexing : boolean;
  68. procedure DocUnknownTag; virtual;
  69. procedure DocTYPE; virtual;
  70. procedure DocHTML(Entered: boolean); virtual;
  71. procedure DocHEAD(Entered: boolean); virtual;
  72. procedure DocMETA; virtual;
  73. procedure DocTITLE(Entered: boolean); virtual;
  74. procedure DocBODY(Entered: boolean); virtual;
  75. procedure DocAnchor(Entered: boolean); virtual;
  76. procedure DocHeading(Level: integer; Entered: boolean); virtual;
  77. procedure DocParagraph(Entered: boolean); virtual;
  78. procedure DocBreak; virtual;
  79. procedure DocImage; virtual;
  80. procedure DocBold(Entered: boolean); virtual;
  81. procedure DocCite(Entered: boolean); virtual;
  82. procedure DocCode(Entered: boolean); virtual;
  83. procedure DocEmphasized(Entered: boolean); virtual;
  84. procedure DocItalic(Entered: boolean); virtual;
  85. procedure DocKbd(Entered: boolean); virtual;
  86. procedure DocPreformatted(Entered: boolean); virtual;
  87. procedure DocSample(Entered: boolean); virtual;
  88. procedure DocStrong(Entered: boolean); virtual;
  89. procedure DocTeleType(Entered: boolean); virtual;
  90. procedure DocVariable(Entered: boolean); virtual;
  91. procedure DocSpan(Entered: boolean); virtual;
  92. procedure DocDiv(Entered: boolean); virtual;
  93. procedure DocList(Entered: boolean); virtual;
  94. procedure DocOrderedList(Entered: boolean); virtual;
  95. procedure DocListItem; virtual;
  96. procedure DocDefList(Entered: boolean); virtual;
  97. procedure DocDefTerm; virtual;
  98. procedure DocDefExp; virtual;
  99. procedure DocTable(Entered: boolean); virtual;
  100. procedure DocTableRow(Entered: boolean); virtual;
  101. procedure DocTableHeaderItem(Entered: boolean); virtual;
  102. procedure DocTableItem(Entered: boolean); virtual;
  103. procedure DocHorizontalRuler; virtual;
  104. end;
  105. implementation
  106. uses
  107. WUtils;
  108. function TTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  109. begin
  110. Abstract;
  111. GetLine:=false;
  112. end;
  113. function TTextFile.GetFileName : string;
  114. begin
  115. GetFileName:='unknown';
  116. end;
  117. constructor TMemoryTextFile.Init;
  118. begin
  119. inherited Init;
  120. New(Lines, Init(500,500));
  121. end;
  122. function TMemoryTextFile.GetFileName : string;
  123. begin
  124. GetFileName:='unknown';
  125. end;
  126. procedure TMemoryTextFile.AddLine(const S: string);
  127. begin
  128. Lines^.Insert(NewStr(S));
  129. end;
  130. function TMemoryTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  131. var OK: boolean;
  132. PS: PString;
  133. begin
  134. OK:=(Lines<>nil) and (Idx<Lines^.Count);
  135. if OK then
  136. begin
  137. PS:=Lines^.At(Idx);
  138. if PS=nil then S:='' else S:=PS^;
  139. end;
  140. GetLine:=OK;
  141. end;
  142. destructor TMemoryTextFile.Done;
  143. begin
  144. if Lines<>nil then
  145. Dispose(Lines, Done);
  146. Lines:=nil;
  147. inherited Done;
  148. end;
  149. constructor TDOSTextFile.Init(AFileName: string);
  150. (*{$ifdef TPUNIXLF}
  151. procedure readln(var t:text;var s:string);
  152. var
  153. c : char;
  154. i : longint;
  155. begin
  156. c:=#0;
  157. i:=0;
  158. while (not eof(t)) and (c<>#10) and (i<255) do
  159. begin
  160. read(t,c);
  161. if (i<255) and (c<>#10) then
  162. begin
  163. inc(i);
  164. s[i]:=c;
  165. end;
  166. end;
  167. if (i>0) and (s[i]=#13) then
  168. dec(i);
  169. s[0]:=chr(i);
  170. end;
  171. {$endif}*)
  172. var f: file;
  173. linecomplete,hasCR: boolean;
  174. S: string;
  175. begin
  176. inherited Init;
  177. if AFileName='' then Fail;
  178. {$I-}
  179. Assign(f,AFileName);
  180. Reset(f,1);
  181. if IOResult<>0 then Fail;
  182. DosFileName:=AFileName;
  183. Dispose(Lines,Done);
  184. New(Lines, Init(500,2000));
  185. while (Eof(f)=false) and (IOResult=0) do
  186. begin
  187. ReadlnFromFile(f,S,linecomplete,hasCR,true);
  188. AddLine(S);
  189. end;
  190. Close(f);
  191. {$I+}
  192. end;
  193. function TDosTextFile.GetFileName : string;
  194. begin
  195. GetFileName:=DosFileName;
  196. end;
  197. constructor TSGMLParser.Init;
  198. begin
  199. inherited Init;
  200. FileName:='';
  201. end;
  202. function TSGMLParser.GetFileName : string;
  203. begin
  204. GetFileName:=FileName;
  205. end;
  206. function TSGMLParser.Process(HTMLFile: PTextFile): boolean;
  207. var S: string;
  208. OK,LineOK: boolean;
  209. begin
  210. if HTMLFile=nil then Exit;
  211. InTag:=false; InComment:=false; InString:=false; CurTag:='';
  212. Line:=0; OK:=true;
  213. FileName:=HTMLFile^.GetFileName;
  214. repeat
  215. LineOK:=HTMLFile^.GetLine(Line,S);
  216. if LineOK then
  217. begin
  218. Inc(Line);
  219. OK:=ProcessLine(S);
  220. end;
  221. until (LineOK=false) or (OK=false);
  222. Process:=OK;
  223. end;
  224. function TSGMLParser.ProcessLine(LineText: string): boolean;
  225. var OK: boolean;
  226. C: char;
  227. NewInString: boolean;
  228. OldInComment: boolean;
  229. WasThereAnyText: boolean;
  230. Pos2: integer;
  231. Name,Entity: string;
  232. LiteralCode: boolean;
  233. LiteralStart,LiteralEnd,P: integer;
  234. const TabSize : integer = 8;
  235. Tab = #9;
  236. begin
  237. WasThereAnyText:=false;
  238. OK:=true; LinePos:=1;
  239. LiteralStart:=0; LiteralEnd:=0;
  240. repeat
  241. P:=Pos(TAB,LineText);
  242. if P>0 then
  243. LineText:=copy(LineText,1,P-1)+CharStr(' ',TabSize)+copy(LineText,P+1,255);
  244. until P=0;
  245. while (LinePos<=length(LineText)) and OK do
  246. begin
  247. LiteralCode:=false;
  248. NewInString:=InString; OldInComment:=InComment;
  249. C:=LineText[LinePos];
  250. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  251. if (LiteralCode=false) and (C='&') then
  252. begin
  253. LiteralStart:=0; LiteralEnd:=0;
  254. Name:=''; Pos2:=LinePos+1;
  255. while (Pos2<=length(LineText)) and (LineText[Pos2]<>';') do
  256. begin
  257. Name:=Name+LineText[Pos2];
  258. Inc(Pos2);
  259. end;
  260. Inc(Pos2);
  261. if DocDecodeNamedEntity(Name,Entity) then
  262. begin
  263. LineText:=copy(LineText,1,LinePos-1)+Entity+copy(LineText,Pos2,255);
  264. LiteralStart:=LinePos; LiteralEnd:=LiteralStart+length(Entity)-1;
  265. C:=LineText[LinePos];
  266. end;
  267. end;
  268. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  269. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=false) then
  270. NewInString:=true;
  271. if (LiteralCode=false) and (C='<') and (InTag=false) then
  272. InTag:=true;
  273. if InTag then CurTag:=CurTag+C else
  274. WasThereAnyText:=DocAddTextChar(C);
  275. if (LiteralCode=false) and InTag and (InString=false) and (CurTag='<!--') then
  276. InComment:=true;
  277. { A comment can be longer than 255 chars
  278. move the test to LineText string,
  279. This is why the Previous, Next and Up Tags where not working ... PM
  280. if (LiteralCode=false) and InTag and InComment and (InString=false) and (length(CurTag)>=3) and
  281. (copy(CurTag,length(CurTag)-2,3)='-->') then
  282. InComment:=false; }
  283. if (LiteralCode=false) and InTag and InComment and (InString=false) and (LinePos>=3) and
  284. (copy(LineText,LinePos-2,3)='-->') then
  285. InComment:=false;
  286. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=true) then
  287. NewInString:=false;
  288. if (LiteralCode=false) and (C='>') and (InTag=true) then
  289. begin
  290. InTag:=false;
  291. if OldInComment then
  292. DocProcessComment(CurTag)
  293. else
  294. DocProcessTag(CurTag);
  295. CurTag:='';
  296. end;
  297. InString:=NewInString;
  298. Inc(LinePos);
  299. end;
  300. { whtml does not depend on whelp,
  301. so I can not use hscLineBreak here. PM }
  302. if InTag then
  303. begin
  304. if InString then
  305. CurTag:=CurTag+#0;
  306. end
  307. else if WasThereAnyText then
  308. DocSoftBreak;
  309. ProcessLine:=true;
  310. end;
  311. procedure TSGMLParser.DocSoftBreak;
  312. begin
  313. Abstract;
  314. end;
  315. function TSGMLParser.DocAddTextChar(C: char): boolean;
  316. begin
  317. Abstract;
  318. DocAddTextChar:=false;
  319. end;
  320. procedure TSGMLParser.DocAddText(S: string);
  321. var I: sw_integer;
  322. begin
  323. for I:=1 to length(S) do
  324. DocAddTextChar(S[I]);
  325. end;
  326. function TSGMLParser.DocDecodeNamedEntity(Name: string; var Entity: string): boolean;
  327. begin
  328. DocDecodeNamedEntity:=false;
  329. end;
  330. procedure TSGMLParser.DocProcessTag(Tag: string);
  331. begin
  332. Abstract;
  333. end;
  334. procedure TSGMLParser.DocProcessComment(Comment: string);
  335. begin
  336. Abstract;
  337. end;
  338. destructor TSGMLParser.Done;
  339. begin
  340. inherited Done;
  341. end;
  342. procedure THTMLParser.DocSoftBreak;
  343. begin
  344. end;
  345. function THTMLParser.DocAddTextChar(C: char): boolean;
  346. begin
  347. { Abstract }
  348. DocAddTextChar:=false;
  349. end;
  350. function THTMLParser.DocDecodeNamedEntity(Name: string; var E: string): boolean;
  351. var Found: boolean;
  352. Code: word;
  353. CC: word;
  354. begin
  355. Found:=true; Code:=$ffff;
  356. Name:=LowCaseStr(Name);
  357. if copy(Name,1,1)='#' then
  358. begin
  359. if Name[2]='x' then
  360. Val('$'+copy(Name,3,255),Code,CC)
  361. else
  362. Val(copy(Name,2,255),Code,CC);
  363. if CC<>0 then
  364. begin
  365. {$ifdef DEBUG}
  366. DebugMessage(FileName,'NamedEntity '+Name+' not converted',1,1);
  367. {$endif DEBUG}
  368. Code:=$ffff;
  369. end;
  370. end;
  371. if (Code=$22) or (Name='quot') then E:='"' else { double quote sign }
  372. if (Code=$26) or (Name='amp') then E:='&' else { ampersand }
  373. if (Code=$3C) or (Name='lt') then E:='<' else { less-than sign }
  374. if (Code=$3E) or (Name='gt') then E:='>' else { greater-than sign }
  375. if (Code=$5B) then E:='[' else { [ }
  376. if (Code=$5C) then E:='\' else { \ }
  377. if (Code=$5D) then E:=']' else { ] }
  378. if (Code=$5E) then E:='^' else { ^ }
  379. if (Code=$5F) then E:='_' else { _ }
  380. if (Code=160) or (Name='nbsp') then E:=#255 else { no-break space }
  381. if (Code=161) or (Name='iexcl') then E:='­' else { inverted excalamation mark }
  382. if (Code=162) or (Name='cent') then E:='›' else { cent sign }
  383. if (Code=163) or (Name='pound') then E:='œ' else { pound sterling sign }
  384. if (Code=164) or (Name='curren') then E:='$' else { general currency sign }
  385. if (Code=165) or (Name='yen') then E:='�' else { yen sign }
  386. if (Code=166) or (Name='brvbar') then E:='|' else { broken vertical bar }
  387. (* if (Code=167) or (Name='sect') then E:=#255 else { section sign }*)
  388. (* if (Code=168) or (Name='uml') then E:=#255 else { umlaut (dieresis) }*)
  389. if (Code=169) or (Name='copy') then E:='(C)' else { copyright sign }
  390. (* if (Code=170) or (Name='ordf') then E:=#255 else { ordinal indicator, feminine }*)
  391. if (Code=171) or (Name='laquo') then E:='"' else { angle quotation mark -left }
  392. if (Code=172) or (Name='not') then E:='!' else { not sign }
  393. if (Code=173) or (Name='shy') then E:='-' else { soft hypen }
  394. if (Code=174) or (Name='reg') then E:='(R)' else { registered sign }
  395. (* if (Code=175) or (Name='macr') then E:='?' else { macron }*)
  396. if (Code=176) or (Name='deg') then E:='ø' else { degree sign }
  397. if (Code=177) or (Name='plusmn') then E:='ñ' else { plus-or-minus sign }
  398. if (Code=178) or (Name='sup2') then E:='ý' else { superscript 2 }
  399. if (Code=179) or (Name='sup3') then E:='^3' else { superscript 3 }
  400. if (Code=180) or (Name='acute') then E:='''' else { acute accent }
  401. if (Code=181) or (Name='micro') then E:='æ' else { micro sign }
  402. (* if (Code=182) or (Name='para') then E:='?' else { paragraph sign }*)
  403. if (Code=183) or (Name='middot') then E:='ù' else { middle dot }
  404. (* if (Code=184) or (Name='cedil') then E:='?' else { cedilla }*)
  405. if (Code=185) or (Name='sup1') then E:='^1' else { superscript 1 }
  406. (* if (Code=186) or (Name='ordm') then E:='?' else { ordinal indicator, masculine }*)
  407. if (Code=187) or (Name='raquo') then E:='"' else { angle quoatation mark -right }
  408. if (Code=188) or (Name='frac14') then E:='¬' else { fraction one-quarter }
  409. if (Code=189) or (Name='frac12') then E:='«' else { fraction one-half }
  410. if (Code=190) or (Name='frac34') then E:='3/4' else { fraction three-quarters }
  411. if (Code=191) or (Name='iquest') then E:='¨' else { inverted question mark }
  412. if (Code=192) or (Name='Agrave') then E:='A' else { capital A, grave accent }
  413. if (Code=193) or (Name='Aacute') then E:='A' else { capital A, acute accent }
  414. if (Code=194) or (Name='Acirc') then E:='A' else { capital A, circumflex accent }
  415. if (Code=195) or (Name='Atilde') then E:='A' else { capital A, tilde accent }
  416. if (Code=196) or (Name='Auml') then E:='Ž' else { capital A, dieresis or umlaut }
  417. if (Code=197) or (Name='Aring') then E:='�' else { capital A, ring }
  418. if (Code=198) or (Name='AElig') then E:='AE' else { capital AE diphthong }
  419. (* if (Code=199) or (Name='Ccedil') then E:='?' else { capital C, cedilla }*)
  420. if (Code=200) or (Name='Egrave') then E:='�' else { capital E, grave accent }
  421. if (Code=201) or (Name='Eacute') then E:='�' else { capital E, acute accent }
  422. if (Code=202) or (Name='Ecirc') then E:='E' else { capital E, circumflex accent }
  423. if (Code=203) or (Name='Euml') then E:='E' else { capital E, dieresis or umlaut }
  424. if (Code=204) or (Name='Igrave') then E:='I' else { capital I, grave accent }
  425. if (Code=205) or (Name='Iacute') then E:='I' else { capital I, acute accent }
  426. if (Code=206) or (Name='Icirc') then E:='I' else { capital I, circumflex accent }
  427. if (Code=207) or (Name='Iuml') then E:='I' else { capital I, dieresis or umlaut }
  428. (* if (Code=208) or (Name='ETH') then E:='?' else { capital Eth, Icelandic }*)
  429. if (Code=209) or (Name='Ntidle') then E:='¥' else { capital N, tilde }
  430. if (Code=210) or (Name='Ograve') then E:='O' else { capital O, grave accent }
  431. if (Code=211) or (Name='Oacute') then E:='O' else { capital O, acute accent }
  432. if (Code=212) or (Name='Ocirc') then E:='O' else { capital O, circumflex accent }
  433. if (Code=213) or (Name='Otilde') then E:='O' else { capital O, tilde }
  434. if (Code=214) or (Name='Ouml') then E:='™' else { capital O, dieresis or umlaut }
  435. if (Code=215) or (Name='times') then E:='*' else { multiply sign }
  436. if (Code=216) or (Name='Oslash') then E:='O' else { capital O, slash }
  437. if (Code=217) or (Name='Ugrave') then E:='U' else { capital U, grave accent }
  438. if (Code=218) or (Name='Uacute') then E:='U' else { capital U, acute accent }
  439. if (Code=219) or (Name='Ucirc') then E:='U' else { capital U, circumflex accent }
  440. if (Code=220) or (Name='Uuml') then E:='š' else { capital U, dieresis or umlaut }
  441. if (Code=221) or (Name='Yacute') then E:='Y' else { capital Y, acute accent }
  442. (* if (Code=222) or (Name='THORN') then E:='?' else { capital THORN, Icelandic }*)
  443. if (Code=223) or (Name='szlig') then E:='á' else { small sharp S, German }
  444. if (Code=224) or (Name='agrave') then E:='…' else { small a, grave accent }
  445. if (Code=225) or (Name='aacute') then E:=' ' else { small a, acute accent }
  446. if (Code=226) or (Name='acirc') then E:='ƒ' else { small a, circumflex accent }
  447. if (Code=227) or (Name='atilde') then E:='ƒ' else { small a, tilde }
  448. if (Code=228) or (Name='auml') then E:='„' else { small a, dieresis or umlaut }
  449. if (Code=229) or (Name='aring') then E:='†' else { small a, ring }
  450. if (Code=230) or (Name='aelig') then E:='ae' else { small ae, diphthong }
  451. (* if (Code=231) or (Name='ccedil') then E:='?' else { small c, cedilla }*)
  452. if (Code=232) or (Name='egrave') then E:='Š' else { small e, grave accent }
  453. if (Code=233) or (Name='eacute') then E:='‚' else { small e, acute accent }
  454. if (Code=234) or (Name='ecirc') then E:='ˆ' else { small e, circumflex accent }
  455. if (Code=235) or (Name='euml') then E:='‰' else { small e, dieresis or umlaut }
  456. if (Code=236) or (Name='igrave') then E:='�' else { small i, grave accent }
  457. if (Code=237) or (Name='iacute') then E:='¡' else { small i, acute accent }
  458. if (Code=238) or (Name='icirc') then E:='Œ' else { small i, circumflex accent }
  459. if (Code=239) or (Name='iuml') then E:='‹' else { small i, dieresis or umlaut }
  460. (* if (Code=240) or (Name='eth') then E:='?' else { small eth, Icelandic }*)
  461. if (Code=241) or (Name='ntilde') then E:='¤' else { small n, tilde }
  462. if (Code=242) or (Name='ograve') then E:='•' else { small o, grave accent }
  463. if (Code=243) or (Name='oacute') then E:='¢' else { small o, acute accent }
  464. if (Code=244) or (Name='ocirc') then E:='“' else { small o, circumflex accent }
  465. if (Code=245) or (Name='otilde') then E:='“' else { small o, tilde }
  466. if (Code=246) or (Name='ouml') then E:='”' else { small o, dieresis or umlaut }
  467. if (Code=247) or (Name='divide') then E:='/' else { divide sign }
  468. if (Code=248) or (Name='oslash') then E:='"' else { small o, slash }
  469. if (Code=249) or (Name='ugrave') then E:='—' else { small u, grave accent }
  470. if (Code=250) or (Name='uacute') then E:='£' else { small u, acute accent }
  471. if (Code=251) or (Name='ucirc') then E:='–' else { small u, circumflex accent }
  472. if (Code=252) or (Name='uuml') then E:='�' else { small u, dieresis or umlaut }
  473. if (Code=253) or (Name='yacute') then E:='y' else { small y, acute accent }
  474. (* if (Code=254) or (Name='thorn') then E:='?' else { small thorn, Icelandic }*)
  475. if (Code=255) or (Name='yuml') then E:='y' else { small y, dieresis or umlaut }
  476. { Special codes appearing in TeXH generated files }
  477. if (Code=8217) then E:='''' else { acute accent as generated by TeXH }
  478. if (code=$2c6) then E:='^' else { Modifier Letter Circumflex Accent }
  479. if (code=$2013) then E:='-' else { En dash }
  480. if (code=$2014) then E:='--' else { Em dash }
  481. if (code=$201D) then E:='``' else { right double quotation marks }
  482. if (Code=$FB00) then E:='ff' else { ff together }
  483. if (Code=$FB01) then E:='fi' else { fi together }
  484. if (Code=$FB02) then E:='fl' else { fl together }
  485. if (Code=$FB03) then E:='ffi' else { ffi together }
  486. Found:=false;
  487. DocDecodeNamedEntity:=Found;
  488. {$ifdef DEBUG}
  489. if (Code<>$ffff) and not found then
  490. begin
  491. DebugMessage(FileName,'NamedEntity '+Name+' not handled',1,1);
  492. end;
  493. {$endif DEBUG}
  494. end;
  495. procedure THTMLParser.DocProcessTag(Tag: string);
  496. var UTagName,ETagName: string[30];
  497. P: byte;
  498. NotEndTag: boolean;
  499. begin
  500. if copy(Tag,1,1)='<' then Delete(Tag,1,1);
  501. if copy(Tag,length(Tag),1)='>' then Delete(Tag,length(Tag),1);
  502. Tag:=Trim(Tag);
  503. P:=Pos(' ',Tag); if P=0 then P:=length(Tag)+1;
  504. TagName:=copy(Tag,1,P-1); TagParams:=copy(Tag,P+1,255);
  505. UTagName:=UpcaseStr(TagName);
  506. NotEndTag:=copy(TagName,1,1)<>'/';
  507. if NotEndTag then ETagName:=UTagName else ETagName:=copy(UTagName,2,255);
  508. { <BR/> is also a Break tag... }
  509. if Copy(ETagName,Length(ETagName),1)='/' then
  510. begin
  511. ETagName:=copy(ETagName,1,Length(ETagName)-1);
  512. NotEndTag:=false;
  513. end;
  514. if (UTagName='!DOCTYPE') then DocTYPE else
  515. { Section tags }
  516. if (ETagName='HTML') then DocHTML(NotEndTag) else
  517. if (ETagName='HEAD') then DocHEAD(NotEndTag) else
  518. if (ETagName='TITLE') then DocTITLE(NotEndTag) else
  519. if (ETagName='BODY') then DocBODY(NotEndTag) else
  520. { Anchor tags }
  521. if (ETagName='A') then DocAnchor(NotEndTag) else
  522. { Direct formatting directives }
  523. if (ETagName='H1') then DocHeading(1,NotEndTag) else
  524. if (ETagName='H2') then DocHeading(2,NotEndTag) else
  525. if (ETagName='H3') then DocHeading(3,NotEndTag) else
  526. if (ETagName='H4') then DocHeading(4,NotEndTag) else
  527. if (ETagName='H5') then DocHeading(5,NotEndTag) else
  528. if (ETagName='H6') then DocHeading(6,NotEndTag) else
  529. if (ETagName='P') then DocParagraph(NotEndTag) else
  530. if (ETagName='BR') then DocBreak else
  531. if (ETagName='B') then DocBold(NotEndTag) else
  532. if (ETagName='CITE') then DocCite(NotEndTag) else
  533. if (ETagName='CODE') then DocCode(NotEndTag) else
  534. if (ETagName='EM') then DocEmphasized(NotEndTag) else
  535. if (ETagName='I') then DocItalic(NotEndTag) else
  536. if (ETagName='KBD') then DocKbd(NotEndTag) else
  537. if (ETagName='PRE') then DocPreformatted(NotEndTag) else
  538. if (ETagName='SAMP') then DocSample(NotEndTag) else
  539. if (ETagName='STRONG') then DocStrong(NotEndTag) else
  540. if (ETagName='TT') then DocTeleType(NotEndTag) else
  541. if (ETagName='VAR') then DocVariable(NotEndTag) else
  542. if (ETagName='SPAN') then DocSpan(NotEndTag) else
  543. if (ETagName='DIV') then DocDiv(NotEndTag) else
  544. { Unordered & ordered lists }
  545. if (ETagName='UL') then DocList(NotEndTag) else
  546. if (ETagName='OL') then DocOrderedList(NotEndTag) else
  547. if (UTagName='LI') then DocListItem else
  548. { Definition list }
  549. if (ETagName='DL') then DocDefList(NotEndTag) else
  550. if (UTagName='DT') then DocDefTerm else
  551. if (UTagName='DD') then DocDefExp else
  552. { Table }
  553. if (ETagName='TABLE') then DocTable(NotEndTag) else
  554. if (ETagName='TR') then DocTableRow(NotEndTag) else
  555. if (ETagName='TH') then DocTableHeaderItem(NotEndTag) else
  556. if (ETagName='TD') then DocTableItem(NotEndTag) else
  557. { Misc. tags }
  558. if (UTagName='META') then DocMETA else
  559. if (UTagName='IMG') then DocImage else
  560. if (UTagName='HR') then DocHorizontalRuler else
  561. DocUnknownTag;
  562. end;
  563. function THTMLParser.DocGetTagParam(Name: string; var Value: string): boolean;
  564. var Found: boolean;
  565. S: string;
  566. ParamName,ParamValue: string;
  567. InStr: boolean;
  568. I: sw_integer;
  569. begin
  570. Found:=false; Name:=UpcaseStr(Name);
  571. S:=TagParams;
  572. repeat
  573. InStr:=false;
  574. ParamName:=''; ParamValue:='';
  575. S:=Trim(S); I:=1;
  576. while (I<=length(S)) and (S[I]<>'=') do
  577. begin
  578. ParamName:=ParamName+S[I];
  579. Inc(I);
  580. end;
  581. ParamName:=Trim(ParamName);
  582. if S[I]='=' then
  583. begin
  584. Inc(I); InStr:=false;
  585. while (I<=length(S)) and (S[I]=' ') do
  586. Inc(I);
  587. if (I<=length(S)) and (S[I]='"') then
  588. begin
  589. InStr:=true;
  590. Inc(I);
  591. end;
  592. while (I<=length(S)) and ((InStr=true) or (S[I]<>' ')) do
  593. begin
  594. if S[I]='"' then
  595. begin
  596. InStr:=not InStr;
  597. if InStr=false then Break;
  598. end
  599. else
  600. ParamValue:=ParamValue+S[I];
  601. Inc(I);
  602. end;
  603. end;
  604. Found:=(Name=UpcaseStr(ParamName));
  605. if Found then Value:=ParamValue;
  606. Delete(S,1,I);
  607. until Found or (S='');
  608. DocGetTagParam:=Found;
  609. end;
  610. procedure THTMLParser.DocProcessComment(Comment: string);
  611. begin
  612. end;
  613. procedure THTMLParser.DocUnknownTag;
  614. begin
  615. end;
  616. procedure THTMLParser.DocTYPE;
  617. begin
  618. end;
  619. procedure THTMLParser.DocHTML(Entered: boolean);
  620. begin
  621. end;
  622. procedure THTMLParser.DocHEAD(Entered: boolean);
  623. begin
  624. end;
  625. procedure THTMLParser.DocMETA;
  626. begin
  627. end;
  628. procedure THTMLParser.DocTITLE(Entered: boolean);
  629. begin
  630. end;
  631. procedure THTMLParser.DocBODY(Entered: boolean);
  632. begin
  633. end;
  634. procedure THTMLParser.DocAnchor(Entered: boolean);
  635. begin
  636. end;
  637. procedure THTMLParser.DocHeading(Level: integer; Entered: boolean);
  638. begin
  639. end;
  640. procedure THTMLParser.DocParagraph(Entered: boolean);
  641. begin
  642. end;
  643. procedure THTMLParser.DocBreak;
  644. begin
  645. end;
  646. procedure THTMLParser.DocImage;
  647. begin
  648. end;
  649. procedure THTMLParser.DocBold(Entered: boolean);
  650. begin
  651. end;
  652. procedure THTMLParser.DocCite(Entered: boolean);
  653. begin
  654. end;
  655. procedure THTMLParser.DocCode(Entered: boolean);
  656. begin
  657. end;
  658. procedure THTMLParser.DocEmphasized(Entered: boolean);
  659. begin
  660. end;
  661. procedure THTMLParser.DocItalic(Entered: boolean);
  662. begin
  663. end;
  664. procedure THTMLParser.DocKbd(Entered: boolean);
  665. begin
  666. end;
  667. procedure THTMLParser.DocPreformatted(Entered: boolean);
  668. begin
  669. end;
  670. procedure THTMLParser.DocSample(Entered: boolean);
  671. begin
  672. end;
  673. procedure THTMLParser.DocStrong(Entered: boolean);
  674. begin
  675. end;
  676. procedure THTMLParser.DocTeleType(Entered: boolean);
  677. begin
  678. end;
  679. procedure THTMLParser.DocVariable(Entered: boolean);
  680. begin
  681. end;
  682. procedure THTMLParser.DocSpan(Entered: boolean);
  683. begin
  684. end;
  685. procedure THTMLParser.DocDiv(Entered: boolean);
  686. var
  687. S: String;
  688. begin
  689. if Entered then
  690. begin
  691. if DocGetTagParam('CLASS',S) then
  692. if S='crosslinks' then
  693. begin
  694. DisableCrossIndexing:=true;
  695. {$ifdef DEBUG}
  696. DebugMessage(GetFileName,'Crosslinks found',Line,LinePos);
  697. {$endif DEBUG}
  698. end;
  699. end
  700. else
  701. begin
  702. {$ifdef DEBUG}
  703. if DisableCrossIndexing then
  704. begin
  705. DebugMessage(GetFileName,'Crosslinks end found',Line,LinePos);
  706. end;
  707. {$endif DEBUG}
  708. DisableCrossIndexing:=false;
  709. end;
  710. end;
  711. procedure THTMLParser.DocList(Entered: boolean);
  712. begin
  713. end;
  714. procedure THTMLParser.DocOrderedList(Entered: boolean);
  715. begin
  716. end;
  717. procedure THTMLParser.DocListItem;
  718. begin
  719. end;
  720. procedure THTMLParser.DocDefList(Entered: boolean);
  721. begin
  722. end;
  723. procedure THTMLParser.DocDefTerm;
  724. begin
  725. end;
  726. procedure THTMLParser.DocDefExp;
  727. begin
  728. end;
  729. procedure THTMLParser.DocTable(Entered: boolean);
  730. begin
  731. end;
  732. procedure THTMLParser.DocTableRow(Entered: boolean);
  733. begin
  734. end;
  735. procedure THTMLParser.DocTableHeaderItem(Entered: boolean);
  736. begin
  737. end;
  738. procedure THTMLParser.DocTableItem(Entered: boolean);
  739. begin
  740. end;
  741. procedure THTMLParser.DocHorizontalRuler;
  742. begin
  743. end;
  744. END.