whtml.pas 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 1998 by Berczi Gabor
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. unit WHTML;
  11. {$I globdir.inc}
  12. interface
  13. uses Objects;
  14. type
  15. PTextFile = ^TTextFile;
  16. TTextFile = object(TObject)
  17. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  18. function GetFileName : string; virtual;
  19. end;
  20. PMemoryTextFile = ^TMemoryTextFile;
  21. TMemoryTextFile = object(TTextFile)
  22. constructor Init;
  23. procedure AddLine(const S: string); virtual;
  24. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  25. function GetFileName : string; virtual;
  26. function GetLineCount : sw_integer;
  27. destructor Done; virtual;
  28. private
  29. Lines : PUnsortedStrCollection;
  30. end;
  31. PDOSTextFile = ^TDOSTextFile;
  32. TDOSTextFile = object(TMemoryTextFile)
  33. constructor Init(AFileName: string);
  34. function GetFileName : string; virtual;
  35. private
  36. DosFileName : string;
  37. end;
  38. PSGMLParser = ^TSGMLParser;
  39. TSGMLParser = object(TObject)
  40. constructor Init;
  41. function Process(HTMLFile: PTextFile): boolean; virtual;
  42. function ProcessLine(LineText: string): boolean; virtual;
  43. destructor Done; virtual;
  44. public
  45. Line,LinePos: sw_integer;
  46. procedure DocSoftBreak; virtual;
  47. function GetFileName : string;
  48. function DocAddTextChar(C: char): boolean; virtual;
  49. procedure DocAddText(S: string); virtual;
  50. procedure DocProcessTag(Tag: string); virtual;
  51. procedure DocProcessComment(Comment: string); virtual;
  52. function DocDecodeNamedEntity(Name: string; var Entity: string): boolean; virtual;
  53. private
  54. CurTag: string;
  55. FileName : string;
  56. InTag,InComment,InString: boolean;
  57. end;
  58. PHTMLParser = ^THTMLParser;
  59. THTMLParser = object(TSGMLParser)
  60. procedure DocSoftBreak; virtual;
  61. function DocAddTextChar(C: char): boolean; virtual;
  62. procedure DocProcessTag(Tag: string); virtual;
  63. function DocGetTagParam(Name: string; var Value: string): boolean; virtual;
  64. procedure DocProcessComment(Comment: string); virtual;
  65. function DocDecodeNamedEntity(Name: string; var E: string): boolean; virtual;
  66. public
  67. TagName,TagParams: string;
  68. DisableCrossIndexing : boolean;
  69. procedure DocUnknownTag; virtual;
  70. procedure DocTYPE; virtual;
  71. procedure DocHTML(Entered: boolean); virtual;
  72. procedure DocHEAD(Entered: boolean); virtual;
  73. procedure DocMETA; virtual;
  74. procedure DocTITLE(Entered: boolean); virtual;
  75. procedure DocBODY(Entered: boolean); virtual;
  76. procedure DocAnchor(Entered: boolean); virtual;
  77. procedure DocHeading(Level: integer; Entered: boolean); virtual;
  78. procedure DocParagraph(Entered: boolean); virtual;
  79. procedure DocBreak; virtual;
  80. procedure DocImage; virtual;
  81. procedure DocBold(Entered: boolean); virtual;
  82. procedure DocCite(Entered: boolean); virtual;
  83. procedure DocCode(Entered: boolean); virtual;
  84. procedure DocEmphasized(Entered: boolean); virtual;
  85. procedure DocItalic(Entered: boolean); virtual;
  86. procedure DocKbd(Entered: boolean); virtual;
  87. procedure DocPreformatted(Entered: boolean); virtual;
  88. procedure DocSample(Entered: boolean); virtual;
  89. procedure DocStrong(Entered: boolean); virtual;
  90. procedure DocTeleType(Entered: boolean); virtual;
  91. procedure DocVariable(Entered: boolean); virtual;
  92. procedure DocSpan(Entered: boolean); virtual;
  93. procedure DocDiv(Entered: boolean); virtual;
  94. procedure DocList(Entered: boolean); virtual;
  95. procedure DocOrderedList(Entered: boolean); virtual;
  96. procedure DocListItem(Entered: boolean); virtual;
  97. procedure DocDefList(Entered: boolean); virtual;
  98. procedure DocDefTerm(Entered: boolean); virtual;
  99. procedure DocDefExp(Entered: boolean); virtual;
  100. procedure DocTable(Entered: boolean); virtual;
  101. procedure DocTableRow(Entered: boolean); virtual;
  102. procedure DocTableHeaderItem(Entered: boolean); virtual;
  103. procedure DocTableItem(Entered: boolean); virtual;
  104. procedure DocHorizontalRuler; virtual;
  105. end;
  106. implementation
  107. uses
  108. WUtils;
  109. function TTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  110. begin
  111. Abstract;
  112. GetLine:=false;
  113. end;
  114. function TTextFile.GetFileName : string;
  115. begin
  116. GetFileName:='unknown';
  117. end;
  118. constructor TMemoryTextFile.Init;
  119. begin
  120. inherited Init;
  121. New(Lines, Init(500,500));
  122. end;
  123. function TMemoryTextFile.GetFileName : string;
  124. begin
  125. GetFileName:='unknown';
  126. end;
  127. function TMemoryTextFile.GetLineCount : sw_integer;
  128. begin
  129. GetLineCount:=Lines^.Count;
  130. end;
  131. procedure TMemoryTextFile.AddLine(const S: string);
  132. begin
  133. Lines^.Insert(NewStr(S));
  134. end;
  135. function TMemoryTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  136. var OK: boolean;
  137. PS: PString;
  138. begin
  139. OK:=(Lines<>nil) and (Idx<Lines^.Count);
  140. if OK then
  141. begin
  142. PS:=Lines^.At(Idx);
  143. if PS=nil then S:='' else S:=PS^;
  144. end;
  145. GetLine:=OK;
  146. end;
  147. destructor TMemoryTextFile.Done;
  148. begin
  149. if Lines<>nil then
  150. Dispose(Lines, Done);
  151. Lines:=nil;
  152. inherited Done;
  153. end;
  154. constructor TDOSTextFile.Init(AFileName: string);
  155. (*{$ifdef TPUNIXLF}
  156. procedure readln(var t:text;var s:string);
  157. var
  158. c : char;
  159. i : longint;
  160. begin
  161. c:=#0;
  162. i:=0;
  163. while (not eof(t)) and (c<>#10) and (i<255) do
  164. begin
  165. read(t,c);
  166. if (i<255) and (c<>#10) then
  167. begin
  168. inc(i);
  169. s[i]:=c;
  170. end;
  171. end;
  172. if (i>0) and (s[i]=#13) then
  173. dec(i);
  174. s[0]:=chr(i);
  175. end;
  176. {$endif}*)
  177. var f: file;
  178. linecomplete,hasCR: boolean;
  179. S: string;
  180. begin
  181. inherited Init;
  182. if AFileName='' then Fail;
  183. {$I-}
  184. Assign(f,AFileName);
  185. Reset(f,1);
  186. if IOResult<>0 then Fail;
  187. DosFileName:=AFileName;
  188. Dispose(Lines,Done);
  189. New(Lines, Init(500,2000));
  190. while (Eof(f)=false) and (IOResult=0) do
  191. begin
  192. ReadlnFromFile(f,S,linecomplete,hasCR,true);
  193. AddLine(S);
  194. end;
  195. Close(f);
  196. {$I+}
  197. end;
  198. function TDosTextFile.GetFileName : string;
  199. begin
  200. GetFileName:=DosFileName;
  201. end;
  202. constructor TSGMLParser.Init;
  203. begin
  204. inherited Init;
  205. FileName:='';
  206. end;
  207. function TSGMLParser.GetFileName : string;
  208. begin
  209. GetFileName:=FileName;
  210. end;
  211. function TSGMLParser.Process(HTMLFile: PTextFile): boolean;
  212. var S: string;
  213. OK,LineOK: boolean;
  214. begin
  215. if HTMLFile=nil then Exit;
  216. InTag:=false; InComment:=false; InString:=false; CurTag:='';
  217. Line:=0; OK:=true;
  218. FileName:=HTMLFile^.GetFileName;
  219. repeat
  220. LineOK:=HTMLFile^.GetLine(Line,S);
  221. if LineOK then
  222. begin
  223. Inc(Line);
  224. OK:=ProcessLine(S);
  225. end;
  226. until (LineOK=false) or (OK=false);
  227. Process:=OK;
  228. end;
  229. function TSGMLParser.ProcessLine(LineText: string): boolean;
  230. var OK: boolean;
  231. C: char;
  232. NewInString: boolean;
  233. OldInComment: boolean;
  234. WasThereAnyText: boolean;
  235. Pos2: integer;
  236. Name,Entity: string;
  237. LiteralCode: boolean;
  238. LiteralStart,LiteralEnd,P: integer;
  239. const TabSize : integer = 8;
  240. Tab = #9;
  241. begin
  242. WasThereAnyText:=false;
  243. OK:=true; LinePos:=1;
  244. LiteralStart:=0; LiteralEnd:=0;
  245. repeat
  246. P:=Pos(TAB,LineText);
  247. if P>0 then
  248. LineText:=copy(LineText,1,P-1)+CharStr(' ',TabSize)+copy(LineText,P+1,255);
  249. until P=0;
  250. while (LinePos<=length(LineText)) and OK do
  251. begin
  252. LiteralCode:=false;
  253. NewInString:=InString; OldInComment:=InComment;
  254. C:=LineText[LinePos];
  255. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  256. if (LiteralCode=false) and (C='&') then
  257. begin
  258. LiteralStart:=0; LiteralEnd:=0;
  259. Name:=''; Pos2:=LinePos+1;
  260. while (Pos2<=length(LineText)) and (LineText[Pos2]<>';') do
  261. begin
  262. Name:=Name+LineText[Pos2];
  263. Inc(Pos2);
  264. end;
  265. Inc(Pos2);
  266. if DocDecodeNamedEntity(Name,Entity) then
  267. begin
  268. LineText:=copy(LineText,1,LinePos-1)+Entity+copy(LineText,Pos2,255);
  269. LiteralStart:=LinePos; LiteralEnd:=LiteralStart+length(Entity)-1;
  270. C:=LineText[LinePos];
  271. end;
  272. end;
  273. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  274. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=false) then
  275. NewInString:=true;
  276. if (LiteralCode=false) and (C='<') and (InTag=false) then
  277. InTag:=true;
  278. if InTag then CurTag:=CurTag+C else
  279. WasThereAnyText:=DocAddTextChar(C);
  280. if (LiteralCode=false) and InTag and (InString=false) and (CurTag='<!--') then
  281. InComment:=true;
  282. { A comment can be longer than 255 chars
  283. move the test to LineText string,
  284. This is why the Previous, Next and Up Tags where not working ... PM
  285. if (LiteralCode=false) and InTag and InComment and (InString=false) and (length(CurTag)>=3) and
  286. (copy(CurTag,length(CurTag)-2,3)='-->') then
  287. InComment:=false; }
  288. if (LiteralCode=false) and InTag and InComment and (InString=false) and (LinePos>=3) and
  289. (copy(LineText,LinePos-2,3)='-->') then
  290. InComment:=false;
  291. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=true) then
  292. NewInString:=false;
  293. if (LiteralCode=false) and (C='>') and (InTag=true) then
  294. begin
  295. InTag:=false;
  296. if OldInComment then
  297. DocProcessComment(CurTag)
  298. else
  299. DocProcessTag(CurTag);
  300. CurTag:='';
  301. end;
  302. InString:=NewInString;
  303. Inc(LinePos);
  304. end;
  305. { whtml does not depend on whelp,
  306. so I can not use hscLineBreak here. PM }
  307. if InTag then
  308. begin
  309. if InString then
  310. CurTag:=CurTag+#0;
  311. end
  312. else if WasThereAnyText then
  313. DocSoftBreak;
  314. ProcessLine:=true;
  315. end;
  316. procedure TSGMLParser.DocSoftBreak;
  317. begin
  318. Abstract;
  319. end;
  320. function TSGMLParser.DocAddTextChar(C: char): boolean;
  321. begin
  322. Abstract;
  323. DocAddTextChar:=false;
  324. end;
  325. procedure TSGMLParser.DocAddText(S: string);
  326. var I: sw_integer;
  327. begin
  328. for I:=1 to length(S) do
  329. DocAddTextChar(S[I]);
  330. end;
  331. function TSGMLParser.DocDecodeNamedEntity(Name: string; var Entity: string): boolean;
  332. begin
  333. DocDecodeNamedEntity:=false;
  334. end;
  335. procedure TSGMLParser.DocProcessTag(Tag: string);
  336. begin
  337. Abstract;
  338. end;
  339. procedure TSGMLParser.DocProcessComment(Comment: string);
  340. begin
  341. Abstract;
  342. end;
  343. destructor TSGMLParser.Done;
  344. begin
  345. inherited Done;
  346. end;
  347. procedure THTMLParser.DocSoftBreak;
  348. begin
  349. end;
  350. function THTMLParser.DocAddTextChar(C: char): boolean;
  351. begin
  352. { Abstract }
  353. DocAddTextChar:=false;
  354. end;
  355. function THTMLParser.DocDecodeNamedEntity(Name: string; var E: string): boolean;
  356. var Found: boolean;
  357. Code: word;
  358. CC: word;
  359. begin
  360. Found:=true; Code:=$ffff;
  361. Name:=LowCaseStr(Name);
  362. if copy(Name,1,1)='#' then
  363. begin
  364. if Name[2]='x' then
  365. Val('$'+copy(Name,3,255),Code,CC)
  366. else
  367. Val(copy(Name,2,255),Code,CC);
  368. if CC<>0 then
  369. begin
  370. {$ifdef DEBUG}
  371. DebugMessage(FileName,'NamedEntity '+Name+' not converted',1,1);
  372. {$endif DEBUG}
  373. Code:=$ffff;
  374. end;
  375. end;
  376. if (Code=$22) or (Name='quot') then E:='"' else { double quote sign }
  377. if (Code=$26) or (Name='amp') then E:='&' else { ampersand }
  378. if (Code=$3C) or (Name='lt') then E:='<' else { less-than sign }
  379. if (Code=$3E) or (Name='gt') then E:='>' else { greater-than sign }
  380. if (Code=$5B) then E:='[' else { [ }
  381. if (Code=$5C) then E:='\' else { \ }
  382. if (Code=$5D) then E:=']' else { ] }
  383. if (Code=$5E) then E:='^' else { ^ }
  384. if (Code=$5F) then E:='_' else { _ }
  385. if (Code=160) or (Name='nbsp') then E:=#255 else { no-break space }
  386. if (Code=161) or (Name='iexcl') then E:='­' else { inverted excalamation mark }
  387. if (Code=162) or (Name='cent') then E:='›' else { cent sign }
  388. if (Code=163) or (Name='pound') then E:='œ' else { pound sterling sign }
  389. if (Code=164) or (Name='curren') then E:='$' else { general currency sign }
  390. if (Code=165) or (Name='yen') then E:='�' else { yen sign }
  391. if (Code=166) or (Name='brvbar') then E:='|' else { broken vertical bar }
  392. (* if (Code=167) or (Name='sect') then E:=#255 else { section sign }*)
  393. (* if (Code=168) or (Name='uml') then E:=#255 else { umlaut (dieresis) }*)
  394. if (Code=169) or (Name='copy') then E:='(C)' else { copyright sign }
  395. (* if (Code=170) or (Name='ordf') then E:=#255 else { ordinal indicator, feminine }*)
  396. if (Code=171) or (Name='laquo') then E:='"' else { angle quotation mark -left }
  397. if (Code=172) or (Name='not') then E:='!' else { not sign }
  398. if (Code=173) or (Name='shy') then E:='-' else { soft hypen }
  399. if (Code=174) or (Name='reg') then E:='(R)' else { registered sign }
  400. (* if (Code=175) or (Name='macr') then E:='?' else { macron }*)
  401. if (Code=176) or (Name='deg') then E:='ø' else { degree sign }
  402. if (Code=177) or (Name='plusmn') then E:='ñ' else { plus-or-minus sign }
  403. if (Code=178) or (Name='sup2') then E:='ý' else { superscript 2 }
  404. if (Code=179) or (Name='sup3') then E:='^3' else { superscript 3 }
  405. if (Code=180) or (Name='acute') then E:='''' else { acute accent }
  406. if (Code=181) or (Name='micro') then E:='æ' else { micro sign }
  407. (* if (Code=182) or (Name='para') then E:='?' else { paragraph sign }*)
  408. if (Code=183) or (Name='middot') then E:='ù' else { middle dot }
  409. (* if (Code=184) or (Name='cedil') then E:='?' else { cedilla }*)
  410. if (Code=185) or (Name='sup1') then E:='^1' else { superscript 1 }
  411. (* if (Code=186) or (Name='ordm') then E:='?' else { ordinal indicator, masculine }*)
  412. if (Code=187) or (Name='raquo') then E:='"' else { angle quoatation mark -right }
  413. if (Code=188) or (Name='frac14') then E:='¬' else { fraction one-quarter }
  414. if (Code=189) or (Name='frac12') then E:='«' else { fraction one-half }
  415. if (Code=190) or (Name='frac34') then E:='3/4' else { fraction three-quarters }
  416. if (Code=191) or (Name='iquest') then E:='¨' else { inverted question mark }
  417. if (Code=192) or (Name='Agrave') then E:='A' else { capital A, grave accent }
  418. if (Code=193) or (Name='Aacute') then E:='A' else { capital A, acute accent }
  419. if (Code=194) or (Name='Acirc') then E:='A' else { capital A, circumflex accent }
  420. if (Code=195) or (Name='Atilde') then E:='A' else { capital A, tilde accent }
  421. if (Code=196) or (Name='Auml') then E:='Ž' else { capital A, dieresis or umlaut }
  422. if (Code=197) or (Name='Aring') then E:='�' else { capital A, ring }
  423. if (Code=198) or (Name='AElig') then E:='AE' else { capital AE diphthong }
  424. (* if (Code=199) or (Name='Ccedil') then E:='?' else { capital C, cedilla }*)
  425. if (Code=200) or (Name='Egrave') then E:='�' else { capital E, grave accent }
  426. if (Code=201) or (Name='Eacute') then E:='�' else { capital E, acute accent }
  427. if (Code=202) or (Name='Ecirc') then E:='E' else { capital E, circumflex accent }
  428. if (Code=203) or (Name='Euml') then E:='E' else { capital E, dieresis or umlaut }
  429. if (Code=204) or (Name='Igrave') then E:='I' else { capital I, grave accent }
  430. if (Code=205) or (Name='Iacute') then E:='I' else { capital I, acute accent }
  431. if (Code=206) or (Name='Icirc') then E:='I' else { capital I, circumflex accent }
  432. if (Code=207) or (Name='Iuml') then E:='I' else { capital I, dieresis or umlaut }
  433. (* if (Code=208) or (Name='ETH') then E:='?' else { capital Eth, Icelandic }*)
  434. if (Code=209) or (Name='Ntidle') then E:='¥' else { capital N, tilde }
  435. if (Code=210) or (Name='Ograve') then E:='O' else { capital O, grave accent }
  436. if (Code=211) or (Name='Oacute') then E:='O' else { capital O, acute accent }
  437. if (Code=212) or (Name='Ocirc') then E:='O' else { capital O, circumflex accent }
  438. if (Code=213) or (Name='Otilde') then E:='O' else { capital O, tilde }
  439. if (Code=214) or (Name='Ouml') then E:='™' else { capital O, dieresis or umlaut }
  440. if (Code=215) or (Name='times') then E:='*' else { multiply sign }
  441. if (Code=216) or (Name='Oslash') then E:='O' else { capital O, slash }
  442. if (Code=217) or (Name='Ugrave') then E:='U' else { capital U, grave accent }
  443. if (Code=218) or (Name='Uacute') then E:='U' else { capital U, acute accent }
  444. if (Code=219) or (Name='Ucirc') then E:='U' else { capital U, circumflex accent }
  445. if (Code=220) or (Name='Uuml') then E:='š' else { capital U, dieresis or umlaut }
  446. if (Code=221) or (Name='Yacute') then E:='Y' else { capital Y, acute accent }
  447. (* if (Code=222) or (Name='THORN') then E:='?' else { capital THORN, Icelandic }*)
  448. if (Code=223) or (Name='szlig') then E:='á' else { small sharp S, German }
  449. if (Code=224) or (Name='agrave') then E:='…' else { small a, grave accent }
  450. if (Code=225) or (Name='aacute') then E:=' ' else { small a, acute accent }
  451. if (Code=226) or (Name='acirc') then E:='ƒ' else { small a, circumflex accent }
  452. if (Code=227) or (Name='atilde') then E:='ƒ' else { small a, tilde }
  453. if (Code=228) or (Name='auml') then E:='„' else { small a, dieresis or umlaut }
  454. if (Code=229) or (Name='aring') then E:='†' else { small a, ring }
  455. if (Code=230) or (Name='aelig') then E:='ae' else { small ae, diphthong }
  456. (* if (Code=231) or (Name='ccedil') then E:='?' else { small c, cedilla }*)
  457. if (Code=232) or (Name='egrave') then E:='Š' else { small e, grave accent }
  458. if (Code=233) or (Name='eacute') then E:='‚' else { small e, acute accent }
  459. if (Code=234) or (Name='ecirc') then E:='ˆ' else { small e, circumflex accent }
  460. if (Code=235) or (Name='euml') then E:='‰' else { small e, dieresis or umlaut }
  461. if (Code=236) or (Name='igrave') then E:='�' else { small i, grave accent }
  462. if (Code=237) or (Name='iacute') then E:='¡' else { small i, acute accent }
  463. if (Code=238) or (Name='icirc') then E:='Œ' else { small i, circumflex accent }
  464. if (Code=239) or (Name='iuml') then E:='‹' else { small i, dieresis or umlaut }
  465. (* if (Code=240) or (Name='eth') then E:='?' else { small eth, Icelandic }*)
  466. if (Code=241) or (Name='ntilde') then E:='¤' else { small n, tilde }
  467. if (Code=242) or (Name='ograve') then E:='•' else { small o, grave accent }
  468. if (Code=243) or (Name='oacute') then E:='¢' else { small o, acute accent }
  469. if (Code=244) or (Name='ocirc') then E:='“' else { small o, circumflex accent }
  470. if (Code=245) or (Name='otilde') then E:='“' else { small o, tilde }
  471. if (Code=246) or (Name='ouml') then E:='”' else { small o, dieresis or umlaut }
  472. if (Code=247) or (Name='divide') then E:='/' else { divide sign }
  473. if (Code=248) or (Name='oslash') then E:='"' else { small o, slash }
  474. if (Code=249) or (Name='ugrave') then E:='—' else { small u, grave accent }
  475. if (Code=250) or (Name='uacute') then E:='£' else { small u, acute accent }
  476. if (Code=251) or (Name='ucirc') then E:='–' else { small u, circumflex accent }
  477. if (Code=252) or (Name='uuml') then E:='�' else { small u, dieresis or umlaut }
  478. if (Code=253) or (Name='yacute') then E:='y' else { small y, acute accent }
  479. (* if (Code=254) or (Name='thorn') then E:='?' else { small thorn, Icelandic }*)
  480. if (Code=255) or (Name='yuml') then E:='y' else { small y, dieresis or umlaut }
  481. { Special codes appearing in TeXH generated files }
  482. if (Code=8217) then E:='''' else { acute accent as generated by TeXH }
  483. if (code=$2c6) then E:='^' else { Modifier Letter Circumflex Accent }
  484. if (code=$2013) then E:='-' else { En dash }
  485. if (code=$2014) then E:='--' else { Em dash }
  486. if (code=$201D) then E:='``' else { right double quotation marks }
  487. if (Code=$FB00) then E:='ff' else { ff together }
  488. if (Code=$FB01) then E:='fi' else { fi together }
  489. if (Code=$FB02) then E:='fl' else { fl together }
  490. if (Code=$FB03) then E:='ffi' else { ffi together }
  491. Found:=false;
  492. DocDecodeNamedEntity:=Found;
  493. {$ifdef DEBUG}
  494. if (Code<>$ffff) and not found then
  495. begin
  496. DebugMessage(FileName,'NamedEntity '+Name+' not handled',1,1);
  497. end;
  498. {$endif DEBUG}
  499. end;
  500. procedure THTMLParser.DocProcessTag(Tag: string);
  501. var UTagName,ETagName: string[30];
  502. P: byte;
  503. NotEndTag: boolean;
  504. begin
  505. if copy(Tag,1,1)='<' then Delete(Tag,1,1);
  506. if copy(Tag,length(Tag),1)='>' then Delete(Tag,length(Tag),1);
  507. Tag:=Trim(Tag);
  508. P:=Pos(' ',Tag); if P=0 then P:=length(Tag)+1;
  509. TagName:=copy(Tag,1,P-1); TagParams:=copy(Tag,P+1,255);
  510. UTagName:=UpcaseStr(TagName);
  511. NotEndTag:=copy(TagName,1,1)<>'/';
  512. if NotEndTag then ETagName:=UTagName else ETagName:=copy(UTagName,2,255);
  513. { <BR/> is also a Break tag... }
  514. if Copy(ETagName,Length(ETagName),1)='/' then
  515. begin
  516. ETagName:=copy(ETagName,1,Length(ETagName)-1);
  517. NotEndTag:=false;
  518. end;
  519. if (UTagName='!DOCTYPE') then DocTYPE else
  520. { Section tags }
  521. if (ETagName='HTML') then DocHTML(NotEndTag) else
  522. if (ETagName='HEAD') then DocHEAD(NotEndTag) else
  523. if (ETagName='TITLE') then DocTITLE(NotEndTag) else
  524. if (ETagName='BODY') then DocBODY(NotEndTag) else
  525. { Anchor tags }
  526. if (ETagName='A') then DocAnchor(NotEndTag) else
  527. { Direct formatting directives }
  528. if (ETagName='H1') then DocHeading(1,NotEndTag) else
  529. if (ETagName='H2') then DocHeading(2,NotEndTag) else
  530. if (ETagName='H3') then DocHeading(3,NotEndTag) else
  531. if (ETagName='H4') then DocHeading(4,NotEndTag) else
  532. if (ETagName='H5') then DocHeading(5,NotEndTag) else
  533. if (ETagName='H6') then DocHeading(6,NotEndTag) else
  534. if (ETagName='P') then DocParagraph(NotEndTag) else
  535. if (ETagName='BR') then DocBreak else
  536. if (ETagName='B') then DocBold(NotEndTag) else
  537. if (ETagName='CITE') then DocCite(NotEndTag) else
  538. if (ETagName='CODE') then DocCode(NotEndTag) else
  539. if (ETagName='EM') then DocEmphasized(NotEndTag) else
  540. if (ETagName='I') then DocItalic(NotEndTag) else
  541. if (ETagName='KBD') then DocKbd(NotEndTag) else
  542. if (ETagName='PRE') then DocPreformatted(NotEndTag) else
  543. if (ETagName='SAMP') then DocSample(NotEndTag) else
  544. if (ETagName='STRONG') then DocStrong(NotEndTag) else
  545. if (ETagName='TT') then DocTeleType(NotEndTag) else
  546. if (ETagName='VAR') then DocVariable(NotEndTag) else
  547. if (ETagName='SPAN') then DocSpan(NotEndTag) else
  548. if (ETagName='DIV') then DocDiv(NotEndTag) else
  549. { Unordered & ordered lists }
  550. if (ETagName='UL') then DocList(NotEndTag) else
  551. if (ETagName='OL') then DocOrderedList(NotEndTag) else
  552. if (ETagName='LI') then DocListItem(NotEndTag) else
  553. { Definition list }
  554. if (ETagName='DL') then DocDefList(NotEndTag) else
  555. if (ETagName='DT') then DocDefTerm(NotEndTag) else
  556. if (ETagName='DD') then DocDefExp(NotEndTag) else
  557. { Table }
  558. if (ETagName='TABLE') then DocTable(NotEndTag) else
  559. if (ETagName='TR') then DocTableRow(NotEndTag) else
  560. if (ETagName='TH') then DocTableHeaderItem(NotEndTag) else
  561. if (ETagName='TD') then DocTableItem(NotEndTag) else
  562. { Misc. tags }
  563. if (UTagName='META') then DocMETA else
  564. if (UTagName='IMG') then DocImage else
  565. if (UTagName='HR') then DocHorizontalRuler else
  566. DocUnknownTag;
  567. end;
  568. function THTMLParser.DocGetTagParam(Name: string; var Value: string): boolean;
  569. var Found: boolean;
  570. S: string;
  571. ParamName,ParamValue: string;
  572. InStr: boolean;
  573. I: sw_integer;
  574. begin
  575. Found:=false;
  576. Name:=UpcaseStr(Name);
  577. Value:='';
  578. S:=TagParams;
  579. repeat
  580. InStr:=false;
  581. ParamName:=''; ParamValue:='';
  582. S:=Trim(S); I:=1;
  583. while (I<=length(S)) and (S[I]<>'=') do
  584. begin
  585. ParamName:=ParamName+S[I];
  586. Inc(I);
  587. end;
  588. ParamName:=Trim(ParamName);
  589. if S[I]='=' then
  590. begin
  591. Inc(I); InStr:=false;
  592. while (I<=length(S)) and (S[I]=' ') do
  593. Inc(I);
  594. if (I<=length(S)) and (S[I]='"') then
  595. begin
  596. InStr:=true;
  597. Inc(I);
  598. end;
  599. while (I<=length(S)) and ((InStr=true) or (S[I]<>' ')) do
  600. begin
  601. if S[I]='"' then
  602. begin
  603. InStr:=not InStr;
  604. if InStr=false then Break;
  605. end
  606. else
  607. ParamValue:=ParamValue+S[I];
  608. Inc(I);
  609. end;
  610. end;
  611. Found:=(Name=UpcaseStr(ParamName));
  612. if Found then Value:=ParamValue;
  613. Delete(S,1,I);
  614. until Found or (S='');
  615. DocGetTagParam:=Found;
  616. end;
  617. procedure THTMLParser.DocProcessComment(Comment: string);
  618. begin
  619. end;
  620. procedure THTMLParser.DocUnknownTag;
  621. begin
  622. end;
  623. procedure THTMLParser.DocTYPE;
  624. begin
  625. end;
  626. procedure THTMLParser.DocHTML(Entered: boolean);
  627. begin
  628. end;
  629. procedure THTMLParser.DocHEAD(Entered: boolean);
  630. begin
  631. end;
  632. procedure THTMLParser.DocMETA;
  633. begin
  634. end;
  635. procedure THTMLParser.DocTITLE(Entered: boolean);
  636. begin
  637. end;
  638. procedure THTMLParser.DocBODY(Entered: boolean);
  639. begin
  640. end;
  641. procedure THTMLParser.DocAnchor(Entered: boolean);
  642. begin
  643. end;
  644. procedure THTMLParser.DocHeading(Level: integer; Entered: boolean);
  645. begin
  646. end;
  647. procedure THTMLParser.DocParagraph(Entered: boolean);
  648. begin
  649. end;
  650. procedure THTMLParser.DocBreak;
  651. begin
  652. end;
  653. procedure THTMLParser.DocImage;
  654. begin
  655. end;
  656. procedure THTMLParser.DocBold(Entered: boolean);
  657. begin
  658. end;
  659. procedure THTMLParser.DocCite(Entered: boolean);
  660. begin
  661. end;
  662. procedure THTMLParser.DocCode(Entered: boolean);
  663. begin
  664. end;
  665. procedure THTMLParser.DocEmphasized(Entered: boolean);
  666. begin
  667. end;
  668. procedure THTMLParser.DocItalic(Entered: boolean);
  669. begin
  670. end;
  671. procedure THTMLParser.DocKbd(Entered: boolean);
  672. begin
  673. end;
  674. procedure THTMLParser.DocPreformatted(Entered: boolean);
  675. begin
  676. end;
  677. procedure THTMLParser.DocSample(Entered: boolean);
  678. begin
  679. end;
  680. procedure THTMLParser.DocStrong(Entered: boolean);
  681. begin
  682. end;
  683. procedure THTMLParser.DocTeleType(Entered: boolean);
  684. begin
  685. end;
  686. procedure THTMLParser.DocVariable(Entered: boolean);
  687. begin
  688. end;
  689. procedure THTMLParser.DocSpan(Entered: boolean);
  690. begin
  691. end;
  692. procedure THTMLParser.DocDiv(Entered: boolean);
  693. var
  694. S: String;
  695. begin
  696. if Entered then
  697. begin
  698. if DocGetTagParam('CLASS',S) then
  699. if S='crosslinks' then
  700. begin
  701. DisableCrossIndexing:=true;
  702. {$ifdef DEBUG}
  703. DebugMessage(GetFileName,'Crosslinks found',Line,LinePos);
  704. {$endif DEBUG}
  705. end;
  706. end
  707. else
  708. begin
  709. {$ifdef DEBUG}
  710. if DisableCrossIndexing then
  711. begin
  712. DebugMessage(GetFileName,'Crosslinks end found',Line,LinePos);
  713. end;
  714. {$endif DEBUG}
  715. DisableCrossIndexing:=false;
  716. end;
  717. end;
  718. procedure THTMLParser.DocList(Entered: boolean);
  719. begin
  720. end;
  721. procedure THTMLParser.DocOrderedList(Entered: boolean);
  722. begin
  723. end;
  724. procedure THTMLParser.DocListItem(Entered: boolean);
  725. begin
  726. end;
  727. procedure THTMLParser.DocDefList(Entered: boolean);
  728. begin
  729. end;
  730. procedure THTMLParser.DocDefTerm(Entered: boolean);
  731. begin
  732. end;
  733. procedure THTMLParser.DocDefExp(Entered: boolean);
  734. begin
  735. end;
  736. procedure THTMLParser.DocTable(Entered: boolean);
  737. var
  738. S: String;
  739. begin
  740. if Entered then
  741. begin
  742. if DocGetTagParam('CLASS',S) then
  743. if S='bar' then
  744. begin
  745. DisableCrossIndexing:=true;
  746. {$ifdef DEBUG}
  747. DebugMessage(GetFileName,'Bar table found, cross indexing disabled ',Line,LinePos);
  748. {$endif DEBUG}
  749. end;
  750. end
  751. else
  752. begin
  753. {$ifdef DEBUG}
  754. if DisableCrossIndexing then
  755. begin
  756. DebugMessage(GetFileName,'Bar table end found',Line,LinePos);
  757. end;
  758. {$endif DEBUG}
  759. DisableCrossIndexing:=false;
  760. end;
  761. end;
  762. procedure THTMLParser.DocTableRow(Entered: boolean);
  763. begin
  764. end;
  765. procedure THTMLParser.DocTableHeaderItem(Entered: boolean);
  766. begin
  767. end;
  768. procedure THTMLParser.DocTableItem(Entered: boolean);
  769. begin
  770. end;
  771. procedure THTMLParser.DocHorizontalRuler;
  772. begin
  773. end;
  774. END.