whtml.pas 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 1998 by Berczi Gabor
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. unit WHTML;
  11. {$I globdir.inc}
  12. interface
  13. uses Objects;
  14. type
  15. PTextFile = ^TTextFile;
  16. TTextFile = object(TObject)
  17. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  18. function GetFileName : string; virtual;
  19. end;
  20. PMemoryTextFile = ^TMemoryTextFile;
  21. TMemoryTextFile = object(TTextFile)
  22. constructor Init;
  23. procedure AddLine(const S: string); virtual;
  24. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  25. function GetFileName : string; virtual;
  26. function GetLineCount : sw_integer;
  27. destructor Done; virtual;
  28. private
  29. Lines : PUnsortedStrCollection;
  30. end;
  31. PDOSTextFile = ^TDOSTextFile;
  32. TDOSTextFile = object(TMemoryTextFile)
  33. constructor Init(AFileName: string);
  34. function GetFileName : string; virtual;
  35. private
  36. DosFileName : string;
  37. end;
  38. PSGMLParser = ^TSGMLParser;
  39. TSGMLParser = object(TObject)
  40. constructor Init;
  41. function Process(HTMLFile: PTextFile): boolean; virtual;
  42. function ProcessLine(LineText: string): boolean; virtual;
  43. destructor Done; virtual;
  44. public
  45. Line,LinePos: sw_integer;
  46. procedure DocSoftBreak; virtual;
  47. function GetFileName : string;
  48. function DocAddTextChar(C: char): boolean; virtual;
  49. procedure DocAddText(S: string); virtual;
  50. procedure DocProcessTag(Tag: string); virtual;
  51. procedure DocProcessComment(Comment: string); virtual;
  52. function DocDecodeNamedEntity(Name: string; var Entity: string): boolean; virtual;
  53. private
  54. CurTag: string;
  55. FileName : string;
  56. InTag,InComment,InString: boolean;
  57. end;
  58. PHTMLParser = ^THTMLParser;
  59. THTMLParser = object(TSGMLParser)
  60. procedure DocSoftBreak; virtual;
  61. function DocAddTextChar(C: char): boolean; virtual;
  62. procedure DocProcessTag(Tag: string); virtual;
  63. function DocGetTagParam(Name: string; var Value: string): boolean; virtual;
  64. procedure DocProcessComment(Comment: string); virtual;
  65. function DocDecodeNamedEntity(Name: string; var E: string): boolean; virtual;
  66. public
  67. TagName,TagParams: string;
  68. DisableCrossIndexing : boolean;
  69. procedure DocUnknownTag; virtual;
  70. procedure DocTYPE; virtual;
  71. procedure DocHTML(Entered: boolean); virtual;
  72. procedure DocHEAD(Entered: boolean); virtual;
  73. procedure DocMETA; virtual;
  74. procedure DocTITLE(Entered: boolean); virtual;
  75. procedure DocBODY(Entered: boolean); virtual;
  76. procedure DocAnchor(Entered: boolean); virtual;
  77. procedure DocHeading(Level: integer; Entered: boolean); virtual;
  78. procedure DocParagraph(Entered: boolean); virtual;
  79. procedure DocBreak; virtual;
  80. procedure DocImage; virtual;
  81. procedure DocBold(Entered: boolean); virtual;
  82. procedure DocCite(Entered: boolean); virtual;
  83. procedure DocCode(Entered: boolean); virtual;
  84. procedure DocEmphasized(Entered: boolean); virtual;
  85. procedure DocItalic(Entered: boolean); virtual;
  86. procedure DocKbd(Entered: boolean); virtual;
  87. procedure DocPreformatted(Entered: boolean); virtual;
  88. procedure DocSample(Entered: boolean); virtual;
  89. procedure DocStrong(Entered: boolean); virtual;
  90. procedure DocTeleType(Entered: boolean); virtual;
  91. procedure DocVariable(Entered: boolean); virtual;
  92. procedure DocSpan(Entered: boolean); virtual;
  93. procedure DocDiv(Entered: boolean); virtual;
  94. procedure DocList(Entered: boolean); virtual;
  95. procedure DocOrderedList(Entered: boolean); virtual;
  96. procedure DocListItem(Entered: boolean); virtual;
  97. procedure DocDefList(Entered: boolean); virtual;
  98. procedure DocDefTerm(Entered: boolean); virtual;
  99. procedure DocDefExp(Entered: boolean); virtual;
  100. procedure DocTable(Entered: boolean); virtual;
  101. procedure DocTableRow(Entered: boolean); virtual;
  102. procedure DocTableHeaderItem(Entered: boolean); virtual;
  103. procedure DocTableItem(Entered: boolean); virtual;
  104. procedure DocHorizontalRuler; virtual;
  105. end;
  106. implementation
  107. uses
  108. WUtils;
  109. function TTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  110. begin
  111. Abstract;
  112. GetLine:=false;
  113. end;
  114. function TTextFile.GetFileName : string;
  115. begin
  116. GetFileName:='unknown';
  117. end;
  118. constructor TMemoryTextFile.Init;
  119. begin
  120. inherited Init;
  121. New(Lines, Init(500,500));
  122. end;
  123. function TMemoryTextFile.GetFileName : string;
  124. begin
  125. GetFileName:='unknown';
  126. end;
  127. function TMemoryTextFile.GetLineCount : sw_integer;
  128. begin
  129. GetLineCount:=Lines^.Count;
  130. end;
  131. procedure TMemoryTextFile.AddLine(const S: string);
  132. begin
  133. Lines^.Insert(NewStr(S));
  134. end;
  135. function TMemoryTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  136. var OK: boolean;
  137. PS: PString;
  138. begin
  139. OK:=(Lines<>nil) and (Idx<Lines^.Count);
  140. if OK then
  141. begin
  142. PS:=Lines^.At(Idx);
  143. if PS=nil then S:='' else S:=PS^;
  144. end;
  145. GetLine:=OK;
  146. end;
  147. destructor TMemoryTextFile.Done;
  148. begin
  149. if Lines<>nil then
  150. Dispose(Lines, Done);
  151. Lines:=nil;
  152. inherited Done;
  153. end;
  154. constructor TDOSTextFile.Init(AFileName: string);
  155. var f: file;
  156. linecomplete,hasCR: boolean;
  157. S: string;
  158. OldFMode : Integer;
  159. begin
  160. inherited Init;
  161. if AFileName='' then Fail;
  162. {$I-}
  163. Assign(f,AFileName);
  164. OldFMode:= FileMode;
  165. FileMode:= 0;
  166. Reset(f,1);
  167. FileMode:= OldFMode;
  168. if IOResult<>0 then Fail;
  169. DosFileName:=AFileName;
  170. Dispose(Lines,Done);
  171. New(Lines, Init(500,2000));
  172. while (Eof(f)=false) and (IOResult=0) do
  173. begin
  174. ReadlnFromFile(f,S,linecomplete,hasCR,true);
  175. AddLine(S);
  176. end;
  177. Close(f);
  178. {$I+}
  179. end;
  180. function TDosTextFile.GetFileName : string;
  181. begin
  182. GetFileName:=DosFileName;
  183. end;
  184. constructor TSGMLParser.Init;
  185. begin
  186. inherited Init;
  187. FileName:='';
  188. end;
  189. function TSGMLParser.GetFileName : string;
  190. begin
  191. GetFileName:=FileName;
  192. end;
  193. function TSGMLParser.Process(HTMLFile: PTextFile): boolean;
  194. var S: string;
  195. OK,LineOK: boolean;
  196. begin
  197. if HTMLFile=nil then Exit;
  198. InTag:=false; InComment:=false; InString:=false; CurTag:='';
  199. Line:=0; OK:=true;
  200. FileName:=HTMLFile^.GetFileName;
  201. repeat
  202. LineOK:=HTMLFile^.GetLine(Line,S);
  203. if LineOK then
  204. begin
  205. Inc(Line);
  206. OK:=ProcessLine(S);
  207. end;
  208. until (LineOK=false) or (OK=false);
  209. Process:=OK;
  210. end;
  211. function TSGMLParser.ProcessLine(LineText: string): boolean;
  212. var OK: boolean;
  213. C: char;
  214. NewInString: boolean;
  215. OldInComment: boolean;
  216. WasThereAnyText: boolean;
  217. Pos2: integer;
  218. Name,Entity: string;
  219. LiteralCode: boolean;
  220. LiteralStart,LiteralEnd,P: integer;
  221. const TabSize : integer = 8;
  222. Tab = #9;
  223. begin
  224. WasThereAnyText:=false;
  225. OK:=true; LinePos:=1;
  226. LiteralStart:=0; LiteralEnd:=0;
  227. repeat
  228. P:=Pos(TAB,LineText);
  229. if P>0 then
  230. LineText:=copy(LineText,1,P-1)+CharStr(' ',TabSize)+copy(LineText,P+1,255);
  231. until P=0;
  232. while (LinePos<=length(LineText)) and OK do
  233. begin
  234. LiteralCode:=false;
  235. NewInString:=InString; OldInComment:=InComment;
  236. C:=LineText[LinePos];
  237. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  238. if (LiteralCode=false) and (C='&') then
  239. begin
  240. LiteralStart:=0; LiteralEnd:=0;
  241. Name:=''; Pos2:=LinePos+1;
  242. while (Pos2<=length(LineText)) and (LineText[Pos2]<>';') do
  243. begin
  244. Name:=Name+LineText[Pos2];
  245. Inc(Pos2);
  246. end;
  247. Inc(Pos2);
  248. if DocDecodeNamedEntity(Name,Entity) then
  249. begin
  250. LineText:=copy(LineText,1,LinePos-1)+Entity+copy(LineText,Pos2,255);
  251. LiteralStart:=LinePos; LiteralEnd:=LiteralStart+length(Entity)-1;
  252. C:=LineText[LinePos];
  253. end;
  254. end;
  255. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  256. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=false) then
  257. NewInString:=true;
  258. if (LiteralCode=false) and (C='<') and (InTag=false) then
  259. InTag:=true;
  260. if InTag then CurTag:=CurTag+C else
  261. WasThereAnyText:=DocAddTextChar(C);
  262. if (LiteralCode=false) and InTag and (InString=false) and (CurTag='<!--') then
  263. InComment:=true;
  264. { A comment can be longer than 255 chars
  265. move the test to LineText string,
  266. This is why the Previous, Next and Up Tags where not working ... PM
  267. if (LiteralCode=false) and InTag and InComment and (InString=false) and (length(CurTag)>=3) and
  268. (copy(CurTag,length(CurTag)-2,3)='-->') then
  269. InComment:=false; }
  270. if (LiteralCode=false) and InTag and InComment and (InString=false) and (LinePos>=3) and
  271. (copy(LineText,LinePos-2,3)='-->') then
  272. InComment:=false;
  273. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=true) then
  274. NewInString:=false;
  275. if (LiteralCode=false) and (C='>') and (InTag=true) then
  276. begin
  277. InTag:=false;
  278. if OldInComment then
  279. DocProcessComment(CurTag)
  280. else
  281. DocProcessTag(CurTag);
  282. CurTag:='';
  283. end;
  284. InString:=NewInString;
  285. Inc(LinePos);
  286. end;
  287. { whtml does not depend on whelp,
  288. so I can not use hscLineBreak here. PM }
  289. if InTag then
  290. begin
  291. if InString then
  292. CurTag:=CurTag+#0;
  293. end
  294. else if WasThereAnyText then
  295. DocSoftBreak;
  296. ProcessLine:=true;
  297. end;
  298. procedure TSGMLParser.DocSoftBreak;
  299. begin
  300. Abstract;
  301. end;
  302. function TSGMLParser.DocAddTextChar(C: char): boolean;
  303. begin
  304. Abstract;
  305. DocAddTextChar:=false;
  306. end;
  307. procedure TSGMLParser.DocAddText(S: string);
  308. var I: sw_integer;
  309. begin
  310. for I:=1 to length(S) do
  311. DocAddTextChar(S[I]);
  312. end;
  313. function TSGMLParser.DocDecodeNamedEntity(Name: string; var Entity: string): boolean;
  314. begin
  315. DocDecodeNamedEntity:=false;
  316. end;
  317. procedure TSGMLParser.DocProcessTag(Tag: string);
  318. begin
  319. Abstract;
  320. end;
  321. procedure TSGMLParser.DocProcessComment(Comment: string);
  322. begin
  323. Abstract;
  324. end;
  325. destructor TSGMLParser.Done;
  326. begin
  327. inherited Done;
  328. end;
  329. procedure THTMLParser.DocSoftBreak;
  330. begin
  331. end;
  332. function THTMLParser.DocAddTextChar(C: char): boolean;
  333. begin
  334. { Abstract }
  335. DocAddTextChar:=false;
  336. end;
  337. function THTMLParser.DocDecodeNamedEntity(Name: string; var E: string): boolean;
  338. var Found: boolean;
  339. Code: word;
  340. CC: word;
  341. begin
  342. Found:=true; Code:=$ffff;
  343. Name:=LowCaseStr(Name);
  344. if copy(Name,1,1)='#' then
  345. begin
  346. if Name[2]='x' then
  347. Val('$'+copy(Name,3,255),Code,CC)
  348. else
  349. Val(copy(Name,2,255),Code,CC);
  350. if CC<>0 then
  351. begin
  352. {$ifdef DEBUG}
  353. DebugMessage(FileName,'NamedEntity '+Name+' not converted',1,1);
  354. {$endif DEBUG}
  355. Code:=$ffff;
  356. end;
  357. end;
  358. { #0 to #127 is same for Unicode and Code page 437 }
  359. if (code<=127) then
  360. begin
  361. E:=chr(code);
  362. DocDecodeNamedEntity:=true;
  363. exit;
  364. end;
  365. if (Code=$22{34}) or (Name='quot') then E:='"' else { double quote sign }
  366. if (Code=$26{38}) or (Name='amp') then E:='&' else { ampersand }
  367. if (Code=$27{39}) or (Name='apos') then E:='''' else { apostrophe }
  368. if (Code=$3C{60}) or (Name='lt') then E:='<' else { less-than sign }
  369. if (Code=$3E{62}) or (Name='gt') then E:='>' else { greater-than sign }
  370. if (Code=$5B) then E:='[' else { [ }
  371. if (Code=$5C) then E:='\' else { \ }
  372. if (Code=$5D) then E:=']' else { ] }
  373. if (Code=$5E) then E:='^' else { ^ }
  374. if (Code=$5F) then E:='_' else { _ }
  375. if (Code=160) or (Name='nbsp') then E:=#255 else { no-break space }
  376. if (Code=161) or (Name='iexcl') then E:='­' else { inverted exclamation mark }
  377. if (Code=162) or (Name='cent') then E:='›' else { cent sign }
  378. if (Code=163) or (Name='pound') then E:='œ' else { pound sterling sign }
  379. if (Code=164) or (Name='curren') then E:='$' else { general currency sign }
  380. if (Code=165) or (Name='yen') then E:='' else { yen sign }
  381. if (Code=166) or (Name='brvbar') then E:='|' else { broken vertical bar }
  382. if (Code=167) or (Name='sect') then E:='' else { section sign }
  383. if (Code=168) or (Name='uml') then E:='"' else { umlaut (dieresis) }
  384. if (Code=169) or (Name='copy') then E:='(C)' else { copyright sign }
  385. (* if (Code=170) or (Name='ordf') then E:=#255 else { ordinal indicator, feminine }*)
  386. if (Code=171) or (Name='laquo') then E:='"' else { angle quotation mark -left }
  387. if (Code=172) or (Name='not') then E:='!' else { not sign }
  388. if (Code=173) or (Name='shy') then E:='-' else { soft hypen }
  389. if (Code=174) or (Name='reg') then E:='(R)' else { registered sign }
  390. (* if (Code=175) or (Name='macr') then E:='?' else { macron }*)
  391. if (Code=176) or (Name='deg') then E:='ø' else { degree sign }
  392. if (Code=177) or (Name='plusmn') then E:='ñ' else { plus-or-minus sign }
  393. if (Code=178) or (Name='sup2') then E:='ý' else { superscript 2 }
  394. if (Code=179) or (Name='sup3') then E:='^3' else { superscript 3 }
  395. if (Code=180) or (Name='acute') then E:='''' else { acute accent }
  396. if (Code=181) or (Name='micro') then E:='æ' else { micro sign }
  397. (* if (Code=182) or (Name='para') then E:='?' else { paragraph sign }*)
  398. if (Code=183) or (Name='middot') then E:='ù' else { middle dot }
  399. (* if (Code=184) or (Name='cedil') then E:='?' else { cedilla }*)
  400. if (Code=185) or (Name='sup1') then E:='^1' else { superscript 1 }
  401. (* if (Code=186) or (Name='ordm') then E:='?' else { ordinal indicator, masculine }*)
  402. if (Code=187) or (Name='raquo') then E:='"' else { angle quoatation mark -right }
  403. if (Code=188) or (Name='frac14') then E:='¬' else { fraction one-quarter }
  404. if (Code=189) or (Name='frac12') then E:='«' else { fraction one-half }
  405. if (Code=190) or (Name='frac34') then E:='3/4' else { fraction three-quarters }
  406. if (Code=191) or (Name='iquest') then E:='¨' else { inverted question mark }
  407. if (Code=192) or (Name='Agrave') then E:='A' else { capital A, grave accent }
  408. if (Code=193) or (Name='Aacute') then E:='A' else { capital A, acute accent }
  409. if (Code=194) or (Name='Acirc') then E:='A' else { capital A, circumflex accent }
  410. if (Code=195) or (Name='Atilde') then E:='A' else { capital A, tilde accent }
  411. if (Code=196) or (Name='Auml') then E:='Ž' else { capital A, dieresis or umlaut }
  412. if (Code=197) or (Name='Aring') then E:='' else { capital A, ring }
  413. if (Code=198) or (Name='AElig') then E:='’' else { capital AE diphthong }
  414. if (Code=199) or (Name='Ccedil') then E:='€' else { capital C, cedilla }
  415. if (Code=200) or (Name='Egrave') then E:='' else { capital E, grave accent }
  416. if (Code=201) or (Name='Eacute') then E:='' else { capital E, acute accent }
  417. if (Code=202) or (Name='Ecirc') then E:='E' else { capital E, circumflex accent }
  418. if (Code=203) or (Name='Euml') then E:='E' else { capital E, dieresis or umlaut }
  419. if (Code=204) or (Name='Igrave') then E:='I' else { capital I, grave accent }
  420. if (Code=205) or (Name='Iacute') then E:='I' else { capital I, acute accent }
  421. if (Code=206) or (Name='Icirc') then E:='I' else { capital I, circumflex accent }
  422. if (Code=207) or (Name='Iuml') then E:='I' else { capital I, dieresis or umlaut }
  423. (* if (Code=208) or (Name='ETH') then E:='?' else { capital Eth, Icelandic }*)
  424. if (Code=209) or (Name='Ntidle') then E:='¥' else { capital N, tilde }
  425. if (Code=210) or (Name='Ograve') then E:='O' else { capital O, grave accent }
  426. if (Code=211) or (Name='Oacute') then E:='O' else { capital O, acute accent }
  427. if (Code=212) or (Name='Ocirc') then E:='O' else { capital O, circumflex accent }
  428. if (Code=213) or (Name='Otilde') then E:='O' else { capital O, tilde }
  429. if (Code=214) or (Name='Ouml') then E:='™' else { capital O, dieresis or umlaut }
  430. if (Code=215) or (Name='times') then E:='*' else { multiply sign }
  431. if (Code=216) or (Name='Oslash') then E:='O' else { capital O, slash }
  432. if (Code=217) or (Name='Ugrave') then E:='U' else { capital U, grave accent }
  433. if (Code=218) or (Name='Uacute') then E:='U' else { capital U, acute accent }
  434. if (Code=219) or (Name='Ucirc') then E:='U' else { capital U, circumflex accent }
  435. if (Code=220) or (Name='Uuml') then E:='š' else { capital U, dieresis or umlaut }
  436. if (Code=221) or (Name='Yacute') then E:='Y' else { capital Y, acute accent }
  437. (* if (Code=222) or (Name='THORN') then E:='?' else { capital THORN, Icelandic }*)
  438. if (Code=223) or (Name='szlig') then E:='á' else { small sharp S, German }
  439. if (Code=224) or (Name='agrave') then E:='…' else { small a, grave accent }
  440. if (Code=225) or (Name='aacute') then E:=' ' else { small a, acute accent }
  441. if (Code=226) or (Name='acirc') then E:='ƒ' else { small a, circumflex accent }
  442. if (Code=227) or (Name='atilde') then E:='ƒ' else { small a, tilde }
  443. if (Code=228) or (Name='auml') then E:='„' else { small a, dieresis or umlaut }
  444. if (Code=229) or (Name='aring') then E:='†' else { small a, ring }
  445. if (Code=230) or (Name='aelig') then E:='ae' else { small ae, diphthong }
  446. if (Code=231) or (Name='ccedil') then E:='‡' else { small c, cedilla }
  447. if (Code=232) or (Name='egrave') then E:='Š' else { small e, grave accent }
  448. if (Code=233) or (Name='eacute') then E:='‚' else { small e, acute accent }
  449. if (Code=234) or (Name='ecirc') then E:='ˆ' else { small e, circumflex accent }
  450. if (Code=235) or (Name='euml') then E:='‰' else { small e, dieresis or umlaut }
  451. if (Code=236) or (Name='igrave') then E:='' else { small i, grave accent }
  452. if (Code=237) or (Name='iacute') then E:='¡' else { small i, acute accent }
  453. if (Code=238) or (Name='icirc') then E:='Œ' else { small i, circumflex accent }
  454. if (Code=239) or (Name='iuml') then E:='‹' else { small i, dieresis or umlaut }
  455. (* if (Code=240) or (Name='eth') then E:='?' else { small eth, Icelandic }*)
  456. if (Code=241) or (Name='ntilde') then E:='¤' else { small n, tilde }
  457. if (Code=242) or (Name='ograve') then E:='•' else { small o, grave accent }
  458. if (Code=243) or (Name='oacute') then E:='¢' else { small o, acute accent }
  459. if (Code=244) or (Name='ocirc') then E:='“' else { small o, circumflex accent }
  460. if (Code=245) or (Name='otilde') then E:='“' else { small o, tilde }
  461. if (Code=246) or (Name='ouml') then E:='”' else { small o, dieresis or umlaut }
  462. if (Code=247) or (Name='divide') then E:='/' else { divide sign }
  463. if (Code=248) or (Name='oslash') then E:='"' else { small o, slash }
  464. if (Code=249) or (Name='ugrave') then E:='—' else { small u, grave accent }
  465. if (Code=250) or (Name='uacute') then E:='£' else { small u, acute accent }
  466. if (Code=251) or (Name='ucirc') then E:='–' else { small u, circumflex accent }
  467. if (Code=252) or (Name='uuml') then E:='' else { small u, dieresis or umlaut }
  468. if (Code=253) or (Name='yacute') then E:='y' else { small y, acute accent }
  469. (* if (Code=254) or (Name='thorn') then E:='?' else { small thorn, Icelandic }*)
  470. if (Code=255) or (Name='yuml') then E:='y' else { small y, dieresis or umlaut }
  471. { Special codes appearing in TeXH generated files }
  472. if (code=$2c6{710}) or (Name='circ') then E:='^' else { Modifier Letter Circumflex Accent }
  473. if (code=$2dc{732}) or (Name='tilde') then E:='~' else { Small tilde }
  474. if (code=$2013{8211}) or (Name='endash') then E:='-' else { En dash }
  475. if (code=$2014{8212}) or (Name='emdash') then E:='--' else { Em dash }
  476. if (Code=$2018{8216}) or (Name='lsquo') then E:='`' else { Acute accent as generated by TeXH }
  477. if (Code=$2019{8217}) or (Name='rsquo') then E:='''' else { acute accent as generated by TeXH }
  478. if (code=$201C{8220}) or (Name='ldquo') then E:='''''' else { left double quotation marks }
  479. if (code=$201D{8221}) or (Name='rdquo') then E:='``' else { right double quotation marks }
  480. if (code=$2026{8230}) or (Name='hellip') then E:='...' else { horizontal ellipsis }
  481. if (Code=$FB00) then E:='ff' else { ff together }
  482. if (Code=$FB01) then E:='fi' else { fi together }
  483. if (Code=$FB02) then E:='fl' else { fl together }
  484. if (Code=$FB03) then E:='ffi' else { ffi together }
  485. if (Code=$FB04) then E:='ffl' else { ffl together }
  486. Found:=false;
  487. DocDecodeNamedEntity:=Found;
  488. {$ifdef DEBUG}
  489. if (Code<>$ffff) and not found then
  490. begin
  491. DebugMessage(FileName,'NamedEntity '+Name+' not handled',1,1);
  492. end;
  493. {$endif DEBUG}
  494. end;
  495. procedure THTMLParser.DocProcessTag(Tag: string);
  496. var UTagName,ETagName: string[30];
  497. P: byte;
  498. NotEndTag: boolean;
  499. begin
  500. if copy(Tag,1,1)='<' then Delete(Tag,1,1);
  501. if copy(Tag,length(Tag),1)='>' then Delete(Tag,length(Tag),1);
  502. Tag:=Trim(Tag);
  503. P:=Pos(' ',Tag); if P=0 then P:=length(Tag)+1;
  504. TagName:=copy(Tag,1,P-1); TagParams:=copy(Tag,P+1,255);
  505. UTagName:=UpcaseStr(TagName);
  506. NotEndTag:=copy(TagName,1,1)<>'/';
  507. if NotEndTag then ETagName:=UTagName else ETagName:=copy(UTagName,2,255);
  508. { <BR/> is also a Break tag... }
  509. if Copy(ETagName,Length(ETagName),1)='/' then
  510. begin
  511. ETagName:=copy(ETagName,1,Length(ETagName)-1);
  512. NotEndTag:=false;
  513. end;
  514. if (UTagName='!DOCTYPE') then DocTYPE else
  515. { Section tags }
  516. if (ETagName='HTML') then DocHTML(NotEndTag) else
  517. if (ETagName='HEAD') then DocHEAD(NotEndTag) else
  518. if (ETagName='TITLE') then DocTITLE(NotEndTag) else
  519. if (ETagName='BODY') then DocBODY(NotEndTag) else
  520. { Anchor tags }
  521. if (ETagName='A') then DocAnchor(NotEndTag) else
  522. { Direct formatting directives }
  523. if (ETagName='H1') then DocHeading(1,NotEndTag) else
  524. if (ETagName='H2') then DocHeading(2,NotEndTag) else
  525. if (ETagName='H3') then DocHeading(3,NotEndTag) else
  526. if (ETagName='H4') then DocHeading(4,NotEndTag) else
  527. if (ETagName='H5') then DocHeading(5,NotEndTag) else
  528. if (ETagName='H6') then DocHeading(6,NotEndTag) else
  529. if (ETagName='P') then DocParagraph(NotEndTag) else
  530. if (ETagName='BR') then DocBreak else
  531. if (ETagName='B') then DocBold(NotEndTag) else
  532. if (ETagName='CITE') then DocCite(NotEndTag) else
  533. if (ETagName='CODE') then DocCode(NotEndTag) else
  534. if (ETagName='EM') then DocEmphasized(NotEndTag) else
  535. if (ETagName='I') then DocItalic(NotEndTag) else
  536. if (ETagName='KBD') then DocKbd(NotEndTag) else
  537. if (ETagName='PRE') then DocPreformatted(NotEndTag) else
  538. if (ETagName='SAMP') then DocSample(NotEndTag) else
  539. if (ETagName='STRONG') then DocStrong(NotEndTag) else
  540. if (ETagName='TT') then DocTeleType(NotEndTag) else
  541. if (ETagName='VAR') then DocVariable(NotEndTag) else
  542. if (ETagName='SPAN') then DocSpan(NotEndTag) else
  543. if (ETagName='DIV') then DocDiv(NotEndTag) else
  544. { Unordered & ordered lists }
  545. if (ETagName='UL') then DocList(NotEndTag) else
  546. if (ETagName='OL') then DocOrderedList(NotEndTag) else
  547. if (ETagName='LI') then DocListItem(NotEndTag) else
  548. { Definition list }
  549. if (ETagName='DL') then DocDefList(NotEndTag) else
  550. if (ETagName='DT') then DocDefTerm(NotEndTag) else
  551. if (ETagName='DD') then DocDefExp(NotEndTag) else
  552. { Table }
  553. if (ETagName='TABLE') then DocTable(NotEndTag) else
  554. if (ETagName='TR') then DocTableRow(NotEndTag) else
  555. if (ETagName='TH') then DocTableHeaderItem(NotEndTag) else
  556. if (ETagName='TD') then DocTableItem(NotEndTag) else
  557. { Misc. tags }
  558. if (UTagName='META') then DocMETA else
  559. if (UTagName='IMG') then DocImage else
  560. if (UTagName='HR') then DocHorizontalRuler else
  561. DocUnknownTag;
  562. end;
  563. function THTMLParser.DocGetTagParam(Name: string; var Value: string): boolean;
  564. var Found: boolean;
  565. S: string;
  566. ParamName,ParamValue: string;
  567. InStr: boolean;
  568. I: sw_integer;
  569. begin
  570. Found:=false;
  571. Name:=UpcaseStr(Name);
  572. Value:='';
  573. S:=TagParams;
  574. repeat
  575. InStr:=false;
  576. ParamName:=''; ParamValue:='';
  577. S:=Trim(S); I:=1;
  578. while (I<=length(S)) and (S[I]<>'=') do
  579. begin
  580. if S[I]=' ' then
  581. ParamName:=''
  582. else
  583. ParamName:=ParamName+S[I];
  584. Inc(I);
  585. end;
  586. ParamName:=Trim(ParamName);
  587. if S[I]='=' then
  588. begin
  589. Inc(I); InStr:=false;
  590. while (I<=length(S)) and (S[I]=' ') do
  591. Inc(I);
  592. if (I<=length(S)) and (S[I]='"') then
  593. begin
  594. InStr:=true;
  595. Inc(I);
  596. end;
  597. while (I<=length(S)) and ((InStr=true) or (S[I]<>' ')) do
  598. begin
  599. if S[I]='"' then
  600. begin
  601. InStr:=not InStr;
  602. if InStr=false then Break;
  603. end
  604. else
  605. ParamValue:=ParamValue+S[I];
  606. Inc(I);
  607. end;
  608. end;
  609. Found:=(Name=UpcaseStr(ParamName));
  610. if Found then Value:=ParamValue;
  611. Delete(S,1,I);
  612. until Found or (S='');
  613. DocGetTagParam:=Found;
  614. end;
  615. procedure THTMLParser.DocProcessComment(Comment: string);
  616. begin
  617. end;
  618. procedure THTMLParser.DocUnknownTag;
  619. begin
  620. end;
  621. procedure THTMLParser.DocTYPE;
  622. begin
  623. end;
  624. procedure THTMLParser.DocHTML(Entered: boolean);
  625. begin
  626. end;
  627. procedure THTMLParser.DocHEAD(Entered: boolean);
  628. begin
  629. end;
  630. procedure THTMLParser.DocMETA;
  631. begin
  632. end;
  633. procedure THTMLParser.DocTITLE(Entered: boolean);
  634. begin
  635. end;
  636. procedure THTMLParser.DocBODY(Entered: boolean);
  637. begin
  638. end;
  639. procedure THTMLParser.DocAnchor(Entered: boolean);
  640. begin
  641. end;
  642. procedure THTMLParser.DocHeading(Level: integer; Entered: boolean);
  643. begin
  644. end;
  645. procedure THTMLParser.DocParagraph(Entered: boolean);
  646. begin
  647. end;
  648. procedure THTMLParser.DocBreak;
  649. begin
  650. end;
  651. procedure THTMLParser.DocImage;
  652. begin
  653. end;
  654. procedure THTMLParser.DocBold(Entered: boolean);
  655. begin
  656. end;
  657. procedure THTMLParser.DocCite(Entered: boolean);
  658. begin
  659. end;
  660. procedure THTMLParser.DocCode(Entered: boolean);
  661. begin
  662. end;
  663. procedure THTMLParser.DocEmphasized(Entered: boolean);
  664. begin
  665. end;
  666. procedure THTMLParser.DocItalic(Entered: boolean);
  667. begin
  668. end;
  669. procedure THTMLParser.DocKbd(Entered: boolean);
  670. begin
  671. end;
  672. procedure THTMLParser.DocPreformatted(Entered: boolean);
  673. begin
  674. end;
  675. procedure THTMLParser.DocSample(Entered: boolean);
  676. begin
  677. end;
  678. procedure THTMLParser.DocStrong(Entered: boolean);
  679. begin
  680. end;
  681. procedure THTMLParser.DocTeleType(Entered: boolean);
  682. begin
  683. end;
  684. procedure THTMLParser.DocVariable(Entered: boolean);
  685. begin
  686. end;
  687. procedure THTMLParser.DocSpan(Entered: boolean);
  688. begin
  689. end;
  690. procedure THTMLParser.DocDiv(Entered: boolean);
  691. var
  692. S: String;
  693. begin
  694. if Entered then
  695. begin
  696. if DocGetTagParam('CLASS',S) then
  697. if S='crosslinks' then
  698. begin
  699. DisableCrossIndexing:=true;
  700. {$ifdef DEBUG}
  701. DebugMessage(GetFileName,'Crosslinks found',Line,LinePos);
  702. {$endif DEBUG}
  703. end;
  704. end
  705. else
  706. begin
  707. {$ifdef DEBUG}
  708. if DisableCrossIndexing then
  709. begin
  710. DebugMessage(GetFileName,'Crosslinks end found',Line,LinePos);
  711. end;
  712. {$endif DEBUG}
  713. DisableCrossIndexing:=false;
  714. end;
  715. end;
  716. procedure THTMLParser.DocList(Entered: boolean);
  717. begin
  718. end;
  719. procedure THTMLParser.DocOrderedList(Entered: boolean);
  720. begin
  721. end;
  722. procedure THTMLParser.DocListItem(Entered: boolean);
  723. begin
  724. end;
  725. procedure THTMLParser.DocDefList(Entered: boolean);
  726. begin
  727. end;
  728. procedure THTMLParser.DocDefTerm(Entered: boolean);
  729. begin
  730. end;
  731. procedure THTMLParser.DocDefExp(Entered: boolean);
  732. begin
  733. end;
  734. procedure THTMLParser.DocTable(Entered: boolean);
  735. var
  736. S: String;
  737. begin
  738. if Entered then
  739. begin
  740. if DocGetTagParam('CLASS',S) then
  741. if S='bar' then
  742. begin
  743. DisableCrossIndexing:=true;
  744. {$ifdef DEBUG}
  745. DebugMessage(GetFileName,'Bar table found, cross indexing disabled ',Line,LinePos);
  746. {$endif DEBUG}
  747. end;
  748. end
  749. else
  750. begin
  751. {$ifdef DEBUG}
  752. if DisableCrossIndexing then
  753. begin
  754. DebugMessage(GetFileName,'Bar table end found',Line,LinePos);
  755. end;
  756. {$endif DEBUG}
  757. DisableCrossIndexing:=false;
  758. end;
  759. end;
  760. procedure THTMLParser.DocTableRow(Entered: boolean);
  761. begin
  762. end;
  763. procedure THTMLParser.DocTableHeaderItem(Entered: boolean);
  764. begin
  765. end;
  766. procedure THTMLParser.DocTableItem(Entered: boolean);
  767. begin
  768. end;
  769. procedure THTMLParser.DocHorizontalRuler;
  770. begin
  771. end;
  772. END.