whtml.pas 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 1998 by Berczi Gabor
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. unit WHTML;
  11. {$I globdir.inc}
  12. {$H-}
  13. interface
  14. uses Objects;
  15. type
  16. PTextFile = ^TTextFile;
  17. TTextFile = object(TObject)
  18. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  19. function GetFileName : string; virtual;
  20. end;
  21. PMemoryTextFile = ^TMemoryTextFile;
  22. TMemoryTextFile = object(TTextFile)
  23. constructor Init;
  24. procedure AddLine(const S: string); virtual;
  25. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  26. function GetFileName : string; virtual;
  27. function GetLineCount : sw_integer;
  28. destructor Done; virtual;
  29. private
  30. Lines : PUnsortedStrCollection;
  31. end;
  32. PDOSTextFile = ^TDOSTextFile;
  33. TDOSTextFile = object(TMemoryTextFile)
  34. constructor Init(AFileName: string);
  35. function GetFileName : string; virtual;
  36. private
  37. DosFileName : string;
  38. end;
  39. PSGMLParser = ^TSGMLParser;
  40. TSGMLParser = object(TObject)
  41. constructor Init;
  42. function Process(HTMLFile: PTextFile): boolean; virtual;
  43. function ProcessLine(LineText: string): boolean; virtual;
  44. destructor Done; virtual;
  45. public
  46. Line,LinePos: sw_integer;
  47. procedure DocSoftBreak; virtual;
  48. function GetFileName : string;
  49. function DocAddTextChar(C: AnsiChar): boolean; virtual;
  50. procedure DocAddText(S: string); virtual;
  51. procedure DocProcessTag(Tag: string); virtual;
  52. procedure DocProcessComment(Comment: string); virtual;
  53. function DocDecodeNamedEntity(Name: string; var Entity: string): boolean; virtual;
  54. private
  55. CurTag: string;
  56. FileName : string;
  57. InTag,InComment,InString: boolean;
  58. end;
  59. PHTMLParser = ^THTMLParser;
  60. THTMLParser = object(TSGMLParser)
  61. procedure DocSoftBreak; virtual;
  62. function DocAddTextChar(C: AnsiChar): boolean; virtual;
  63. procedure DocProcessTag(Tag: string); virtual;
  64. function DocGetTagParam(Name: string; var Value: string): boolean; virtual;
  65. procedure DocProcessComment(Comment: string); virtual;
  66. function DocDecodeNamedEntity(Name: string; var E: string): boolean; virtual;
  67. public
  68. TagName,TagParams: string;
  69. DisableCrossIndexing : boolean;
  70. procedure DocUnknownTag; virtual;
  71. procedure DocTYPE; virtual;
  72. procedure DocHTML(Entered: boolean); virtual;
  73. procedure DocHEAD(Entered: boolean); virtual;
  74. procedure DocMETA; virtual;
  75. procedure DocTITLE(Entered: boolean); virtual;
  76. procedure DocBODY(Entered: boolean); virtual;
  77. procedure DocAnchor(Entered: boolean); virtual;
  78. procedure DocHeading(Level: integer; Entered: boolean); virtual;
  79. procedure DocParagraph(Entered: boolean); virtual;
  80. procedure DocBreak; virtual;
  81. procedure DocImage; virtual;
  82. procedure DocBold(Entered: boolean); virtual;
  83. procedure DocCite(Entered: boolean); virtual;
  84. procedure DocCode(Entered: boolean); virtual;
  85. procedure DocEmphasized(Entered: boolean); virtual;
  86. procedure DocItalic(Entered: boolean); virtual;
  87. procedure DocKbd(Entered: boolean); virtual;
  88. procedure DocPreformatted(Entered: boolean); virtual;
  89. procedure DocSample(Entered: boolean); virtual;
  90. procedure DocStrong(Entered: boolean); virtual;
  91. procedure DocTeleType(Entered: boolean); virtual;
  92. procedure DocVariable(Entered: boolean); virtual;
  93. procedure DocSpan(Entered: boolean); virtual;
  94. procedure DocDiv(Entered: boolean); virtual;
  95. procedure DocList(Entered: boolean); virtual;
  96. procedure DocOrderedList(Entered: boolean); virtual;
  97. procedure DocListItem(Entered: boolean); virtual;
  98. procedure DocDefList(Entered: boolean); virtual;
  99. procedure DocDefTerm(Entered: boolean); virtual;
  100. procedure DocDefExp(Entered: boolean); virtual;
  101. procedure DocTable(Entered: boolean); virtual;
  102. procedure DocTableRow(Entered: boolean); virtual;
  103. procedure DocTableHeaderItem(Entered: boolean); virtual;
  104. procedure DocTableItem(Entered: boolean); virtual;
  105. procedure DocHorizontalRuler; virtual;
  106. end;
  107. Type
  108. PTopicLinkCollection = ^TTopicLinkCollection;
  109. TTopicLinkCollection = object(TStringCollection)
  110. procedure Insert(Item: Pointer); virtual;
  111. function At(Index: sw_Integer): PString;
  112. function AddItem(Item: string): sw_integer;
  113. end;
  114. function EncodeHTMLCtx(FileID: integer; LinkNo: word): longint;
  115. procedure DecodeHTMLCtx(Ctx: longint; var FileID: word; var LinkNo: word);
  116. implementation
  117. uses
  118. WUtils;
  119. function TTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  120. begin
  121. Abstract;
  122. GetLine:=false;
  123. end;
  124. function TTextFile.GetFileName : string;
  125. begin
  126. GetFileName:='unknown';
  127. end;
  128. constructor TMemoryTextFile.Init;
  129. begin
  130. inherited Init;
  131. New(Lines, Init(500,500));
  132. end;
  133. function TMemoryTextFile.GetFileName : string;
  134. begin
  135. GetFileName:='unknown';
  136. end;
  137. function TMemoryTextFile.GetLineCount : sw_integer;
  138. begin
  139. GetLineCount:=Lines^.Count;
  140. end;
  141. procedure TMemoryTextFile.AddLine(const S: string);
  142. begin
  143. Lines^.Insert(NewStr(S));
  144. end;
  145. function TMemoryTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  146. var OK: boolean;
  147. PS: PString;
  148. begin
  149. OK:=(Lines<>nil) and (Idx<Lines^.Count);
  150. if OK then
  151. begin
  152. PS:=Lines^.At(Idx);
  153. if PS=nil then S:='' else S:=PS^;
  154. end;
  155. GetLine:=OK;
  156. end;
  157. destructor TMemoryTextFile.Done;
  158. begin
  159. if Lines<>nil then
  160. Dispose(Lines, Done);
  161. Lines:=nil;
  162. inherited Done;
  163. end;
  164. constructor TDOSTextFile.Init(AFileName: string);
  165. var f: file;
  166. linecomplete,hasCR: boolean;
  167. S: string;
  168. OldFMode : Integer;
  169. begin
  170. inherited Init;
  171. if AFileName='' then Fail;
  172. {$I-}
  173. Assign(f,AFileName);
  174. OldFMode:= FileMode;
  175. FileMode:= 0;
  176. Reset(f,1);
  177. FileMode:= OldFMode;
  178. if IOResult<>0 then Fail;
  179. DosFileName:=AFileName;
  180. Dispose(Lines,Done);
  181. New(Lines, Init(500,2000));
  182. while (Eof(f)=false) and (IOResult=0) do
  183. begin
  184. ReadlnFromFile(f,S,linecomplete,hasCR,true);
  185. AddLine(S);
  186. end;
  187. Close(f);
  188. {$I+}
  189. end;
  190. function TDosTextFile.GetFileName : string;
  191. begin
  192. GetFileName:=DosFileName;
  193. end;
  194. constructor TSGMLParser.Init;
  195. begin
  196. inherited Init;
  197. FileName:='';
  198. end;
  199. function TSGMLParser.GetFileName : string;
  200. begin
  201. GetFileName:=FileName;
  202. end;
  203. function TSGMLParser.Process(HTMLFile: PTextFile): boolean;
  204. var S: string;
  205. OK,LineOK: boolean;
  206. begin
  207. if HTMLFile=nil then Exit;
  208. InTag:=false; InComment:=false; InString:=false; CurTag:='';
  209. Line:=0; OK:=true;
  210. FileName:=HTMLFile^.GetFileName;
  211. repeat
  212. LineOK:=HTMLFile^.GetLine(Line,S);
  213. if LineOK then
  214. begin
  215. Inc(Line);
  216. OK:=ProcessLine(S);
  217. end;
  218. until (LineOK=false) or (OK=false);
  219. Process:=OK;
  220. end;
  221. function TSGMLParser.ProcessLine(LineText: string): boolean;
  222. var OK: boolean;
  223. C: AnsiChar;
  224. NewInString: boolean;
  225. OldInComment: boolean;
  226. WasThereAnyText: boolean;
  227. Pos2: integer;
  228. Name,Entity: string;
  229. LiteralCode: boolean;
  230. LiteralStart,LiteralEnd,P: integer;
  231. const TabSize : integer = 8;
  232. Tab = #9;
  233. begin
  234. WasThereAnyText:=false;
  235. OK:=true; LinePos:=1;
  236. LiteralStart:=0; LiteralEnd:=0;
  237. repeat
  238. P:=Pos(TAB,LineText);
  239. if P>0 then
  240. LineText:=copy(LineText,1,P-1)+CharStr(' ',TabSize)+copy(LineText,P+1,255);
  241. until P=0;
  242. while (LinePos<=length(LineText)) and OK do
  243. begin
  244. LiteralCode:=false;
  245. NewInString:=InString; OldInComment:=InComment;
  246. C:=LineText[LinePos];
  247. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  248. if (LiteralCode=false) and (C='&') then
  249. begin
  250. LiteralStart:=0; LiteralEnd:=0;
  251. Name:=''; Pos2:=LinePos+1;
  252. while (Pos2<=length(LineText)) and (LineText[Pos2]<>';') do
  253. begin
  254. Name:=Name+LineText[Pos2];
  255. Inc(Pos2);
  256. end;
  257. Inc(Pos2);
  258. if DocDecodeNamedEntity(Name,Entity) then
  259. begin
  260. LineText:=copy(LineText,1,LinePos-1)+Entity+copy(LineText,Pos2,255);
  261. LiteralStart:=LinePos; LiteralEnd:=LiteralStart+length(Entity)-1;
  262. C:=LineText[LinePos];
  263. end;
  264. end;
  265. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  266. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=false) then
  267. NewInString:=true;
  268. if (LiteralCode=false) and (C='<') and (InTag=false) then
  269. InTag:=true;
  270. if InTag then CurTag:=CurTag+C else
  271. WasThereAnyText:=DocAddTextChar(C);
  272. if (LiteralCode=false) and InTag and (InString=false) and (CurTag='<!--') then
  273. InComment:=true;
  274. { A comment can be longer than 255 chars
  275. move the test to LineText string,
  276. This is why the Previous, Next and Up Tags where not working ... PM
  277. if (LiteralCode=false) and InTag and InComment and (InString=false) and (length(CurTag)>=3) and
  278. (copy(CurTag,length(CurTag)-2,3)='-->') then
  279. InComment:=false; }
  280. if (LiteralCode=false) and InTag and InComment and (InString=false) and (LinePos>=3) and
  281. (copy(LineText,LinePos-2,3)='-->') then
  282. InComment:=false;
  283. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=true) then
  284. NewInString:=false;
  285. if (LiteralCode=false) and (C='>') and (InTag=true) then
  286. begin
  287. InTag:=false;
  288. if OldInComment then
  289. DocProcessComment(CurTag)
  290. else
  291. DocProcessTag(CurTag);
  292. CurTag:='';
  293. end;
  294. InString:=NewInString;
  295. Inc(LinePos);
  296. end;
  297. { whtml does not depend on whelp,
  298. so I can not use hscLineBreak here. PM }
  299. if InTag then
  300. begin
  301. if InString then
  302. CurTag:=CurTag+#0;
  303. end
  304. else if WasThereAnyText then
  305. DocSoftBreak;
  306. ProcessLine:=true;
  307. end;
  308. procedure TSGMLParser.DocSoftBreak;
  309. begin
  310. Abstract;
  311. end;
  312. function TSGMLParser.DocAddTextChar(C: AnsiChar): boolean;
  313. begin
  314. Abstract;
  315. DocAddTextChar:=false;
  316. end;
  317. procedure TSGMLParser.DocAddText(S: string);
  318. var I: sw_integer;
  319. begin
  320. for I:=1 to length(S) do
  321. DocAddTextChar(S[I]);
  322. end;
  323. function TSGMLParser.DocDecodeNamedEntity(Name: string; var Entity: string): boolean;
  324. begin
  325. DocDecodeNamedEntity:=false;
  326. end;
  327. procedure TSGMLParser.DocProcessTag(Tag: string);
  328. begin
  329. Abstract;
  330. end;
  331. procedure TSGMLParser.DocProcessComment(Comment: string);
  332. begin
  333. Abstract;
  334. end;
  335. destructor TSGMLParser.Done;
  336. begin
  337. inherited Done;
  338. end;
  339. procedure THTMLParser.DocSoftBreak;
  340. begin
  341. end;
  342. function THTMLParser.DocAddTextChar(C: AnsiChar): boolean;
  343. begin
  344. { Abstract }
  345. DocAddTextChar:=false;
  346. end;
  347. function THTMLParser.DocDecodeNamedEntity(Name: string; var E: string): boolean;
  348. var Found: boolean;
  349. Code: word;
  350. CC: word;
  351. begin
  352. Found:=true; Code:=$ffff;
  353. Name:=LowCaseStr(Name);
  354. if copy(Name,1,1)='#' then
  355. begin
  356. if Name[2]='x' then
  357. Val('$'+copy(Name,3,255),Code,CC)
  358. else
  359. Val(copy(Name,2,255),Code,CC);
  360. if CC<>0 then
  361. begin
  362. {$ifdef DEBUG}
  363. DebugMessage(FileName,'NamedEntity '+Name+' not converted',1,1);
  364. {$endif DEBUG}
  365. Code:=$ffff;
  366. end;
  367. end;
  368. { #0 to #127 is same for Unicode and Code page 437 }
  369. if (code<=127) then
  370. begin
  371. E:=chr(code);
  372. DocDecodeNamedEntity:=true;
  373. exit;
  374. end;
  375. if (Code=$22{34}) or (Name='quot') then E:='"' else { double quote sign }
  376. if (Code=$26{38}) or (Name='amp') then E:='&' else { ampersand }
  377. if (Code=$27{39}) or (Name='apos') then E:='''' else { apostrophe }
  378. if (Code=$3C{60}) or (Name='lt') then E:='<' else { less-than sign }
  379. if (Code=$3E{62}) or (Name='gt') then E:='>' else { greater-than sign }
  380. if (Code=$5B) then E:='[' else { [ }
  381. if (Code=$5C) then E:='\' else { \ }
  382. if (Code=$5D) then E:=']' else { ] }
  383. if (Code=$5E) then E:='^' else { ^ }
  384. if (Code=$5F) then E:='_' else { _ }
  385. if (Code=160) or (Name='nbsp') then E:=#255 else { no-break space }
  386. if (Code=161) or (Name='iexcl') then E:='­' else { inverted exclamation mark }
  387. if (Code=162) or (Name='cent') then E:='›' else { cent sign }
  388. if (Code=163) or (Name='pound') then E:='œ' else { pound sterling sign }
  389. if (Code=164) or (Name='curren') then E:='$' else { general currency sign }
  390. if (Code=165) or (Name='yen') then E:='' else { yen sign }
  391. if (Code=166) or (Name='brvbar') then E:='|' else { broken vertical bar }
  392. if (Code=167) or (Name='sect') then E:='' else { section sign }
  393. if (Code=168) or (Name='uml') then E:='"' else { umlaut (dieresis) }
  394. if (Code=169) or (Name='copy') then E:='(C)' else { copyright sign }
  395. (* if (Code=170) or (Name='ordf') then E:=#255 else { ordinal indicator, feminine }*)
  396. if (Code=171) or (Name='laquo') then E:='"' else { angle quotation mark -left }
  397. if (Code=172) or (Name='not') then E:='!' else { not sign }
  398. if (Code=173) or (Name='shy') then E:='-' else { soft hypen }
  399. if (Code=174) or (Name='reg') then E:='(R)' else { registered sign }
  400. (* if (Code=175) or (Name='macr') then E:='?' else { macron }*)
  401. if (Code=176) or (Name='deg') then E:='ø' else { degree sign }
  402. if (Code=177) or (Name='plusmn') then E:='ñ' else { plus-or-minus sign }
  403. if (Code=178) or (Name='sup2') then E:='ý' else { superscript 2 }
  404. if (Code=179) or (Name='sup3') then E:='^3' else { superscript 3 }
  405. if (Code=180) or (Name='acute') then E:='''' else { acute accent }
  406. if (Code=181) or (Name='micro') then E:='æ' else { micro sign }
  407. (* if (Code=182) or (Name='para') then E:='?' else { paragraph sign }*)
  408. if (Code=183) or (Name='middot') then E:='ù' else { middle dot }
  409. (* if (Code=184) or (Name='cedil') then E:='?' else { cedilla }*)
  410. if (Code=185) or (Name='sup1') then E:='^1' else { superscript 1 }
  411. (* if (Code=186) or (Name='ordm') then E:='?' else { ordinal indicator, masculine }*)
  412. if (Code=187) or (Name='raquo') then E:='"' else { angle quoatation mark -right }
  413. if (Code=188) or (Name='frac14') then E:='¬' else { fraction one-quarter }
  414. if (Code=189) or (Name='frac12') then E:='«' else { fraction one-half }
  415. if (Code=190) or (Name='frac34') then E:='3/4' else { fraction three-quarters }
  416. if (Code=191) or (Name='iquest') then E:='¨' else { inverted question mark }
  417. if (Code=192) or (Name='Agrave') then E:='A' else { capital A, grave accent }
  418. if (Code=193) or (Name='Aacute') then E:='A' else { capital A, acute accent }
  419. if (Code=194) or (Name='Acirc') then E:='A' else { capital A, circumflex accent }
  420. if (Code=195) or (Name='Atilde') then E:='A' else { capital A, tilde accent }
  421. if (Code=196) or (Name='Auml') then E:='Ž' else { capital A, dieresis or umlaut }
  422. if (Code=197) or (Name='Aring') then E:='' else { capital A, ring }
  423. if (Code=198) or (Name='AElig') then E:='’' else { capital AE diphthong }
  424. if (Code=199) or (Name='Ccedil') then E:='€' else { capital C, cedilla }
  425. if (Code=200) or (Name='Egrave') then E:='' else { capital E, grave accent }
  426. if (Code=201) or (Name='Eacute') then E:='' else { capital E, acute accent }
  427. if (Code=202) or (Name='Ecirc') then E:='E' else { capital E, circumflex accent }
  428. if (Code=203) or (Name='Euml') then E:='E' else { capital E, dieresis or umlaut }
  429. if (Code=204) or (Name='Igrave') then E:='I' else { capital I, grave accent }
  430. if (Code=205) or (Name='Iacute') then E:='I' else { capital I, acute accent }
  431. if (Code=206) or (Name='Icirc') then E:='I' else { capital I, circumflex accent }
  432. if (Code=207) or (Name='Iuml') then E:='I' else { capital I, dieresis or umlaut }
  433. (* if (Code=208) or (Name='ETH') then E:='?' else { capital Eth, Icelandic }*)
  434. if (Code=209) or (Name='Ntidle') then E:='¥' else { capital N, tilde }
  435. if (Code=210) or (Name='Ograve') then E:='O' else { capital O, grave accent }
  436. if (Code=211) or (Name='Oacute') then E:='O' else { capital O, acute accent }
  437. if (Code=212) or (Name='Ocirc') then E:='O' else { capital O, circumflex accent }
  438. if (Code=213) or (Name='Otilde') then E:='O' else { capital O, tilde }
  439. if (Code=214) or (Name='Ouml') then E:='™' else { capital O, dieresis or umlaut }
  440. if (Code=215) or (Name='times') then E:='*' else { multiply sign }
  441. if (Code=216) or (Name='Oslash') then E:='O' else { capital O, slash }
  442. if (Code=217) or (Name='Ugrave') then E:='U' else { capital U, grave accent }
  443. if (Code=218) or (Name='Uacute') then E:='U' else { capital U, acute accent }
  444. if (Code=219) or (Name='Ucirc') then E:='U' else { capital U, circumflex accent }
  445. if (Code=220) or (Name='Uuml') then E:='š' else { capital U, dieresis or umlaut }
  446. if (Code=221) or (Name='Yacute') then E:='Y' else { capital Y, acute accent }
  447. (* if (Code=222) or (Name='THORN') then E:='?' else { capital THORN, Icelandic }*)
  448. if (Code=223) or (Name='szlig') then E:='á' else { small sharp S, German }
  449. if (Code=224) or (Name='agrave') then E:='…' else { small a, grave accent }
  450. if (Code=225) or (Name='aacute') then E:=' ' else { small a, acute accent }
  451. if (Code=226) or (Name='acirc') then E:='ƒ' else { small a, circumflex accent }
  452. if (Code=227) or (Name='atilde') then E:='ƒ' else { small a, tilde }
  453. if (Code=228) or (Name='auml') then E:='„' else { small a, dieresis or umlaut }
  454. if (Code=229) or (Name='aring') then E:='†' else { small a, ring }
  455. if (Code=230) or (Name='aelig') then E:='ae' else { small ae, diphthong }
  456. if (Code=231) or (Name='ccedil') then E:='‡' else { small c, cedilla }
  457. if (Code=232) or (Name='egrave') then E:='Š' else { small e, grave accent }
  458. if (Code=233) or (Name='eacute') then E:='‚' else { small e, acute accent }
  459. if (Code=234) or (Name='ecirc') then E:='ˆ' else { small e, circumflex accent }
  460. if (Code=235) or (Name='euml') then E:='‰' else { small e, dieresis or umlaut }
  461. if (Code=236) or (Name='igrave') then E:='' else { small i, grave accent }
  462. if (Code=237) or (Name='iacute') then E:='¡' else { small i, acute accent }
  463. if (Code=238) or (Name='icirc') then E:='Œ' else { small i, circumflex accent }
  464. if (Code=239) or (Name='iuml') then E:='‹' else { small i, dieresis or umlaut }
  465. (* if (Code=240) or (Name='eth') then E:='?' else { small eth, Icelandic }*)
  466. if (Code=241) or (Name='ntilde') then E:='¤' else { small n, tilde }
  467. if (Code=242) or (Name='ograve') then E:='•' else { small o, grave accent }
  468. if (Code=243) or (Name='oacute') then E:='¢' else { small o, acute accent }
  469. if (Code=244) or (Name='ocirc') then E:='“' else { small o, circumflex accent }
  470. if (Code=245) or (Name='otilde') then E:='“' else { small o, tilde }
  471. if (Code=246) or (Name='ouml') then E:='”' else { small o, dieresis or umlaut }
  472. if (Code=247) or (Name='divide') then E:='/' else { divide sign }
  473. if (Code=248) or (Name='oslash') then E:='"' else { small o, slash }
  474. if (Code=249) or (Name='ugrave') then E:='—' else { small u, grave accent }
  475. if (Code=250) or (Name='uacute') then E:='£' else { small u, acute accent }
  476. if (Code=251) or (Name='ucirc') then E:='–' else { small u, circumflex accent }
  477. if (Code=252) or (Name='uuml') then E:='' else { small u, dieresis or umlaut }
  478. if (Code=253) or (Name='yacute') then E:='y' else { small y, acute accent }
  479. (* if (Code=254) or (Name='thorn') then E:='?' else { small thorn, Icelandic }*)
  480. if (Code=255) or (Name='yuml') then E:='y' else { small y, dieresis or umlaut }
  481. { Special codes appearing in TeXH generated files }
  482. if (code=$2c6{710}) or (Name='circ') then E:='^' else { Modifier Letter Circumflex Accent }
  483. if (code=$2dc{732}) or (Name='tilde') then E:='~' else { Small tilde }
  484. if (code=$2013{8211}) or (Name='endash') then E:='-' else { En dash }
  485. if (code=$2014{8212}) or (Name='emdash') then E:='--' else { Em dash }
  486. if (Code=$2018{8216}) or (Name='lsquo') then E:='`' else { Acute accent as generated by TeXH }
  487. if (Code=$2019{8217}) or (Name='rsquo') then E:='''' else { acute accent as generated by TeXH }
  488. if (code=$201C{8220}) or (Name='ldquo') then E:='''''' else { left double quotation marks }
  489. if (code=$201D{8221}) or (Name='rdquo') then E:='``' else { right double quotation marks }
  490. if (code=$2026{8230}) or (Name='hellip') then E:='...' else { horizontal ellipsis }
  491. if (Code=$FB00) then E:='ff' else { ff together }
  492. if (Code=$FB01) then E:='fi' else { fi together }
  493. if (Code=$FB02) then E:='fl' else { fl together }
  494. if (Code=$FB03) then E:='ffi' else { ffi together }
  495. if (Code=$FB04) then E:='ffl' else { ffl together }
  496. Found:=false;
  497. DocDecodeNamedEntity:=Found;
  498. {$ifdef DEBUG}
  499. if (Code<>$ffff) and not found then
  500. begin
  501. DebugMessage(FileName,'NamedEntity '+Name+' not handled',1,1);
  502. end;
  503. {$endif DEBUG}
  504. end;
  505. procedure THTMLParser.DocProcessTag(Tag: string);
  506. var UTagName,ETagName: string[30];
  507. P: byte;
  508. NotEndTag: boolean;
  509. begin
  510. if copy(Tag,1,1)='<' then Delete(Tag,1,1);
  511. if copy(Tag,length(Tag),1)='>' then Delete(Tag,length(Tag),1);
  512. Tag:=Trim(Tag);
  513. P:=Pos(' ',Tag); if P=0 then P:=length(Tag)+1;
  514. TagName:=copy(Tag,1,P-1); TagParams:=copy(Tag,P+1,255);
  515. UTagName:=UpcaseStr(TagName);
  516. NotEndTag:=copy(TagName,1,1)<>'/';
  517. if NotEndTag then ETagName:=UTagName else ETagName:=copy(UTagName,2,255);
  518. { <BR/> is also a Break tag... }
  519. if Copy(ETagName,Length(ETagName),1)='/' then
  520. begin
  521. ETagName:=copy(ETagName,1,Length(ETagName)-1);
  522. NotEndTag:=false;
  523. end;
  524. if (UTagName='!DOCTYPE') then DocTYPE else
  525. { Section tags }
  526. if (ETagName='HTML') then DocHTML(NotEndTag) else
  527. if (ETagName='HEAD') then DocHEAD(NotEndTag) else
  528. if (ETagName='TITLE') then DocTITLE(NotEndTag) else
  529. if (ETagName='BODY') then DocBODY(NotEndTag) else
  530. { Anchor tags }
  531. if (ETagName='A') then DocAnchor(NotEndTag) else
  532. { Direct formatting directives }
  533. if (ETagName='H1') then DocHeading(1,NotEndTag) else
  534. if (ETagName='H2') then DocHeading(2,NotEndTag) else
  535. if (ETagName='H3') then DocHeading(3,NotEndTag) else
  536. if (ETagName='H4') then DocHeading(4,NotEndTag) else
  537. if (ETagName='H5') then DocHeading(5,NotEndTag) else
  538. if (ETagName='H6') then DocHeading(6,NotEndTag) else
  539. if (ETagName='P') then DocParagraph(NotEndTag) else
  540. if (ETagName='BR') then DocBreak else
  541. if (ETagName='B') then DocBold(NotEndTag) else
  542. if (ETagName='CITE') then DocCite(NotEndTag) else
  543. if (ETagName='CODE') then DocCode(NotEndTag) else
  544. if (ETagName='EM') then DocEmphasized(NotEndTag) else
  545. if (ETagName='I') then DocItalic(NotEndTag) else
  546. if (ETagName='KBD') then DocKbd(NotEndTag) else
  547. if (ETagName='PRE') then DocPreformatted(NotEndTag) else
  548. if (ETagName='SAMP') then DocSample(NotEndTag) else
  549. if (ETagName='STRONG') then DocStrong(NotEndTag) else
  550. if (ETagName='TT') then DocTeleType(NotEndTag) else
  551. if (ETagName='VAR') then DocVariable(NotEndTag) else
  552. if (ETagName='SPAN') then DocSpan(NotEndTag) else
  553. if (ETagName='DIV') then DocDiv(NotEndTag) else
  554. { Unordered & ordered lists }
  555. if (ETagName='UL') then DocList(NotEndTag) else
  556. if (ETagName='OL') then DocOrderedList(NotEndTag) else
  557. if (ETagName='LI') then DocListItem(NotEndTag) else
  558. { Definition list }
  559. if (ETagName='DL') then DocDefList(NotEndTag) else
  560. if (ETagName='DT') then DocDefTerm(NotEndTag) else
  561. if (ETagName='DD') then DocDefExp(NotEndTag) else
  562. { Table }
  563. if (ETagName='TABLE') then DocTable(NotEndTag) else
  564. if (ETagName='TR') then DocTableRow(NotEndTag) else
  565. if (ETagName='TH') then DocTableHeaderItem(NotEndTag) else
  566. if (ETagName='TD') then DocTableItem(NotEndTag) else
  567. { Misc. tags }
  568. if (UTagName='META') then DocMETA else
  569. if (UTagName='IMG') then DocImage else
  570. if (UTagName='HR') then DocHorizontalRuler else
  571. DocUnknownTag;
  572. end;
  573. function THTMLParser.DocGetTagParam(Name: string; var Value: string): boolean;
  574. var Found: boolean;
  575. S: string;
  576. ParamName,ParamValue: string;
  577. InStr: boolean;
  578. I: sw_integer;
  579. begin
  580. Found:=false;
  581. Name:=UpcaseStr(Name);
  582. Value:='';
  583. S:=TagParams;
  584. repeat
  585. InStr:=false;
  586. ParamName:=''; ParamValue:='';
  587. S:=Trim(S); I:=1;
  588. while (I<=length(S)) and (S[I]<>'=') do
  589. begin
  590. if S[I]=' ' then
  591. ParamName:=''
  592. else
  593. ParamName:=ParamName+S[I];
  594. Inc(I);
  595. end;
  596. ParamName:=Trim(ParamName);
  597. if S[I]='=' then
  598. begin
  599. Inc(I); InStr:=false;
  600. while (I<=length(S)) and (S[I]=' ') do
  601. Inc(I);
  602. if (I<=length(S)) and (S[I]='"') then
  603. begin
  604. InStr:=true;
  605. Inc(I);
  606. end;
  607. while (I<=length(S)) and ((InStr=true) or (S[I]<>' ')) do
  608. begin
  609. if S[I]='"' then
  610. begin
  611. InStr:=not InStr;
  612. if InStr=false then Break;
  613. end
  614. else
  615. ParamValue:=ParamValue+S[I];
  616. Inc(I);
  617. end;
  618. end;
  619. Found:=(Name=UpcaseStr(ParamName));
  620. if Found then Value:=ParamValue;
  621. Delete(S,1,I);
  622. until Found or (S='');
  623. DocGetTagParam:=Found;
  624. end;
  625. procedure THTMLParser.DocProcessComment(Comment: string);
  626. begin
  627. end;
  628. procedure THTMLParser.DocUnknownTag;
  629. begin
  630. end;
  631. procedure THTMLParser.DocTYPE;
  632. begin
  633. end;
  634. procedure THTMLParser.DocHTML(Entered: boolean);
  635. begin
  636. end;
  637. procedure THTMLParser.DocHEAD(Entered: boolean);
  638. begin
  639. end;
  640. procedure THTMLParser.DocMETA;
  641. begin
  642. end;
  643. procedure THTMLParser.DocTITLE(Entered: boolean);
  644. begin
  645. end;
  646. procedure THTMLParser.DocBODY(Entered: boolean);
  647. begin
  648. end;
  649. procedure THTMLParser.DocAnchor(Entered: boolean);
  650. begin
  651. end;
  652. procedure THTMLParser.DocHeading(Level: integer; Entered: boolean);
  653. begin
  654. end;
  655. procedure THTMLParser.DocParagraph(Entered: boolean);
  656. begin
  657. end;
  658. procedure THTMLParser.DocBreak;
  659. begin
  660. end;
  661. procedure THTMLParser.DocImage;
  662. begin
  663. end;
  664. procedure THTMLParser.DocBold(Entered: boolean);
  665. begin
  666. end;
  667. procedure THTMLParser.DocCite(Entered: boolean);
  668. begin
  669. end;
  670. procedure THTMLParser.DocCode(Entered: boolean);
  671. begin
  672. end;
  673. procedure THTMLParser.DocEmphasized(Entered: boolean);
  674. begin
  675. end;
  676. procedure THTMLParser.DocItalic(Entered: boolean);
  677. begin
  678. end;
  679. procedure THTMLParser.DocKbd(Entered: boolean);
  680. begin
  681. end;
  682. procedure THTMLParser.DocPreformatted(Entered: boolean);
  683. begin
  684. end;
  685. procedure THTMLParser.DocSample(Entered: boolean);
  686. begin
  687. end;
  688. procedure THTMLParser.DocStrong(Entered: boolean);
  689. begin
  690. end;
  691. procedure THTMLParser.DocTeleType(Entered: boolean);
  692. begin
  693. end;
  694. procedure THTMLParser.DocVariable(Entered: boolean);
  695. begin
  696. end;
  697. procedure THTMLParser.DocSpan(Entered: boolean);
  698. begin
  699. end;
  700. procedure THTMLParser.DocDiv(Entered: boolean);
  701. var
  702. S: String;
  703. begin
  704. if Entered then
  705. begin
  706. if DocGetTagParam('CLASS',S) then
  707. if S='crosslinks' then
  708. begin
  709. DisableCrossIndexing:=true;
  710. {$ifdef DEBUG}
  711. DebugMessage(GetFileName,'Crosslinks found',Line,LinePos);
  712. {$endif DEBUG}
  713. end;
  714. end
  715. else
  716. begin
  717. {$ifdef DEBUG}
  718. if DisableCrossIndexing then
  719. begin
  720. DebugMessage(GetFileName,'Crosslinks end found',Line,LinePos);
  721. end;
  722. {$endif DEBUG}
  723. DisableCrossIndexing:=false;
  724. end;
  725. end;
  726. procedure THTMLParser.DocList(Entered: boolean);
  727. begin
  728. end;
  729. procedure THTMLParser.DocOrderedList(Entered: boolean);
  730. begin
  731. end;
  732. procedure THTMLParser.DocListItem(Entered: boolean);
  733. begin
  734. end;
  735. procedure THTMLParser.DocDefList(Entered: boolean);
  736. begin
  737. end;
  738. procedure THTMLParser.DocDefTerm(Entered: boolean);
  739. begin
  740. end;
  741. procedure THTMLParser.DocDefExp(Entered: boolean);
  742. begin
  743. end;
  744. procedure THTMLParser.DocTable(Entered: boolean);
  745. var
  746. S: String;
  747. begin
  748. if Entered then
  749. begin
  750. if DocGetTagParam('CLASS',S) then
  751. if S='bar' then
  752. begin
  753. DisableCrossIndexing:=true;
  754. {$ifdef DEBUG}
  755. DebugMessage(GetFileName,'Bar table found, cross indexing disabled ',Line,LinePos);
  756. {$endif DEBUG}
  757. end;
  758. end
  759. else
  760. begin
  761. {$ifdef DEBUG}
  762. if DisableCrossIndexing then
  763. begin
  764. DebugMessage(GetFileName,'Bar table end found',Line,LinePos);
  765. end;
  766. {$endif DEBUG}
  767. DisableCrossIndexing:=false;
  768. end;
  769. end;
  770. procedure THTMLParser.DocTableRow(Entered: boolean);
  771. begin
  772. end;
  773. procedure THTMLParser.DocTableHeaderItem(Entered: boolean);
  774. begin
  775. end;
  776. procedure THTMLParser.DocTableItem(Entered: boolean);
  777. begin
  778. end;
  779. procedure THTMLParser.DocHorizontalRuler;
  780. begin
  781. end;
  782. function EncodeHTMLCtx(FileID: integer; LinkNo: word): longint;
  783. var Ctx: longint;
  784. begin
  785. Ctx:=(longint(FileID) shl 16)+LinkNo;
  786. EncodeHTMLCtx:=Ctx;
  787. end;
  788. procedure DecodeHTMLCtx(Ctx: longint; var FileID: word; var LinkNo: word);
  789. begin
  790. if (Ctx shr 16)=0 then
  791. begin
  792. FileID:=$ffff; LinkNo:=0;
  793. end
  794. else
  795. begin
  796. FileID:=Ctx shr 16; LinkNo:=Ctx and $ffff;
  797. end;
  798. end;
  799. procedure TTopicLinkCollection.Insert(Item: Pointer);
  800. begin
  801. AtInsert(Count,Item);
  802. end;
  803. function TTopicLinkCollection.At(Index: sw_Integer): PString;
  804. begin
  805. At:=inherited At(Index);
  806. end;
  807. function TTopicLinkCollection.AddItem(Item: string): sw_integer;
  808. var Idx: sw_integer;
  809. begin
  810. if Item='' then Idx:=-1 else
  811. if Search(@Item,Idx)=false then
  812. begin
  813. AtInsert(Count,NewStr(Item));
  814. Idx:=Count-1;
  815. end;
  816. AddItem:=Idx;
  817. end;
  818. END.