2
0

whtml.pas 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 1998 by Berczi Gabor
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. unit WHTML;
  11. {$I globdir.inc}
  12. interface
  13. uses Objects;
  14. type
  15. PTextFile = ^TTextFile;
  16. TTextFile = object(TObject)
  17. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  18. function GetFileName : string; virtual;
  19. end;
  20. PMemoryTextFile = ^TMemoryTextFile;
  21. TMemoryTextFile = object(TTextFile)
  22. constructor Init;
  23. procedure AddLine(const S: string); virtual;
  24. function GetLine(Idx: sw_integer; var S: string): boolean; virtual;
  25. function GetFileName : string; virtual;
  26. function GetLineCount : sw_integer;
  27. destructor Done; virtual;
  28. private
  29. Lines : PUnsortedStrCollection;
  30. end;
  31. PDOSTextFile = ^TDOSTextFile;
  32. TDOSTextFile = object(TMemoryTextFile)
  33. constructor Init(AFileName: string);
  34. function GetFileName : string; virtual;
  35. private
  36. DosFileName : string;
  37. end;
  38. PSGMLParser = ^TSGMLParser;
  39. TSGMLParser = object(TObject)
  40. constructor Init;
  41. function Process(HTMLFile: PTextFile): boolean; virtual;
  42. function ProcessLine(LineText: string): boolean; virtual;
  43. destructor Done; virtual;
  44. public
  45. Line,LinePos: sw_integer;
  46. procedure DocSoftBreak; virtual;
  47. function GetFileName : string;
  48. function DocAddTextChar(C: char): boolean; virtual;
  49. procedure DocAddText(S: string); virtual;
  50. procedure DocProcessTag(Tag: string); virtual;
  51. procedure DocProcessComment(Comment: string); virtual;
  52. function DocDecodeNamedEntity(Name: string; var Entity: string): boolean; virtual;
  53. private
  54. CurTag: string;
  55. FileName : string;
  56. InTag,InComment,InString: boolean;
  57. end;
  58. PHTMLParser = ^THTMLParser;
  59. THTMLParser = object(TSGMLParser)
  60. procedure DocSoftBreak; virtual;
  61. function DocAddTextChar(C: char): boolean; virtual;
  62. procedure DocProcessTag(Tag: string); virtual;
  63. function DocGetTagParam(Name: string; var Value: string): boolean; virtual;
  64. procedure DocProcessComment(Comment: string); virtual;
  65. function DocDecodeNamedEntity(Name: string; var E: string): boolean; virtual;
  66. public
  67. TagName,TagParams: string;
  68. DisableCrossIndexing : boolean;
  69. procedure DocUnknownTag; virtual;
  70. procedure DocTYPE; virtual;
  71. procedure DocHTML(Entered: boolean); virtual;
  72. procedure DocHEAD(Entered: boolean); virtual;
  73. procedure DocMETA; virtual;
  74. procedure DocTITLE(Entered: boolean); virtual;
  75. procedure DocBODY(Entered: boolean); virtual;
  76. procedure DocAnchor(Entered: boolean); virtual;
  77. procedure DocHeading(Level: integer; Entered: boolean); virtual;
  78. procedure DocParagraph(Entered: boolean); virtual;
  79. procedure DocBreak; virtual;
  80. procedure DocImage; virtual;
  81. procedure DocBold(Entered: boolean); virtual;
  82. procedure DocCite(Entered: boolean); virtual;
  83. procedure DocCode(Entered: boolean); virtual;
  84. procedure DocEmphasized(Entered: boolean); virtual;
  85. procedure DocItalic(Entered: boolean); virtual;
  86. procedure DocKbd(Entered: boolean); virtual;
  87. procedure DocPreformatted(Entered: boolean); virtual;
  88. procedure DocSample(Entered: boolean); virtual;
  89. procedure DocStrong(Entered: boolean); virtual;
  90. procedure DocTeleType(Entered: boolean); virtual;
  91. procedure DocVariable(Entered: boolean); virtual;
  92. procedure DocSpan(Entered: boolean); virtual;
  93. procedure DocDiv(Entered: boolean); virtual;
  94. procedure DocList(Entered: boolean); virtual;
  95. procedure DocOrderedList(Entered: boolean); virtual;
  96. procedure DocListItem(Entered: boolean); virtual;
  97. procedure DocDefList(Entered: boolean); virtual;
  98. procedure DocDefTerm(Entered: boolean); virtual;
  99. procedure DocDefExp(Entered: boolean); virtual;
  100. procedure DocTable(Entered: boolean); virtual;
  101. procedure DocTableRow(Entered: boolean); virtual;
  102. procedure DocTableHeaderItem(Entered: boolean); virtual;
  103. procedure DocTableItem(Entered: boolean); virtual;
  104. procedure DocHorizontalRuler; virtual;
  105. end;
  106. Type
  107. PTopicLinkCollection = ^TTopicLinkCollection;
  108. TTopicLinkCollection = object(TStringCollection)
  109. procedure Insert(Item: Pointer); virtual;
  110. function At(Index: sw_Integer): PString;
  111. function AddItem(Item: string): sw_integer;
  112. end;
  113. function EncodeHTMLCtx(FileID: integer; LinkNo: word): longint;
  114. procedure DecodeHTMLCtx(Ctx: longint; var FileID: word; var LinkNo: word);
  115. implementation
  116. uses
  117. WUtils;
  118. function TTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  119. begin
  120. Abstract;
  121. GetLine:=false;
  122. end;
  123. function TTextFile.GetFileName : string;
  124. begin
  125. GetFileName:='unknown';
  126. end;
  127. constructor TMemoryTextFile.Init;
  128. begin
  129. inherited Init;
  130. New(Lines, Init(500,500));
  131. end;
  132. function TMemoryTextFile.GetFileName : string;
  133. begin
  134. GetFileName:='unknown';
  135. end;
  136. function TMemoryTextFile.GetLineCount : sw_integer;
  137. begin
  138. GetLineCount:=Lines^.Count;
  139. end;
  140. procedure TMemoryTextFile.AddLine(const S: string);
  141. begin
  142. Lines^.Insert(NewStr(S));
  143. end;
  144. function TMemoryTextFile.GetLine(Idx: sw_integer; var S: string): boolean;
  145. var OK: boolean;
  146. PS: PString;
  147. begin
  148. OK:=(Lines<>nil) and (Idx<Lines^.Count);
  149. if OK then
  150. begin
  151. PS:=Lines^.At(Idx);
  152. if PS=nil then S:='' else S:=PS^;
  153. end;
  154. GetLine:=OK;
  155. end;
  156. destructor TMemoryTextFile.Done;
  157. begin
  158. if Lines<>nil then
  159. Dispose(Lines, Done);
  160. Lines:=nil;
  161. inherited Done;
  162. end;
  163. constructor TDOSTextFile.Init(AFileName: string);
  164. var f: file;
  165. linecomplete,hasCR: boolean;
  166. S: string;
  167. OldFMode : Integer;
  168. begin
  169. inherited Init;
  170. if AFileName='' then Fail;
  171. {$I-}
  172. Assign(f,AFileName);
  173. OldFMode:= FileMode;
  174. FileMode:= 0;
  175. Reset(f,1);
  176. FileMode:= OldFMode;
  177. if IOResult<>0 then Fail;
  178. DosFileName:=AFileName;
  179. Dispose(Lines,Done);
  180. New(Lines, Init(500,2000));
  181. while (Eof(f)=false) and (IOResult=0) do
  182. begin
  183. ReadlnFromFile(f,S,linecomplete,hasCR,true);
  184. AddLine(S);
  185. end;
  186. Close(f);
  187. {$I+}
  188. end;
  189. function TDosTextFile.GetFileName : string;
  190. begin
  191. GetFileName:=DosFileName;
  192. end;
  193. constructor TSGMLParser.Init;
  194. begin
  195. inherited Init;
  196. FileName:='';
  197. end;
  198. function TSGMLParser.GetFileName : string;
  199. begin
  200. GetFileName:=FileName;
  201. end;
  202. function TSGMLParser.Process(HTMLFile: PTextFile): boolean;
  203. var S: string;
  204. OK,LineOK: boolean;
  205. begin
  206. if HTMLFile=nil then Exit;
  207. InTag:=false; InComment:=false; InString:=false; CurTag:='';
  208. Line:=0; OK:=true;
  209. FileName:=HTMLFile^.GetFileName;
  210. repeat
  211. LineOK:=HTMLFile^.GetLine(Line,S);
  212. if LineOK then
  213. begin
  214. Inc(Line);
  215. OK:=ProcessLine(S);
  216. end;
  217. until (LineOK=false) or (OK=false);
  218. Process:=OK;
  219. end;
  220. function TSGMLParser.ProcessLine(LineText: string): boolean;
  221. var OK: boolean;
  222. C: char;
  223. NewInString: boolean;
  224. OldInComment: boolean;
  225. WasThereAnyText: boolean;
  226. Pos2: integer;
  227. Name,Entity: string;
  228. LiteralCode: boolean;
  229. LiteralStart,LiteralEnd,P: integer;
  230. const TabSize : integer = 8;
  231. Tab = #9;
  232. begin
  233. WasThereAnyText:=false;
  234. OK:=true; LinePos:=1;
  235. LiteralStart:=0; LiteralEnd:=0;
  236. repeat
  237. P:=Pos(TAB,LineText);
  238. if P>0 then
  239. LineText:=copy(LineText,1,P-1)+CharStr(' ',TabSize)+copy(LineText,P+1,255);
  240. until P=0;
  241. while (LinePos<=length(LineText)) and OK do
  242. begin
  243. LiteralCode:=false;
  244. NewInString:=InString; OldInComment:=InComment;
  245. C:=LineText[LinePos];
  246. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  247. if (LiteralCode=false) and (C='&') then
  248. begin
  249. LiteralStart:=0; LiteralEnd:=0;
  250. Name:=''; Pos2:=LinePos+1;
  251. while (Pos2<=length(LineText)) and (LineText[Pos2]<>';') do
  252. begin
  253. Name:=Name+LineText[Pos2];
  254. Inc(Pos2);
  255. end;
  256. Inc(Pos2);
  257. if DocDecodeNamedEntity(Name,Entity) then
  258. begin
  259. LineText:=copy(LineText,1,LinePos-1)+Entity+copy(LineText,Pos2,255);
  260. LiteralStart:=LinePos; LiteralEnd:=LiteralStart+length(Entity)-1;
  261. C:=LineText[LinePos];
  262. end;
  263. end;
  264. LiteralCode:=(LiteralStart<=LinePos) and (LinePos<=LiteralEnd);
  265. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=false) then
  266. NewInString:=true;
  267. if (LiteralCode=false) and (C='<') and (InTag=false) then
  268. InTag:=true;
  269. if InTag then CurTag:=CurTag+C else
  270. WasThereAnyText:=DocAddTextChar(C);
  271. if (LiteralCode=false) and InTag and (InString=false) and (CurTag='<!--') then
  272. InComment:=true;
  273. { A comment can be longer than 255 chars
  274. move the test to LineText string,
  275. This is why the Previous, Next and Up Tags where not working ... PM
  276. if (LiteralCode=false) and InTag and InComment and (InString=false) and (length(CurTag)>=3) and
  277. (copy(CurTag,length(CurTag)-2,3)='-->') then
  278. InComment:=false; }
  279. if (LiteralCode=false) and InTag and InComment and (InString=false) and (LinePos>=3) and
  280. (copy(LineText,LinePos-2,3)='-->') then
  281. InComment:=false;
  282. if (LiteralCode=false) and (C='"') and (InTag=true) and (InString=true) then
  283. NewInString:=false;
  284. if (LiteralCode=false) and (C='>') and (InTag=true) then
  285. begin
  286. InTag:=false;
  287. if OldInComment then
  288. DocProcessComment(CurTag)
  289. else
  290. DocProcessTag(CurTag);
  291. CurTag:='';
  292. end;
  293. InString:=NewInString;
  294. Inc(LinePos);
  295. end;
  296. { whtml does not depend on whelp,
  297. so I can not use hscLineBreak here. PM }
  298. if InTag then
  299. begin
  300. if InString then
  301. CurTag:=CurTag+#0;
  302. end
  303. else if WasThereAnyText then
  304. DocSoftBreak;
  305. ProcessLine:=true;
  306. end;
  307. procedure TSGMLParser.DocSoftBreak;
  308. begin
  309. Abstract;
  310. end;
  311. function TSGMLParser.DocAddTextChar(C: char): boolean;
  312. begin
  313. Abstract;
  314. DocAddTextChar:=false;
  315. end;
  316. procedure TSGMLParser.DocAddText(S: string);
  317. var I: sw_integer;
  318. begin
  319. for I:=1 to length(S) do
  320. DocAddTextChar(S[I]);
  321. end;
  322. function TSGMLParser.DocDecodeNamedEntity(Name: string; var Entity: string): boolean;
  323. begin
  324. DocDecodeNamedEntity:=false;
  325. end;
  326. procedure TSGMLParser.DocProcessTag(Tag: string);
  327. begin
  328. Abstract;
  329. end;
  330. procedure TSGMLParser.DocProcessComment(Comment: string);
  331. begin
  332. Abstract;
  333. end;
  334. destructor TSGMLParser.Done;
  335. begin
  336. inherited Done;
  337. end;
  338. procedure THTMLParser.DocSoftBreak;
  339. begin
  340. end;
  341. function THTMLParser.DocAddTextChar(C: char): boolean;
  342. begin
  343. { Abstract }
  344. DocAddTextChar:=false;
  345. end;
  346. function THTMLParser.DocDecodeNamedEntity(Name: string; var E: string): boolean;
  347. var Found: boolean;
  348. Code: word;
  349. CC: word;
  350. begin
  351. Found:=true; Code:=$ffff;
  352. Name:=LowCaseStr(Name);
  353. if copy(Name,1,1)='#' then
  354. begin
  355. if Name[2]='x' then
  356. Val('$'+copy(Name,3,255),Code,CC)
  357. else
  358. Val(copy(Name,2,255),Code,CC);
  359. if CC<>0 then
  360. begin
  361. {$ifdef DEBUG}
  362. DebugMessage(FileName,'NamedEntity '+Name+' not converted',1,1);
  363. {$endif DEBUG}
  364. Code:=$ffff;
  365. end;
  366. end;
  367. { #0 to #127 is same for Unicode and Code page 437 }
  368. if (code<=127) then
  369. begin
  370. E:=chr(code);
  371. DocDecodeNamedEntity:=true;
  372. exit;
  373. end;
  374. if (Code=$22{34}) or (Name='quot') then E:='"' else { double quote sign }
  375. if (Code=$26{38}) or (Name='amp') then E:='&' else { ampersand }
  376. if (Code=$27{39}) or (Name='apos') then E:='''' else { apostrophe }
  377. if (Code=$3C{60}) or (Name='lt') then E:='<' else { less-than sign }
  378. if (Code=$3E{62}) or (Name='gt') then E:='>' else { greater-than sign }
  379. if (Code=$5B) then E:='[' else { [ }
  380. if (Code=$5C) then E:='\' else { \ }
  381. if (Code=$5D) then E:=']' else { ] }
  382. if (Code=$5E) then E:='^' else { ^ }
  383. if (Code=$5F) then E:='_' else { _ }
  384. if (Code=160) or (Name='nbsp') then E:=#255 else { no-break space }
  385. if (Code=161) or (Name='iexcl') then E:='­' else { inverted exclamation mark }
  386. if (Code=162) or (Name='cent') then E:='›' else { cent sign }
  387. if (Code=163) or (Name='pound') then E:='œ' else { pound sterling sign }
  388. if (Code=164) or (Name='curren') then E:='$' else { general currency sign }
  389. if (Code=165) or (Name='yen') then E:='' else { yen sign }
  390. if (Code=166) or (Name='brvbar') then E:='|' else { broken vertical bar }
  391. if (Code=167) or (Name='sect') then E:='' else { section sign }
  392. if (Code=168) or (Name='uml') then E:='"' else { umlaut (dieresis) }
  393. if (Code=169) or (Name='copy') then E:='(C)' else { copyright sign }
  394. (* if (Code=170) or (Name='ordf') then E:=#255 else { ordinal indicator, feminine }*)
  395. if (Code=171) or (Name='laquo') then E:='"' else { angle quotation mark -left }
  396. if (Code=172) or (Name='not') then E:='!' else { not sign }
  397. if (Code=173) or (Name='shy') then E:='-' else { soft hypen }
  398. if (Code=174) or (Name='reg') then E:='(R)' else { registered sign }
  399. (* if (Code=175) or (Name='macr') then E:='?' else { macron }*)
  400. if (Code=176) or (Name='deg') then E:='ø' else { degree sign }
  401. if (Code=177) or (Name='plusmn') then E:='ñ' else { plus-or-minus sign }
  402. if (Code=178) or (Name='sup2') then E:='ý' else { superscript 2 }
  403. if (Code=179) or (Name='sup3') then E:='^3' else { superscript 3 }
  404. if (Code=180) or (Name='acute') then E:='''' else { acute accent }
  405. if (Code=181) or (Name='micro') then E:='æ' else { micro sign }
  406. (* if (Code=182) or (Name='para') then E:='?' else { paragraph sign }*)
  407. if (Code=183) or (Name='middot') then E:='ù' else { middle dot }
  408. (* if (Code=184) or (Name='cedil') then E:='?' else { cedilla }*)
  409. if (Code=185) or (Name='sup1') then E:='^1' else { superscript 1 }
  410. (* if (Code=186) or (Name='ordm') then E:='?' else { ordinal indicator, masculine }*)
  411. if (Code=187) or (Name='raquo') then E:='"' else { angle quoatation mark -right }
  412. if (Code=188) or (Name='frac14') then E:='¬' else { fraction one-quarter }
  413. if (Code=189) or (Name='frac12') then E:='«' else { fraction one-half }
  414. if (Code=190) or (Name='frac34') then E:='3/4' else { fraction three-quarters }
  415. if (Code=191) or (Name='iquest') then E:='¨' else { inverted question mark }
  416. if (Code=192) or (Name='Agrave') then E:='A' else { capital A, grave accent }
  417. if (Code=193) or (Name='Aacute') then E:='A' else { capital A, acute accent }
  418. if (Code=194) or (Name='Acirc') then E:='A' else { capital A, circumflex accent }
  419. if (Code=195) or (Name='Atilde') then E:='A' else { capital A, tilde accent }
  420. if (Code=196) or (Name='Auml') then E:='Ž' else { capital A, dieresis or umlaut }
  421. if (Code=197) or (Name='Aring') then E:='' else { capital A, ring }
  422. if (Code=198) or (Name='AElig') then E:='’' else { capital AE diphthong }
  423. if (Code=199) or (Name='Ccedil') then E:='€' else { capital C, cedilla }
  424. if (Code=200) or (Name='Egrave') then E:='' else { capital E, grave accent }
  425. if (Code=201) or (Name='Eacute') then E:='' else { capital E, acute accent }
  426. if (Code=202) or (Name='Ecirc') then E:='E' else { capital E, circumflex accent }
  427. if (Code=203) or (Name='Euml') then E:='E' else { capital E, dieresis or umlaut }
  428. if (Code=204) or (Name='Igrave') then E:='I' else { capital I, grave accent }
  429. if (Code=205) or (Name='Iacute') then E:='I' else { capital I, acute accent }
  430. if (Code=206) or (Name='Icirc') then E:='I' else { capital I, circumflex accent }
  431. if (Code=207) or (Name='Iuml') then E:='I' else { capital I, dieresis or umlaut }
  432. (* if (Code=208) or (Name='ETH') then E:='?' else { capital Eth, Icelandic }*)
  433. if (Code=209) or (Name='Ntidle') then E:='¥' else { capital N, tilde }
  434. if (Code=210) or (Name='Ograve') then E:='O' else { capital O, grave accent }
  435. if (Code=211) or (Name='Oacute') then E:='O' else { capital O, acute accent }
  436. if (Code=212) or (Name='Ocirc') then E:='O' else { capital O, circumflex accent }
  437. if (Code=213) or (Name='Otilde') then E:='O' else { capital O, tilde }
  438. if (Code=214) or (Name='Ouml') then E:='™' else { capital O, dieresis or umlaut }
  439. if (Code=215) or (Name='times') then E:='*' else { multiply sign }
  440. if (Code=216) or (Name='Oslash') then E:='O' else { capital O, slash }
  441. if (Code=217) or (Name='Ugrave') then E:='U' else { capital U, grave accent }
  442. if (Code=218) or (Name='Uacute') then E:='U' else { capital U, acute accent }
  443. if (Code=219) or (Name='Ucirc') then E:='U' else { capital U, circumflex accent }
  444. if (Code=220) or (Name='Uuml') then E:='š' else { capital U, dieresis or umlaut }
  445. if (Code=221) or (Name='Yacute') then E:='Y' else { capital Y, acute accent }
  446. (* if (Code=222) or (Name='THORN') then E:='?' else { capital THORN, Icelandic }*)
  447. if (Code=223) or (Name='szlig') then E:='á' else { small sharp S, German }
  448. if (Code=224) or (Name='agrave') then E:='…' else { small a, grave accent }
  449. if (Code=225) or (Name='aacute') then E:=' ' else { small a, acute accent }
  450. if (Code=226) or (Name='acirc') then E:='ƒ' else { small a, circumflex accent }
  451. if (Code=227) or (Name='atilde') then E:='ƒ' else { small a, tilde }
  452. if (Code=228) or (Name='auml') then E:='„' else { small a, dieresis or umlaut }
  453. if (Code=229) or (Name='aring') then E:='†' else { small a, ring }
  454. if (Code=230) or (Name='aelig') then E:='ae' else { small ae, diphthong }
  455. if (Code=231) or (Name='ccedil') then E:='‡' else { small c, cedilla }
  456. if (Code=232) or (Name='egrave') then E:='Š' else { small e, grave accent }
  457. if (Code=233) or (Name='eacute') then E:='‚' else { small e, acute accent }
  458. if (Code=234) or (Name='ecirc') then E:='ˆ' else { small e, circumflex accent }
  459. if (Code=235) or (Name='euml') then E:='‰' else { small e, dieresis or umlaut }
  460. if (Code=236) or (Name='igrave') then E:='' else { small i, grave accent }
  461. if (Code=237) or (Name='iacute') then E:='¡' else { small i, acute accent }
  462. if (Code=238) or (Name='icirc') then E:='Œ' else { small i, circumflex accent }
  463. if (Code=239) or (Name='iuml') then E:='‹' else { small i, dieresis or umlaut }
  464. (* if (Code=240) or (Name='eth') then E:='?' else { small eth, Icelandic }*)
  465. if (Code=241) or (Name='ntilde') then E:='¤' else { small n, tilde }
  466. if (Code=242) or (Name='ograve') then E:='•' else { small o, grave accent }
  467. if (Code=243) or (Name='oacute') then E:='¢' else { small o, acute accent }
  468. if (Code=244) or (Name='ocirc') then E:='“' else { small o, circumflex accent }
  469. if (Code=245) or (Name='otilde') then E:='“' else { small o, tilde }
  470. if (Code=246) or (Name='ouml') then E:='”' else { small o, dieresis or umlaut }
  471. if (Code=247) or (Name='divide') then E:='/' else { divide sign }
  472. if (Code=248) or (Name='oslash') then E:='"' else { small o, slash }
  473. if (Code=249) or (Name='ugrave') then E:='—' else { small u, grave accent }
  474. if (Code=250) or (Name='uacute') then E:='£' else { small u, acute accent }
  475. if (Code=251) or (Name='ucirc') then E:='–' else { small u, circumflex accent }
  476. if (Code=252) or (Name='uuml') then E:='' else { small u, dieresis or umlaut }
  477. if (Code=253) or (Name='yacute') then E:='y' else { small y, acute accent }
  478. (* if (Code=254) or (Name='thorn') then E:='?' else { small thorn, Icelandic }*)
  479. if (Code=255) or (Name='yuml') then E:='y' else { small y, dieresis or umlaut }
  480. { Special codes appearing in TeXH generated files }
  481. if (code=$2c6{710}) or (Name='circ') then E:='^' else { Modifier Letter Circumflex Accent }
  482. if (code=$2dc{732}) or (Name='tilde') then E:='~' else { Small tilde }
  483. if (code=$2013{8211}) or (Name='endash') then E:='-' else { En dash }
  484. if (code=$2014{8212}) or (Name='emdash') then E:='--' else { Em dash }
  485. if (Code=$2018{8216}) or (Name='lsquo') then E:='`' else { Acute accent as generated by TeXH }
  486. if (Code=$2019{8217}) or (Name='rsquo') then E:='''' else { acute accent as generated by TeXH }
  487. if (code=$201C{8220}) or (Name='ldquo') then E:='''''' else { left double quotation marks }
  488. if (code=$201D{8221}) or (Name='rdquo') then E:='``' else { right double quotation marks }
  489. if (code=$2026{8230}) or (Name='hellip') then E:='...' else { horizontal ellipsis }
  490. if (Code=$FB00) then E:='ff' else { ff together }
  491. if (Code=$FB01) then E:='fi' else { fi together }
  492. if (Code=$FB02) then E:='fl' else { fl together }
  493. if (Code=$FB03) then E:='ffi' else { ffi together }
  494. if (Code=$FB04) then E:='ffl' else { ffl together }
  495. Found:=false;
  496. DocDecodeNamedEntity:=Found;
  497. {$ifdef DEBUG}
  498. if (Code<>$ffff) and not found then
  499. begin
  500. DebugMessage(FileName,'NamedEntity '+Name+' not handled',1,1);
  501. end;
  502. {$endif DEBUG}
  503. end;
  504. procedure THTMLParser.DocProcessTag(Tag: string);
  505. var UTagName,ETagName: string[30];
  506. P: byte;
  507. NotEndTag: boolean;
  508. begin
  509. if copy(Tag,1,1)='<' then Delete(Tag,1,1);
  510. if copy(Tag,length(Tag),1)='>' then Delete(Tag,length(Tag),1);
  511. Tag:=Trim(Tag);
  512. P:=Pos(' ',Tag); if P=0 then P:=length(Tag)+1;
  513. TagName:=copy(Tag,1,P-1); TagParams:=copy(Tag,P+1,255);
  514. UTagName:=UpcaseStr(TagName);
  515. NotEndTag:=copy(TagName,1,1)<>'/';
  516. if NotEndTag then ETagName:=UTagName else ETagName:=copy(UTagName,2,255);
  517. { <BR/> is also a Break tag... }
  518. if Copy(ETagName,Length(ETagName),1)='/' then
  519. begin
  520. ETagName:=copy(ETagName,1,Length(ETagName)-1);
  521. NotEndTag:=false;
  522. end;
  523. if (UTagName='!DOCTYPE') then DocTYPE else
  524. { Section tags }
  525. if (ETagName='HTML') then DocHTML(NotEndTag) else
  526. if (ETagName='HEAD') then DocHEAD(NotEndTag) else
  527. if (ETagName='TITLE') then DocTITLE(NotEndTag) else
  528. if (ETagName='BODY') then DocBODY(NotEndTag) else
  529. { Anchor tags }
  530. if (ETagName='A') then DocAnchor(NotEndTag) else
  531. { Direct formatting directives }
  532. if (ETagName='H1') then DocHeading(1,NotEndTag) else
  533. if (ETagName='H2') then DocHeading(2,NotEndTag) else
  534. if (ETagName='H3') then DocHeading(3,NotEndTag) else
  535. if (ETagName='H4') then DocHeading(4,NotEndTag) else
  536. if (ETagName='H5') then DocHeading(5,NotEndTag) else
  537. if (ETagName='H6') then DocHeading(6,NotEndTag) else
  538. if (ETagName='P') then DocParagraph(NotEndTag) else
  539. if (ETagName='BR') then DocBreak else
  540. if (ETagName='B') then DocBold(NotEndTag) else
  541. if (ETagName='CITE') then DocCite(NotEndTag) else
  542. if (ETagName='CODE') then DocCode(NotEndTag) else
  543. if (ETagName='EM') then DocEmphasized(NotEndTag) else
  544. if (ETagName='I') then DocItalic(NotEndTag) else
  545. if (ETagName='KBD') then DocKbd(NotEndTag) else
  546. if (ETagName='PRE') then DocPreformatted(NotEndTag) else
  547. if (ETagName='SAMP') then DocSample(NotEndTag) else
  548. if (ETagName='STRONG') then DocStrong(NotEndTag) else
  549. if (ETagName='TT') then DocTeleType(NotEndTag) else
  550. if (ETagName='VAR') then DocVariable(NotEndTag) else
  551. if (ETagName='SPAN') then DocSpan(NotEndTag) else
  552. if (ETagName='DIV') then DocDiv(NotEndTag) else
  553. { Unordered & ordered lists }
  554. if (ETagName='UL') then DocList(NotEndTag) else
  555. if (ETagName='OL') then DocOrderedList(NotEndTag) else
  556. if (ETagName='LI') then DocListItem(NotEndTag) else
  557. { Definition list }
  558. if (ETagName='DL') then DocDefList(NotEndTag) else
  559. if (ETagName='DT') then DocDefTerm(NotEndTag) else
  560. if (ETagName='DD') then DocDefExp(NotEndTag) else
  561. { Table }
  562. if (ETagName='TABLE') then DocTable(NotEndTag) else
  563. if (ETagName='TR') then DocTableRow(NotEndTag) else
  564. if (ETagName='TH') then DocTableHeaderItem(NotEndTag) else
  565. if (ETagName='TD') then DocTableItem(NotEndTag) else
  566. { Misc. tags }
  567. if (UTagName='META') then DocMETA else
  568. if (UTagName='IMG') then DocImage else
  569. if (UTagName='HR') then DocHorizontalRuler else
  570. DocUnknownTag;
  571. end;
  572. function THTMLParser.DocGetTagParam(Name: string; var Value: string): boolean;
  573. var Found: boolean;
  574. S: string;
  575. ParamName,ParamValue: string;
  576. InStr: boolean;
  577. I: sw_integer;
  578. begin
  579. Found:=false;
  580. Name:=UpcaseStr(Name);
  581. Value:='';
  582. S:=TagParams;
  583. repeat
  584. InStr:=false;
  585. ParamName:=''; ParamValue:='';
  586. S:=Trim(S); I:=1;
  587. while (I<=length(S)) and (S[I]<>'=') do
  588. begin
  589. if S[I]=' ' then
  590. ParamName:=''
  591. else
  592. ParamName:=ParamName+S[I];
  593. Inc(I);
  594. end;
  595. ParamName:=Trim(ParamName);
  596. if S[I]='=' then
  597. begin
  598. Inc(I); InStr:=false;
  599. while (I<=length(S)) and (S[I]=' ') do
  600. Inc(I);
  601. if (I<=length(S)) and (S[I]='"') then
  602. begin
  603. InStr:=true;
  604. Inc(I);
  605. end;
  606. while (I<=length(S)) and ((InStr=true) or (S[I]<>' ')) do
  607. begin
  608. if S[I]='"' then
  609. begin
  610. InStr:=not InStr;
  611. if InStr=false then Break;
  612. end
  613. else
  614. ParamValue:=ParamValue+S[I];
  615. Inc(I);
  616. end;
  617. end;
  618. Found:=(Name=UpcaseStr(ParamName));
  619. if Found then Value:=ParamValue;
  620. Delete(S,1,I);
  621. until Found or (S='');
  622. DocGetTagParam:=Found;
  623. end;
  624. procedure THTMLParser.DocProcessComment(Comment: string);
  625. begin
  626. end;
  627. procedure THTMLParser.DocUnknownTag;
  628. begin
  629. end;
  630. procedure THTMLParser.DocTYPE;
  631. begin
  632. end;
  633. procedure THTMLParser.DocHTML(Entered: boolean);
  634. begin
  635. end;
  636. procedure THTMLParser.DocHEAD(Entered: boolean);
  637. begin
  638. end;
  639. procedure THTMLParser.DocMETA;
  640. begin
  641. end;
  642. procedure THTMLParser.DocTITLE(Entered: boolean);
  643. begin
  644. end;
  645. procedure THTMLParser.DocBODY(Entered: boolean);
  646. begin
  647. end;
  648. procedure THTMLParser.DocAnchor(Entered: boolean);
  649. begin
  650. end;
  651. procedure THTMLParser.DocHeading(Level: integer; Entered: boolean);
  652. begin
  653. end;
  654. procedure THTMLParser.DocParagraph(Entered: boolean);
  655. begin
  656. end;
  657. procedure THTMLParser.DocBreak;
  658. begin
  659. end;
  660. procedure THTMLParser.DocImage;
  661. begin
  662. end;
  663. procedure THTMLParser.DocBold(Entered: boolean);
  664. begin
  665. end;
  666. procedure THTMLParser.DocCite(Entered: boolean);
  667. begin
  668. end;
  669. procedure THTMLParser.DocCode(Entered: boolean);
  670. begin
  671. end;
  672. procedure THTMLParser.DocEmphasized(Entered: boolean);
  673. begin
  674. end;
  675. procedure THTMLParser.DocItalic(Entered: boolean);
  676. begin
  677. end;
  678. procedure THTMLParser.DocKbd(Entered: boolean);
  679. begin
  680. end;
  681. procedure THTMLParser.DocPreformatted(Entered: boolean);
  682. begin
  683. end;
  684. procedure THTMLParser.DocSample(Entered: boolean);
  685. begin
  686. end;
  687. procedure THTMLParser.DocStrong(Entered: boolean);
  688. begin
  689. end;
  690. procedure THTMLParser.DocTeleType(Entered: boolean);
  691. begin
  692. end;
  693. procedure THTMLParser.DocVariable(Entered: boolean);
  694. begin
  695. end;
  696. procedure THTMLParser.DocSpan(Entered: boolean);
  697. begin
  698. end;
  699. procedure THTMLParser.DocDiv(Entered: boolean);
  700. var
  701. S: String;
  702. begin
  703. if Entered then
  704. begin
  705. if DocGetTagParam('CLASS',S) then
  706. if S='crosslinks' then
  707. begin
  708. DisableCrossIndexing:=true;
  709. {$ifdef DEBUG}
  710. DebugMessage(GetFileName,'Crosslinks found',Line,LinePos);
  711. {$endif DEBUG}
  712. end;
  713. end
  714. else
  715. begin
  716. {$ifdef DEBUG}
  717. if DisableCrossIndexing then
  718. begin
  719. DebugMessage(GetFileName,'Crosslinks end found',Line,LinePos);
  720. end;
  721. {$endif DEBUG}
  722. DisableCrossIndexing:=false;
  723. end;
  724. end;
  725. procedure THTMLParser.DocList(Entered: boolean);
  726. begin
  727. end;
  728. procedure THTMLParser.DocOrderedList(Entered: boolean);
  729. begin
  730. end;
  731. procedure THTMLParser.DocListItem(Entered: boolean);
  732. begin
  733. end;
  734. procedure THTMLParser.DocDefList(Entered: boolean);
  735. begin
  736. end;
  737. procedure THTMLParser.DocDefTerm(Entered: boolean);
  738. begin
  739. end;
  740. procedure THTMLParser.DocDefExp(Entered: boolean);
  741. begin
  742. end;
  743. procedure THTMLParser.DocTable(Entered: boolean);
  744. var
  745. S: String;
  746. begin
  747. if Entered then
  748. begin
  749. if DocGetTagParam('CLASS',S) then
  750. if S='bar' then
  751. begin
  752. DisableCrossIndexing:=true;
  753. {$ifdef DEBUG}
  754. DebugMessage(GetFileName,'Bar table found, cross indexing disabled ',Line,LinePos);
  755. {$endif DEBUG}
  756. end;
  757. end
  758. else
  759. begin
  760. {$ifdef DEBUG}
  761. if DisableCrossIndexing then
  762. begin
  763. DebugMessage(GetFileName,'Bar table end found',Line,LinePos);
  764. end;
  765. {$endif DEBUG}
  766. DisableCrossIndexing:=false;
  767. end;
  768. end;
  769. procedure THTMLParser.DocTableRow(Entered: boolean);
  770. begin
  771. end;
  772. procedure THTMLParser.DocTableHeaderItem(Entered: boolean);
  773. begin
  774. end;
  775. procedure THTMLParser.DocTableItem(Entered: boolean);
  776. begin
  777. end;
  778. procedure THTMLParser.DocHorizontalRuler;
  779. begin
  780. end;
  781. function EncodeHTMLCtx(FileID: integer; LinkNo: word): longint;
  782. var Ctx: longint;
  783. begin
  784. Ctx:=(longint(FileID) shl 16)+LinkNo;
  785. EncodeHTMLCtx:=Ctx;
  786. end;
  787. procedure DecodeHTMLCtx(Ctx: longint; var FileID: word; var LinkNo: word);
  788. begin
  789. if (Ctx shr 16)=0 then
  790. begin
  791. FileID:=$ffff; LinkNo:=0;
  792. end
  793. else
  794. begin
  795. FileID:=Ctx shr 16; LinkNo:=Ctx and $ffff;
  796. end;
  797. end;
  798. procedure TTopicLinkCollection.Insert(Item: Pointer);
  799. begin
  800. AtInsert(Count,Item);
  801. end;
  802. function TTopicLinkCollection.At(Index: sw_Integer): PString;
  803. begin
  804. At:=inherited At(Index);
  805. end;
  806. function TTopicLinkCollection.AddItem(Item: string): sw_integer;
  807. var Idx: sw_integer;
  808. begin
  809. if Item='' then Idx:=-1 else
  810. if Search(@Item,Idx)=false then
  811. begin
  812. AtInsert(Count,NewStr(Item));
  813. Idx:=Count-1;
  814. end;
  815. AddItem:=Idx;
  816. end;
  817. END.