htmldefs.pp 29 KB


  1. {
  2. $Id: htmldefs.pp,v 1.2 2006/01/03 23:33:23 lukvdl Exp $
  3. This file is part of the Free Component Library
  4. HTML definitions and utility functions
  5. Copyright (c) 2000-2002 by
  6. Areca Systems GmbH / Sebastian Guenther, [email protected]
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************}
  13. {$IFNDEF FPC_DOTTEDUNITS}
  14. unit HTMLDefs;
  15. {$ENDIF FPC_DOTTEDUNITS}
  16. {$MODE objfpc}
  17. {$H+}
  18. interface
  19. type
  20. THTMLCData = string;
  21. THTMLID = string;
  22. THTMLName = string;
  23. THTMLIDRef = string;
  24. THTMLIDRefs = string;
  25. THTMLNumber = longint;
  26. THTMLText = THTMLCData;
  27. THTMLCharsets = THTMLCData;
  28. THTMLContentTypes = THTMLCData;
  29. THTMLURI = string;
  30. THTMLCharacter = AnsiChar;
  31. THTMLDir = (dirEmpty,dirLeftToRight,dirRightToLeft);
  32. THTMLalign = (alEmpty,alleft,alcenter,alright,aljustify,alchar);
  33. THTMLvalign = (vaEmpty,vatop,vamiddle,vabottom,vabaseline);
  34. THTMLframe = (frEmpty,frvoid,frabove,frbelow,frhsides,frvsides,frlefthandsise,frrighthandside,frbox,frborder);
  35. THTMLrules = (ruEmpty,runone,rugroups,rurows,rucols,ruall);
  36. THTMLvaluetype = (vtEmpty,vtdata,vtref,vtobject);
  37. THTMLshape = (shEmpty,shdefault,shrect,shcircle,shpoly);
  38. THTMLinputtype = (itEmpty,ittext,itpassword,itcheckbox,itradio,itsubmit,itreset,itfile,ithidden,itimage,itbutton);
  39. THTMLbuttontype = (btEmpty,btsubmit,btreset,btbutton);
  40. THTMLColor = (
  41. clHTMLBlack, clHTMLSilver, clHTMLGray, clHTMLWhite, clHTMLMaroon,
  42. // #000000 #C0C0C0 #808080 #FFFFFF #800000
  43. clHTMLRed, clHTMLPurple, clHTMLFuchsia,clHTMLGreen, clHTMLLime, clHTMLOlive,
  44. // #FF0000 #800080 #FF00FF #008000 #00FF00 #808000
  45. clHTMLYellow,clHTMLNavy, clHTMLBlue, clHTMLTeal, clHTMLAqua
  46. // #FFFF00 #000080 #0000FF #008080 #00FFFF
  47. );
  48. THTMLAttributeTag = (
  49. atabbr, atalink, atacceptcharset, ataccept, ataccesskey, ataction, atalign, atalt, atarchive,
  50. ataxis, atbackground, atbgcolor, atborder, atcellpadding, atcellspacing, atchar, atcharoff, atcharset,
  51. atchecked, atcite, atclass, atclassid, atclear, atcode, atcodebase, atcodetype, atcolor, atcols,
  52. atcolspan, atcompact, atcontent, atcoords, atdata, atdatetime, atdeclare,atdefer,
  53. atdir, atdisabled, atenctype, atface, atfor, atframe, atframeborder, atheaders,
  54. atheight, athref, athreflang, athspace, athttpequiv, atid, atismap, atlabel, atlang, atlink,
  55. atlongdesc, atmarginheight, atmarginwidth, atmaxlength, atmedia, atmethod,
  56. atmultiple, atname, atnohref, atnoresize, atnoshade, atnowrap, atobject, atonblur, atonchange, atonclick,
  57. atondblclick, atonfocus, atonkeydown, atonkeypress, atonkeyup, atonload,
  58. atonmousedown, atonmousemove, atonmouseout, atonmouseover, atonmouseup,
  59. atonreset, atonselect, atonsubmit, atonunload, atprofile, atprompt, atreadonly,
  60. atrel, atrev, atrows, atrowspan, atrules, atscheme, atscope, atscrolling,
  61. atselected, atshape, atsize, atspan, atsrc, atstandby, atstart, atstyle, atsummary,
  62. attabindex, attarget, attext, attitle, attype, atusemap, atvalign, atvalue,
  63. atvaluetype, atversion, atvlink, atvspace, atwidth
  64. );
  65. THTMLAttributeSet = set of THTMLAttributeTag;
  66. THTMLElementTag = (
  67. eta, etabbr, etacronym, etaddress, etapplet, etarea, etb, etbase,
  68. etbasefont, etbdo, etbig, etblockquote, etbody, etbr, etbutton,
  69. etcaption, etcenter, etcite, etcode, etcol, etcolgroup, etdd, etdel,
  70. etdfn, etdir, etdiv, etdl, etdt, etem, etfieldset, etfont, etform,
  71. etframe, etframeset, eth1, eth2, eth3, eth4, eth5, eth6, ethead, ethr,
  72. ethtml, eti, etiframe, etimg, etinput, etins, etisindex, etkbd, etlabel,
  73. etlegend, etli, etlink, etmap, etmenu, etmeta, etnoframes, etnoscript,
  74. etobject, etol, etoptgroup, etoption, etp, etparam, etpre, etq, ets,
  75. etsamp, etscript, etselect, etsmall, etspan, etstrike, etstrong,
  76. etstyle, etsub, etsup, ettable, ettbody, ettd, ettextarea, ettfoot,
  77. etth, etthead, ettitle, ettr, ettt, etu, etul, etvar,
  78. etText, etUnknown
  79. );
  80. THTMLElementTagSet = set of THTMLElementTag;
  81. THTMLElementFlag = (
  82. efSubelementContent, // may have subelements
  83. efPCDATAContent, // may have PCDATA content
  84. efPreserveWhitespace, // preserve all whitespace
  85. efDeprecated, // can be dropped in future versions
  86. efNoChecks, // Checks (attributes,subtags,...) can only be implemented in descendants
  87. efEndTagOptional
  88. );
  89. THTMLElementFlags = set of THTMLElementFlag;
  90. PHTMLElementProps = ^THTMLElementProps;
  91. THTMLElementProps = record
  92. Name: String;
  93. Flags: THTMLElementFlags;
  94. Attributes: THTMLAttributeSet;
  95. end;
  96. const
  97. BooleanAttributes = [atchecked,atdeclare,atdefer,atdisabled,atnohref,atnoresize,
  98. atmultiple,atreadonly,atselected];
  99. DeprecatedAttributes = [atalink, atbackground, atbgcolor, atclear, atcode, atcolor,
  100. atcompact, atface, athspace, atlink, atnoshade, atnowrap, atobject, atprompt,
  101. atstart, attext, atvlink, atversion, atvspace];
  102. efSubcontent = [efSubelementContent, efPCDATAContent];
  103. atsi18n = [atlang, atdir];
  104. atscoreattrs = [atid,atclass,atstyle,attitle];
  105. atsevents = [atonclick,atondblclick,atonmousedown,atonmouseup,atonmouseover,
  106. atonmousemove,atonmouseout,atonkeypress,atonkeydown,atonkeyup];
  107. atsattrs = atsevents + atscoreattrs + atsi18n;
  108. atscellhalign = [atalign, atchar, atcharoff];
  109. { etsStructured := [];
  110. etsDivisions := [];
  111. etsLists := [];
  112. etsLinks := [];
  113. etsObjects := [etImg, etObject, etApplet, etMap, etArea];
  114. etsForms := [etForm];
  115. etsText = etsStructured + etsDivisions + etsLists + etsLinks + etsObjects +
  116. etsForms +
  117. etTable + etText + etScript + ; }
  118. HTMLElementProps: array[THTMLElementTag] of THTMLElementProps = (
  119. (Name: 'a'; Flags: efSubcontent;
  120. Attributes: atsattrs+[atcharset,attype,atname,athref,athreflang,atrel,atrev,
  121. ataccesskey,atshape,atcoords,attabindex,atonfocus,atonblur]),
  122. (Name: 'abbr'; Flags: efSubcontent; Attributes: atsattrs),
  123. (Name: 'acronym'; Flags: efSubcontent; Attributes: atsattrs),
  124. (Name: 'address'; Flags: efSubcontent; Attributes: atsattrs),
  125. (Name: 'applet'; Flags: efSubcontent+[efDeprecated];
  126. Attributes: atscoreattrs+[atcodebase,atarchive,atalt,atname,atwidth,atheight]),
  127. (Name: 'area'; Flags: [];
  128. Attributes: atsattrs+[atshape,atcoords,athref,atnohref,atalt,attabindex,
  129. ataccesskey,atonfocus,atonblur]),
  130. (Name: 'b'; Flags: efSubcontent; Attributes: atsattrs),
  131. (Name: 'base'; Flags: []; Attributes: [athref]),
  132. (Name: 'basefont'; Flags: [efDeprecated]; Attributes: [atid]),
  133. (Name: 'bdo'; Flags: efSubcontent; Attributes: atscoreattrs+[atlang,atdir]),
  134. (Name: 'big'; Flags: efSubcontent; Attributes: atsattrs),
  135. (Name: 'blockquote';Flags: [efSubelementContent]; Attributes: atsattrs+[atcite]),
  136. (Name: 'body'; Flags: [efSubelementContent];
  137. Attributes: atsAttrs+[atonload, atonunload]),
  138. (Name: 'br'; Flags: []; Attributes: atscoreattrs),
  139. (Name: 'button'; Flags: efSubcontent;
  140. Attributes: atsattrs+[atname,atvalue,attype,atdisabled,attabindex,
  141. ataccesskey,atonfocus,atonblur]),
  142. (Name: 'caption'; Flags: efSubcontent; Attributes: atsattrs),
  143. (Name: 'center'; Flags: [efSubelementContent,efDeprecated]; Attributes: []),
  144. (Name: 'cite'; Flags: efSubcontent; Attributes: atsattrs),
  145. (Name: 'code'; Flags: efSubcontent; Attributes: atsattrs),
  146. (Name: 'col'; Flags: [];
  147. Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),
  148. (Name: 'colgroup'; Flags: [efSubelementContent, efEndTagOptional];
  149. Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),
  150. (Name: 'dd'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),
  151. (Name: 'del'; Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]),
  152. (Name: 'dfn'; Flags: efSubcontent; Attributes: atsattrs),
  153. (Name: 'dir'; Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs),
  154. (Name: 'div'; Flags: efSubContent; Attributes: atsattrs),
  155. (Name: 'dl'; Flags: [efSubelementContent]; Attributes: atsattrs),
  156. (Name: 'dt'; Flags: [efPCDataContent, efEndTagOptional]; Attributes: atsattrs),
  157. (Name: 'em'; Flags: efSubcontent; Attributes: atsattrs),
  158. (Name: 'fieldset'; Flags: efSubcontent; Attributes: atsattrs),
  159. (Name: 'font'; Flags: efSubcontent+[efDeprecated]; Attributes: atscoreattrs+atsi18n),
  160. (Name: 'form'; Flags: [efSubelementContent];
  161. Attributes: atsattrs+[ataction,atmethod,atenctype,atonsubmit,atonreset,atacceptcharset]),
  162. (Name: 'frame'; Flags: [];
  163. Attributes: atscoreattrs+[atlongdesc,atname,atsrc,atframeborder,
  164. atmarginwidth,atmarginheight,atnoresize,atscrolling]),
  165. (Name: 'frameset'; Flags: efSubcontent;
  166. Attributes: atsCoreattrs+[atrows,atcols,atonload,atonunload]),
  167. (Name: 'h1'; Flags: efSubcontent; Attributes: atsattrs),
  168. (Name: 'h2'; Flags: efSubcontent; Attributes: atsattrs),
  169. (Name: 'h3'; Flags: efSubcontent; Attributes: atsattrs),
  170. (Name: 'h4'; Flags: efSubcontent; Attributes: atsattrs),
  171. (Name: 'h5'; Flags: efSubcontent; Attributes: atsattrs),
  172. (Name: 'h6'; Flags: efSubcontent; Attributes: atsattrs),
  173. (Name: 'head'; Flags: [efSubelementContent]; Attributes: atsi18n+[atprofile]),
  174. (Name: 'hr'; Flags: []; Attributes: atscoreattrs+atsevents),
  175. (Name: 'html'; Flags: [efSubelementContent]; Attributes: atsi18n),
  176. (Name: 'i'; Flags: efSubcontent; Attributes: atsattrs),
  177. (Name: 'iframe'; Flags: [efSubelementContent];
  178. Attributes: atscoreattrs+[atlongdesc,atname,atsrc,atframeborder,atmarginwidth,
  179. atmarginheight,atscrolling,atalign,atheight,atwidth]),
  180. (Name: 'img'; Flags: [];
  181. Attributes: atsattrs+[atsrc,atalt,atlongdesc,atheight,atwidth,atusemap,atismap]),
  182. (Name: 'input'; Flags: [];
  183. Attributes: atsattrs+[attype,atname,atvalue,atchecked,atdisabled,
  184. atreadonly,atsize,atmaxlength,atsrc,atalt,atusemap,attabindex,
  185. ataccesskey,atonfocus,atonblur,atonselect,atonchange,ataccept]),
  186. (Name: 'ins'; Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]),
  187. (Name: 'isindex'; Flags: [efDeprecated]; Attributes: atscoreattrs+atsi18n),
  188. (Name: 'kbd'; Flags: efSubcontent; Attributes: atsattrs),
  189. (Name: 'label'; Flags: efSubcontent;
  190. Attributes: atsattrs+[atfor,ataccesskey,atonfocus,atonblur]),
  191. (Name: 'legend'; Flags: efSubcontent; Attributes: atsattrs+[ataccesskey]),
  192. (Name: 'li'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),
  193. (Name: 'link'; Flags: [];
  194. Attributes: atsattrs+[atcharset,athref,athreflang,attype,atrel,atrev,atmedia]),
  195. (Name: 'map'; Flags: [efSubelementContent]; Attributes: atsattrs+[atname]),
  196. (Name: 'menu'; Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs),
  197. (Name: 'meta'; Flags: []; Attributes: atsi18n+[athttpequiv,atname,atcontent,atscheme]),
  198. (Name: 'noframes'; Flags: efSubcontent; Attributes: atsattrs),
  199. (Name: 'noscript'; Flags: efSubcontent; Attributes: atsattrs),
  200. (Name: 'object'; Flags: efSubcontent;
  201. Attributes: atsattrs+[atdeclare,atclassid,atcodebase,atdata,attype,atcodetype,
  202. atarchive,atstandby,atheight,atwidth,atusemap,atname,attabindex]),
  203. (Name: 'ol'; Flags: [efSubelementContent]; Attributes: atsattrs),
  204. (Name: 'optgroup'; Flags: efSubcontent; Attributes: atsattrs+[atdisabled,atlabel]),
  205. (Name: 'option'; Flags: efSubcontent+[efEndTagOptional];
  206. Attributes: atsattrs+[atselected,atdisabled,atlabel,atvalue]),
  207. (Name: 'p'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),
  208. (Name: 'param'; Flags: []; Attributes: [atid,atname,atvalue,atvaluetype,attype]),
  209. (Name: 'pre'; Flags: efSubcontent + [efPreserveWhitespace]; Attributes: atsattrs),
  210. (Name: 'q'; Flags: efSubcontent; Attributes: atsattrs+[atcite]),
  211. (Name: 's'; Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs),
  212. (Name: 'samp'; Flags: efSubcontent; Attributes: atsattrs),
  213. (Name: 'script'; Flags: [efPCDATAContent]; Attributes: [atcharset,attype,atsrc,atdefer]),
  214. (Name: 'select'; Flags: [efSubelementContent];
  215. Attributes: atsattrs+[atname,atsize,atmultiple,atdisabled,attabindex,atonfocus,
  216. atonblur,atonchange]),
  217. (Name: 'small'; Flags: efSubcontent; Attributes: atsattrs),
  218. (Name: 'span'; Flags: efSubcontent; Attributes: atsattrs),
  219. (Name: 'strike'; Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs),
  220. (Name: 'strong'; Flags: efSubcontent; Attributes: atsattrs),
  221. (Name: 'style'; Flags: [efPCDATAContent];
  222. Attributes: atsi18n+[attype,atmedia,attitle]),
  223. (Name: 'sub'; Flags: efSubcontent; Attributes: atsattrs),
  224. (Name: 'sup'; Flags: efSubcontent; Attributes: atsattrs),
  225. (Name: 'table'; Flags: [efSubelementContent];
  226. Attributes: atsattrs+[atsummary,atwidth,atborder,atframe,atrules,atcellspacing,atcellpadding]),
  227. (Name: 'tbody'; Flags: [efSubelementContent]; Attributes: atsattrs+atscellhalign+[atvalign]),
  228. (Name: 'td'; Flags: efSubcontent+[efEndTagOptional];
  229. Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),
  230. (Name: 'textarea'; Flags: [efPCDATAContent];
  231. Attributes: atsattrs+[atname,atrows,atcols,atdisabled,atreadonly,attabindex,
  232. ataccesskey,atonfocus,atonblur,atonselect,atonchange]),
  233. (Name: 'tfoot'; Flags: [efSubelementContent,efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]),
  234. (Name: 'th'; Flags: efSubcontent+[efEndTagOptional];
  235. Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),
  236. (Name: 'thead'; Flags: [efSubelementContent, efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]),
  237. (Name: 'title'; Flags: efSubcontent; Attributes: atsi18n),
  238. (Name: 'tr'; Flags: [efSubelementContent, efEndTagOptional];
  239. Attributes: atsattrs+atscellhalign+[atvalign]),
  240. (Name: 'tt'; Flags: efSubcontent; Attributes: atsattrs),
  241. (Name: 'u'; Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs),
  242. (Name: 'ul'; Flags: [efSubelementContent]; Attributes: atsattrs),
  243. (Name: 'var'; Flags: efSubcontent; Attributes: atsattrs),
  244. (Name: 'text'; Flags: efSubcontent; Attributes: []),
  245. (Name: 'unknown'; Flags: efSubcontent+[efNoChecks]; Attributes: [])
  246. );
  247. HTMLAttributeTag : array [THTMLAttributeTag] of String = (
  248. 'abbr', 'alink', 'accept-charset', 'accept', 'accesskey', 'action', 'align', 'alt', 'archive',
  249. 'axis', 'background', 'bgcolor', 'border', 'cellpadding', 'cellspacing', 'AnsiChar', 'charoff', 'charset',
  250. 'checked', 'cite', 'class', 'classid', 'clear', 'code', 'codebase', 'codetype', 'color', 'cols',
  251. 'colspan', 'compact', 'content', 'coords', 'data', 'datetime', 'declare', 'defer',
  252. 'dir', 'disabled', 'enctype', 'face', 'for', 'frame', 'frameborder', 'headers',
  253. 'height', 'href', 'hreflang', 'hspace', 'http-equiv', 'id', 'ismap', 'label', 'lang', 'link',
  254. 'longdesc', 'marginheight', 'marginwidth', 'maxlength', 'media', 'method',
  255. 'multiple', 'name', 'nohref', 'noresize', 'noshade', 'nowrap', 'object', 'onblur', 'onchange', 'onclick',
  256. 'ondblclick', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload',
  257. 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup',
  258. 'onreset', 'onselect', 'onsubmit', 'onunload', 'profile', 'prompt', 'readonly',
  259. 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scheme', 'scope', 'scrolling',
  260. 'selected', 'shape', 'size', 'span', 'src', 'standby', 'start', 'style', 'summary',
  261. 'tabindex', 'target', 'text', 'title', 'type', 'usemap', 'valign', 'value',
  262. 'valuetype', 'version', 'vlink', 'vspace', 'width');
  263. HTMLColor : array [THTMLColor] of string =
  264. ('Black', 'Silver', 'Gray', 'White', 'Maroon', 'Red', 'Purple', 'Fuchsia',
  265. 'Green', 'Lime', 'Olive', 'Yellow', 'Navy', 'Blue', 'Teal', 'Aqua');
  266. HTMLDir : array [THTMLDir] of string = ('','LTR','RTL');
  267. HTMLAlign : array [THTMLalign] of string = ('','left','center','right','justify','AnsiChar');
  268. HTMLvalign : array [THTMLvalign] of string = ('','top','middle','bottom','baseline');
  269. HTMLframe : array [THTMLframe] of string =
  270. ('','void','above','below','hsides','vsides','lhs','rhs','box','border');
  271. HTMLrules : array [THTMLrules] of string = ('','none','groups','rows','cols','all');
  272. HTMLvaluetype : array [THTMLvaluetype] of string = ('','data','ref','object');
  273. HTMLshape : array [THTMLshape] of string = ('','default','rect','circle','poly');
  274. HTMLinputtype : array [THTMLinputtype] of string = ('','text','password','checkbox',
  275. 'radio','submit','reset','file','hidden','image','button');
  276. HTMLbuttontype : array [THTMLbuttontype] of string = ('','submit','reset','button');
  277. function ResolveHTMLEntityReference(const Name: WideString;
  278. var Entity: WideChar): Boolean;
  279. function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean;
  280. implementation
  281. {$IFDEF FPC_DOTTEDUNITS}
  282. uses System.SysUtils;
  283. {$ELSE FPC_DOTTEDUNITS}
  284. uses SysUtils;
  285. {$ENDIF FPC_DOTTEDUNITS}
  286. { Define which elements auto-close other elements, modelled after libxml2.
  287. This is an array of variable-length lists, each terminated by etUnknown.
  288. Indices to first element of each list are provided by AutoCloseIndex array,
  289. which *must* be updated after any change. }
  290. const
  291. AutoCloseTab: array[0..277] of THTMLElementTag = (
  292. etform, etform, etp, ethr, eth1, eth2, eth3, eth4, eth5, eth6,
  293. etdl, etul, etol, etmenu, etdir, etaddress, etpre,
  294. ethead, etUnknown,
  295. ethead, etp, etUnknown,
  296. ettitle, etp, etUnknown,
  297. etbody, ethead, etstyle, etlink, ettitle, etp, etUnknown,
  298. etframeset, ethead, etstyle, etlink, ettitle, etp, etUnknown,
  299. etli, etp, eth1, eth2, eth3, eth4, eth5, eth6, etdl, etaddress,
  300. etpre, ethead, etli, etUnknown,
  301. ethr, etp, ethead, etUnknown,
  302. eth1, etp, ethead, etUnknown,
  303. eth2, etp, ethead, etUnknown,
  304. eth3, etp, ethead, etUnknown,
  305. eth4, etp, ethead, etUnknown,
  306. eth5, etp, ethead, etUnknown,
  307. eth6, etp, ethead, etUnknown,
  308. etdir, etp, ethead, etUnknown,
  309. etaddress, etp, ethead, etul, etUnknown,
  310. etpre, etp, ethead, etul, etUnknown,
  311. etblockquote, etp, ethead, etUnknown,
  312. etdl, etp, etdt, etmenu, etdir, etaddress, etpre,
  313. ethead, etUnknown,
  314. etdt, etp, etmenu, etdir, etaddress, etpre,
  315. ethead, etdd, etUnknown,
  316. etdd, etp, etmenu, etdir, etaddress, etpre,
  317. ethead, etdt, etUnknown,
  318. etul, etp, ethead, etol, etmenu, etdir, etaddress, etpre, etUnknown,
  319. etol, etp, ethead, etul, etUnknown,
  320. etmenu, etp, ethead, etul, etUnknown,
  321. etp, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etUnknown,
  322. etdiv, etp, ethead, etUnknown,
  323. etnoscript, etp, ethead, etUnknown,
  324. etcenter, etfont, etb, eti, etp, ethead, etUnknown,
  325. eta, eta, etUnknown,
  326. etcaption, etp, etUnknown,
  327. etcolgroup, etcaption, etcolgroup, etcol, etp, etUnknown,
  328. etcol, etcaption, etcol, etp, etUnknown,
  329. ettable, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etpre,
  330. eta, etUnknown,
  331. etth, etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown,
  332. ettd, etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown,
  333. ettr, etth, ettd, ettr, etcaption, etcol, etcolgroup, etp, etUnknown,
  334. etthead, etcaption, etcol, etcolgroup, etUnknown,
  335. ettfoot, etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead,
  336. ettbody, etp, etUnknown,
  337. ettbody, etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead,
  338. ettfoot, ettbody, etp, etUnknown,
  339. etoptgroup, etoption, etUnknown,
  340. etoption, etoption, etUnknown,
  341. etfieldset, etlegend, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6,
  342. etpre, eta, etUnknown,
  343. etUnknown);
  344. AutoCloseIndex: array[0..40] of Integer = (
  345. 0, 19, 22, 25, 32, 39, 53, 57, 61, 65, 69,
  346. 73, 77, 81, 85, 90, 95, 99, 108, 117, 126,
  347. 135, 140, 145, 155, 159, 163, 170, 173, 176,
  348. 182, 187, 199, 210, 221, 230, 235, 246, 258,
  349. 261, 264
  350. );
  351. { HTML entities, each preceded with its code. There is a separate list for
  352. each entity length, and each list is sorted by character codes.
  353. The sole purpose of using AnsiString here is staying compatible with Delphi 7,
  354. which is totally broken with respect to handling wide literals.
  355. }
  356. ent_2 =
  357. #3#$9C + 'Mu'+
  358. #3#$9D + 'Nu'+
  359. #3#$A0 + 'Pi'+
  360. #3#$9E + 'Xi'+
  361. #$22#$65+ 'ge'+
  362. #0#62 + 'gt'+
  363. #$22#$64+ 'le'+
  364. #0#60 + 'lt'+
  365. #3#$BC + 'mu'+
  366. #$22#$60+ 'ne'+
  367. #$22#$0B+ 'ni'+
  368. #3#$BD + 'nu'+
  369. #$22#$28+ 'or'+
  370. #3#$C0 + 'pi'+
  371. #3#$BE + 'xi';
  372. ent_3 =
  373. #3#$A7 + 'Chi'+
  374. #0#208 + 'ETH'+
  375. #3#$97 + 'Eta'+
  376. #3#$A6 + 'Phi'+
  377. #3#$A8 + 'Psi'+
  378. #3#$A1 + 'Rho'+
  379. #3#$A4 + 'Tau'+
  380. #0#38 + 'amp'+
  381. #$22#$27+ 'and'+
  382. #$22#$20+ 'ang'+
  383. #$22#$29+ 'cap'+
  384. #3#$C7 + 'chi'+
  385. #$22#$2A+ 'cup'+
  386. #0#176 + 'deg'+
  387. #3#$B7 + 'eta'+
  388. #0#240 + 'eth'+
  389. #$22#$2B+ 'int'+
  390. #$25#$CA+ 'loz'+
  391. #$20#$0E+ 'lrm'+
  392. #0#172 + 'not'+
  393. #3#$C6 + 'phi'+
  394. #3#$D6 + 'piv'+
  395. #3#$C8 + 'psi'+
  396. #0#174 + 'reg'+
  397. #3#$C1 + 'rho'+
  398. #$20#$0F+ 'rlm'+
  399. #0#173 + 'shy'+
  400. #$22#$3C+ 'sim'+
  401. #$22#$82+ 'sub'+
  402. #$22#$11+ 'sum'+
  403. #$22#$83+ 'sup'+
  404. #3#$C4 + 'tau'+
  405. #0#168 + 'uml'+
  406. #0#165 + 'yen'+
  407. #$20#$0D+ 'zwj';
  408. ent_4 =
  409. #0#196 + 'Auml'+
  410. #3#$92 + 'Beta'+
  411. #0#203 + 'Euml'+
  412. #3#$99 + 'Iota'+
  413. #0#207 + 'Iuml'+
  414. #0#214 + 'Ouml'+
  415. #0#220 + 'Uuml'+
  416. #1#$78 + 'Yuml'+
  417. #3#$96 + 'Zeta'+
  418. #0#228 + 'auml'+
  419. #3#$B2 + 'beta'+
  420. #$20#$22+ 'bull'+
  421. #0#162 + 'cent'+
  422. #2#$C6 + 'circ'+
  423. #$22#$45+ 'cong'+
  424. #0#169 + 'copy'+
  425. #$21#$D3+ 'dArr'+
  426. #$21#$93+ 'darr'+
  427. #$20#$03+ 'emsp'+
  428. #$20#$02+ 'ensp'+
  429. #0#235 + 'euml'+
  430. #$20#$AC+ 'euro'+
  431. #1#$92 + 'fnof'+
  432. #$21#$D4+ 'hArr'+
  433. #$21#$94+ 'harr'+
  434. #3#$B9 + 'iota'+
  435. #$22#$08+ 'isin'+
  436. #0#239 + 'iuml'+
  437. #$21#$D0+ 'lArr'+
  438. #$23#$29+ 'lang'+
  439. #$21#$90+ 'larr'+
  440. #0#175 + 'macr'+
  441. #0#160 + 'nbsp'+
  442. #$22#$84+ 'nsub'+
  443. #0#170 + 'ordf'+
  444. #0#186 + 'ordm'+
  445. #0#246 + 'ouml'+
  446. #0#182 + 'para'+
  447. #$22#$02+ 'part'+
  448. #$22#$A5+ 'perp'+
  449. #$22#$0F+ 'prod'+
  450. #$22#$1D+ 'prop'+
  451. #0#34 + 'quot'+
  452. #$21#$D2+ 'rArr'+
  453. #$23#$2A+ 'rang'+
  454. #$21#$92+ 'rarr'+
  455. #$21#$1C+ 'real'+
  456. #$22#$C5+ 'sdot'+
  457. #0#167 + 'sect'+
  458. #$22#$86+ 'sube'+
  459. #0#185 + 'sup1'+
  460. #0#178 + 'sup2'+
  461. #0#179 + 'sup3'+
  462. #$22#$87+ 'supe'+
  463. #$21#$D1+ 'uArr'+
  464. #$21#$91+ 'uarr'+
  465. #0#252 + 'uuml'+
  466. #0#255 + 'yuml'+
  467. #3#$B6 + 'zeta'+
  468. #$20#$0C+ 'zwnj';
  469. ent_5 =
  470. #0#198 + 'AElig'+
  471. #0#194 + 'Acirc'+
  472. #3#$91 + 'Alpha'+
  473. #0#197 + 'Aring'+
  474. #3#$94 + 'Delta'+
  475. #0#202 + 'Ecirc'+
  476. #3#$93 + 'Gamma'+
  477. #0#206 + 'Icirc'+
  478. #3#$9A + 'Kappa'+
  479. #1#$52 + 'OElig'+
  480. #0#212 + 'Ocirc'+
  481. #3#$A9 + 'Omega'+
  482. #$20#$33+ 'Prime'+
  483. #3#$A3 + 'Sigma'+
  484. #0#222 + 'THORN'+
  485. #3#$98 + 'Theta'+
  486. #0#219 + 'Ucirc'+
  487. #0#226 + 'acirc'+
  488. #0#180 + 'acute'+
  489. #0#230 + 'aelig'+
  490. #3#$B1 + 'alpha'+
  491. #0#229 + 'aring'+
  492. #$22#$48+ 'asymp'+
  493. #$20#$1E+ 'bdquo'+
  494. #0#184 + 'cedil'+
  495. #$26#$63+ 'clubs'+
  496. #$21#$B5+ 'crarr'+
  497. #3#$B4 + 'delta'+
  498. #$26#$66+ 'diams'+
  499. #0#234 + 'ecirc'+
  500. #$22#$05+ 'empty'+
  501. #$22#$61+ 'equiv'+
  502. #$22#$03+ 'exist'+
  503. #$20#$44+ 'frasl'+
  504. #3#$B3 + 'gamma'+
  505. #0#238 + 'icirc'+
  506. #0#161 + 'iexcl'+
  507. #$21#$11+ 'image'+
  508. #$22#$1E+ 'infin'+
  509. #3#$BA + 'kappa'+
  510. #0#171 + 'laquo'+
  511. #$23#$08+ 'lceil'+
  512. #$20#$1C+ 'ldquo'+
  513. #$20#$18+ 'lsquo'+
  514. #$20#$14+ 'mdash'+
  515. #0#181 + 'micro'+
  516. #$22#$12+ 'minus'+
  517. #$22#$07+ 'nabla'+
  518. #$20#$13+ 'ndash'+
  519. #$22#$09+ 'notin'+
  520. #0#244 + 'ocirc'+
  521. #1#$53 + 'oelig'+
  522. #$20#$3E+ 'oline'+
  523. #3#$C9 + 'omega'+
  524. #$22#$95+ 'oplus'+
  525. #0#163 + 'pound'+
  526. #$20#$32+ 'prime'+
  527. #$22#$1A+ 'radic'+
  528. #0#187 + 'raquo'+
  529. #$23#$09+ 'rceil'+
  530. #$20#$1D+ 'rdquo'+
  531. #$20#$19+ 'rsquo'+
  532. #$20#$1A+ 'sbquo'+
  533. #3#$C3 + 'sigma'+
  534. #0#223 + 'szlig'+
  535. #3#$B8 + 'theta'+
  536. #0#254 + 'thorn'+
  537. #2#$DC + 'tilde'+
  538. #0#215 + 'times'+
  539. #$21#$22+ 'trade'+
  540. #0#251 + 'ucirc'+
  541. #3#$D2 + 'upsih';
  542. ent_6 =
  543. #0#193 + 'Aacute'+
  544. #0#192 + 'Agrave'+
  545. #0#195 + 'Atilde'+
  546. #0#199 + 'Ccedil'+
  547. #$20#$21+ 'Dagger'+
  548. #0#201 + 'Eacute'+
  549. #0#200 + 'Egrave'+
  550. #0#205 + 'Iacute'+
  551. #0#204 + 'Igrave'+
  552. #3#$9B + 'Lambda'+
  553. #0#209 + 'Ntilde'+
  554. #0#211 + 'Oacute'+
  555. #0#210 + 'Ograve'+
  556. #0#216 + 'Oslash'+
  557. #0#213 + 'Otilde'+
  558. #1#$60 + 'Scaron'+
  559. #0#218 + 'Uacute'+
  560. #0#217 + 'Ugrave'+
  561. #0#221 + 'Yacute'+
  562. #0#225 + 'aacute'+
  563. #0#224 + 'agrave'+
  564. #0#227 + 'atilde'+
  565. #0#166 + 'brvbar'+
  566. #0#231 + 'ccedil'+
  567. #0#164 + 'curren'+
  568. #$20#$20+ 'dagger'+
  569. #0#247 + 'divide'+
  570. #0#233 + 'eacute'+
  571. #0#232 + 'egrave'+
  572. #$22#$00+ 'forall'+
  573. #0#189 + 'frac12'+
  574. #0#188 + 'frac14'+
  575. #0#190 + 'frac34'+
  576. #$26#$65+ 'hearts'+
  577. #$20#$26+ 'hellip'+
  578. #0#237 + 'iacute'+
  579. #0#236 + 'igrave'+
  580. #0#191 + 'iquest'+
  581. #3#$BB + 'lambda'+
  582. #$23#$0A+ 'lfloor'+
  583. #$22#$17+ 'lowast'+
  584. #$20#$39+ 'lsaquo'+
  585. #0#183 + 'middot'+
  586. #0#241 + 'ntilde'+
  587. #0#243 + 'oacute'+
  588. #0#242 + 'ograve'+
  589. #0#248 + 'oslash'+
  590. #0#245 + 'otilde'+
  591. #$22#$97+ 'otimes'+
  592. #$20#$30+ 'permil'+
  593. #0#177 + 'plusmn'+
  594. #$23#$0B+ 'rfloor'+
  595. #$20#$3A+ 'rsaquo'+
  596. #1#$61 + 'scaron'+
  597. #3#$C2 + 'sigmaf'+
  598. #$26#$60+ 'spades'+
  599. #$22#$34+ 'there4'+
  600. #$20#$09+ 'thinsp'+
  601. #0#250 + 'uacute'+
  602. #0#249 + 'ugrave'+
  603. #$21#$18+ 'weierp'+
  604. #0#253 + 'yacute';
  605. ent_7 =
  606. #3#$95 + 'Epsilon'+
  607. #3#$9F + 'Omicron'+
  608. #3#$A5 + 'Upsilon'+
  609. #$21#$35+ 'alefsym'+
  610. #3#$B5 + 'epsilon'+
  611. #3#$BF + 'omicron'+
  612. #3#$C5 + 'upsilon';
  613. ent_8 =
  614. #3#$D1 + 'thetasym';
  615. strs: array[2..8] of string = (
  616. ent_2, ent_3, ent_4, ent_5, ent_6, ent_7, ent_8
  617. );
  618. function BSearch(P: PWideChar; Len: Integer; const data: string): WideChar;
  619. var
  620. L, H, mid, J, C: Integer;
  621. begin
  622. Result := #0;
  623. L := 0;
  624. H := (Length(data)+1) div (Len+2);
  625. while L <= H do
  626. begin
  627. mid := L + ((H - L) shr 1);
  628. J := 0;
  629. repeat
  630. C := ord(P[J]) - ord(data[mid*(Len+2)+3+J]);
  631. Inc(J);
  632. until (C <> 0) or (J >= Len);
  633. if C > 0 then L := mid + 1 else
  634. begin
  635. H := mid - 1;
  636. if C = 0 then
  637. begin
  638. Result := WideChar((ord(data[mid*(Len+2)+1]) shl 8) or ord(data[mid*(Len+2)+2]));
  639. Exit;
  640. end;
  641. end;
  642. end;
  643. end;
  644. {
  645. Remaining issues:
  646. 1) UTF-16 surrogate pairs
  647. 2) HTML accepts uppercase 'X' for hex notation, but XML does not.
  648. 3) 'apos' is used in xml/xhtml, but not in HTML 4.01
  649. }
  650. function ResolveHTMLEntityReference(const Name: WideString;
  651. var Entity: WideChar): Boolean;
  652. var
  653. i, L: Integer;
  654. value: Integer;
  655. begin
  656. L := Length(Name);
  657. if (L > 1) and (Name[1] = '#') then
  658. begin
  659. value := 0;
  660. if (Name[2] = 'x') or (Name[2] = 'X') then
  661. begin
  662. i := 3;
  663. while i <= L do
  664. begin
  665. case Name[i] of
  666. '0'..'9': Value := Value * 16 + Ord(Name[i]) - Ord('0');
  667. 'a'..'f': Value := Value * 16 + Ord(Name[i]) - (Ord('a') - 10);
  668. 'A'..'F': Value := Value * 16 + Ord(Name[i]) - (Ord('A') - 10);
  669. else
  670. Break;
  671. end;
  672. Inc(i);
  673. end;
  674. end
  675. else
  676. begin
  677. i := 2;
  678. while i <= L do
  679. begin
  680. case Name[i] of
  681. '0'..'9': Value := Value * 10 + Ord(Name[i]) - Ord('0');
  682. else
  683. Break;
  684. end;
  685. Inc(i);
  686. end;
  687. end;
  688. Result := (i = L+1);
  689. if Result then
  690. Entity := WideChar(Value);
  691. end
  692. else
  693. begin
  694. case L of
  695. 2..8: Entity := BSearch(PWideChar(Name), L, strs[L]);
  696. else
  697. Entity := #0;
  698. end;
  699. Result := (Entity <> #0);
  700. end;
  701. end;
  702. function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean;
  703. var
  704. i, j: Integer;
  705. begin
  706. Result := False;
  707. for i := 0 to high(AutoCloseIndex) do
  708. if NewTag = AutoCloseTab[AutoCloseIndex[i]] then
  709. begin
  710. j := AutoCloseIndex[i]+1;
  711. while AutoCloseTab[j] <> etUnknown do
  712. begin
  713. if AutoCloseTab[j] = OldTag then
  714. begin
  715. Result := True;
  716. Exit;
  717. end;
  718. Inc(j);
  719. end;
  720. Exit;
  721. end;
  722. end;
  723. end.