htmldefs.pp 29 KB


  1. {
  2. $Id: htmldefs.pp,v 1.2 2006/01/03 23:33:23 lukvdl Exp $
  3. This file is part of the Free Component Library
  4. HTML definitions and utility functions
  5. Copyright (c) 2000-2002 by
  6. Areca Systems GmbH / Sebastian Guenther, [email protected]
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************}
  13. unit HTMLDefs;
  14. {$MODE objfpc}
  15. {$H+}
  16. interface
  17. type
  18. THTMLCData = string;
  19. THTMLID = string;
  20. THTMLName = string;
  21. THTMLIDRef = string;
  22. THTMLIDRefs = string;
  23. THTMLNumber = longint;
  24. THTMLText = THTMLCData;
  25. THTMLCharsets = THTMLCData;
  26. THTMLContentTypes = THTMLCData;
  27. THTMLURI = string;
  28. THTMLCharacter = char;
  29. THTMLDir = (dirEmpty,dirLeftToRight,dirRightToLeft);
  30. THTMLalign = (alEmpty,alleft,alcenter,alright,aljustify,alchar);
  31. THTMLvalign = (vaEmpty,vatop,vamiddle,vabottom,vabaseline);
  32. THTMLframe = (frEmpty,frvoid,frabove,frbelow,frhsides,frvsides,frlefthandsise,frrighthandside,frbox,frborder);
  33. THTMLrules = (ruEmpty,runone,rugroups,rurows,rucols,ruall);
  34. THTMLvaluetype = (vtEmpty,vtdata,vtref,vtobject);
  35. THTMLshape = (shEmpty,shdefault,shrect,shcircle,shpoly);
  36. THTMLinputtype = (itEmpty,ittext,itpassword,itcheckbox,itradio,itsubmit,itreset,itfile,ithidden,itimage,itbutton);
  37. THTMLbuttontype = (btEmpty,btsubmit,btreset,btbutton);
  38. THTMLColor = (
  39. clHTMLBlack, clHTMLSilver, clHTMLGray, clHTMLWhite, clHTMLMaroon,
  40. // #000000 #C0C0C0 #808080 #FFFFFF #800000
  41. clHTMLRed, clHTMLPurple, clHTMLFuchsia,clHTMLGreen, clHTMLLime, clHTMLOlive,
  42. // #FF0000 #800080 #FF00FF #008000 #00FF00 #808000
  43. clHTMLYellow,clHTMLNavy, clHTMLBlue, clHTMLTeal, clHTMLAqua
  44. // #FFFF00 #000080 #0000FF #008080 #00FFFF
  45. );
  46. THTMLAttributeTag = (
  47. atabbr, atalink, atacceptcharset, ataccept, ataccesskey, ataction, atalign, atalt, atarchive,
  48. ataxis, atbackground, atbgcolor, atborder, atcellpadding, atcellspacing, atchar, atcharoff, atcharset,
  49. atchecked, atcite, atclass, atclassid, atclear, atcode, atcodebase, atcodetype, atcolor, atcols,
  50. atcolspan, atcompact, atcontent, atcoords, atdata, atdatetime, atdeclare,atdefer,
  51. atdir, atdisabled, atenctype, atface, atfor, atframe, atframeborder, atheaders,
  52. atheight, athref, athreflang, athspace, athttpequiv, atid, atismap, atlabel, atlang, atlink,
  53. atlongdesc, atmarginheight, atmarginwidth, atmaxlength, atmedia, atmethod,
  54. atmultiple, atname, atnohref, atnoresize, atnoshade, atnowrap, atobject, atonblur, atonchange, atonclick,
  55. atondblclick, atonfocus, atonkeydown, atonkeypress, atonkeyup, atonload,
  56. atonmousedown, atonmousemove, atonmouseout, atonmouseover, atonmouseup,
  57. atonreset, atonselect, atonsubmit, atonunload, atprofile, atprompt, atreadonly,
  58. atrel, atrev, atrows, atrowspan, atrules, atscheme, atscope, atscrolling,
  59. atselected, atshape, atsize, atspan, atsrc, atstandby, atstart, atstyle, atsummary,
  60. attabindex, attarget, attext, attitle, attype, atusemap, atvalign, atvalue,
  61. atvaluetype, atversion, atvlink, atvspace, atwidth
  62. );
  63. THTMLAttributeSet = set of THTMLAttributeTag;
  64. THTMLElementTag = (
  65. eta, etabbr, etacronym, etaddress, etapplet, etarea, etb, etbase,
  66. etbasefont, etbdo, etbig, etblockquote, etbody, etbr, etbutton,
  67. etcaption, etcenter, etcite, etcode, etcol, etcolgroup, etdd, etdel,
  68. etdfn, etdir, etdiv, etdl, etdt, etem, etfieldset, etfont, etform,
  69. etframe, etframeset, eth1, eth2, eth3, eth4, eth5, eth6, ethead, ethr,
  70. ethtml, eti, etiframe, etimg, etinput, etins, etisindex, etkbd, etlabel,
  71. etlegend, etli, etlink, etmap, etmenu, etmeta, etnoframes, etnoscript,
  72. etobject, etol, etoptgroup, etoption, etp, etparam, etpre, etq, ets,
  73. etsamp, etscript, etselect, etsmall, etspan, etstrike, etstrong,
  74. etstyle, etsub, etsup, ettable, ettbody, ettd, ettextarea, ettfoot,
  75. etth, etthead, ettitle, ettr, ettt, etu, etul, etvar,
  76. etText, etUnknown
  77. );
  78. THTMLElementTagSet = set of THTMLElementTag;
  79. THTMLElementFlag = (
  80. efSubelementContent, // may have subelements
  81. efPCDATAContent, // may have PCDATA content
  82. efPreserveWhitespace, // preserve all whitespace
  83. efDeprecated, // can be dropped in future versions
  84. efNoChecks, // Checks (attributes,subtags,...) can only be implemented in descendants
  85. efEndTagOptional
  86. );
  87. THTMLElementFlags = set of THTMLElementFlag;
  88. PHTMLElementProps = ^THTMLElementProps;
  89. THTMLElementProps = record
  90. Name: String;
  91. Flags: THTMLElementFlags;
  92. Attributes: THTMLAttributeSet;
  93. end;
  94. const
  95. BooleanAttributes = [atchecked,atdeclare,atdefer,atdisabled,atnohref,atnoresize,
  96. atmultiple,atreadonly,atselected];
  97. DeprecatedAttributes = [atalink, atbackground, atbgcolor, atclear, atcode, atcolor,
  98. atcompact, atface, athspace, atlink, atnoshade, atnowrap, atobject, atprompt,
  99. atstart, attext, atvlink, atversion, atvspace];
  100. efSubcontent = [efSubelementContent, efPCDATAContent];
  101. atsi18n = [atlang, atdir];
  102. atscoreattrs = [atid,atclass,atstyle,attitle];
  103. atsevents = [atonclick,atondblclick,atonmousedown,atonmouseup,atonmouseover,
  104. atonmousemove,atonmouseout,atonkeypress,atonkeydown,atonkeyup];
  105. atsattrs = atsevents + atscoreattrs + atsi18n;
  106. atscellhalign = [atalign, atchar, atcharoff];
  107. { etsStructured := [];
  108. etsDivisions := [];
  109. etsLists := [];
  110. etsLinks := [];
  111. etsObjects := [etImg, etObject, etApplet, etMap, etArea];
  112. etsForms := [etForm];
  113. etsText = etsStructured + etsDivisions + etsLists + etsLinks + etsObjects +
  114. etsForms +
  115. etTable + etText + etScript + ; }
  116. HTMLElementProps: array[THTMLElementTag] of THTMLElementProps = (
  117. (Name: 'a'; Flags: efSubcontent;
  118. Attributes: atsattrs+[atcharset,attype,atname,athref,athreflang,atrel,atrev,
  119. ataccesskey,atshape,atcoords,attabindex,atonfocus,atonblur]),
  120. (Name: 'abbr'; Flags: efSubcontent; Attributes: atsattrs),
  121. (Name: 'acronym'; Flags: efSubcontent; Attributes: atsattrs),
  122. (Name: 'address'; Flags: efSubcontent; Attributes: atsattrs),
  123. (Name: 'applet'; Flags: efSubcontent+[efDeprecated];
  124. Attributes: atscoreattrs+[atcodebase,atarchive,atalt,atname,atwidth,atheight]),
  125. (Name: 'area'; Flags: [];
  126. Attributes: atsattrs+[atshape,atcoords,athref,atnohref,atalt,attabindex,
  127. ataccesskey,atonfocus,atonblur]),
  128. (Name: 'b'; Flags: efSubcontent; Attributes: atsattrs),
  129. (Name: 'base'; Flags: []; Attributes: [athref]),
  130. (Name: 'basefont'; Flags: [efDeprecated]; Attributes: [atid]),
  131. (Name: 'bdo'; Flags: efSubcontent; Attributes: atscoreattrs+[atlang,atdir]),
  132. (Name: 'big'; Flags: efSubcontent; Attributes: atsattrs),
  133. (Name: 'blockquote';Flags: [efSubelementContent]; Attributes: atsattrs+[atcite]),
  134. (Name: 'body'; Flags: [efSubelementContent];
  135. Attributes: atsAttrs+[atonload, atonunload]),
  136. (Name: 'br'; Flags: []; Attributes: atscoreattrs),
  137. (Name: 'button'; Flags: efSubcontent;
  138. Attributes: atsattrs+[atname,atvalue,attype,atdisabled,attabindex,
  139. ataccesskey,atonfocus,atonblur]),
  140. (Name: 'caption'; Flags: efSubcontent; Attributes: atsattrs),
  141. (Name: 'center'; Flags: [efSubelementContent,efDeprecated]; Attributes: []),
  142. (Name: 'cite'; Flags: efSubcontent; Attributes: atsattrs),
  143. (Name: 'code'; Flags: efSubcontent; Attributes: atsattrs),
  144. (Name: 'col'; Flags: [];
  145. Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),
  146. (Name: 'colgroup'; Flags: [efSubelementContent, efEndTagOptional];
  147. Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),
  148. (Name: 'dd'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),
  149. (Name: 'del'; Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]),
  150. (Name: 'dfn'; Flags: efSubcontent; Attributes: atsattrs),
  151. (Name: 'dir'; Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs),
  152. (Name: 'div'; Flags: efSubContent; Attributes: atsattrs),
  153. (Name: 'dl'; Flags: [efSubelementContent]; Attributes: atsattrs),
  154. (Name: 'dt'; Flags: [efPCDataContent, efEndTagOptional]; Attributes: atsattrs),
  155. (Name: 'em'; Flags: efSubcontent; Attributes: atsattrs),
  156. (Name: 'fieldset'; Flags: efSubcontent; Attributes: atsattrs),
  157. (Name: 'font'; Flags: efSubcontent+[efDeprecated]; Attributes: atscoreattrs+atsi18n),
  158. (Name: 'form'; Flags: [efSubelementContent];
  159. Attributes: atsattrs+[ataction,atmethod,atenctype,atonsubmit,atonreset,atacceptcharset]),
  160. (Name: 'frame'; Flags: [];
  161. Attributes: atscoreattrs+[atlongdesc,atname,atsrc,atframeborder,
  162. atmarginwidth,atmarginheight,atnoresize,atscrolling]),
  163. (Name: 'frameset'; Flags: efSubcontent;
  164. Attributes: atsCoreattrs+[atrows,atcols,atonload,atonunload]),
  165. (Name: 'h1'; Flags: efSubcontent; Attributes: atsattrs),
  166. (Name: 'h2'; Flags: efSubcontent; Attributes: atsattrs),
  167. (Name: 'h3'; Flags: efSubcontent; Attributes: atsattrs),
  168. (Name: 'h4'; Flags: efSubcontent; Attributes: atsattrs),
  169. (Name: 'h5'; Flags: efSubcontent; Attributes: atsattrs),
  170. (Name: 'h6'; Flags: efSubcontent; Attributes: atsattrs),
  171. (Name: 'head'; Flags: [efSubelementContent]; Attributes: atsi18n+[atprofile]),
  172. (Name: 'hr'; Flags: []; Attributes: atscoreattrs+atsevents),
  173. (Name: 'html'; Flags: [efSubelementContent]; Attributes: atsi18n),
  174. (Name: 'i'; Flags: efSubcontent; Attributes: atsattrs),
  175. (Name: 'iframe'; Flags: [efSubelementContent];
  176. Attributes: atscoreattrs+[atlongdesc,atname,atsrc,atframeborder,atmarginwidth,
  177. atmarginheight,atscrolling,atalign,atheight,atwidth]),
  178. (Name: 'img'; Flags: [];
  179. Attributes: atsattrs+[atsrc,atalt,atlongdesc,atheight,atwidth,atusemap,atismap]),
  180. (Name: 'input'; Flags: [];
  181. Attributes: atsattrs+[attype,atname,atvalue,atchecked,atdisabled,
  182. atreadonly,atsize,atmaxlength,atsrc,atalt,atusemap,attabindex,
  183. ataccesskey,atonfocus,atonblur,atonselect,atonchange,ataccept]),
  184. (Name: 'ins'; Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]),
  185. (Name: 'isindex'; Flags: [efDeprecated]; Attributes: atscoreattrs+atsi18n),
  186. (Name: 'kbd'; Flags: efSubcontent; Attributes: atsattrs),
  187. (Name: 'label'; Flags: efSubcontent;
  188. Attributes: atsattrs+[atfor,ataccesskey,atonfocus,atonblur]),
  189. (Name: 'legend'; Flags: efSubcontent; Attributes: atsattrs+[ataccesskey]),
  190. (Name: 'li'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),
  191. (Name: 'link'; Flags: [];
  192. Attributes: atsattrs+[atcharset,athref,athreflang,attype,atrel,atrev,atmedia]),
  193. (Name: 'map'; Flags: [efSubelementContent]; Attributes: atsattrs+[atname]),
  194. (Name: 'menu'; Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs),
  195. (Name: 'meta'; Flags: []; Attributes: atsi18n+[athttpequiv,atname,atcontent,atscheme]),
  196. (Name: 'noframes'; Flags: efSubcontent; Attributes: atsattrs),
  197. (Name: 'noscript'; Flags: efSubcontent; Attributes: atsattrs),
  198. (Name: 'object'; Flags: efSubcontent;
  199. Attributes: atsattrs+[atdeclare,atclassid,atcodebase,atdata,attype,atcodetype,
  200. atarchive,atstandby,atheight,atwidth,atusemap,atname,attabindex]),
  201. (Name: 'ol'; Flags: [efSubelementContent]; Attributes: atsattrs),
  202. (Name: 'optgroup'; Flags: efSubcontent; Attributes: atsattrs+[atdisabled,atlabel]),
  203. (Name: 'option'; Flags: efSubcontent+[efEndTagOptional];
  204. Attributes: atsattrs+[atselected,atdisabled,atlabel,atvalue]),
  205. (Name: 'p'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),
  206. (Name: 'param'; Flags: []; Attributes: [atid,atname,atvalue,atvaluetype,attype]),
  207. (Name: 'pre'; Flags: efSubcontent + [efPreserveWhitespace]; Attributes: atsattrs),
  208. (Name: 'q'; Flags: efSubcontent; Attributes: atsattrs+[atcite]),
  209. (Name: 's'; Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs),
  210. (Name: 'samp'; Flags: efSubcontent; Attributes: atsattrs),
  211. (Name: 'script'; Flags: [efPCDATAContent]; Attributes: [atcharset,attype,atsrc,atdefer]),
  212. (Name: 'select'; Flags: [efSubelementContent];
  213. Attributes: atsattrs+[atname,atsize,atmultiple,atdisabled,attabindex,atonfocus,
  214. atonblur,atonchange]),
  215. (Name: 'small'; Flags: efSubcontent; Attributes: atsattrs),
  216. (Name: 'span'; Flags: efSubcontent; Attributes: atsattrs),
  217. (Name: 'strike'; Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs),
  218. (Name: 'strong'; Flags: efSubcontent; Attributes: atsattrs),
  219. (Name: 'style'; Flags: [efPCDATAContent];
  220. Attributes: atsi18n+[attype,atmedia,attitle]),
  221. (Name: 'sub'; Flags: efSubcontent; Attributes: atsattrs),
  222. (Name: 'sup'; Flags: efSubcontent; Attributes: atsattrs),
  223. (Name: 'table'; Flags: [efSubelementContent];
  224. Attributes: atsattrs+[atsummary,atwidth,atborder,atframe,atrules,atcellspacing,atcellpadding]),
  225. (Name: 'tbody'; Flags: [efSubelementContent]; Attributes: atsattrs+atscellhalign+[atvalign]),
  226. (Name: 'td'; Flags: efSubcontent+[efEndTagOptional];
  227. Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),
  228. (Name: 'textarea'; Flags: [efPCDATAContent];
  229. Attributes: atsattrs+[atname,atrows,atcols,atdisabled,atreadonly,attabindex,
  230. ataccesskey,atonfocus,atonblur,atonselect,atonchange]),
  231. (Name: 'tfoot'; Flags: [efSubelementContent,efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]),
  232. (Name: 'th'; Flags: efSubcontent+[efEndTagOptional];
  233. Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),
  234. (Name: 'thead'; Flags: [efSubelementContent, efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]),
  235. (Name: 'title'; Flags: efSubcontent; Attributes: atsi18n),
  236. (Name: 'tr'; Flags: [efSubelementContent, efEndTagOptional];
  237. Attributes: atsattrs+atscellhalign+[atvalign]),
  238. (Name: 'tt'; Flags: efSubcontent; Attributes: atsattrs),
  239. (Name: 'u'; Flags: efSubcontent+[efDeprecated]; Attributes: atsattrs),
  240. (Name: 'ul'; Flags: [efSubelementContent]; Attributes: atsattrs),
  241. (Name: 'var'; Flags: efSubcontent; Attributes: atsattrs),
  242. (Name: 'text'; Flags: efSubcontent; Attributes: []),
  243. (Name: 'unknown'; Flags: efSubcontent+[efNoChecks]; Attributes: [])
  244. );
  245. HTMLAttributeTag : array [THTMLAttributeTag] of String = (
  246. 'abbr', 'alink', 'accept-charset', 'accept', 'accesskey', 'action', 'align', 'alt', 'archive',
  247. 'axis', 'background', 'bgcolor', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 'charset',
  248. 'checked', 'cite', 'class', 'classid', 'clear', 'code', 'codebase', 'codetype', 'color', 'cols',
  249. 'colspan', 'compact', 'content', 'coords', 'data', 'datetime', 'declare', 'defer',
  250. 'dir', 'disabled', 'enctype', 'face', 'for', 'frame', 'frameborder', 'headers',
  251. 'height', 'href', 'hreflang', 'hspace', 'http-equiv', 'id', 'ismap', 'label', 'lang', 'link',
  252. 'longdesc', 'marginheight', 'marginwidth', 'maxlength', 'media', 'method',
  253. 'multiple', 'name', 'nohref', 'noresize', 'noshade', 'nowrap', 'object', 'onblur', 'onchange', 'onclick',
  254. 'ondblclick', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload',
  255. 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup',
  256. 'onreset', 'onselect', 'onsubmit', 'onunload', 'profile', 'prompt', 'readonly',
  257. 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scheme', 'scope', 'scrolling',
  258. 'selected', 'shape', 'size', 'span', 'src', 'standby', 'start', 'style', 'summary',
  259. 'tabindex', 'target', 'text', 'title', 'type', 'usemap', 'valign', 'value',
  260. 'valuetype', 'version', 'vlink', 'vspace', 'width');
  261. HTMLColor : array [THTMLColor] of string =
  262. ('Black', 'Silver', 'Gray', 'White', 'Maroon', 'Red', 'Purple', 'Fuchsia',
  263. 'Green', 'Lime', 'Olive', 'Yellow', 'Navy', 'Blue', 'Teal', 'Aqua');
  264. HTMLDir : array [THTMLDir] of string = ('','LTR','RTL');
  265. HTMLAlign : array [THTMLalign] of string = ('','left','center','right','justify','char');
  266. HTMLvalign : array [THTMLvalign] of string = ('','top','middle','bottom','baseline');
  267. HTMLframe : array [THTMLframe] of string =
  268. ('','void','above','below','hsides','vsides','lhs','rhs','box','border');
  269. HTMLrules : array [THTMLrules] of string = ('','none','groups','rows','cols','all');
  270. HTMLvaluetype : array [THTMLvaluetype] of string = ('','data','ref','object');
  271. HTMLshape : array [THTMLshape] of string = ('','default','rect','circle','poly');
  272. HTMLinputtype : array [THTMLinputtype] of string = ('','text','password','checkbox',
  273. 'radio','submit','reset','file','hidden','image','button');
  274. HTMLbuttontype : array [THTMLbuttontype] of string = ('','submit','reset','button');
  275. function ResolveHTMLEntityReference(const Name: WideString;
  276. var Entity: WideChar): Boolean;
  277. function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean;
  278. implementation
  279. uses SysUtils;
  280. { Define which elements auto-close other elements, modelled after libxml2.
  281. This is an array of variable-length lists, each terminated by etUnknown.
  282. Indices to first element of each list are provided by AutoCloseIndex array,
  283. which *must* be updated after any change. }
  284. const
  285. AutoCloseTab: array[0..277] of THTMLElementTag = (
  286. etform, etform, etp, ethr, eth1, eth2, eth3, eth4, eth5, eth6,
  287. etdl, etul, etol, etmenu, etdir, etaddress, etpre,
  288. ethead, etUnknown,
  289. ethead, etp, etUnknown,
  290. ettitle, etp, etUnknown,
  291. etbody, ethead, etstyle, etlink, ettitle, etp, etUnknown,
  292. etframeset, ethead, etstyle, etlink, ettitle, etp, etUnknown,
  293. etli, etp, eth1, eth2, eth3, eth4, eth5, eth6, etdl, etaddress,
  294. etpre, ethead, etli, etUnknown,
  295. ethr, etp, ethead, etUnknown,
  296. eth1, etp, ethead, etUnknown,
  297. eth2, etp, ethead, etUnknown,
  298. eth3, etp, ethead, etUnknown,
  299. eth4, etp, ethead, etUnknown,
  300. eth5, etp, ethead, etUnknown,
  301. eth6, etp, ethead, etUnknown,
  302. etdir, etp, ethead, etUnknown,
  303. etaddress, etp, ethead, etul, etUnknown,
  304. etpre, etp, ethead, etul, etUnknown,
  305. etblockquote, etp, ethead, etUnknown,
  306. etdl, etp, etdt, etmenu, etdir, etaddress, etpre,
  307. ethead, etUnknown,
  308. etdt, etp, etmenu, etdir, etaddress, etpre,
  309. ethead, etdd, etUnknown,
  310. etdd, etp, etmenu, etdir, etaddress, etpre,
  311. ethead, etdt, etUnknown,
  312. etul, etp, ethead, etol, etmenu, etdir, etaddress, etpre, etUnknown,
  313. etol, etp, ethead, etul, etUnknown,
  314. etmenu, etp, ethead, etul, etUnknown,
  315. etp, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etUnknown,
  316. etdiv, etp, ethead, etUnknown,
  317. etnoscript, etp, ethead, etUnknown,
  318. etcenter, etfont, etb, eti, etp, ethead, etUnknown,
  319. eta, eta, etUnknown,
  320. etcaption, etp, etUnknown,
  321. etcolgroup, etcaption, etcolgroup, etcol, etp, etUnknown,
  322. etcol, etcaption, etcol, etp, etUnknown,
  323. ettable, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etpre,
  324. eta, etUnknown,
  325. etth, etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown,
  326. ettd, etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown,
  327. ettr, etth, ettd, ettr, etcaption, etcol, etcolgroup, etp, etUnknown,
  328. etthead, etcaption, etcol, etcolgroup, etUnknown,
  329. ettfoot, etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead,
  330. ettbody, etp, etUnknown,
  331. ettbody, etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead,
  332. ettfoot, ettbody, etp, etUnknown,
  333. etoptgroup, etoption, etUnknown,
  334. etoption, etoption, etUnknown,
  335. etfieldset, etlegend, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6,
  336. etpre, eta, etUnknown,
  337. etUnknown);
  338. AutoCloseIndex: array[0..40] of Integer = (
  339. 0, 19, 22, 25, 32, 39, 53, 57, 61, 65, 69,
  340. 73, 77, 81, 85, 90, 95, 99, 108, 117, 126,
  341. 135, 140, 145, 155, 159, 163, 170, 173, 176,
  342. 182, 187, 199, 210, 221, 230, 235, 246, 258,
  343. 261, 264
  344. );
  345. { HTML entities, each preceded with its code. There is a separate list for
  346. each entity length, and each list is sorted by character codes.
  347. The sole purpose of using AnsiString here is staying compatible with Delphi 7,
  348. which is totally broken with respect to handling wide literals.
  349. }
  350. ent_2 =
  351. #3#$9C + 'Mu'+
  352. #3#$9D + 'Nu'+
  353. #3#$A0 + 'Pi'+
  354. #3#$9E + 'Xi'+
  355. #$22#$65+ 'ge'+
  356. #0#62 + 'gt'+
  357. #$22#$64+ 'le'+
  358. #0#60 + 'lt'+
  359. #3#$BC + 'mu'+
  360. #$22#$60+ 'ne'+
  361. #$22#$0B+ 'ni'+
  362. #3#$BD + 'nu'+
  363. #$22#$28+ 'or'+
  364. #3#$C0 + 'pi'+
  365. #3#$BE + 'xi';
  366. ent_3 =
  367. #3#$A7 + 'Chi'+
  368. #0#208 + 'ETH'+
  369. #3#$97 + 'Eta'+
  370. #3#$A6 + 'Phi'+
  371. #3#$A8 + 'Psi'+
  372. #3#$A1 + 'Rho'+
  373. #3#$A4 + 'Tau'+
  374. #0#38 + 'amp'+
  375. #$22#$27+ 'and'+
  376. #$22#$20+ 'ang'+
  377. #$22#$29+ 'cap'+
  378. #3#$C7 + 'chi'+
  379. #$22#$2A+ 'cup'+
  380. #0#176 + 'deg'+
  381. #3#$B7 + 'eta'+
  382. #0#240 + 'eth'+
  383. #$22#$2B+ 'int'+
  384. #$25#$CA+ 'loz'+
  385. #$20#$0E+ 'lrm'+
  386. #0#172 + 'not'+
  387. #3#$C6 + 'phi'+
  388. #3#$D6 + 'piv'+
  389. #3#$C8 + 'psi'+
  390. #0#174 + 'reg'+
  391. #3#$C1 + 'rho'+
  392. #$20#$0F+ 'rlm'+
  393. #0#173 + 'shy'+
  394. #$22#$3C+ 'sim'+
  395. #$22#$82+ 'sub'+
  396. #$22#$11+ 'sum'+
  397. #$22#$83+ 'sup'+
  398. #3#$C4 + 'tau'+
  399. #0#168 + 'uml'+
  400. #0#165 + 'yen'+
  401. #$20#$0D+ 'zwj';
  402. ent_4 =
  403. #0#196 + 'Auml'+
  404. #3#$92 + 'Beta'+
  405. #0#203 + 'Euml'+
  406. #3#$99 + 'Iota'+
  407. #0#207 + 'Iuml'+
  408. #0#214 + 'Ouml'+
  409. #0#220 + 'Uuml'+
  410. #1#$78 + 'Yuml'+
  411. #3#$96 + 'Zeta'+
  412. #0#228 + 'auml'+
  413. #3#$B2 + 'beta'+
  414. #$20#$22+ 'bull'+
  415. #0#162 + 'cent'+
  416. #2#$C6 + 'circ'+
  417. #$22#$45+ 'cong'+
  418. #0#169 + 'copy'+
  419. #$21#$D3+ 'dArr'+
  420. #$21#$93+ 'darr'+
  421. #$20#$03+ 'emsp'+
  422. #$20#$02+ 'ensp'+
  423. #0#235 + 'euml'+
  424. #$20#$AC+ 'euro'+
  425. #1#$92 + 'fnof'+
  426. #$21#$D4+ 'hArr'+
  427. #$21#$94+ 'harr'+
  428. #3#$B9 + 'iota'+
  429. #$22#$08+ 'isin'+
  430. #0#239 + 'iuml'+
  431. #$21#$D0+ 'lArr'+
  432. #$23#$29+ 'lang'+
  433. #$21#$90+ 'larr'+
  434. #0#175 + 'macr'+
  435. #0#160 + 'nbsp'+
  436. #$22#$84+ 'nsub'+
  437. #0#170 + 'ordf'+
  438. #0#186 + 'ordm'+
  439. #0#246 + 'ouml'+
  440. #0#182 + 'para'+
  441. #$22#$02+ 'part'+
  442. #$22#$A5+ 'perp'+
  443. #$22#$0F+ 'prod'+
  444. #$22#$1D+ 'prop'+
  445. #0#34 + 'quot'+
  446. #$21#$D2+ 'rArr'+
  447. #$23#$2A+ 'rang'+
  448. #$21#$92+ 'rarr'+
  449. #$21#$1C+ 'real'+
  450. #$22#$C5+ 'sdot'+
  451. #0#167 + 'sect'+
  452. #$22#$86+ 'sube'+
  453. #0#185 + 'sup1'+
  454. #0#178 + 'sup2'+
  455. #0#179 + 'sup3'+
  456. #$22#$87+ 'supe'+
  457. #$21#$D1+ 'uArr'+
  458. #$21#$91+ 'uarr'+
  459. #0#252 + 'uuml'+
  460. #0#255 + 'yuml'+
  461. #3#$B6 + 'zeta'+
  462. #$20#$0C+ 'zwnj';
  463. ent_5 =
  464. #0#198 + 'AElig'+
  465. #0#194 + 'Acirc'+
  466. #3#$91 + 'Alpha'+
  467. #0#197 + 'Aring'+
  468. #3#$94 + 'Delta'+
  469. #0#202 + 'Ecirc'+
  470. #3#$93 + 'Gamma'+
  471. #0#206 + 'Icirc'+
  472. #3#$9A + 'Kappa'+
  473. #1#$52 + 'OElig'+
  474. #0#212 + 'Ocirc'+
  475. #3#$A9 + 'Omega'+
  476. #$20#$33+ 'Prime'+
  477. #3#$A3 + 'Sigma'+
  478. #0#222 + 'THORN'+
  479. #3#$98 + 'Theta'+
  480. #0#219 + 'Ucirc'+
  481. #0#226 + 'acirc'+
  482. #0#180 + 'acute'+
  483. #0#230 + 'aelig'+
  484. #3#$B1 + 'alpha'+
  485. #0#229 + 'aring'+
  486. #$22#$48+ 'asymp'+
  487. #$20#$1E+ 'bdquo'+
  488. #0#184 + 'cedil'+
  489. #$26#$63+ 'clubs'+
  490. #$21#$B5+ 'crarr'+
  491. #3#$B4 + 'delta'+
  492. #$26#$66+ 'diams'+
  493. #0#234 + 'ecirc'+
  494. #$22#$05+ 'empty'+
  495. #$22#$61+ 'equiv'+
  496. #$22#$03+ 'exist'+
  497. #$20#$44+ 'frasl'+
  498. #3#$B3 + 'gamma'+
  499. #0#238 + 'icirc'+
  500. #0#161 + 'iexcl'+
  501. #$21#$11+ 'image'+
  502. #$22#$1E+ 'infin'+
  503. #3#$BA + 'kappa'+
  504. #0#171 + 'laquo'+
  505. #$23#$08+ 'lceil'+
  506. #$20#$1C+ 'ldquo'+
  507. #$20#$18+ 'lsquo'+
  508. #$20#$14+ 'mdash'+
  509. #0#181 + 'micro'+
  510. #$22#$12+ 'minus'+
  511. #$22#$07+ 'nabla'+
  512. #$20#$13+ 'ndash'+
  513. #$22#$09+ 'notin'+
  514. #0#244 + 'ocirc'+
  515. #1#$53 + 'oelig'+
  516. #$20#$3E+ 'oline'+
  517. #3#$C9 + 'omega'+
  518. #$22#$95+ 'oplus'+
  519. #0#163 + 'pound'+
  520. #$20#$32+ 'prime'+
  521. #$22#$1A+ 'radic'+
  522. #0#187 + 'raquo'+
  523. #$23#$09+ 'rceil'+
  524. #$20#$1D+ 'rdquo'+
  525. #$20#$19+ 'rsquo'+
  526. #$20#$1A+ 'sbquo'+
  527. #3#$C3 + 'sigma'+
  528. #0#223 + 'szlig'+
  529. #3#$B8 + 'theta'+
  530. #0#254 + 'thorn'+
  531. #2#$DC + 'tilde'+
  532. #0#215 + 'times'+
  533. #$21#$22+ 'trade'+
  534. #0#251 + 'ucirc'+
  535. #3#$D2 + 'upsih';
  536. ent_6 =
  537. #0#193 + 'Aacute'+
  538. #0#192 + 'Agrave'+
  539. #0#195 + 'Atilde'+
  540. #0#199 + 'Ccedil'+
  541. #$20#$21+ 'Dagger'+
  542. #0#201 + 'Eacute'+
  543. #0#200 + 'Egrave'+
  544. #0#205 + 'Iacute'+
  545. #0#204 + 'Igrave'+
  546. #3#$9B + 'Lambda'+
  547. #0#209 + 'Ntilde'+
  548. #0#211 + 'Oacute'+
  549. #0#210 + 'Ograve'+
  550. #0#216 + 'Oslash'+
  551. #0#213 + 'Otilde'+
  552. #1#$60 + 'Scaron'+
  553. #0#218 + 'Uacute'+
  554. #0#217 + 'Ugrave'+
  555. #0#221 + 'Yacute'+
  556. #0#225 + 'aacute'+
  557. #0#224 + 'agrave'+
  558. #0#227 + 'atilde'+
  559. #0#166 + 'brvbar'+
  560. #0#231 + 'ccedil'+
  561. #0#164 + 'curren'+
  562. #$20#$20+ 'dagger'+
  563. #0#247 + 'divide'+
  564. #0#233 + 'eacute'+
  565. #0#232 + 'egrave'+
  566. #$22#$00+ 'forall'+
  567. #0#189 + 'frac12'+
  568. #0#188 + 'frac14'+
  569. #0#190 + 'frac34'+
  570. #$26#$65+ 'hearts'+
  571. #$20#$26+ 'hellip'+
  572. #0#237 + 'iacute'+
  573. #0#236 + 'igrave'+
  574. #0#191 + 'iquest'+
  575. #3#$BB + 'lambda'+
  576. #$23#$0A+ 'lfloor'+
  577. #$22#$17+ 'lowast'+
  578. #$20#$39+ 'lsaquo'+
  579. #0#183 + 'middot'+
  580. #0#241 + 'ntilde'+
  581. #0#243 + 'oacute'+
  582. #0#242 + 'ograve'+
  583. #0#248 + 'oslash'+
  584. #0#245 + 'otilde'+
  585. #$22#$97+ 'otimes'+
  586. #$20#$30+ 'permil'+
  587. #0#177 + 'plusmn'+
  588. #$23#$0B+ 'rfloor'+
  589. #$20#$3A+ 'rsaquo'+
  590. #1#$61 + 'scaron'+
  591. #3#$C2 + 'sigmaf'+
  592. #$26#$60+ 'spades'+
  593. #$22#$34+ 'there4'+
  594. #$20#$09+ 'thinsp'+
  595. #0#250 + 'uacute'+
  596. #0#249 + 'ugrave'+
  597. #$21#$18+ 'weierp'+
  598. #0#253 + 'yacute';
  599. ent_7 =
  600. #3#$95 + 'Epsilon'+
  601. #3#$9F + 'Omicron'+
  602. #3#$A5 + 'Upsilon'+
  603. #$21#$35+ 'alefsym'+
  604. #3#$B5 + 'epsilon'+
  605. #3#$BF + 'omicron'+
  606. #3#$C5 + 'upsilon';
  607. ent_8 =
  608. #3#$D1 + 'thetasym';
  609. strs: array[2..8] of string = (
  610. ent_2, ent_3, ent_4, ent_5, ent_6, ent_7, ent_8
  611. );
  612. function BSearch(P: PWideChar; Len: Integer; const data: string): WideChar;
  613. var
  614. L, H, mid, J, C: Integer;
  615. begin
  616. Result := #0;
  617. L := 0;
  618. H := (Length(data)+1) div (Len+2);
  619. while L <= H do
  620. begin
  621. mid := L + ((H - L) shr 1);
  622. J := 0;
  623. repeat
  624. C := ord(P[J]) - ord(data[mid*(Len+2)+3+J]);
  625. Inc(J);
  626. until (C <> 0) or (J >= Len);
  627. if C > 0 then L := mid + 1 else
  628. begin
  629. H := mid - 1;
  630. if C = 0 then
  631. begin
  632. Result := WideChar((ord(data[mid*(Len+2)+1]) shl 8) or ord(data[mid*(Len+2)+2]));
  633. Exit;
  634. end;
  635. end;
  636. end;
  637. end;
  638. {
  639. Remaining issues:
  640. 1) UTF-16 surrogate pairs
  641. 2) HTML accepts uppercase 'X' for hex notation, but XML does not.
  642. 3) 'apos' is used in xml/xhtml, but not in HTML 4.01
  643. }
  644. function ResolveHTMLEntityReference(const Name: WideString;
  645. var Entity: WideChar): Boolean;
  646. var
  647. i, L: Integer;
  648. value: Integer;
  649. begin
  650. L := Length(Name);
  651. if (L > 1) and (Name[1] = '#') then
  652. begin
  653. value := 0;
  654. if (Name[2] = 'x') or (Name[2] = 'X') then
  655. begin
  656. i := 3;
  657. while i <= L do
  658. begin
  659. case Name[i] of
  660. '0'..'9': Value := Value * 16 + Ord(Name[i]) - Ord('0');
  661. 'a'..'f': Value := Value * 16 + Ord(Name[i]) - (Ord('a') - 10);
  662. 'A'..'F': Value := Value * 16 + Ord(Name[i]) - (Ord('A') - 10);
  663. else
  664. Break;
  665. end;
  666. Inc(i);
  667. end;
  668. end
  669. else
  670. begin
  671. i := 2;
  672. while i <= L do
  673. begin
  674. case Name[i] of
  675. '0'..'9': Value := Value * 10 + Ord(Name[i]) - Ord('0');
  676. else
  677. Break;
  678. end;
  679. Inc(i);
  680. end;
  681. end;
  682. Result := (i = L+1);
  683. if Result then
  684. Entity := WideChar(Value);
  685. end
  686. else
  687. begin
  688. case L of
  689. 2..8: Entity := BSearch(PWideChar(Name), L, strs[L]);
  690. else
  691. Entity := #0;
  692. end;
  693. Result := (Entity <> #0);
  694. end;
  695. end;
  696. function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean;
  697. var
  698. i, j: Integer;
  699. begin
  700. Result := False;
  701. for i := 0 to high(AutoCloseIndex) do
  702. if NewTag = AutoCloseTab[AutoCloseIndex[i]] then
  703. begin
  704. j := AutoCloseIndex[i]+1;
  705. while AutoCloseTab[j] <> etUnknown do
  706. begin
  707. if AutoCloseTab[j] = OldTag then
  708. begin
  709. Result := True;
  710. Exit;
  711. end;
  712. Inc(j);
  713. end;
  714. Exit;
  715. end;
  716. end;
  717. end.