|
@@ -390,174 +390,7 @@ const
|
|
|
'radio','submit','reset','file','hidden','image','button');
|
|
|
HTMLbuttontype : array [THTMLbuttontype] of string = ('','submit','reset','button');
|
|
|
|
|
|
-
|
|
|
- // ISO8859-1 mapping:
|
|
|
- HTMLEntities: array[#160..#255] of String = (
|
|
|
- // 160-191
|
|
|
- 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen', 'brvbar', 'sect',
|
|
|
- 'uml', 'copy', 'ordf', 'laquo', 'not', 'shy', 'reg', 'macr',
|
|
|
- 'deg', 'plusmn', 'sup2', 'sup3', 'acute', 'micro', 'para', 'middot',
|
|
|
- 'cedil', 'sup1', 'ordm', 'raquo', 'frac14', 'frac12', 'frac34', 'iquest',
|
|
|
- // 192-223
|
|
|
- 'Agrave', 'Aacute', 'Acirc', 'Atilde', 'Auml', 'Aring', 'AElig', 'Ccedil',
|
|
|
- 'Egrave', 'Eacute', 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml',
|
|
|
- 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde', 'Ouml', 'times',
|
|
|
- 'Oslash', 'Ugrave', 'Uacute', 'Ucirc', 'Uuml', 'Yacute', 'THORN', 'szlig',
|
|
|
- // 224-255
|
|
|
- 'agrave', 'aacute', 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil',
|
|
|
- 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute', 'icirc', 'iuml',
|
|
|
- 'eth', 'ntilde', 'ograve', 'oacute', 'ocirc', 'otilde', 'ouml', 'divide',
|
|
|
- 'oslash', 'ugrave', 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml');
|
|
|
-
|
|
|
-
|
|
|
- UnicodeHTMLEntities: array[0..141] of String = (
|
|
|
- 'Alpha', // #913
|
|
|
- 'Beta', // #914
|
|
|
- 'Gamma', // #915
|
|
|
- 'Delta', // #916
|
|
|
- 'Epsilon', // #917
|
|
|
- 'Zeta', // #918
|
|
|
- 'Eta', // #919
|
|
|
- 'Theta', // #920
|
|
|
- 'Iota', // #921
|
|
|
- 'Kappa', // #922
|
|
|
- 'Lambda', // #923
|
|
|
- 'Mu', // #924
|
|
|
- 'Nu', // #925
|
|
|
- 'Xi', // #926
|
|
|
- 'Omicron', // #927
|
|
|
- 'Pi', // #928
|
|
|
- 'Rho', // #929
|
|
|
- 'Sigma', // #931
|
|
|
- 'Tau', // #932
|
|
|
- 'Upsilon', // #933
|
|
|
- 'Phi', // #934
|
|
|
- 'Chi', // #935
|
|
|
- 'Psi', // #936
|
|
|
- 'Omega', // #937
|
|
|
- 'alpha', // #945
|
|
|
- 'beta', // #946
|
|
|
- 'gamma', // #947
|
|
|
- 'delta', // #948
|
|
|
- 'epsilon', // #949
|
|
|
- 'zeta', // #950
|
|
|
- 'eta', // #951
|
|
|
- 'theta', // #952
|
|
|
- 'iota', // #953
|
|
|
- 'kappa', // #954
|
|
|
- 'lambda', // #955
|
|
|
- 'mu', // #956
|
|
|
- 'nu', // #957
|
|
|
- 'xi', // #958
|
|
|
- 'omicron', // #959
|
|
|
- 'pi', // #960
|
|
|
- 'rho', // #961
|
|
|
- 'sigmaf', // #962
|
|
|
- 'sigma', // #963
|
|
|
- 'tau', // #964
|
|
|
- 'upsilon', // #965
|
|
|
- 'phi', // #966
|
|
|
- 'chi', // #967
|
|
|
- 'psi', // #968
|
|
|
- 'omega', // #969
|
|
|
- 'thetasym', // #977
|
|
|
- 'upsih', // #978
|
|
|
- 'piv', // #982
|
|
|
- 'ensp', // #8194
|
|
|
- 'emsp', // #8195
|
|
|
- 'thinsp', // #8201
|
|
|
- 'zwnj', // #8204
|
|
|
- 'zwj', // #8205
|
|
|
- 'lrm', // #8206
|
|
|
- 'rlm', // #8207
|
|
|
- 'ndash', // #8211
|
|
|
- 'mdash', // #8212
|
|
|
- 'lsquo', // #8216
|
|
|
- 'rsquo', // #8217
|
|
|
- 'sbquo', // #8218
|
|
|
- 'ldquo', // #8220
|
|
|
- 'rdquo', // #8221
|
|
|
- 'bdquo', // #8222
|
|
|
- 'dagger', // #8224
|
|
|
- 'Dagger', // #8225
|
|
|
- 'bull', // #8226
|
|
|
- 'hellip', // #8230
|
|
|
- 'permil', // #8240
|
|
|
- 'prime', // #8242
|
|
|
- 'lsaquo', // #8249
|
|
|
- 'rsaquo', // #8250
|
|
|
- 'oline', // #8254
|
|
|
- 'frasl', // #8260
|
|
|
- 'image', // #8465
|
|
|
- 'weierp', // #8472
|
|
|
- 'real', // #8476
|
|
|
- 'trade', // #8482
|
|
|
- 'alefsym', // #8501
|
|
|
- 'larr', // #8592
|
|
|
- 'uarr', // #8593
|
|
|
- 'rarr', // #8594
|
|
|
- 'darr', // #8595
|
|
|
- 'harr', // #8596
|
|
|
- 'crarr', // #8629
|
|
|
- 'lArr', // #8656
|
|
|
- 'uArr', // #8657
|
|
|
- 'rArr', // #8658
|
|
|
- 'dArr', // #8659
|
|
|
- 'hArr', // #8660
|
|
|
- 'forall', // #8704
|
|
|
- 'part', // #8706
|
|
|
- 'exist', // #8707
|
|
|
- 'empty', // #8709
|
|
|
- 'nabla', // #8711
|
|
|
- 'isin', // #8712
|
|
|
- 'notin', // #8713
|
|
|
- 'ni', // #8715
|
|
|
- 'prod', // #8719
|
|
|
- 'sum', // #8721
|
|
|
- 'minus', // #8722
|
|
|
- 'lowast', // #8727
|
|
|
- 'radic', // #8730
|
|
|
- 'prop', // #8733
|
|
|
- 'infin', // #8734
|
|
|
- 'ang', // #8736
|
|
|
- 'and', // #8743
|
|
|
- 'or', // #8744
|
|
|
- 'cap', // #8745
|
|
|
- 'cup', // #8746
|
|
|
- 'int', // #8747
|
|
|
- 'there4', // #8756
|
|
|
- 'sim', // #8764
|
|
|
- 'cong', // #8773
|
|
|
- 'asymp', // #8776
|
|
|
- 'ne', // #8800
|
|
|
- 'equiv', // #8801
|
|
|
- 'le', // #8804
|
|
|
- 'ge', // #8805
|
|
|
- 'sub', // #8834
|
|
|
- 'sup', // #8835
|
|
|
- 'nsub', // #8836
|
|
|
- 'sube', // #8838
|
|
|
- 'supe', // #8839
|
|
|
- 'oplus', // #8853
|
|
|
- 'otimes', // #8855
|
|
|
- 'perp', // #8869
|
|
|
- 'sdot', // #8901
|
|
|
- 'lceil', // #8968
|
|
|
- 'rceil', // #8969
|
|
|
- 'lfloor', // #8970
|
|
|
- 'rfloor', // #8971
|
|
|
- 'lang', // #9001
|
|
|
- 'rang', // #9002
|
|
|
- 'loz', // #9674
|
|
|
- 'spades', // #9824
|
|
|
- 'clubs', // #9827
|
|
|
- 'hearts', // #9829
|
|
|
- 'diams' // #9830
|
|
|
- );
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-function ResolveHTMLEntityReference(const Name: String;
|
|
|
+function ResolveHTMLEntityReference(const Name: WideString;
|
|
|
var Entity: WideChar): Boolean;
|
|
|
|
|
|
function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean;
|
|
@@ -635,40 +468,333 @@ const
|
|
|
261, 264
|
|
|
);
|
|
|
|
|
|
-function ResolveHTMLEntityReference(const Name: String;
|
|
|
+{ HTML entities, each preceded with its code. There is a separate list for
|
|
|
+ each entity length, and each list is sorted by character codes.
|
|
|
+ The sole purpose of using AnsiString here is staying compatible with Delphi 7,
|
|
|
+ which is totally broken with respect to handling wide literals.
|
|
|
+}
|
|
|
+
|
|
|
+ ent_2 =
|
|
|
+ #3#$9C + 'Mu'+
|
|
|
+ #3#$9D + 'Nu'+
|
|
|
+ #3#$A0 + 'Pi'+
|
|
|
+ #3#$9E + 'Xi'+
|
|
|
+ #$22#$65+ 'ge'+
|
|
|
+ #0#62 + 'gt'+
|
|
|
+ #$22#$64+ 'le'+
|
|
|
+ #0#60 + 'lt'+
|
|
|
+ #3#$BC + 'mu'+
|
|
|
+ #$22#$60+ 'ne'+
|
|
|
+ #$22#$0B+ 'ni'+
|
|
|
+ #3#$BD + 'nu'+
|
|
|
+ #$22#$28+ 'or'+
|
|
|
+ #3#$C0 + 'pi'+
|
|
|
+ #3#$BE + 'xi';
|
|
|
+
|
|
|
+ ent_3 =
|
|
|
+ #3#$A7 + 'Chi'+
|
|
|
+ #0#208 + 'ETH'+
|
|
|
+ #3#$97 + 'Eta'+
|
|
|
+ #3#$A6 + 'Phi'+
|
|
|
+ #3#$A8 + 'Psi'+
|
|
|
+ #3#$A1 + 'Rho'+
|
|
|
+ #3#$A4 + 'Tau'+
|
|
|
+ #0#38 + 'amp'+
|
|
|
+ #$22#$27+ 'and'+
|
|
|
+ #$22#$20+ 'ang'+
|
|
|
+ #$22#$29+ 'cap'+
|
|
|
+ #3#$C7 + 'chi'+
|
|
|
+ #$22#$2A+ 'cup'+
|
|
|
+ #0#176 + 'deg'+
|
|
|
+ #3#$B7 + 'eta'+
|
|
|
+ #0#240 + 'eth'+
|
|
|
+ #$22#$2B+ 'int'+
|
|
|
+ #$25#$CA+ 'loz'+
|
|
|
+ #$20#$0E+ 'lrm'+
|
|
|
+ #0#172 + 'not'+
|
|
|
+ #3#$C6 + 'phi'+
|
|
|
+ #3#$D6 + 'piv'+
|
|
|
+ #3#$C8 + 'psi'+
|
|
|
+ #0#174 + 'reg'+
|
|
|
+ #3#$C1 + 'rho'+
|
|
|
+ #$20#$0F+ 'rlm'+
|
|
|
+ #0#173 + 'shy'+
|
|
|
+ #$22#$3C+ 'sim'+
|
|
|
+ #$22#$82+ 'sub'+
|
|
|
+ #$22#$11+ 'sum'+
|
|
|
+ #$22#$83+ 'sup'+
|
|
|
+ #3#$C4 + 'tau'+
|
|
|
+ #0#168 + 'uml'+
|
|
|
+ #0#165 + 'yen'+
|
|
|
+ #$20#$0D+ 'zwj';
|
|
|
+
|
|
|
+ ent_4 =
|
|
|
+ #0#196 + 'Auml'+
|
|
|
+ #3#$92 + 'Beta'+
|
|
|
+ #0#203 + 'Euml'+
|
|
|
+ #3#$99 + 'Iota'+
|
|
|
+ #0#207 + 'Iuml'+
|
|
|
+ #0#214 + 'Ouml'+
|
|
|
+ #0#220 + 'Uuml'+
|
|
|
+ #1#$78 + 'Yuml'+
|
|
|
+ #3#$96 + 'Zeta'+
|
|
|
+
|
|
|
+ #0#228 + 'auml'+
|
|
|
+ #3#$B2 + 'beta'+
|
|
|
+ #$20#$22+ 'bull'+
|
|
|
+ #0#162 + 'cent'+
|
|
|
+ #2#$C6 + 'circ'+
|
|
|
+ #$22#$45+ 'cong'+
|
|
|
+ #0#169 + 'copy'+
|
|
|
+ #$21#$D3+ 'dArr'+
|
|
|
+ #$21#$93+ 'darr'+
|
|
|
+ #$20#$03+ 'emsp'+
|
|
|
+ #$20#$02+ 'ensp'+
|
|
|
+ #0#235 + 'euml'+
|
|
|
+ #$20#$AC+ 'euro'+
|
|
|
+ #1#$92 + 'fnof'+
|
|
|
+ #$21#$D4+ 'hArr'+
|
|
|
+ #$21#$94+ 'harr'+
|
|
|
+ #3#$B9 + 'iota'+
|
|
|
+ #$22#$08+ 'isin'+
|
|
|
+ #0#239 + 'iuml'+
|
|
|
+ #$21#$D0+ 'lArr'+
|
|
|
+ #$23#$29+ 'lang'+
|
|
|
+ #$21#$90+ 'larr'+
|
|
|
+ #0#175 + 'macr'+
|
|
|
+ #0#160 + 'nbsp'+
|
|
|
+ #$22#$84+ 'nsub'+
|
|
|
+ #0#170 + 'ordf'+
|
|
|
+ #0#186 + 'ordm'+
|
|
|
+ #0#246 + 'ouml'+
|
|
|
+ #0#182 + 'para'+
|
|
|
+ #$22#$02+ 'part'+
|
|
|
+ #$22#$A5+ 'perp'+
|
|
|
+ #$22#$0F+ 'prod'+
|
|
|
+ #$22#$1D+ 'prop'+
|
|
|
+ #0#34 + 'quot'+
|
|
|
+ #$21#$D2+ 'rArr'+
|
|
|
+ #$23#$2A+ 'rang'+
|
|
|
+ #$21#$92+ 'rarr'+
|
|
|
+ #$21#$1C+ 'real'+
|
|
|
+ #$22#$C5+ 'sdot'+
|
|
|
+ #0#167 + 'sect'+
|
|
|
+ #$22#$86+ 'sube'+
|
|
|
+ #0#185 + 'sup1'+
|
|
|
+ #0#178 + 'sup2'+
|
|
|
+ #0#179 + 'sup3'+
|
|
|
+ #$22#$87+ 'supe'+
|
|
|
+ #$21#$D1+ 'uArr'+
|
|
|
+ #$21#$91+ 'uarr'+
|
|
|
+ #0#252 + 'uuml'+
|
|
|
+ #0#255 + 'yuml'+
|
|
|
+ #3#$B6 + 'zeta'+
|
|
|
+ #$20#$0C+ 'zwnj';
|
|
|
+
|
|
|
+ ent_5 =
|
|
|
+ #0#198 + 'AElig'+
|
|
|
+ #0#194 + 'Acirc'+
|
|
|
+ #3#$91 + 'Alpha'+
|
|
|
+ #0#197 + 'Aring'+
|
|
|
+ #3#$94 + 'Delta'+
|
|
|
+ #0#202 + 'Ecirc'+
|
|
|
+ #3#$93 + 'Gamma'+
|
|
|
+ #0#206 + 'Icirc'+
|
|
|
+ #3#$9A + 'Kappa'+
|
|
|
+ #1#$52 + 'OElig'+
|
|
|
+ #0#212 + 'Ocirc'+
|
|
|
+ #3#$A9 + 'Omega'+
|
|
|
+ #$20#$33+ 'Prime'+
|
|
|
+ #3#$A3 + 'Sigma'+
|
|
|
+ #0#222 + 'THORN'+
|
|
|
+ #3#$98 + 'Theta'+
|
|
|
+ #0#219 + 'Ucirc'+
|
|
|
+
|
|
|
+ #0#226 + 'acirc'+
|
|
|
+ #0#180 + 'acute'+
|
|
|
+ #0#230 + 'aelig'+
|
|
|
+ #3#$B1 + 'alpha'+
|
|
|
+ #0#229 + 'aring'+
|
|
|
+ #$22#$48+ 'asymp'+
|
|
|
+ #$20#$1E+ 'bdquo'+
|
|
|
+ #0#184 + 'cedil'+
|
|
|
+ #$26#$63+ 'clubs'+
|
|
|
+ #$21#$B5+ 'crarr'+
|
|
|
+ #3#$B4 + 'delta'+
|
|
|
+ #$26#$66+ 'diams'+
|
|
|
+ #0#234 + 'ecirc'+
|
|
|
+ #$22#$05+ 'empty'+
|
|
|
+ #$22#$61+ 'equiv'+
|
|
|
+ #$22#$03+ 'exist'+
|
|
|
+ #$20#$44+ 'frasl'+
|
|
|
+ #3#$B3 + 'gamma'+
|
|
|
+ #0#238 + 'icirc'+
|
|
|
+ #0#161 + 'iexcl'+
|
|
|
+ #$21#$11+ 'image'+
|
|
|
+ #$22#$1E+ 'infin'+
|
|
|
+ #3#$BA + 'kappa'+
|
|
|
+ #0#171 + 'laquo'+
|
|
|
+ #$23#$08+ 'lceil'+
|
|
|
+ #$20#$1C+ 'ldquo'+
|
|
|
+ #$20#$18+ 'lsquo'+
|
|
|
+ #$20#$14+ 'mdash'+
|
|
|
+ #0#181 + 'micro'+
|
|
|
+ #$22#$12+ 'minus'+
|
|
|
+ #$22#$07+ 'nabla'+
|
|
|
+ #$20#$13+ 'ndash'+
|
|
|
+ #$22#$09+ 'notin'+
|
|
|
+ #0#244 + 'ocirc'+
|
|
|
+ #1#$53 + 'oelig'+
|
|
|
+ #$20#$3E+ 'oline'+
|
|
|
+ #3#$C9 + 'omega'+
|
|
|
+ #$22#$95+ 'oplus'+
|
|
|
+ #0#163 + 'pound'+
|
|
|
+ #$20#$32+ 'prime'+
|
|
|
+ #$22#$1A+ 'radic'+
|
|
|
+ #0#187 + 'raquo'+
|
|
|
+ #$23#$09+ 'rceil'+
|
|
|
+ #$20#$1D+ 'rdquo'+
|
|
|
+ #$20#$19+ 'rsquo'+
|
|
|
+ #$20#$1A+ 'sbquo'+
|
|
|
+ #3#$C3 + 'sigma'+
|
|
|
+ #0#223 + 'szlig'+
|
|
|
+ #3#$B8 + 'theta'+
|
|
|
+ #0#254 + 'thorn'+
|
|
|
+ #2#$DC + 'tilde'+
|
|
|
+ #0#215 + 'times'+
|
|
|
+ #$21#$22+ 'trade'+
|
|
|
+ #0#251 + 'ucirc'+
|
|
|
+ #3#$D2 + 'upsih';
|
|
|
+
|
|
|
+ ent_6 =
|
|
|
+ #0#193 + 'Aacute'+
|
|
|
+ #0#192 + 'Agrave'+
|
|
|
+ #0#195 + 'Atilde'+
|
|
|
+ #0#199 + 'Ccedil'+
|
|
|
+ #$20#$21+ 'Dagger'+
|
|
|
+ #0#201 + 'Eacute'+
|
|
|
+ #0#200 + 'Egrave'+
|
|
|
+ #0#205 + 'Iacute'+
|
|
|
+ #0#204 + 'Igrave'+
|
|
|
+ #3#$9B + 'Lambda'+
|
|
|
+ #0#209 + 'Ntilde'+
|
|
|
+ #0#211 + 'Oacute'+
|
|
|
+ #0#210 + 'Ograve'+
|
|
|
+ #0#216 + 'Oslash'+
|
|
|
+ #0#213 + 'Otilde'+
|
|
|
+ #1#$60 + 'Scaron'+
|
|
|
+ #0#218 + 'Uacute'+
|
|
|
+ #0#217 + 'Ugrave'+
|
|
|
+ #0#221 + 'Yacute'+
|
|
|
+
|
|
|
+ #0#225 + 'aacute'+
|
|
|
+ #0#224 + 'agrave'+
|
|
|
+ #0#227 + 'atilde'+
|
|
|
+ #0#166 + 'brvbar'+
|
|
|
+ #0#231 + 'ccedil'+
|
|
|
+ #0#164 + 'curren'+
|
|
|
+ #$20#$20+ 'dagger'+
|
|
|
+ #0#247 + 'divide'+
|
|
|
+ #0#233 + 'eacute'+
|
|
|
+ #0#232 + 'egrave'+
|
|
|
+ #$22#$00+ 'forall'+
|
|
|
+ #0#189 + 'frac12'+
|
|
|
+ #0#188 + 'frac14'+
|
|
|
+ #0#190 + 'frac34'+
|
|
|
+ #$26#$65+ 'hearts'+
|
|
|
+ #$20#$26+ 'hellip'+
|
|
|
+ #0#237 + 'iacute'+
|
|
|
+ #0#236 + 'igrave'+
|
|
|
+ #0#191 + 'iquest'+
|
|
|
+ #3#$BB + 'lambda'+
|
|
|
+ #$23#$0A+ 'lfloor'+
|
|
|
+ #$22#$17+ 'lowast'+
|
|
|
+ #$20#$39+ 'lsaquo'+
|
|
|
+ #0#183 + 'middot'+
|
|
|
+ #0#241 + 'ntilde'+
|
|
|
+ #0#243 + 'oacute'+
|
|
|
+ #0#242 + 'ograve'+
|
|
|
+ #0#248 + 'oslash'+
|
|
|
+ #0#245 + 'otilde'+
|
|
|
+ #$22#$97+ 'otimes'+
|
|
|
+ #$20#$30+ 'permil'+
|
|
|
+ #0#177 + 'plusmn'+
|
|
|
+ #$23#$0B+ 'rfloor'+
|
|
|
+ #$20#$3A+ 'rsaquo'+
|
|
|
+ #1#$61 + 'scaron'+
|
|
|
+ #3#$C2 + 'sigmaf'+
|
|
|
+ #$26#$60+ 'spades'+
|
|
|
+ #$22#$34+ 'there4'+
|
|
|
+ #$20#$09+ 'thinsp'+
|
|
|
+ #0#250 + 'uacute'+
|
|
|
+ #0#249 + 'ugrave'+
|
|
|
+ #$21#$18+ 'weierp'+
|
|
|
+ #0#253 + 'yacute';
|
|
|
+
|
|
|
+ ent_7 =
|
|
|
+ #3#$95 + 'Epsilon'+
|
|
|
+ #3#$9F + 'Omicron'+
|
|
|
+ #3#$A5 + 'Upsilon'+
|
|
|
+ #$21#$35+ 'alefsym'+
|
|
|
+ #3#$B5 + 'epsilon'+
|
|
|
+ #3#$BF + 'omicron'+
|
|
|
+ #3#$C5 + 'upsilon';
|
|
|
+
|
|
|
+ ent_8 =
|
|
|
+ #3#$D1 + 'thetasym';
|
|
|
+
|
|
|
+ strs: array[2..8] of string = (
|
|
|
+ ent_2, ent_3, ent_4, ent_5, ent_6, ent_7, ent_8
|
|
|
+ );
|
|
|
+
|
|
|
+function BSearch(P: PWideChar; Len: Integer; const data: string): WideChar;
|
|
|
+var
|
|
|
+ L, H, mid, J, C: Integer;
|
|
|
+begin
|
|
|
+ Result := #0;
|
|
|
+ L := 0;
|
|
|
+ H := (Length(data)+1) div (Len+2);
|
|
|
+ while L <= H do
|
|
|
+ begin
|
|
|
+ mid := L + ((H - L) shr 1);
|
|
|
+ J := 0;
|
|
|
+ repeat
|
|
|
+ C := ord(P[J]) - ord(data[mid*(Len+2)+3+J]);
|
|
|
+ Inc(J);
|
|
|
+ until (C <> 0) or (J >= Len);
|
|
|
+ if C > 0 then L := mid + 1 else
|
|
|
+ begin
|
|
|
+ H := mid - 1;
|
|
|
+ if C = 0 then
|
|
|
+ begin
|
|
|
+ Result := WideChar((ord(data[mid*(Len+2)+1]) shl 8) or ord(data[mid*(Len+2)+2]));
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+end;
|
|
|
+
|
|
|
+{
|
|
|
+ Remaining issues:
|
|
|
+ 1) UTF-16 surrogate pairs
|
|
|
+ 2) HTML accepts uppercase 'X' for hex notation, but XML does not.
|
|
|
+ 3) 'apos' is used in xml/xhtml, but not in HTML 4.01
|
|
|
+}
|
|
|
+
|
|
|
+function ResolveHTMLEntityReference(const Name: WideString;
|
|
|
var Entity: WideChar): Boolean;
|
|
|
var
|
|
|
- Ent: WideChar;
|
|
|
- i: Integer;
|
|
|
+ i, L: Integer;
|
|
|
value: Integer;
|
|
|
begin
|
|
|
- if Name = 'quot' then
|
|
|
- begin
|
|
|
- Entity := '"';
|
|
|
- Result := True;
|
|
|
- end else if Name = 'apos' then
|
|
|
- begin
|
|
|
- Entity := '''';
|
|
|
- Result := True;
|
|
|
- end else if Name = 'amp' then
|
|
|
- begin
|
|
|
- Entity := '&';
|
|
|
- Result := True;
|
|
|
- end else if Name = 'lt' then
|
|
|
- begin
|
|
|
- Entity := '<';
|
|
|
- Result := True;
|
|
|
- end else if Name = 'gt' then
|
|
|
- begin
|
|
|
- Entity := '>';
|
|
|
- Result := True;
|
|
|
- end else if (Length(Name) > 1) and (Name[1] = '#') then
|
|
|
+ L := Length(Name);
|
|
|
+ if (L > 1) and (Name[1] = '#') then
|
|
|
begin
|
|
|
value := 0;
|
|
|
- if Name[2] in ['x', 'X'] then
|
|
|
+ if (Name[2] = 'x') or (Name[2] = 'X') then
|
|
|
begin
|
|
|
i := 3;
|
|
|
- while i <= Length(Name) do
|
|
|
+ while i <= L do
|
|
|
begin
|
|
|
case Name[i] of
|
|
|
'0'..'9': Value := Value * 16 + Ord(Name[i]) - Ord('0');
|
|
@@ -683,7 +809,7 @@ begin
|
|
|
else
|
|
|
begin
|
|
|
i := 2;
|
|
|
- while i <= Length(Name) do
|
|
|
+ while i <= L do
|
|
|
begin
|
|
|
case Name[i] of
|
|
|
'0'..'9': Value := Value * 10 + Ord(Name[i]) - Ord('0');
|
|
@@ -693,19 +819,18 @@ begin
|
|
|
Inc(i);
|
|
|
end;
|
|
|
end;
|
|
|
- Result := (i = Length(Name)+1);
|
|
|
+ Result := (i = L+1);
|
|
|
if Result then
|
|
|
Entity := WideChar(Value);
|
|
|
- end else
|
|
|
+ end
|
|
|
+ else
|
|
|
begin
|
|
|
- for Ent := Low(HTMLEntities) to High(HTMLEntities) do
|
|
|
- if HTMLEntities[Ent] = Name then
|
|
|
- begin
|
|
|
- Entity := Ent;
|
|
|
- Result := True;
|
|
|
- exit;
|
|
|
- end;
|
|
|
- Result := False;
|
|
|
+ case L of
|
|
|
+ 2..8: Entity := BSearch(PWideChar(Name), L, strs[L]);
|
|
|
+ else
|
|
|
+ Entity := #0;
|
|
|
+ end;
|
|
|
+ Result := (Entity <> #0);
|
|
|
end;
|
|
|
end;
|
|
|
|