htmlutil.pas 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. { Copyright (C) <2005> <Andrew Haines> htmlutil.pas
  2. This library is free software; you can redistribute it and/or modify it
  3. under the terms of the GNU Library General Public License as published by
  4. the Free Software Foundation; either version 2 of the License, or (at your
  5. option) any later version.
  6. This program is distributed in the hope that it will be useful, but WITHOUT
  7. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  8. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
  9. for more details.
  10. You should have received a copy of the GNU Library General Public License
  11. along with this library; if not, write to the Free Software Foundation,
  12. Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  13. }
  14. {
  15. See the file COPYING.FPC, included in this distribution,
  16. for details about the copyright.
  17. }
  18. { modified from jsFastHtmlParser for use with freepascal
  19. Original Author:
  20. James Azarja
  21. Contributor:
  22. Lars aka L505
  23. http://z505.com
  24. Note: this isn't perfect, it needs to be improved.. see comments }
  25. {$IFNDEF FPC_DOTTEDUNITS}
  26. unit HTMLUtil;
  27. {$ENDIF FPC_DOTTEDUNITS}
  28. {$ifdef fpc}
  29. {$MODE Delphi}
  30. {$H+}
  31. {$endif}
  32. interface
  33. {$IFDEF FPC_DOTTEDUNITS}
  34. uses
  35. System.SysUtils, System.StrUtils;
  36. {$ELSE FPC_DOTTEDUNITS}
  37. uses
  38. SysUtils, strutils;
  39. {$ENDIF FPC_DOTTEDUNITS}
  40. { most commonly used }
  41. function GetVal(const tag, attribname_ci: string): string;
  42. function GetTagName(const Tag: string): string;
  43. { less commonly used, but useful }
  44. function GetUpTagName(const tag: string): string;
  45. function GetNameValPair(const tag, attribname_ci: string): string;
  46. function GetValFromNameVal(const namevalpair: string): string;
  47. { old buggy code}
  48. function GetVal_JAMES(tag, attribname_ci: string): string;
  49. function GetNameValPair_JAMES(tag, attribname_ci: string): string;
  50. { rarely needed NAME= case sensitivity }
  51. function GetNameValPair_cs(tag, attribname: string): string;
  52. implementation
  53. function CopyBuffer(StartIndex: PAnsiChar; Len: integer): string;
  54. var s : String;
  55. begin
  56. SetLength(s, Len);
  57. StrLCopy(@s[1], StartIndex, Len);
  58. result:= s;
  59. end;
  60. { Return tag name, case preserved }
  61. function GetTagName(const Tag: string): string;
  62. var
  63. P : PAnsiChar;
  64. S : PAnsiChar;
  65. begin
  66. P := PAnsiChar(Tag);
  67. while P^ in ['<',' ',#9] do
  68. inc(P);
  69. S := P;
  70. while Not (P^ in [' ','>',#0]) do
  71. inc(P);
  72. if P > S then
  73. Result := CopyBuffer( S, P-S)
  74. else
  75. Result := '';
  76. end;
  77. { Return tag name in uppercase }
  78. function GetUpTagName(const tag: string): string;
  79. var
  80. P : PAnsiChar;
  81. S : PAnsiChar;
  82. begin
  83. P := PAnsiChar(uppercase(Tag));
  84. while P^ in ['<',' ',#9] do
  85. inc(P);
  86. S := P;
  87. while Not (P^ in [' ','>',#0]) do
  88. inc(P);
  89. if P > S then
  90. Result := CopyBuffer( S, P-S)
  91. else
  92. Result := '';
  93. end;
  94. { Return name=value pair ignoring case of NAME, preserving case of VALUE
  95. Lars' fixed version }
  96. function GetNameValPair(const tag, attribname_ci: string): string;
  97. var
  98. P : PAnsiChar;
  99. S : PAnsiChar;
  100. UpperTag,
  101. UpperAttrib : string;
  102. Start: integer;
  103. L : integer;
  104. C : AnsiChar;
  105. begin
  106. // must be space before case insensitive NAME, i.e. <a HREF="" STYLE=""
  107. UpperAttrib:= ' ' + Uppercase(attribname_ci);
  108. UpperTag:= Uppercase(Tag);
  109. P:= PAnsiChar(UpperTag);
  110. S:= StrPos(P, PAnsiChar(UpperAttrib));
  111. if S <> nil then
  112. begin
  113. inc(S); // skip space
  114. P:= S;
  115. // Skip tag name
  116. while not (P^ in ['=', ' ', '>', #0]) do
  117. inc(P);
  118. // Skip spaces and '='
  119. while (P^ in ['=', ' ']) do
  120. inc(P);
  121. while not (P^ in [' ','>',#0]) do
  122. begin
  123. if (P^ in ['"','''']) then
  124. begin
  125. C:= P^;
  126. inc(P); { Skip quote }
  127. end else
  128. C:= ' ';
  129. { thanks to Dmitry [[email protected]] }
  130. while not (P^ in [C, '>', #0]) do
  131. Inc(P);
  132. if (P^ <> '>') then inc(P); { Skip current character, except '>' }
  133. break;
  134. end;
  135. L:= P - S;
  136. Start:= S - PAnsiChar(UpperTag);
  137. P:= PAnsiChar(Tag);
  138. S:= P;
  139. inc(S, Start);
  140. result:= CopyBuffer(S, L);
  141. end;
  142. end;
  143. { Get value of attribute, e.g WIDTH=36 -return-> 36, preserves case sensitive }
  144. function GetValFromNameVal(const namevalpair: string): string;
  145. var
  146. P: PAnsiChar;
  147. S: PAnsiChar;
  148. C: AnsiChar;
  149. begin
  150. Result := '';
  151. P:= PAnsiChar(namevalpair);
  152. S:= StrPos(P, '=');
  153. if S <> nil then
  154. begin
  155. inc(S); // skip equal
  156. while S^ = ' ' do inc(S); // skip any spaces after =
  157. P:= S; // set P to a character after =
  158. if (P^ in ['"','''']) then
  159. begin
  160. C:= P^;
  161. Inc(P); { Skip current character }
  162. end else
  163. C:= ' ';
  164. S:= P;
  165. while not (P^ in [C, #0]) do
  166. inc(P);
  167. if (P <> S) then { Thanks to Dave Keighan ([email protected]) }
  168. Result:= CopyBuffer(S, P - S);
  169. end;
  170. end;
  171. { return value of an attribute (attribname_ci), case ignored for NAME portion, but return value case is preserved }
  172. function GetVal(const tag, attribname_ci: string): string;
  173. var namevalpair: string;
  174. begin
  175. // returns full name=value pair
  176. namevalpair:= GetNameValPair(tag, attribname_ci);
  177. // extracts value portion only
  178. result:= GetValFromNameVal(namevalpair);
  179. end;
  180. { ----------------------------------------------------------------------------
  181. BELOW FUNCTIONS ARE OBSOLETE OR RARELY NEEDED SINCE THEY EITHER CONTAIN BUGS
  182. OR THEY ARE TOO CASE SENSITIVE (FOR THE TAG NAME PORTION OF THE ATTRIBUTE }
  183. { James old buggy code for testing purposes.
  184. Bug: when finding 'ID', function finds "width", even though width <> "id" }
  185. function GetNameValPair_JAMES(tag, attribname_ci: string): string;
  186. var
  187. P : PAnsiChar;
  188. S : PAnsiChar;
  189. UT,
  190. UA : string;
  191. Start: integer;
  192. L : integer;
  193. C : AnsiChar;
  194. begin
  195. UA:= Uppercase(attribname_ci);
  196. UT:= Uppercase(Tag);
  197. P:= PAnsiChar(UT);
  198. S:= StrPos(P, PAnsiChar(UA));
  199. if S <> nil then
  200. begin
  201. P := S;
  202. // Skip attribute name
  203. while not (P^ in ['=',' ','>',#0]) do
  204. inc(P);
  205. if (P^ = '=') then
  206. inc(P);
  207. while not (P^ in [' ','>',#0]) do
  208. begin
  209. if (P^ in ['"','''']) then
  210. begin
  211. C:= P^;
  212. inc(P); { Skip current character }
  213. end else
  214. C:= ' ';
  215. { thanks to Dmitry [[email protected]] }
  216. while not (P^ in [C, '>', #0]) do
  217. Inc(P);
  218. if (P^ <> '>') then inc(P); { Skip current character, except '>' }
  219. break;
  220. end;
  221. L:= P - S;
  222. Start:= S - PAnsiChar(UT);
  223. P:= PAnsiChar(Tag);
  224. S:= P;
  225. inc(S, Start);
  226. result:= CopyBuffer(S, L);
  227. end;
  228. end;
  229. { James old buggy code for testing purposes }
  230. function GetVal_JAMES(tag, attribname_ci: string): string;
  231. var namevalpair: string;
  232. begin
  233. namevalpair:= GetNameValPair_JAMES(tag, attribname_ci);
  234. result:= GetValFromNameVal(namevalpair);
  235. end;
  236. { return name=value portion, case sensitive, case preserved }
  237. function GetNameValPair_cs(Tag, attribname: string): string;
  238. var
  239. P : PAnsiChar;
  240. S : PAnsiChar;
  241. C : AnsiChar;
  242. begin
  243. P := PAnsiChar(Tag);
  244. S := StrPos(P, PAnsiChar(attribname));
  245. if S<>nil then
  246. begin
  247. P := S;
  248. // Skip attribute name
  249. while not (P^ in ['=',' ','>',#0]) do
  250. inc(P);
  251. if (P^ = '=') then
  252. inc(P);
  253. while not (P^ in [' ','>',#0]) do
  254. begin
  255. if (P^ in ['"','''']) then
  256. begin
  257. C:= P^;
  258. inc(P); { Skip current character }
  259. end else
  260. C:= ' ';
  261. { thanks to Dmitry [[email protected]] }
  262. while not (P^ in [C, '>', #0]) do
  263. inc(P);
  264. if (P^<>'>') then
  265. inc(P); { Skip current character, except '>' }
  266. break;
  267. end;
  268. if P > S then
  269. Result:= CopyBuffer(S, P - S)
  270. else
  271. Result:= '';
  272. end;
  273. end;
  274. end.
  275. (* alternative, not needed
  276. { return value (case preserved) from a name=value pair, ignores case in given NAME= portion }
  277. function GetValFromNameVal(namevalpair: string): string;
  278. type
  279. TAttribPos = record
  280. startpos: longword; // start pos of value
  281. len: longword; // length of value
  282. end;
  283. { returns case insensitive start position and length of just the value
  284. substring in name=value pair}
  285. function ReturnPos(attribute: string): TAttribPos;
  286. var
  287. P : PAnsiChar;
  288. S : PAnsiChar;
  289. C : AnsiChar;
  290. begin
  291. result.startpos:= 0;
  292. result.len:= 0;
  293. P:= PAnsiChar(uppercase(Attribute));
  294. // get substring including and everything after equal
  295. S:= StrPos(P, '=');
  296. result.startpos:= pos('=', P);
  297. if S <> nil then
  298. begin
  299. inc(S);
  300. // set to character after =
  301. inc(result.startpos);
  302. P:= S;
  303. if (P^ in ['"','''']) then
  304. begin
  305. C:= P^;
  306. // skip quote
  307. inc(P);
  308. inc(result.startpos);
  309. end else
  310. C:= ' ';
  311. S:= P;
  312. // go to end quote or end of value
  313. while not (P^ in [C, #0]) do
  314. inc(P);
  315. if (P <> S) then
  316. begin
  317. result.len:= p - s;
  318. end;
  319. end;
  320. end;
  321. var
  322. found: TAttribPos;
  323. begin
  324. found:= ReturnPos(namevalpair);
  325. // extract using coordinates
  326. result:= MidStr(namevalpair, found.startpos, found.len);
  327. end;
  328. *)