htmlutil.pas 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. { Copyright (C) <2005> <Andrew Haines> htmlutil.pas
  2. This library is free software; you can redistribute it and/or modify it
  3. under the terms of the GNU Library General Public License as published by
  4. the Free Software Foundation; either version 2 of the License, or (at your
  5. option) any later version.
  6. This program is distributed in the hope that it will be useful, but WITHOUT
  7. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  8. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
  9. for more details.
  10. You should have received a copy of the GNU Library General Public License
  11. along with this library; if not, write to the Free Software Foundation,
  12. Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  13. }
  14. {
  15. See the file COPYING.FPC, included in this distribution,
  16. for details about the copyright.
  17. }
  18. { modified from jsFastHtmlParser for use with freepascal
  19. Original Author:
  20. James Azarja
  21. Contributor:
  22. Lars aka L505
  23. http://z505.com
  24. Note: this isn't perfect, it needs to be improved.. see comments }
  25. unit HTMLUtil; {$ifdef fpc} {$MODE Delphi} {$H+}{$endif}
  26. interface
  27. uses
  28. SysUtils, strutils;
  29. { most commonly used }
  30. function GetVal(const tag, attribname_ci: string): string;
  31. function GetTagName(const Tag: string): string;
  32. { less commonly used, but useful }
  33. function GetUpTagName(const tag: string): string;
  34. function GetNameValPair(const tag, attribname_ci: string): string;
  35. function GetValFromNameVal(const namevalpair: string): string;
  36. { old buggy code}
  37. function GetVal_JAMES(tag, attribname_ci: string): string;
  38. function GetNameValPair_JAMES(tag, attribname_ci: string): string;
  39. { rarely needed NAME= case sensitivity }
  40. function GetNameValPair_cs(tag, attribname: string): string;
  41. implementation
  42. function CopyBuffer(StartIndex: PAnsiChar; Len: integer): string;
  43. var s : String;
  44. begin
  45. SetLength(s, Len);
  46. StrLCopy(@s[1], StartIndex, Len);
  47. result:= s;
  48. end;
  49. { Return tag name, case preserved }
  50. function GetTagName(const Tag: string): string;
  51. var
  52. P : PAnsiChar;
  53. S : PAnsiChar;
  54. begin
  55. P := PAnsiChar(Tag);
  56. while P^ in ['<',' ',#9] do
  57. inc(P);
  58. S := P;
  59. while Not (P^ in [' ','>',#0]) do
  60. inc(P);
  61. if P > S then
  62. Result := CopyBuffer( S, P-S)
  63. else
  64. Result := '';
  65. end;
  66. { Return tag name in uppercase }
  67. function GetUpTagName(const tag: string): string;
  68. var
  69. P : PAnsiChar;
  70. S : PAnsiChar;
  71. begin
  72. P := PAnsiChar(uppercase(Tag));
  73. while P^ in ['<',' ',#9] do
  74. inc(P);
  75. S := P;
  76. while Not (P^ in [' ','>',#0]) do
  77. inc(P);
  78. if P > S then
  79. Result := CopyBuffer( S, P-S)
  80. else
  81. Result := '';
  82. end;
  83. { Return name=value pair ignoring case of NAME, preserving case of VALUE
  84. Lars' fixed version }
  85. function GetNameValPair(const tag, attribname_ci: string): string;
  86. var
  87. P : PAnsiChar;
  88. S : PAnsiChar;
  89. UpperTag,
  90. UpperAttrib : string;
  91. Start: integer;
  92. L : integer;
  93. C : AnsiChar;
  94. begin
  95. // must be space before case insensitive NAME, i.e. <a HREF="" STYLE=""
  96. UpperAttrib:= ' ' + Uppercase(attribname_ci);
  97. UpperTag:= Uppercase(Tag);
  98. P:= PAnsiChar(UpperTag);
  99. S:= StrPos(P, PAnsiChar(UpperAttrib));
  100. if S <> nil then
  101. begin
  102. inc(S); // skip space
  103. P:= S;
  104. // Skip tag name
  105. while not (P^ in ['=', ' ', '>', #0]) do
  106. inc(P);
  107. // Skip spaces and '='
  108. while (P^ in ['=', ' ']) do
  109. inc(P);
  110. while not (P^ in [' ','>',#0]) do
  111. begin
  112. if (P^ in ['"','''']) then
  113. begin
  114. C:= P^;
  115. inc(P); { Skip quote }
  116. end else
  117. C:= ' ';
  118. { thanks to Dmitry [[email protected]] }
  119. while not (P^ in [C, '>', #0]) do
  120. Inc(P);
  121. if (P^ <> '>') then inc(P); { Skip current character, except '>' }
  122. break;
  123. end;
  124. L:= P - S;
  125. Start:= S - PAnsiChar(UpperTag);
  126. P:= PAnsiChar(Tag);
  127. S:= P;
  128. inc(S, Start);
  129. result:= CopyBuffer(S, L);
  130. end;
  131. end;
  132. { Get value of attribute, e.g WIDTH=36 -return-> 36, preserves case sensitive }
  133. function GetValFromNameVal(const namevalpair: string): string;
  134. var
  135. P: PAnsiChar;
  136. S: PAnsiChar;
  137. C: AnsiChar;
  138. begin
  139. Result := '';
  140. P:= PAnsiChar(namevalpair);
  141. S:= StrPos(P, '=');
  142. if S <> nil then
  143. begin
  144. inc(S); // skip equal
  145. while S^ = ' ' do inc(S); // skip any spaces after =
  146. P:= S; // set P to a character after =
  147. if (P^ in ['"','''']) then
  148. begin
  149. C:= P^;
  150. Inc(P); { Skip current character }
  151. end else
  152. C:= ' ';
  153. S:= P;
  154. while not (P^ in [C, #0]) do
  155. inc(P);
  156. if (P <> S) then { Thanks to Dave Keighan ([email protected]) }
  157. Result:= CopyBuffer(S, P - S);
  158. end;
  159. end;
  160. { return value of an attribute (attribname_ci), case ignored for NAME portion, but return value case is preserved }
  161. function GetVal(const tag, attribname_ci: string): string;
  162. var namevalpair: string;
  163. begin
  164. // returns full name=value pair
  165. namevalpair:= GetNameValPair(tag, attribname_ci);
  166. // extracts value portion only
  167. result:= GetValFromNameVal(namevalpair);
  168. end;
  169. { ----------------------------------------------------------------------------
  170. BELOW FUNCTIONS ARE OBSOLETE OR RARELY NEEDED SINCE THEY EITHER CONTAIN BUGS
  171. OR THEY ARE TOO CASE SENSITIVE (FOR THE TAG NAME PORTION OF THE ATTRIBUTE }
  172. { James old buggy code for testing purposes.
  173. Bug: when finding 'ID', function finds "width", even though width <> "id" }
  174. function GetNameValPair_JAMES(tag, attribname_ci: string): string;
  175. var
  176. P : PAnsiChar;
  177. S : PAnsiChar;
  178. UT,
  179. UA : string;
  180. Start: integer;
  181. L : integer;
  182. C : AnsiChar;
  183. begin
  184. UA:= Uppercase(attribname_ci);
  185. UT:= Uppercase(Tag);
  186. P:= PAnsiChar(UT);
  187. S:= StrPos(P, PAnsiChar(UA));
  188. if S <> nil then
  189. begin
  190. P := S;
  191. // Skip attribute name
  192. while not (P^ in ['=',' ','>',#0]) do
  193. inc(P);
  194. if (P^ = '=') then
  195. inc(P);
  196. while not (P^ in [' ','>',#0]) do
  197. begin
  198. if (P^ in ['"','''']) then
  199. begin
  200. C:= P^;
  201. inc(P); { Skip current character }
  202. end else
  203. C:= ' ';
  204. { thanks to Dmitry [[email protected]] }
  205. while not (P^ in [C, '>', #0]) do
  206. Inc(P);
  207. if (P^ <> '>') then inc(P); { Skip current character, except '>' }
  208. break;
  209. end;
  210. L:= P - S;
  211. Start:= S - PAnsiChar(UT);
  212. P:= PAnsiChar(Tag);
  213. S:= P;
  214. inc(S, Start);
  215. result:= CopyBuffer(S, L);
  216. end;
  217. end;
  218. { James old buggy code for testing purposes }
  219. function GetVal_JAMES(tag, attribname_ci: string): string;
  220. var namevalpair: string;
  221. begin
  222. namevalpair:= GetNameValPair_JAMES(tag, attribname_ci);
  223. result:= GetValFromNameVal(namevalpair);
  224. end;
  225. { return name=value portion, case sensitive, case preserved }
  226. function GetNameValPair_cs(Tag, attribname: string): string;
  227. var
  228. P : PAnsiChar;
  229. S : PAnsiChar;
  230. C : AnsiChar;
  231. begin
  232. P := PAnsiChar(Tag);
  233. S := StrPos(P, PAnsiChar(attribname));
  234. if S<>nil then
  235. begin
  236. P := S;
  237. // Skip attribute name
  238. while not (P^ in ['=',' ','>',#0]) do
  239. inc(P);
  240. if (P^ = '=') then
  241. inc(P);
  242. while not (P^ in [' ','>',#0]) do
  243. begin
  244. if (P^ in ['"','''']) then
  245. begin
  246. C:= P^;
  247. inc(P); { Skip current character }
  248. end else
  249. C:= ' ';
  250. { thanks to Dmitry [[email protected]] }
  251. while not (P^ in [C, '>', #0]) do
  252. inc(P);
  253. if (P^<>'>') then
  254. inc(P); { Skip current character, except '>' }
  255. break;
  256. end;
  257. if P > S then
  258. Result:= CopyBuffer(S, P - S)
  259. else
  260. Result:= '';
  261. end;
  262. end;
  263. end.
  264. (* alternative, not needed
  265. { return value (case preserved) from a name=value pair, ignores case in given NAME= portion }
  266. function GetValFromNameVal(namevalpair: string): string;
  267. type
  268. TAttribPos = record
  269. startpos: longword; // start pos of value
  270. len: longword; // length of value
  271. end;
  272. { returns case insensitive start position and length of just the value
  273. substring in name=value pair}
  274. function ReturnPos(attribute: string): TAttribPos;
  275. var
  276. P : PAnsiChar;
  277. S : PAnsiChar;
  278. C : AnsiChar;
  279. begin
  280. result.startpos:= 0;
  281. result.len:= 0;
  282. P:= PAnsiChar(uppercase(Attribute));
  283. // get substring including and everything after equal
  284. S:= StrPos(P, '=');
  285. result.startpos:= pos('=', P);
  286. if S <> nil then
  287. begin
  288. inc(S);
  289. // set to character after =
  290. inc(result.startpos);
  291. P:= S;
  292. if (P^ in ['"','''']) then
  293. begin
  294. C:= P^;
  295. // skip quote
  296. inc(P);
  297. inc(result.startpos);
  298. end else
  299. C:= ' ';
  300. S:= P;
  301. // go to end quote or end of value
  302. while not (P^ in [C, #0]) do
  303. inc(P);
  304. if (P <> S) then
  305. begin
  306. result.len:= p - s;
  307. end;
  308. end;
  309. end;
  310. var
  311. found: TAttribPos;
  312. begin
  313. found:= ReturnPos(namevalpair);
  314. // extract using coordinates
  315. result:= MidStr(namevalpair, found.startpos, found.len);
  316. end;
  317. *)