htmlutil.pas 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. { Copyright (C) <2005> <Andrew Haines> htmlutil.pas
  2. This library is free software; you can redistribute it and/or modify it
  3. under the terms of the GNU Library General Public License as published by
  4. the Free Software Foundation; either version 2 of the License, or (at your
  5. option) any later version.
  6. This program is distributed in the hope that it will be useful, but WITHOUT
  7. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  8. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
  9. for more details.
  10. You should have received a copy of the GNU Library General Public License
  11. along with this library; if not, write to the Free Software Foundation,
  12. Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  13. }
  14. {
  15. See the file COPYING.FPC, included in this distribution,
  16. for details about the copyright.
  17. }
  18. { modified from jsFastHtmlParser for use with freepascal
  19. Original Author:
  20. James Azarja
  21. Contributor:
  22. Lars aka L505
  23. http://z505.com
  24. Note: this isn't perfect, it needs to be improved.. see comments }
  25. unit HTMLUtil; {$ifdef fpc} {$MODE Delphi} {$H+}{$endif}
  26. interface
  27. uses
  28. SysUtils, strutils;
  29. { most commonly used }
  30. function GetVal(tag, attribname_ci: string): string;
  31. function GetTagName(Tag: string): string;
  32. { less commonly used, but useful }
  33. function GetUpTagName(tag: string): string;
  34. function GetNameValPair(tag, attribname_ci: string): string;
  35. function GetValFromNameVal(namevalpair: string): string;
  36. { old buggy code}
  37. function GetVal_JAMES(tag, attribname_ci: string): string;
  38. function GetNameValPair_JAMES(tag, attribname_ci: string): string;
  39. { rarely needed NAME= case sensitivity }
  40. function GetNameValPair_cs(tag, attribname: string): string;
  41. implementation
  42. function CopyBuffer(StartIndex: PChar; Len: integer): string;
  43. var s : String;
  44. begin
  45. SetLength(s, Len);
  46. StrLCopy(@s[1], StartIndex, Len);
  47. result:= s;
  48. end;
  49. { Return tag name, case preserved }
  50. function GetTagName(Tag: string): string;
  51. var
  52. P : Pchar;
  53. S : Pchar;
  54. begin
  55. P := Pchar(Tag);
  56. while P^ in ['<',' ',#9] do inc(P);
  57. S := P;
  58. while Not (P^ in [' ','>',#0]) do inc(P);
  59. if P > S then
  60. Result := CopyBuffer( S, P-S)
  61. else
  62. Result := '';
  63. end;
  64. { Return tag name in uppercase }
  65. function GetUpTagName(tag: string): string;
  66. var
  67. P : Pchar;
  68. S : Pchar;
  69. begin
  70. P := Pchar(uppercase(Tag));
  71. while P^ in ['<',' ',#9] do inc(P);
  72. S := P;
  73. while Not (P^ in [' ','>',#0]) do inc(P);
  74. if P > S then
  75. Result := CopyBuffer( S, P-S)
  76. else
  77. Result := '';
  78. end;
  79. { Return name=value pair ignoring case of NAME, preserving case of VALUE
  80. Lars' fixed version }
  81. function GetNameValPair(tag, attribname_ci: string): string;
  82. var
  83. P : Pchar;
  84. S : Pchar;
  85. UpperTag,
  86. UpperAttrib : string;
  87. Start: integer;
  88. L : integer;
  89. C : char;
  90. begin
  91. // must be space before case insensitive NAME, i.e. <a HREF="" STYLE=""
  92. UpperAttrib:= ' ' + Uppercase(attribname_ci);
  93. UpperTag:= Uppercase(Tag);
  94. P:= Pchar(UpperTag);
  95. S:= StrPos(P, Pchar(UpperAttrib));
  96. if S <> nil then
  97. begin
  98. inc(S); // skip space
  99. P:= S;
  100. // Skip
  101. while not (P^ in ['=', ' ', '>', #0]) do
  102. inc(P);
  103. if (P^ = '=') then inc(P);
  104. while not (P^ in [' ','>',#0]) do
  105. begin
  106. if (P^ in ['"','''']) then
  107. begin
  108. C:= P^;
  109. inc(P); { Skip quote }
  110. end else
  111. C:= ' ';
  112. { thanks to Dmitry [[email protected]] }
  113. while not (P^ in [C, '>', #0]) do
  114. Inc(P);
  115. if (P^ <> '>') then inc(P); { Skip current character, except '>' }
  116. break;
  117. end;
  118. L:= P - S;
  119. Start:= S - Pchar(UpperTag);
  120. P:= Pchar(Tag);
  121. S:= P;
  122. inc(S, Start);
  123. result:= CopyBuffer(S, L);
  124. end;
  125. end;
  126. { Get value of attribute, e.g WIDTH=36 -return-> 36, preserves case sensitive }
  127. function GetValFromNameVal(namevalpair: string): string;
  128. var
  129. P: Pchar;
  130. S: Pchar;
  131. C: Char;
  132. begin
  133. P:= Pchar(namevalpair);
  134. S:= StrPos(P, '=');
  135. if S <> nil then
  136. begin
  137. inc(S); // skip equal
  138. P:= S; // set P to a character after =
  139. if (P^ in ['"','''']) then
  140. begin
  141. C:= P^;
  142. Inc(P); { Skip current character }
  143. end else
  144. C:= ' ';
  145. S:= P;
  146. while not (P^ in [C, #0]) do
  147. inc(P);
  148. if (P <> S) then { Thanks to Dave Keighan ([email protected]) }
  149. Result:= CopyBuffer(S, P - S)
  150. else
  151. Result:= '';
  152. end;
  153. end;
  154. { return value of an attribute (attribname_ci), case ignored for NAME portion, but return value case is preserved }
  155. function GetVal(tag, attribname_ci: string): string;
  156. var namevalpair: string;
  157. begin
  158. // returns full name=value pair
  159. namevalpair:= GetNameValPair(tag, attribname_ci);
  160. // extracts value portion only
  161. result:= GetValFromNameVal(namevalpair);
  162. end;
  163. { ----------------------------------------------------------------------------
  164. BELOW FUNCTIONS ARE OBSOLETE OR RARELY NEEDED SINCE THEY EITHER CONTAIN BUGS
  165. OR THEY ARE TOO CASE SENSITIVE (FOR THE TAG NAME PORTION OF THE ATTRIBUTE }
  166. { James old buggy code for testing purposes.
  167. Bug: when finding 'ID', function finds "width", even though width <> "id" }
  168. function GetNameValPair_JAMES(tag, attribname_ci: string): string;
  169. var
  170. P : Pchar;
  171. S : Pchar;
  172. UT,
  173. UA : string;
  174. Start: integer;
  175. L : integer;
  176. C : char;
  177. begin
  178. UA:= Uppercase(attribname_ci);
  179. UT:= Uppercase(Tag);
  180. P:= Pchar(UT);
  181. S:= StrPos(P, Pchar(UA));
  182. if S <> nil then
  183. begin
  184. P := S;
  185. // Skip attribute name
  186. while not (P^ in ['=',' ','>',#0]) do
  187. inc(P);
  188. if (P^ = '=') then inc(P);
  189. while not (P^ in [' ','>',#0]) do
  190. begin
  191. if (P^ in ['"','''']) then
  192. begin
  193. C:= P^;
  194. inc(P); { Skip current character }
  195. end else
  196. C:= ' ';
  197. { thanks to Dmitry [[email protected]] }
  198. while not (P^ in [C, '>', #0]) do
  199. Inc(P);
  200. if (P^ <> '>') then inc(P); { Skip current character, except '>' }
  201. break;
  202. end;
  203. L:= P - S;
  204. Start:= S - Pchar(UT);
  205. P:= Pchar(Tag);
  206. S:= P;
  207. inc(S, Start);
  208. result:= CopyBuffer(S, L);
  209. end;
  210. end;
  211. { James old buggy code for testing purposes }
  212. function GetVal_JAMES(tag, attribname_ci: string): string;
  213. var namevalpair: string;
  214. begin
  215. namevalpair:= GetNameValPair_JAMES(tag, attribname_ci);
  216. result:= GetValFromNameVal(namevalpair);
  217. end;
  218. { return name=value portion, case sensitive, case preserved }
  219. function GetNameValPair_cs(Tag, attribname: string): string;
  220. var
  221. P : Pchar;
  222. S : Pchar;
  223. C : Char;
  224. begin
  225. P := Pchar(Tag);
  226. S := StrPos(P, Pchar(attribname));
  227. if S<>nil then
  228. begin
  229. P := S;
  230. // Skip attribute name
  231. while not (P^ in ['=',' ','>',#0]) do
  232. inc(P);
  233. if (P^ = '=') then inc(P);
  234. while not (P^ in [' ','>',#0]) do
  235. begin
  236. if (P^ in ['"','''']) then
  237. begin
  238. C:= P^;
  239. inc(P); { Skip current character }
  240. end else
  241. C:= ' ';
  242. { thanks to Dmitry [[email protected]] }
  243. while not (P^ in [C, '>', #0]) do
  244. inc(P);
  245. if (P^<>'>') then inc(P); { Skip current character, except '>' }
  246. break;
  247. end;
  248. if P > S then
  249. Result:= CopyBuffer(S, P - S)
  250. else
  251. Result:= '';
  252. end;
  253. end;
  254. end.
  255. (* alternative, not needed
  256. { return value (case preserved) from a name=value pair, ignores case in given NAME= portion }
  257. function GetValFromNameVal(namevalpair: string): string;
  258. type
  259. TAttribPos = record
  260. startpos: longword; // start pos of value
  261. len: longword; // length of value
  262. end;
  263. { returns case insensitive start position and length of just the value
  264. substring in name=value pair}
  265. function ReturnPos(attribute: string): TAttribPos;
  266. var
  267. P : Pchar;
  268. S : Pchar;
  269. C : Char;
  270. begin
  271. result.startpos:= 0;
  272. result.len:= 0;
  273. P:= Pchar(uppercase(Attribute));
  274. // get substring including and everything after equal
  275. S:= StrPos(P, '=');
  276. result.startpos:= pos('=', P);
  277. if S <> nil then
  278. begin
  279. inc(S);
  280. // set to character after =
  281. inc(result.startpos);
  282. P:= S;
  283. if (P^ in ['"','''']) then
  284. begin
  285. C:= P^;
  286. // skip quote
  287. inc(P);
  288. inc(result.startpos);
  289. end else
  290. C:= ' ';
  291. S:= P;
  292. // go to end quote or end of value
  293. while not (P^ in [C, #0]) do
  294. inc(P);
  295. if (P <> S) then
  296. begin
  297. result.len:= p - s;
  298. end;
  299. end;
  300. end;
  301. var
  302. found: TAttribPos;
  303. begin
  304. found:= ReturnPos(namevalpair);
  305. // extract using coordinates
  306. result:= MidStr(namevalpair, found.startpos, found.len);
  307. end;
  308. *)