htmlutil.pas 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. { Copyright (C) <2005> <Andrew Haines> htmlutil.pas
  2. This library is free software; you can redistribute it and/or modify it
  3. under the terms of the GNU Library General Public License as published by
  4. the Free Software Foundation; either version 2 of the License, or (at your
  5. option) any later version.
  6. This program is distributed in the hope that it will be useful, but WITHOUT
  7. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  8. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
  9. for more details.
  10. You should have received a copy of the GNU Library General Public License
  11. along with this library; if not, write to the Free Software Foundation,
  12. Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  13. }
  14. {
  15. See the file COPYING.FPC, included in this distribution,
  16. for details about the copyright.
  17. }
  18. { modified from jsFastHtmlParser for use with freepascal
  19. Original Author:
  20. James Azarja
  21. Contributor:
  22. Lars aka L505
  23. http://z505.com
  24. Note: this isn't perfect, it needs to be improved.. see comments }
  25. unit HTMLUtil; {$ifdef fpc} {$MODE Delphi} {$H+}{$endif}
  26. interface
  27. uses
  28. SysUtils, strutils;
  29. { most commonly used }
  30. function GetVal(tag, attribname_ci: string): string;
  31. function GetTagName(Tag: string): string;
  32. { less commonly used, but useful }
  33. function GetUpTagName(tag: string): string;
  34. function GetNameValPair(tag, attribname_ci: string): string;
  35. function GetValFromNameVal(namevalpair: string): string;
  36. { old buggy code}
  37. function GetVal_JAMES(tag, attribname_ci: string): string;
  38. function GetNameValPair_JAMES(tag, attribname_ci: string): string;
  39. { rarely needed NAME= case sensitivity }
  40. function GetNameValPair_cs(tag, attribname: string): string;
  41. implementation
  42. function CopyBuffer(StartIndex: PChar; Len: integer): string;
  43. var s : String;
  44. begin
  45. SetLength(s, Len);
  46. StrLCopy(@s[1], StartIndex, Len);
  47. result:= s;
  48. end;
  49. { Return tag name, case preserved }
  50. function GetTagName(Tag: string): string;
  51. var
  52. P : Pchar;
  53. S : Pchar;
  54. begin
  55. P := Pchar(Tag);
  56. while P^ in ['<',' ',#9] do inc(P);
  57. S := P;
  58. while Not (P^ in [' ','>',#0]) do inc(P);
  59. if P > S then
  60. Result := CopyBuffer( S, P-S)
  61. else
  62. Result := '';
  63. end;
  64. { Return tag name in uppercase }
  65. function GetUpTagName(tag: string): string;
  66. var
  67. P : Pchar;
  68. S : Pchar;
  69. begin
  70. P := Pchar(uppercase(Tag));
  71. while P^ in ['<',' ',#9] do inc(P);
  72. S := P;
  73. while Not (P^ in [' ','>',#0]) do inc(P);
  74. if P > S then
  75. Result := CopyBuffer( S, P-S)
  76. else
  77. Result := '';
  78. end;
  79. { Return name=value pair ignoring case of NAME, preserving case of VALUE
  80. Lars' fixed version }
  81. function GetNameValPair(tag, attribname_ci: string): string;
  82. var
  83. P : Pchar;
  84. S : Pchar;
  85. UpperTag,
  86. UpperAttrib : string;
  87. Start: integer;
  88. L : integer;
  89. C : char;
  90. begin
  91. // must be space before case insensitive NAME, i.e. <a HREF="" STYLE=""
  92. UpperAttrib:= ' ' + Uppercase(attribname_ci);
  93. UpperTag:= Uppercase(Tag);
  94. P:= Pchar(UpperTag);
  95. S:= StrPos(P, Pchar(UpperAttrib));
  96. if S <> nil then
  97. begin
  98. inc(S); // skip space
  99. P:= S;
  100. // Skip tag name
  101. while not (P^ in ['=', ' ', '>', #0]) do
  102. inc(P);
  103. // Skip spaces and '='
  104. while (P^ in ['=', ' ']) do
  105. inc(P);
  106. while not (P^ in [' ','>',#0]) do
  107. begin
  108. if (P^ in ['"','''']) then
  109. begin
  110. C:= P^;
  111. inc(P); { Skip quote }
  112. end else
  113. C:= ' ';
  114. { thanks to Dmitry [[email protected]] }
  115. while not (P^ in [C, '>', #0]) do
  116. Inc(P);
  117. if (P^ <> '>') then inc(P); { Skip current character, except '>' }
  118. break;
  119. end;
  120. L:= P - S;
  121. Start:= S - Pchar(UpperTag);
  122. P:= Pchar(Tag);
  123. S:= P;
  124. inc(S, Start);
  125. result:= CopyBuffer(S, L);
  126. end;
  127. end;
  128. { Get value of attribute, e.g WIDTH=36 -return-> 36, preserves case sensitive }
  129. function GetValFromNameVal(namevalpair: string): string;
  130. var
  131. P: Pchar;
  132. S: Pchar;
  133. C: Char;
  134. begin
  135. Result := '';
  136. P:= Pchar(namevalpair);
  137. S:= StrPos(P, '=');
  138. if S <> nil then
  139. begin
  140. inc(S); // skip equal
  141. while S^ = ' ' do inc(S); // skip any spaces after =
  142. P:= S; // set P to a character after =
  143. if (P^ in ['"','''']) then
  144. begin
  145. C:= P^;
  146. Inc(P); { Skip current character }
  147. end else
  148. C:= ' ';
  149. S:= P;
  150. while not (P^ in [C, #0]) do
  151. inc(P);
  152. if (P <> S) then { Thanks to Dave Keighan ([email protected]) }
  153. Result:= CopyBuffer(S, P - S);
  154. end;
  155. end;
  156. { return value of an attribute (attribname_ci), case ignored for NAME portion, but return value case is preserved }
  157. function GetVal(tag, attribname_ci: string): string;
  158. var namevalpair: string;
  159. begin
  160. // returns full name=value pair
  161. namevalpair:= GetNameValPair(tag, attribname_ci);
  162. // extracts value portion only
  163. result:= GetValFromNameVal(namevalpair);
  164. end;
  165. { ----------------------------------------------------------------------------
  166. BELOW FUNCTIONS ARE OBSOLETE OR RARELY NEEDED SINCE THEY EITHER CONTAIN BUGS
  167. OR THEY ARE TOO CASE SENSITIVE (FOR THE TAG NAME PORTION OF THE ATTRIBUTE }
  168. { James old buggy code for testing purposes.
  169. Bug: when finding 'ID', function finds "width", even though width <> "id" }
  170. function GetNameValPair_JAMES(tag, attribname_ci: string): string;
  171. var
  172. P : Pchar;
  173. S : Pchar;
  174. UT,
  175. UA : string;
  176. Start: integer;
  177. L : integer;
  178. C : char;
  179. begin
  180. UA:= Uppercase(attribname_ci);
  181. UT:= Uppercase(Tag);
  182. P:= Pchar(UT);
  183. S:= StrPos(P, Pchar(UA));
  184. if S <> nil then
  185. begin
  186. P := S;
  187. // Skip attribute name
  188. while not (P^ in ['=',' ','>',#0]) do
  189. inc(P);
  190. if (P^ = '=') then inc(P);
  191. while not (P^ in [' ','>',#0]) do
  192. begin
  193. if (P^ in ['"','''']) then
  194. begin
  195. C:= P^;
  196. inc(P); { Skip current character }
  197. end else
  198. C:= ' ';
  199. { thanks to Dmitry [[email protected]] }
  200. while not (P^ in [C, '>', #0]) do
  201. Inc(P);
  202. if (P^ <> '>') then inc(P); { Skip current character, except '>' }
  203. break;
  204. end;
  205. L:= P - S;
  206. Start:= S - Pchar(UT);
  207. P:= Pchar(Tag);
  208. S:= P;
  209. inc(S, Start);
  210. result:= CopyBuffer(S, L);
  211. end;
  212. end;
  213. { James old buggy code for testing purposes }
  214. function GetVal_JAMES(tag, attribname_ci: string): string;
  215. var namevalpair: string;
  216. begin
  217. namevalpair:= GetNameValPair_JAMES(tag, attribname_ci);
  218. result:= GetValFromNameVal(namevalpair);
  219. end;
  220. { return name=value portion, case sensitive, case preserved }
  221. function GetNameValPair_cs(Tag, attribname: string): string;
  222. var
  223. P : Pchar;
  224. S : Pchar;
  225. C : Char;
  226. begin
  227. P := Pchar(Tag);
  228. S := StrPos(P, Pchar(attribname));
  229. if S<>nil then
  230. begin
  231. P := S;
  232. // Skip attribute name
  233. while not (P^ in ['=',' ','>',#0]) do
  234. inc(P);
  235. if (P^ = '=') then inc(P);
  236. while not (P^ in [' ','>',#0]) do
  237. begin
  238. if (P^ in ['"','''']) then
  239. begin
  240. C:= P^;
  241. inc(P); { Skip current character }
  242. end else
  243. C:= ' ';
  244. { thanks to Dmitry [[email protected]] }
  245. while not (P^ in [C, '>', #0]) do
  246. inc(P);
  247. if (P^<>'>') then inc(P); { Skip current character, except '>' }
  248. break;
  249. end;
  250. if P > S then
  251. Result:= CopyBuffer(S, P - S)
  252. else
  253. Result:= '';
  254. end;
  255. end;
  256. end.
  257. (* alternative, not needed
  258. { return value (case preserved) from a name=value pair, ignores case in given NAME= portion }
  259. function GetValFromNameVal(namevalpair: string): string;
  260. type
  261. TAttribPos = record
  262. startpos: longword; // start pos of value
  263. len: longword; // length of value
  264. end;
  265. { returns case insensitive start position and length of just the value
  266. substring in name=value pair}
  267. function ReturnPos(attribute: string): TAttribPos;
  268. var
  269. P : Pchar;
  270. S : Pchar;
  271. C : Char;
  272. begin
  273. result.startpos:= 0;
  274. result.len:= 0;
  275. P:= Pchar(uppercase(Attribute));
  276. // get substring including and everything after equal
  277. S:= StrPos(P, '=');
  278. result.startpos:= pos('=', P);
  279. if S <> nil then
  280. begin
  281. inc(S);
  282. // set to character after =
  283. inc(result.startpos);
  284. P:= S;
  285. if (P^ in ['"','''']) then
  286. begin
  287. C:= P^;
  288. // skip quote
  289. inc(P);
  290. inc(result.startpos);
  291. end else
  292. C:= ' ';
  293. S:= P;
  294. // go to end quote or end of value
  295. while not (P^ in [C, #0]) do
  296. inc(P);
  297. if (P <> S) then
  298. begin
  299. result.len:= p - s;
  300. end;
  301. end;
  302. end;
  303. var
  304. found: TAttribPos;
  305. begin
  306. found:= ReturnPos(namevalpair);
  307. // extract using coordinates
  308. result:= MidStr(namevalpair, found.startpos, found.len);
  309. end;
  310. *)