123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- { Copyright (C) <2005> <Andrew Haines> htmlutil.pas
- This library is free software; you can redistribute it and/or modify it
- under the terms of the GNU Library General Public License as published by
- the Free Software Foundation; either version 2 of the License, or (at your
- option) any later version.
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
- for more details.
- You should have received a copy of the GNU Library General Public License
- along with this library; if not, write to the Free Software Foundation,
- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- }
- {
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- }
- { modified from jsFastHtmlParser for use with freepascal
-
- Original Author:
- James Azarja
- Contributor:
- Lars aka L505
- http://z505.com
- Note: this isn't perfect, it needs to be improved.. see comments }
-
- unit HTMLUtil; {$ifdef fpc} {$MODE Delphi} {$H+}{$endif}
- interface
- uses
- SysUtils, strutils;
- { most commonly used }
- function GetVal(tag, attribname_ci: string): string;
- function GetTagName(Tag: string): string;
- { less commonly used, but useful }
- function GetUpTagName(tag: string): string;
- function GetNameValPair(tag, attribname_ci: string): string;
- function GetValFromNameVal(namevalpair: string): string;
- { old buggy code}
- function GetVal_JAMES(tag, attribname_ci: string): string;
- function GetNameValPair_JAMES(tag, attribname_ci: string): string;
- { rarely needed NAME= case sensitivity }
- function GetNameValPair_cs(tag, attribname: string): string;
- implementation
- function CopyBuffer(StartIndex: PChar; Len: integer): string;
- var s : String;
- begin
- SetLength(s, Len);
- StrLCopy(@s[1], StartIndex, Len);
- result:= s;
- end;
- { Return tag name, case preserved }
- function GetTagName(Tag: string): string;
- var
- P : Pchar;
- S : Pchar;
- begin
- P := Pchar(Tag);
- while P^ in ['<',' ',#9] do inc(P);
- S := P;
- while Not (P^ in [' ','>',#0]) do inc(P);
- if P > S then
- Result := CopyBuffer( S, P-S)
- else
- Result := '';
- end;
- { Return tag name in uppercase }
- function GetUpTagName(tag: string): string;
- var
- P : Pchar;
- S : Pchar;
- begin
- P := Pchar(uppercase(Tag));
- while P^ in ['<',' ',#9] do inc(P);
- S := P;
- while Not (P^ in [' ','>',#0]) do inc(P);
- if P > S then
- Result := CopyBuffer( S, P-S)
- else
- Result := '';
- end;
- { Return name=value pair ignoring case of NAME, preserving case of VALUE
- Lars' fixed version }
- function GetNameValPair(tag, attribname_ci: string): string;
- var
- P : Pchar;
- S : Pchar;
- UpperTag,
- UpperAttrib : string;
- Start: integer;
- L : integer;
- C : char;
- begin
- // must be space before case insensitive NAME, i.e. <a HREF="" STYLE=""
- UpperAttrib:= ' ' + Uppercase(attribname_ci);
- UpperTag:= Uppercase(Tag);
- P:= Pchar(UpperTag);
- S:= StrPos(P, Pchar(UpperAttrib));
- if S <> nil then
- begin
- inc(S); // skip space
- P:= S;
- // Skip
- while not (P^ in ['=', ' ', '>', #0]) do
- inc(P);
- if (P^ = '=') then inc(P);
-
- while not (P^ in [' ','>',#0]) do
- begin
- if (P^ in ['"','''']) then
- begin
- C:= P^;
- inc(P); { Skip quote }
- end else
- C:= ' ';
- { thanks to Dmitry [[email protected]] }
- while not (P^ in [C, '>', #0]) do
- Inc(P);
- if (P^ <> '>') then inc(P); { Skip current character, except '>' }
- break;
- end;
- L:= P - S;
- Start:= S - Pchar(UpperTag);
- P:= Pchar(Tag);
- S:= P;
- inc(S, Start);
-
- result:= CopyBuffer(S, L);
- end;
- end;
- { Get value of attribute, e.g WIDTH=36 -return-> 36, preserves case sensitive }
- function GetValFromNameVal(namevalpair: string): string;
- var
- P: Pchar;
- S: Pchar;
- C: Char;
- begin
- P:= Pchar(namevalpair);
- S:= StrPos(P, '=');
- if S <> nil then
- begin
- inc(S); // skip equal
- P:= S; // set P to a character after =
- if (P^ in ['"','''']) then
- begin
- C:= P^;
- Inc(P); { Skip current character }
- end else
- C:= ' ';
- S:= P;
- while not (P^ in [C, #0]) do
- inc(P);
- if (P <> S) then { Thanks to Dave Keighan ([email protected]) }
- Result:= CopyBuffer(S, P - S)
- else
- Result:= '';
- end;
- end;
- { return value of an attribute (attribname_ci), case ignored for NAME portion, but return value case is preserved }
- function GetVal(tag, attribname_ci: string): string;
- var namevalpair: string;
- begin
- // returns full name=value pair
- namevalpair:= GetNameValPair(tag, attribname_ci);
- // extracts value portion only
- result:= GetValFromNameVal(namevalpair);
- end;
- { ----------------------------------------------------------------------------
- BELOW FUNCTIONS ARE OBSOLETE OR RARELY NEEDED SINCE THEY EITHER CONTAIN BUGS
- OR THEY ARE TOO CASE SENSITIVE (FOR THE TAG NAME PORTION OF THE ATTRIBUTE }
- { James old buggy code for testing purposes.
- Bug: when finding 'ID', function finds "width", even though width <> "id" }
- function GetNameValPair_JAMES(tag, attribname_ci: string): string;
- var
- P : Pchar;
- S : Pchar;
- UT,
- UA : string;
- Start: integer;
- L : integer;
- C : char;
- begin
- UA:= Uppercase(attribname_ci);
- UT:= Uppercase(Tag);
- P:= Pchar(UT);
- S:= StrPos(P, Pchar(UA));
- if S <> nil then
- begin
- P := S;
- // Skip attribute name
- while not (P^ in ['=',' ','>',#0]) do
- inc(P);
- if (P^ = '=') then inc(P);
-
- while not (P^ in [' ','>',#0]) do
- begin
- if (P^ in ['"','''']) then
- begin
- C:= P^;
- inc(P); { Skip current character }
- end else
- C:= ' ';
- { thanks to Dmitry [[email protected]] }
- while not (P^ in [C, '>', #0]) do
- Inc(P);
- if (P^ <> '>') then inc(P); { Skip current character, except '>' }
- break;
- end;
- L:= P - S;
- Start:= S - Pchar(UT);
- P:= Pchar(Tag);
- S:= P;
- inc(S, Start);
- result:= CopyBuffer(S, L);
- end;
- end;
- { James old buggy code for testing purposes }
- function GetVal_JAMES(tag, attribname_ci: string): string;
- var namevalpair: string;
- begin
- namevalpair:= GetNameValPair_JAMES(tag, attribname_ci);
- result:= GetValFromNameVal(namevalpair);
- end;
- { return name=value portion, case sensitive, case preserved }
- function GetNameValPair_cs(Tag, attribname: string): string;
- var
- P : Pchar;
- S : Pchar;
- C : Char;
- begin
- P := Pchar(Tag);
- S := StrPos(P, Pchar(attribname));
- if S<>nil then
- begin
- P := S;
- // Skip attribute name
- while not (P^ in ['=',' ','>',#0]) do
- inc(P);
- if (P^ = '=') then inc(P);
-
- while not (P^ in [' ','>',#0]) do
- begin
- if (P^ in ['"','''']) then
- begin
- C:= P^;
- inc(P); { Skip current character }
- end else
- C:= ' ';
- { thanks to Dmitry [[email protected]] }
- while not (P^ in [C, '>', #0]) do
- inc(P);
- if (P^<>'>') then inc(P); { Skip current character, except '>' }
- break;
- end;
- if P > S then
- Result:= CopyBuffer(S, P - S)
- else
- Result:= '';
- end;
- end;
- end.
- (* alternative, not needed
- { return value (case preserved) from a name=value pair, ignores case in given NAME= portion }
- function GetValFromNameVal(namevalpair: string): string;
- type
- TAttribPos = record
- startpos: longword; // start pos of value
- len: longword; // length of value
- end;
- { returns case insensitive start position and length of just the value
- substring in name=value pair}
- function ReturnPos(attribute: string): TAttribPos;
- var
- P : Pchar;
- S : Pchar;
- C : Char;
- begin
- result.startpos:= 0;
- result.len:= 0;
- P:= Pchar(uppercase(Attribute));
- // get substring including and everything after equal
- S:= StrPos(P, '=');
- result.startpos:= pos('=', P);
- if S <> nil then
- begin
- inc(S);
- // set to character after =
- inc(result.startpos);
- P:= S;
- if (P^ in ['"','''']) then
- begin
- C:= P^;
- // skip quote
- inc(P);
- inc(result.startpos);
- end else
- C:= ' ';
- S:= P;
- // go to end quote or end of value
- while not (P^ in [C, #0]) do
- inc(P);
- if (P <> S) then
- begin
- result.len:= p - s;
- end;
- end;
- end;
- var
- found: TAttribPos;
- begin
- found:= ReturnPos(namevalpair);
- // extract using coordinates
- result:= MidStr(namevalpair, found.startpos, found.len);
- end;
- *)
|