| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425 | {    This file is part of the Free Pascal run time library.    Copyright (c) 2003 by the Free Pascal development team    Original author: Sebastian Guenther    Unit to parse complete URI in its parts or to reassemble an URI    See the file COPYING.FPC, included in this distribution,    for details about the copyright.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. **********************************************************************}{$IFDEF FPC}{$MODE objfpc}{$H+}{$ENDIF}unit URIParser;interfacetype  TURI = record    Protocol: String;    Username: String;    Password: String;    Host: String;    Port: Word;    Path: String;    Document: String;    Params: String;    Bookmark: String;    HasAuthority: Boolean;  end;function EncodeURI(const URI: TURI): String;function ParseURI(const URI: String):  TURI; overload;function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word):  TURI; overload;function ResolveRelativeURI(const BaseUri, RelUri: WideString;  out ResultUri: WideString): Boolean; overload;function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;  out ResultUri: UTF8String): Boolean; overload;function URIToFilename(const URI: string; out Filename: string): Boolean;function FilenameToURI(const Filename: string): string;function IsAbsoluteURI(const UriReference: string): Boolean;implementationuses SysUtils;const  GenDelims = [':', '/', '?', '#', '[', ']', '@'];  SubDelims = ['!', '$', '&', '''', '(', ')', '*', '+', ',', ';', '='];  ALPHA = ['A'..'Z', 'a'..'z'];  DIGIT = ['0'..'9'];  Unreserved = ALPHA + DIGIT + ['-', '.', '_', '~'];  ValidPathChars = Unreserved + SubDelims + ['@', ':', '/'];function Escape(const s: String; const Allowed: TSysCharSet): String;var  i: Integer;begin  SetLength(Result, 0);  for i := 1 to Length(s) do    if not (s[i] in Allowed) then      Result := Result + '%' + IntToHex(ord(s[i]), 2)    else      Result := Result + s[i];end;function EncodeURI(const URI: TURI): String;// ! if no scheme then first colon in path should be escapedbegin  SetLength(Result, 0);  if Length(URI.Protocol) > 0 then    Result := LowerCase(URI.Protocol) + ':';  if URI.HasAuthority then  begin    Result := Result + '//';    if Length(URI.Username) > 0 then    begin      Result := Result + URI.Username;      if Length(URI.Password) > 0 then        Result := Result + ':' + URI.Password;      Result := Result + '@';    end;    Result := Result + URI.Host;  end;  if URI.Port <> 0 then    Result := Result + ':' + IntToStr(URI.Port);  Result := Result + Escape(URI.Path, ValidPathChars);  if Length(URI.Document) > 0 then  begin    if (Length(URI.Path) > 0) and ((Length(Result) = 0) or (Result[Length(Result)] <> '/')) then      Result := Result + '/';    Result := Result + Escape(URI.Document, ValidPathChars);  end;  if Length(URI.Params) > 0 then    Result := Result + '?' + Escape(URI.Params, ValidPathChars);  if Length(URI.Bookmark) > 0 then    Result := Result + '#' + Escape(URI.Bookmark, ValidPathChars);end;function ParseURI(const URI: String):  TURI;begin  Result := ParseURI(URI, '', 0);end;function HexValue(c: Char): Integer;begin  case c of    '0'..'9': Result := ord(c) - ord('0');    'A'..'F': Result := ord(c) - (ord('A') - 10);    'a'..'f': Result := ord(c) - (ord('a') - 10);  else    Result := 0;  end;end;function Unescape(const s: String): String;var  i, RealLength: Integer;begin  SetLength(Result, Length(s));  i := 1;  RealLength := 0;  while i <= Length(s) do  begin    Inc(RealLength);    if s[i] = '%' then    begin      Result[RealLength] := Chr(HexValue(s[i + 1]) shl 4 or HexValue(s[i + 2]));      Inc(i, 3);    end else    begin      Result[RealLength] := s[i];      Inc(i);    end;  end;  SetLength(Result, RealLength);end;function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word):  TURI;var  s, Authority: String;  i: Integer;begin  Result.Protocol := LowerCase(DefaultProtocol);  Result.Port := DefaultPort;  s := URI;  // Extract scheme  for i := 1 to Length(s) do    if s[i] = ':' then    begin      Result.Protocol := Copy(s, 1, i - 1);      s := Copy(s, i + 1, MaxInt);      break;    end    else      if not (((i=1) and (s[i] in ALPHA)) or (s[i] in ALPHA + DIGIT + ['+', '-', '.'])) then        break;  // Extract the bookmark  i := LastDelimiter('#', s);  if i > 0 then  begin    Result.Bookmark := Unescape(Copy(s, i + 1, MaxInt));    s := Copy(s, 1, i - 1);  end;  // Extract the params  i := LastDelimiter('?', s);  if i > 0 then  begin    Result.Params := Unescape(Copy(s, i + 1, MaxInt));    s := Copy(s, 1, i - 1);  end;  // extract authority  if (Length(s) > 1) and (s[1] = '/') and (s[2] = '/') then  begin    i := 3;    while (i <= Length(s)) and (s[i] <> '/') do      Inc(i);    Authority := Copy(s, 3, i-3);    s := Copy(s, i, MaxInt);    Result.HasAuthority := True;    // even if Authority is empty  end  else  begin    Result.HasAuthority := False;    Authority := '';  end;  // now s is 'hier-part' per RFC3986  // Extract the document name (nasty...)  for i := Length(s) downto 1 do    if s[i] = '/' then    begin      Result.Document := Unescape(Copy(s, i + 1, Length(s)));      if (Result.Document <> '.') and (Result.Document <> '..') then        s := Copy(s, 1, i)      else        Result.Document := '';      break;    end else if s[i] = ':' then      break    else if i = 1 then    begin      Result.Document := Unescape(s);      if (Result.Document <> '.') and (Result.Document <> '..') then        s := ''      else        Result.Document := '';      // break - not needed, last iteration    end;  // Everything left is a path  Result.Path := Unescape(s);  // Extract the port number  i := LastDelimiter(':@', Authority);  if (i > 0) and (Authority[i] = ':') then  begin    Result.Port := StrToInt(Copy(Authority, i + 1, MaxInt));    Authority := Copy(Authority, 1, i - 1);  end;  // Extract the hostname  i := Pos('@', Authority);  if i > 0 then  begin    Result.Host := Copy(Authority, i+1, MaxInt);    Delete(Authority, i, MaxInt);    // Extract username and password    if Length(Authority) > 0 then    begin      i := Pos(':', Authority);      if i = 0 then        Result.Username := Authority      else      begin        Result.Username := Copy(Authority, 1, i - 1);        Result.Password := Copy(Authority, i + 1, MaxInt);      end;    end;  end  else    Result.Host := Authority;end;procedure RemoveDotSegments(var s: string);var  Cur, Prev: Integer;begin  Prev := Pos('/', s);  while (Prev > 0) and (Prev < Length(s)) do  begin    Cur := Prev+1;    while (Cur <= Length(s)) and (s[Cur] <> '/') do      Inc(Cur);    if (Cur - Prev = 2) and (s[Prev+1] = '.') then      Delete(s, Prev+1, 2)    else if (Cur - Prev = 3) and (s[Prev+1] = '.') and (s[Prev+2] = '.') then    begin      while (Prev > 1) and (s[Prev-1] <> '/') do        Dec(Prev);      if Prev > 1 then        Dec(Prev);      Delete(s, Prev+1, Cur-Prev);    end    else      Prev := Cur;  end;end;// TODO: this probably must NOT percent-encode the result...function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;  out ResultUri: UTF8String): Boolean;var  Base, Rel: TUri;begin  Base := ParseUri(BaseUri);  Rel := ParseUri(RelUri);  Result := (Base.Protocol <> '') or (Rel.Protocol <> '');  if not Result then    Exit;  with Rel do  begin    if (Path = '') and (Document = '') then    begin      if (Protocol = '') and (Host = '') then      begin        if Params <> '' then          Base.Params := Params;        Base.Bookmark := Bookmark;        ResultUri := EncodeUri(Base);        Exit;      end;    end;    if (Protocol <> '') then  // RelURI is absolute - return it...    begin      ResultUri := RelUri;      Exit;    end;    // Inherit protocol    Protocol := Base.Protocol;    if (Host = '') then   // TODO: or "not HasAuthority"?    begin      // Inherit Authority (host, port, username, password)      Host := Base.Host;      Port := Base.Port;      Username := Base.Username;      Password := Base.Password;      HasAuthority := Base.HasAuthority;      if (Path = '') or (Path[1] <> '/') then  // path is empty or relative        Path := Base.Path + Path;      RemoveDotSegments(Path);          end;  end; // with  ResultUri := EncodeUri(Rel);end;function ResolveRelativeURI(const BaseUri, RelUri: WideString;  out ResultUri: WideString): Boolean;var  rslt: UTF8String;begin  Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), rslt);  if Result then    ResultURI := UTF8Decode(rslt);end;function URIToFilename(const URI: string; out Filename: string): Boolean;var  U: TURI;  I: Integer;begin  Result := False;  U := ParseURI(URI);  if SameText(U.Protocol, 'file') then  begin    if (Length(U.Path) > 2) and (U.Path[1] = '/') and (U.Path[3] = ':') then      Filename := Copy(U.Path, 2, MaxInt)    else      Filename := U.Path;    Filename := Filename + U.Document;    Result := True;  end  else    if U.Protocol = '' then  // fire and pray?    begin      Filename := U.Path + U.Document;      Result := True;    end;  if PathDelim <> '/' then  begin    I := Pos('/', Filename);    while I > 0 do    begin      Filename[I] := PathDelim;      I := Pos('/', Filename);    end;  end;end;function FilenameToURI(const Filename: string): string;var  I: Integer;begin  // TODO: seems implemented, but not tested well  Result := 'file://';  if (Length(Filename) > 2) and (Filename[1] <> PathDelim) and (Filename[2] = ':') then    Result := Result + '/';  Result := Result + Filename;  if PathDelim <> '/' then  begin    I := Pos(PathDelim, Result);    while I <> 0 do    begin      Result[I] := '/';      I := Pos(PathDelim, Result);    end;  end;end;function IsAbsoluteURI(const UriReference: string): Boolean;var  I: Integer;begin  Result := True;  for I := 1 to Length(UriReference) do  begin    if UriReference[I] = ':' then      Exit    else      if not (((I=1) and (UriReference[I] in ALPHA)) or         (UriReference[i] in ALPHA + DIGIT + ['+', '-', '.'])) then      Break;  end;  Result := False;end;end.
 |