123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425 |
- {
- This file is part of the Free Pascal run time library.
- Copyright (c) 2003 by the Free Pascal development team
- Original author: Sebastian Guenther
- Unit to parse complete URI in its parts or to reassemble an URI
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$IFDEF FPC}
- {$MODE objfpc}
- {$H+}
- {$ENDIF}
- unit URIParser;
- interface
- type
- TURI = record
- Protocol: String;
- Username: String;
- Password: String;
- Host: String;
- Port: Word;
- Path: String;
- Document: String;
- Params: String;
- Bookmark: String;
- HasAuthority: Boolean;
- end;
- function EncodeURI(const URI: TURI): String;
- function ParseURI(const URI: String): TURI; overload;
- function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word): TURI; overload;
- function ResolveRelativeURI(const BaseUri, RelUri: WideString;
- out ResultUri: WideString): Boolean; overload;
- function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
- out ResultUri: UTF8String): Boolean; overload;
- function URIToFilename(const URI: string; out Filename: string): Boolean;
- function FilenameToURI(const Filename: string): string;
- function IsAbsoluteURI(const UriReference: string): Boolean;
- implementation
- uses SysUtils;
- const
- GenDelims = [':', '/', '?', '#', '[', ']', '@'];
- SubDelims = ['!', '$', '&', '''', '(', ')', '*', '+', ',', ';', '='];
- ALPHA = ['A'..'Z', 'a'..'z'];
- DIGIT = ['0'..'9'];
- Unreserved = ALPHA + DIGIT + ['-', '.', '_', '~'];
- ValidPathChars = Unreserved + SubDelims + ['@', ':', '/'];
- function Escape(const s: String; const Allowed: TSysCharSet): String;
- var
- i: Integer;
- begin
- SetLength(Result, 0);
- for i := 1 to Length(s) do
- if not (s[i] in Allowed) then
- Result := Result + '%' + IntToHex(ord(s[i]), 2)
- else
- Result := Result + s[i];
- end;
- function EncodeURI(const URI: TURI): String;
- // ! if no scheme then first colon in path should be escaped
- begin
- SetLength(Result, 0);
- if Length(URI.Protocol) > 0 then
- Result := LowerCase(URI.Protocol) + ':';
- if URI.HasAuthority then
- begin
- Result := Result + '//';
- if Length(URI.Username) > 0 then
- begin
- Result := Result + URI.Username;
- if Length(URI.Password) > 0 then
- Result := Result + ':' + URI.Password;
- Result := Result + '@';
- end;
- Result := Result + URI.Host;
- end;
- if URI.Port <> 0 then
- Result := Result + ':' + IntToStr(URI.Port);
- Result := Result + Escape(URI.Path, ValidPathChars);
- if Length(URI.Document) > 0 then
- begin
- if (Length(URI.Path) > 0) and ((Length(Result) = 0) or (Result[Length(Result)] <> '/')) then
- Result := Result + '/';
- Result := Result + Escape(URI.Document, ValidPathChars);
- end;
- if Length(URI.Params) > 0 then
- Result := Result + '?' + Escape(URI.Params, ValidPathChars);
- if Length(URI.Bookmark) > 0 then
- Result := Result + '#' + Escape(URI.Bookmark, ValidPathChars);
- end;
- function ParseURI(const URI: String): TURI;
- begin
- Result := ParseURI(URI, '', 0);
- end;
- function HexValue(c: Char): Integer;
- begin
- case c of
- '0'..'9': Result := ord(c) - ord('0');
- 'A'..'F': Result := ord(c) - (ord('A') - 10);
- 'a'..'f': Result := ord(c) - (ord('a') - 10);
- else
- Result := 0;
- end;
- end;
- function Unescape(const s: String): String;
- var
- i, RealLength: Integer;
- begin
- SetLength(Result, Length(s));
- i := 1;
- RealLength := 0;
- while i <= Length(s) do
- begin
- Inc(RealLength);
- if s[i] = '%' then
- begin
- Result[RealLength] := Chr(HexValue(s[i + 1]) shl 4 or HexValue(s[i + 2]));
- Inc(i, 3);
- end else
- begin
- Result[RealLength] := s[i];
- Inc(i);
- end;
- end;
- SetLength(Result, RealLength);
- end;
- function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word): TURI;
- var
- s, Authority: String;
- i: Integer;
- begin
- Result.Protocol := LowerCase(DefaultProtocol);
- Result.Port := DefaultPort;
- s := URI;
- // Extract scheme
- for i := 1 to Length(s) do
- if s[i] = ':' then
- begin
- Result.Protocol := Copy(s, 1, i - 1);
- s := Copy(s, i + 1, MaxInt);
- break;
- end
- else
- if not (((i=1) and (s[i] in ALPHA)) or (s[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
- break;
- // Extract the bookmark
- i := LastDelimiter('#', s);
- if i > 0 then
- begin
- Result.Bookmark := Unescape(Copy(s, i + 1, MaxInt));
- s := Copy(s, 1, i - 1);
- end;
- // Extract the params
- i := LastDelimiter('?', s);
- if i > 0 then
- begin
- Result.Params := Unescape(Copy(s, i + 1, MaxInt));
- s := Copy(s, 1, i - 1);
- end;
- // extract authority
- if (Length(s) > 1) and (s[1] = '/') and (s[2] = '/') then
- begin
- i := 3;
- while (i <= Length(s)) and (s[i] <> '/') do
- Inc(i);
- Authority := Copy(s, 3, i-3);
- s := Copy(s, i, MaxInt);
- Result.HasAuthority := True; // even if Authority is empty
- end
- else
- begin
- Result.HasAuthority := False;
- Authority := '';
- end;
- // now s is 'hier-part' per RFC3986
- // Extract the document name (nasty...)
- for i := Length(s) downto 1 do
- if s[i] = '/' then
- begin
- Result.Document := Unescape(Copy(s, i + 1, Length(s)));
- if (Result.Document <> '.') and (Result.Document <> '..') then
- s := Copy(s, 1, i)
- else
- Result.Document := '';
- break;
- end else if s[i] = ':' then
- break
- else if i = 1 then
- begin
- Result.Document := Unescape(s);
- if (Result.Document <> '.') and (Result.Document <> '..') then
- s := ''
- else
- Result.Document := '';
- // break - not needed, last iteration
- end;
- // Everything left is a path
- Result.Path := Unescape(s);
- // Extract the port number
- i := LastDelimiter(':@', Authority);
- if (i > 0) and (Authority[i] = ':') then
- begin
- Result.Port := StrToInt(Copy(Authority, i + 1, MaxInt));
- Authority := Copy(Authority, 1, i - 1);
- end;
- // Extract the hostname
- i := Pos('@', Authority);
- if i > 0 then
- begin
- Result.Host := Copy(Authority, i+1, MaxInt);
- Delete(Authority, i, MaxInt);
- // Extract username and password
- if Length(Authority) > 0 then
- begin
- i := Pos(':', Authority);
- if i = 0 then
- Result.Username := Authority
- else
- begin
- Result.Username := Copy(Authority, 1, i - 1);
- Result.Password := Copy(Authority, i + 1, MaxInt);
- end;
- end;
- end
- else
- Result.Host := Authority;
- end;
- procedure RemoveDotSegments(var s: string);
- var
- Cur, Prev: Integer;
- begin
- Prev := Pos('/', s);
- while (Prev > 0) and (Prev < Length(s)) do
- begin
- Cur := Prev+1;
- while (Cur <= Length(s)) and (s[Cur] <> '/') do
- Inc(Cur);
- if (Cur - Prev = 2) and (s[Prev+1] = '.') then
- Delete(s, Prev+1, 2)
- else if (Cur - Prev = 3) and (s[Prev+1] = '.') and (s[Prev+2] = '.') then
- begin
- while (Prev > 1) and (s[Prev-1] <> '/') do
- Dec(Prev);
- if Prev > 1 then
- Dec(Prev);
- Delete(s, Prev+1, Cur-Prev);
- end
- else
- Prev := Cur;
- end;
- end;
- // TODO: this probably must NOT percent-encode the result...
- function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
- out ResultUri: UTF8String): Boolean;
- var
- Base, Rel: TUri;
- begin
- Base := ParseUri(BaseUri);
- Rel := ParseUri(RelUri);
- Result := (Base.Protocol <> '') or (Rel.Protocol <> '');
- if not Result then
- Exit;
- with Rel do
- begin
- if (Path = '') and (Document = '') then
- begin
- if (Protocol = '') and (Host = '') then
- begin
- if Params <> '' then
- Base.Params := Params;
- Base.Bookmark := Bookmark;
- ResultUri := EncodeUri(Base);
- Exit;
- end;
- end;
- if (Protocol <> '') then // RelURI is absolute - return it...
- begin
- ResultUri := RelUri;
- Exit;
- end;
- // Inherit protocol
- Protocol := Base.Protocol;
- if (Host = '') then // TODO: or "not HasAuthority"?
- begin
- // Inherit Authority (host, port, username, password)
- Host := Base.Host;
- Port := Base.Port;
- Username := Base.Username;
- Password := Base.Password;
- HasAuthority := Base.HasAuthority;
- if (Path = '') or (Path[1] <> '/') then // path is empty or relative
- Path := Base.Path + Path;
- RemoveDotSegments(Path);
- end;
- end; // with
- ResultUri := EncodeUri(Rel);
- end;
- function ResolveRelativeURI(const BaseUri, RelUri: WideString;
- out ResultUri: WideString): Boolean;
- var
- rslt: UTF8String;
- begin
- Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), rslt);
- if Result then
- ResultURI := UTF8Decode(rslt);
- end;
- function URIToFilename(const URI: string; out Filename: string): Boolean;
- var
- U: TURI;
- I: Integer;
- begin
- Result := False;
- U := ParseURI(URI);
- if SameText(U.Protocol, 'file') then
- begin
- if (Length(U.Path) > 2) and (U.Path[1] = '/') and (U.Path[3] = ':') then
- Filename := Copy(U.Path, 2, MaxInt)
- else
- Filename := U.Path;
- Filename := Filename + U.Document;
- Result := True;
- end
- else
- if U.Protocol = '' then // fire and pray?
- begin
- Filename := U.Path + U.Document;
- Result := True;
- end;
- if PathDelim <> '/' then
- begin
- I := Pos('/', Filename);
- while I > 0 do
- begin
- Filename[I] := PathDelim;
- I := Pos('/', Filename);
- end;
- end;
- end;
- function FilenameToURI(const Filename: string): string;
- var
- I: Integer;
- begin
- // TODO: seems implemented, but not tested well
- Result := 'file://';
- if (Length(Filename) > 2) and (Filename[1] <> PathDelim) and (Filename[2] = ':') then
- Result := Result + '/';
- Result := Result + Filename;
- if PathDelim <> '/' then
- begin
- I := Pos(PathDelim, Result);
- while I <> 0 do
- begin
- Result[I] := '/';
- I := Pos(PathDelim, Result);
- end;
- end;
- end;
- function IsAbsoluteURI(const UriReference: string): Boolean;
- var
- I: Integer;
- begin
- Result := True;
- for I := 1 to Length(UriReference) do
- begin
- if UriReference[I] = ':' then
- Exit
- else
- if not (((I=1) and (UriReference[I] in ALPHA)) or
- (UriReference[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
- Break;
- end;
- Result := False;
- end;
- end.
|