Browse Source

* new uriparser

git-svn-id: trunk@6477 -
peter 18 years ago
parent
commit
44a78590ec
2 changed files with 426 additions and 0 deletions
  1. 1 0
      .gitattributes
  2. 425 0
      utils/fppkg/fcl20/uriparser.pp

+ 1 - 0
.gitattributes

@@ -8220,6 +8220,7 @@ utils/fppkg/Makefile.fpc svneol=native#text/plain
 utils/fppkg/README svneol=native#text/plain
 utils/fppkg/README svneol=native#text/plain
 utils/fppkg/fcl20/contnrs.pp svneol=native#text/plain
 utils/fppkg/fcl20/contnrs.pp svneol=native#text/plain
 utils/fppkg/fcl20/streamcoll.pp svneol=native#text/plain
 utils/fppkg/fcl20/streamcoll.pp svneol=native#text/plain
+utils/fppkg/fcl20/uriparser.pp svneol=native#text/plain
 utils/fppkg/fcl20/zipper.pp svneol=native#text/plain
 utils/fppkg/fcl20/zipper.pp svneol=native#text/plain
 utils/fppkg/fcl20/zstream.pp svneol=native#text/plain
 utils/fppkg/fcl20/zstream.pp svneol=native#text/plain
 utils/fppkg/fpmkcnst.inc svneol=native#text/plain
 utils/fppkg/fpmkcnst.inc svneol=native#text/plain

+ 425 - 0
utils/fppkg/fcl20/uriparser.pp

@@ -0,0 +1,425 @@
+{
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 2003 by the Free Pascal development team
+    Original author: Sebastian Guenther
+
+    Unit to parse complete URI in its parts or to reassemble an URI
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+{$IFDEF FPC}
+{$MODE objfpc}
+{$H+}
+{$ENDIF}
+
+unit URIParser;
+
+interface
+
+type
+  TURI = record
+    Protocol: String;
+    Username: String;
+    Password: String;
+    Host: String;
+    Port: Word;
+    Path: String;
+    Document: String;
+    Params: String;
+    Bookmark: String;
+    HasAuthority: Boolean;
+  end;
+
+function EncodeURI(const URI: TURI): String;
+function ParseURI(const URI: String):  TURI; overload;
+function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word):  TURI; overload;
+
+function ResolveRelativeURI(const BaseUri, RelUri: WideString;
+  out ResultUri: WideString): Boolean; overload;
+
+function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
+  out ResultUri: UTF8String): Boolean; overload;
+
+function URIToFilename(const URI: string; out Filename: string): Boolean;
+function FilenameToURI(const Filename: string): string;
+
+function IsAbsoluteURI(const UriReference: string): Boolean;
+
+implementation
+
+uses SysUtils;
+
+const
+  GenDelims = [':', '/', '?', '#', '[', ']', '@'];
+  SubDelims = ['!', '$', '&', '''', '(', ')', '*', '+', ',', ';', '='];
+  ALPHA = ['A'..'Z', 'a'..'z'];
+  DIGIT = ['0'..'9'];
+  Unreserved = ALPHA + DIGIT + ['-', '.', '_', '~'];
+  ValidPathChars = Unreserved + SubDelims + ['@', ':', '/'];
+
+function Escape(const s: String; const Allowed: TSysCharSet): String;
+var
+  i: Integer;
+begin
+  SetLength(Result, 0);
+  for i := 1 to Length(s) do
+    if not (s[i] in Allowed) then
+      Result := Result + '%' + IntToHex(ord(s[i]), 2)
+    else
+      Result := Result + s[i];
+end;
+
+function EncodeURI(const URI: TURI): String;
+// ! if no scheme then first colon in path should be escaped
+begin
+  SetLength(Result, 0);
+  if Length(URI.Protocol) > 0 then
+    Result := LowerCase(URI.Protocol) + ':';
+  if URI.HasAuthority then
+  begin
+    Result := Result + '//';
+    if Length(URI.Username) > 0 then
+    begin
+      Result := Result + URI.Username;
+      if Length(URI.Password) > 0 then
+        Result := Result + ':' + URI.Password;
+      Result := Result + '@';
+    end;
+    Result := Result + URI.Host;
+  end;
+  if URI.Port <> 0 then
+    Result := Result + ':' + IntToStr(URI.Port);
+  Result := Result + Escape(URI.Path, ValidPathChars);
+  if Length(URI.Document) > 0 then
+  begin
+    if (Length(URI.Path) > 0) and ((Length(Result) = 0) or (Result[Length(Result)] <> '/')) then
+      Result := Result + '/';
+    Result := Result + Escape(URI.Document, ValidPathChars);
+  end;
+  if Length(URI.Params) > 0 then
+    Result := Result + '?' + Escape(URI.Params, ValidPathChars);
+  if Length(URI.Bookmark) > 0 then
+    Result := Result + '#' + Escape(URI.Bookmark, ValidPathChars);
+end;
+
+function ParseURI(const URI: String):  TURI;
+begin
+  Result := ParseURI(URI, '', 0);
+end;
+
+function HexValue(c: Char): Integer;
+begin
+  case c of
+    '0'..'9': Result := ord(c) - ord('0');
+    'A'..'F': Result := ord(c) - (ord('A') - 10);
+    'a'..'f': Result := ord(c) - (ord('a') - 10);
+  else
+    Result := 0;
+  end;
+end;
+
+function Unescape(const s: String): String;
+var
+  i, RealLength: Integer;
+begin
+  SetLength(Result, Length(s));
+  i := 1;
+  RealLength := 0;
+  while i <= Length(s) do
+  begin
+    Inc(RealLength);
+    if s[i] = '%' then
+    begin
+      Result[RealLength] := Chr(HexValue(s[i + 1]) shl 4 or HexValue(s[i + 2]));
+      Inc(i, 3);
+    end else
+    begin
+      Result[RealLength] := s[i];
+      Inc(i);
+    end;
+  end;
+  SetLength(Result, RealLength);
+end;
+
+function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word):  TURI;
+var
+  s, Authority: String;
+  i: Integer;
+begin
+  Result.Protocol := LowerCase(DefaultProtocol);
+  Result.Port := DefaultPort;
+
+  s := URI;
+
+  // Extract scheme
+
+  for i := 1 to Length(s) do
+    if s[i] = ':' then
+    begin
+      Result.Protocol := Copy(s, 1, i - 1);
+      s := Copy(s, i + 1, MaxInt);
+      break;
+    end
+    else
+      if not (((i=1) and (s[i] in ALPHA)) or (s[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
+        break;
+
+  // Extract the bookmark
+
+  i := LastDelimiter('#', s);
+  if i > 0 then
+  begin
+    Result.Bookmark := Unescape(Copy(s, i + 1, MaxInt));
+    s := Copy(s, 1, i - 1);
+  end;
+
+  // Extract the params
+
+  i := LastDelimiter('?', s);
+  if i > 0 then
+  begin
+    Result.Params := Unescape(Copy(s, i + 1, MaxInt));
+    s := Copy(s, 1, i - 1);
+  end;
+
+  // extract authority
+
+  if (Length(s) > 1) and (s[1] = '/') and (s[2] = '/') then
+  begin
+    i := 3;
+    while (i <= Length(s)) and (s[i] <> '/') do
+      Inc(i);
+    Authority := Copy(s, 3, i-3);
+    s := Copy(s, i, MaxInt);
+    Result.HasAuthority := True;    // even if Authority is empty
+  end
+  else
+  begin
+    Result.HasAuthority := False;
+    Authority := '';
+  end;
+
+  // now s is 'hier-part' per RFC3986
+  // Extract the document name (nasty...)
+
+  for i := Length(s) downto 1 do
+    if s[i] = '/' then
+    begin
+      Result.Document := Unescape(Copy(s, i + 1, Length(s)));
+      if (Result.Document <> '.') and (Result.Document <> '..') then
+        s := Copy(s, 1, i)
+      else
+        Result.Document := '';
+      break;
+    end else if s[i] = ':' then
+      break
+    else if i = 1 then
+    begin
+      Result.Document := Unescape(s);
+      if (Result.Document <> '.') and (Result.Document <> '..') then
+        s := ''
+      else
+        Result.Document := '';
+      // break - not needed, last iteration
+    end;
+
+  // Everything left is a path
+
+  Result.Path := Unescape(s);
+
+  // Extract the port number
+
+  i := LastDelimiter(':@', Authority);
+  if (i > 0) and (Authority[i] = ':') then
+  begin
+    Result.Port := StrToInt(Copy(Authority, i + 1, MaxInt));
+    Authority := Copy(Authority, 1, i - 1);
+  end;
+
+  // Extract the hostname
+
+  i := Pos('@', Authority);
+  if i > 0 then
+  begin
+    Result.Host := Copy(Authority, i+1, MaxInt);
+    Delete(Authority, i, MaxInt);
+
+    // Extract username and password
+    if Length(Authority) > 0 then
+    begin
+      i := Pos(':', Authority);
+      if i = 0 then
+        Result.Username := Authority
+      else
+      begin
+        Result.Username := Copy(Authority, 1, i - 1);
+        Result.Password := Copy(Authority, i + 1, MaxInt);
+      end;
+    end;
+  end
+  else
+    Result.Host := Authority;
+end;
+
+procedure RemoveDotSegments(var s: string);
+var
+  Cur, Prev: Integer;
+begin
+  Prev := Pos('/', s);
+  while (Prev > 0) and (Prev < Length(s)) do
+  begin
+    Cur := Prev+1;
+    while (Cur <= Length(s)) and (s[Cur] <> '/') do
+      Inc(Cur);
+    if (Cur - Prev = 2) and (s[Prev+1] = '.') then
+      Delete(s, Prev+1, 2)
+    else if (Cur - Prev = 3) and (s[Prev+1] = '.') and (s[Prev+2] = '.') then
+    begin
+      while (Prev > 1) and (s[Prev-1] <> '/') do
+        Dec(Prev);
+      if Prev > 1 then
+        Dec(Prev);
+      Delete(s, Prev+1, Cur-Prev);
+    end
+    else
+      Prev := Cur;
+  end;
+end;
+
+// TODO: this probably must NOT percent-encode the result...
+function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
+  out ResultUri: UTF8String): Boolean;
+var
+  Base, Rel: TUri;
+begin
+  Base := ParseUri(BaseUri);
+  Rel := ParseUri(RelUri);
+
+  Result := (Base.Protocol <> '') or (Rel.Protocol <> '');
+  if not Result then
+    Exit;
+  with Rel do
+  begin
+    if (Path = '') and (Document = '') then
+    begin
+      if (Protocol = '') and (Host = '') then
+      begin
+        if Params <> '' then
+          Base.Params := Params;
+        Base.Bookmark := Bookmark;
+        ResultUri := EncodeUri(Base);
+        Exit;
+      end;
+    end;
+    if (Protocol <> '') then  // RelURI is absolute - return it...
+    begin
+      ResultUri := RelUri;
+      Exit;
+    end;
+    // Inherit protocol
+    Protocol := Base.Protocol;
+    if (Host = '') then   // TODO: or "not HasAuthority"?
+    begin
+      // Inherit Authority (host, port, username, password)
+      Host := Base.Host;
+      Port := Base.Port;
+      Username := Base.Username;
+      Password := Base.Password;
+      HasAuthority := Base.HasAuthority;
+      if (Path = '') or (Path[1] <> '/') then  // path is empty or relative
+        Path := Base.Path + Path;
+      RemoveDotSegments(Path);      
+    end;
+  end; // with
+  ResultUri := EncodeUri(Rel);
+end;
+
+function ResolveRelativeURI(const BaseUri, RelUri: WideString;
+  out ResultUri: WideString): Boolean;
+var
+  rslt: UTF8String;
+begin
+  Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), rslt);
+  if Result then
+    ResultURI := UTF8Decode(rslt);
+end;
+
+function URIToFilename(const URI: string; out Filename: string): Boolean;
+var
+  U: TURI;
+  I: Integer;
+begin
+  Result := False;
+  U := ParseURI(URI);
+  if SameText(U.Protocol, 'file') then
+  begin
+    if (Length(U.Path) > 2) and (U.Path[1] = '/') and (U.Path[3] = ':') then
+      Filename := Copy(U.Path, 2, MaxInt)
+    else
+      Filename := U.Path;
+    Filename := Filename + U.Document;
+    Result := True;
+  end
+  else
+    if U.Protocol = '' then  // fire and pray?
+    begin
+      Filename := U.Path + U.Document;
+      Result := True;
+    end;
+  if PathDelim <> '/' then
+  begin
+    I := Pos('/', Filename);
+    while I > 0 do
+    begin
+      Filename[I] := PathDelim;
+      I := Pos('/', Filename);
+    end;
+  end;
+end;
+
+function FilenameToURI(const Filename: string): string;
+var
+  I: Integer;
+begin
+  // TODO: seems implemented, but not tested well
+  Result := 'file://';
+  if (Length(Filename) > 2) and (Filename[1] <> PathDelim) and (Filename[2] = ':') then
+    Result := Result + '/';
+  Result := Result + Filename;
+  if PathDelim <> '/' then
+  begin
+    I := Pos(PathDelim, Result);
+    while I <> 0 do
+    begin
+      Result[I] := '/';
+      I := Pos(PathDelim, Result);
+    end;
+  end;
+end;
+
+
+function IsAbsoluteURI(const UriReference: string): Boolean;
+var
+  I: Integer;
+begin
+  Result := True;
+  for I := 1 to Length(UriReference) do
+  begin
+    if UriReference[I] = ':' then
+      Exit
+    else
+      if not (((I=1) and (UriReference[I] in ALPHA)) or
+         (UriReference[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
+      Break;
+  end;
+  Result := False;
+end;
+
+
+end.