uriparser.pp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team
  4. Original author: Sebastian Guenther
  5. Unit to parse complete URI in its parts or to reassemble an URI
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$IFDEF FPC}
  13. {$MODE objfpc}
  14. {$H+}
  15. {$ENDIF}
  16. unit URIParser;
  17. interface
  18. type
  19. TURI = record
  20. Protocol: String;
  21. Username: String;
  22. Password: String;
  23. Host: String;
  24. Port: Word;
  25. Path: String;
  26. Document: String;
  27. Params: String;
  28. Bookmark: String;
  29. HasAuthority: Boolean;
  30. end;
  31. function EncodeURI(const URI: TURI): String;
  32. function ParseURI(const URI: String): TURI; overload;
  33. function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word): TURI; overload;
  34. function ResolveRelativeURI(const BaseUri, RelUri: WideString;
  35. out ResultUri: WideString): Boolean; overload;
  36. function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
  37. out ResultUri: UTF8String): Boolean; overload;
  38. function URIToFilename(const URI: string; out Filename: string): Boolean;
  39. function FilenameToURI(const Filename: string): string;
  40. function IsAbsoluteURI(const UriReference: string): Boolean;
  41. implementation
  42. uses SysUtils;
  43. const
  44. GenDelims = [':', '/', '?', '#', '[', ']', '@'];
  45. SubDelims = ['!', '$', '&', '''', '(', ')', '*', '+', ',', ';', '='];
  46. ALPHA = ['A'..'Z', 'a'..'z'];
  47. DIGIT = ['0'..'9'];
  48. Unreserved = ALPHA + DIGIT + ['-', '.', '_', '~'];
  49. ValidPathChars = Unreserved + SubDelims + ['@', ':', '/'];
  50. function Escape(const s: String; const Allowed: TSysCharSet): String;
  51. var
  52. i: Integer;
  53. begin
  54. SetLength(Result, 0);
  55. for i := 1 to Length(s) do
  56. if not (s[i] in Allowed) then
  57. Result := Result + '%' + IntToHex(ord(s[i]), 2)
  58. else
  59. Result := Result + s[i];
  60. end;
  61. function EncodeURI(const URI: TURI): String;
  62. // ! if no scheme then first colon in path should be escaped
  63. begin
  64. SetLength(Result, 0);
  65. if Length(URI.Protocol) > 0 then
  66. Result := LowerCase(URI.Protocol) + ':';
  67. if URI.HasAuthority then
  68. begin
  69. Result := Result + '//';
  70. if Length(URI.Username) > 0 then
  71. begin
  72. Result := Result + URI.Username;
  73. if Length(URI.Password) > 0 then
  74. Result := Result + ':' + URI.Password;
  75. Result := Result + '@';
  76. end;
  77. Result := Result + URI.Host;
  78. end;
  79. if URI.Port <> 0 then
  80. Result := Result + ':' + IntToStr(URI.Port);
  81. Result := Result + Escape(URI.Path, ValidPathChars);
  82. if Length(URI.Document) > 0 then
  83. begin
  84. if (Length(URI.Path) > 0) and ((Length(Result) = 0) or (Result[Length(Result)] <> '/')) then
  85. Result := Result + '/';
  86. Result := Result + Escape(URI.Document, ValidPathChars);
  87. end;
  88. if Length(URI.Params) > 0 then
  89. Result := Result + '?' + Escape(URI.Params, ValidPathChars);
  90. if Length(URI.Bookmark) > 0 then
  91. Result := Result + '#' + Escape(URI.Bookmark, ValidPathChars);
  92. end;
  93. function ParseURI(const URI: String): TURI;
  94. begin
  95. Result := ParseURI(URI, '', 0);
  96. end;
  97. function HexValue(c: Char): Integer;
  98. begin
  99. case c of
  100. '0'..'9': Result := ord(c) - ord('0');
  101. 'A'..'F': Result := ord(c) - (ord('A') - 10);
  102. 'a'..'f': Result := ord(c) - (ord('a') - 10);
  103. else
  104. Result := 0;
  105. end;
  106. end;
  107. function Unescape(const s: String): String;
  108. var
  109. i, RealLength: Integer;
  110. begin
  111. SetLength(Result, Length(s));
  112. i := 1;
  113. RealLength := 0;
  114. while i <= Length(s) do
  115. begin
  116. Inc(RealLength);
  117. if s[i] = '%' then
  118. begin
  119. Result[RealLength] := Chr(HexValue(s[i + 1]) shl 4 or HexValue(s[i + 2]));
  120. Inc(i, 3);
  121. end else
  122. begin
  123. Result[RealLength] := s[i];
  124. Inc(i);
  125. end;
  126. end;
  127. SetLength(Result, RealLength);
  128. end;
  129. function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word): TURI;
  130. var
  131. s, Authority: String;
  132. i: Integer;
  133. begin
  134. Result.Protocol := LowerCase(DefaultProtocol);
  135. Result.Port := DefaultPort;
  136. s := URI;
  137. // Extract scheme
  138. for i := 1 to Length(s) do
  139. if s[i] = ':' then
  140. begin
  141. Result.Protocol := Copy(s, 1, i - 1);
  142. s := Copy(s, i + 1, MaxInt);
  143. break;
  144. end
  145. else
  146. if not (((i=1) and (s[i] in ALPHA)) or (s[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
  147. break;
  148. // Extract the bookmark
  149. i := LastDelimiter('#', s);
  150. if i > 0 then
  151. begin
  152. Result.Bookmark := Unescape(Copy(s, i + 1, MaxInt));
  153. s := Copy(s, 1, i - 1);
  154. end;
  155. // Extract the params
  156. i := LastDelimiter('?', s);
  157. if i > 0 then
  158. begin
  159. Result.Params := Unescape(Copy(s, i + 1, MaxInt));
  160. s := Copy(s, 1, i - 1);
  161. end;
  162. // extract authority
  163. if (Length(s) > 1) and (s[1] = '/') and (s[2] = '/') then
  164. begin
  165. i := 3;
  166. while (i <= Length(s)) and (s[i] <> '/') do
  167. Inc(i);
  168. Authority := Copy(s, 3, i-3);
  169. s := Copy(s, i, MaxInt);
  170. Result.HasAuthority := True; // even if Authority is empty
  171. end
  172. else
  173. begin
  174. Result.HasAuthority := False;
  175. Authority := '';
  176. end;
  177. // now s is 'hier-part' per RFC3986
  178. // Extract the document name (nasty...)
  179. for i := Length(s) downto 1 do
  180. if s[i] = '/' then
  181. begin
  182. Result.Document := Unescape(Copy(s, i + 1, Length(s)));
  183. if (Result.Document <> '.') and (Result.Document <> '..') then
  184. s := Copy(s, 1, i)
  185. else
  186. Result.Document := '';
  187. break;
  188. end else if s[i] = ':' then
  189. break
  190. else if i = 1 then
  191. begin
  192. Result.Document := Unescape(s);
  193. if (Result.Document <> '.') and (Result.Document <> '..') then
  194. s := ''
  195. else
  196. Result.Document := '';
  197. // break - not needed, last iteration
  198. end;
  199. // Everything left is a path
  200. Result.Path := Unescape(s);
  201. // Extract the port number
  202. i := LastDelimiter(':@', Authority);
  203. if (i > 0) and (Authority[i] = ':') then
  204. begin
  205. Result.Port := StrToInt(Copy(Authority, i + 1, MaxInt));
  206. Authority := Copy(Authority, 1, i - 1);
  207. end;
  208. // Extract the hostname
  209. i := Pos('@', Authority);
  210. if i > 0 then
  211. begin
  212. Result.Host := Copy(Authority, i+1, MaxInt);
  213. Delete(Authority, i, MaxInt);
  214. // Extract username and password
  215. if Length(Authority) > 0 then
  216. begin
  217. i := Pos(':', Authority);
  218. if i = 0 then
  219. Result.Username := Authority
  220. else
  221. begin
  222. Result.Username := Copy(Authority, 1, i - 1);
  223. Result.Password := Copy(Authority, i + 1, MaxInt);
  224. end;
  225. end;
  226. end
  227. else
  228. Result.Host := Authority;
  229. end;
  230. procedure RemoveDotSegments(var s: string);
  231. var
  232. Cur, Prev: Integer;
  233. begin
  234. Prev := Pos('/', s);
  235. while (Prev > 0) and (Prev < Length(s)) do
  236. begin
  237. Cur := Prev+1;
  238. while (Cur <= Length(s)) and (s[Cur] <> '/') do
  239. Inc(Cur);
  240. if (Cur - Prev = 2) and (s[Prev+1] = '.') then
  241. Delete(s, Prev+1, 2)
  242. else if (Cur - Prev = 3) and (s[Prev+1] = '.') and (s[Prev+2] = '.') then
  243. begin
  244. while (Prev > 1) and (s[Prev-1] <> '/') do
  245. Dec(Prev);
  246. if Prev > 1 then
  247. Dec(Prev);
  248. Delete(s, Prev+1, Cur-Prev);
  249. end
  250. else
  251. Prev := Cur;
  252. end;
  253. end;
  254. // TODO: this probably must NOT percent-encode the result...
  255. function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
  256. out ResultUri: UTF8String): Boolean;
  257. var
  258. Base, Rel: TUri;
  259. begin
  260. Base := ParseUri(BaseUri);
  261. Rel := ParseUri(RelUri);
  262. Result := (Base.Protocol <> '') or (Rel.Protocol <> '');
  263. if not Result then
  264. Exit;
  265. with Rel do
  266. begin
  267. if (Path = '') and (Document = '') then
  268. begin
  269. if (Protocol = '') and (Host = '') then
  270. begin
  271. if Params <> '' then
  272. Base.Params := Params;
  273. Base.Bookmark := Bookmark;
  274. ResultUri := EncodeUri(Base);
  275. Exit;
  276. end;
  277. end;
  278. if (Protocol <> '') then // RelURI is absolute - return it...
  279. begin
  280. ResultUri := RelUri;
  281. Exit;
  282. end;
  283. // Inherit protocol
  284. Protocol := Base.Protocol;
  285. if (Host = '') then // TODO: or "not HasAuthority"?
  286. begin
  287. // Inherit Authority (host, port, username, password)
  288. Host := Base.Host;
  289. Port := Base.Port;
  290. Username := Base.Username;
  291. Password := Base.Password;
  292. HasAuthority := Base.HasAuthority;
  293. if (Path = '') or (Path[1] <> '/') then // path is empty or relative
  294. Path := Base.Path + Path;
  295. RemoveDotSegments(Path);
  296. end;
  297. end; // with
  298. ResultUri := EncodeUri(Rel);
  299. end;
  300. function ResolveRelativeURI(const BaseUri, RelUri: WideString;
  301. out ResultUri: WideString): Boolean;
  302. var
  303. rslt: UTF8String;
  304. begin
  305. Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), rslt);
  306. if Result then
  307. ResultURI := UTF8Decode(rslt);
  308. end;
  309. function URIToFilename(const URI: string; out Filename: string): Boolean;
  310. var
  311. U: TURI;
  312. I: Integer;
  313. begin
  314. Result := False;
  315. U := ParseURI(URI);
  316. if SameText(U.Protocol, 'file') then
  317. begin
  318. if (Length(U.Path) > 2) and (U.Path[1] = '/') and (U.Path[3] = ':') then
  319. Filename := Copy(U.Path, 2, MaxInt)
  320. else
  321. Filename := U.Path;
  322. Filename := Filename + U.Document;
  323. Result := True;
  324. end
  325. else
  326. if U.Protocol = '' then // fire and pray?
  327. begin
  328. Filename := U.Path + U.Document;
  329. Result := True;
  330. end;
  331. if PathDelim <> '/' then
  332. begin
  333. I := Pos('/', Filename);
  334. while I > 0 do
  335. begin
  336. Filename[I] := PathDelim;
  337. I := Pos('/', Filename);
  338. end;
  339. end;
  340. end;
  341. function FilenameToURI(const Filename: string): string;
  342. var
  343. I: Integer;
  344. begin
  345. // TODO: seems implemented, but not tested well
  346. Result := 'file://';
  347. if (Length(Filename) > 2) and (Filename[1] <> PathDelim) and (Filename[2] = ':') then
  348. Result := Result + '/';
  349. Result := Result + Filename;
  350. if PathDelim <> '/' then
  351. begin
  352. I := Pos(PathDelim, Result);
  353. while I <> 0 do
  354. begin
  355. Result[I] := '/';
  356. I := Pos(PathDelim, Result);
  357. end;
  358. end;
  359. end;
  360. function IsAbsoluteURI(const UriReference: string): Boolean;
  361. var
  362. I: Integer;
  363. begin
  364. Result := True;
  365. for I := 1 to Length(UriReference) do
  366. begin
  367. if UriReference[I] = ':' then
  368. Exit
  369. else
  370. if not (((I=1) and (UriReference[I] in ALPHA)) or
  371. (UriReference[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
  372. Break;
  373. end;
  374. Result := False;
  375. end;
  376. end.