uriparser.pp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team
  4. Original author: Sebastian Guenther
  5. Unit to parse complete URI in its parts or to reassemble an URI
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$IFDEF FPC}
  13. {$MODE objfpc}
  14. {$H+}
  15. {$ENDIF}
  16. unit URIParser;
  17. interface
  18. type
  19. TURI = record
  20. Protocol: String;
  21. Username: String;
  22. Password: String;
  23. Host: String;
  24. Port: Word;
  25. Path: String;
  26. Document: String;
  27. Params: String;
  28. Bookmark: String;
  29. HasAuthority: Boolean;
  30. end;
  31. function EncodeURI(const URI: TURI): String;
  32. function ParseURI(const URI: String; Decode : Boolean = True): TURI; overload;
  33. function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word; Decode : Boolean = True): TURI; overload;
  34. function ResolveRelativeURI(const BaseUri, RelUri: UnicodeString;out ResultUri: UnicodeString): Boolean; overload;
  35. {$ifdef WINDOWS}
  36. function ResolveRelativeURI(const BaseUri, RelUri: WideString; out ResultUri: WideString): Boolean; overload;
  37. {$ENDIF}
  38. function ResolveRelativeURI(const BaseUri, RelUri: AnsiString; out ResultUri: AnsiString): Boolean; overload;
  39. function URIToFilename(const URI: string; out Filename: string): Boolean;
  40. function FilenameToURI(const Filename: string; Encode : Boolean = True): string;
  41. function IsAbsoluteURI(const UriReference: string): Boolean;
  42. implementation
  43. uses SysUtils;
  44. const
  45. GenDelims = [':', '/', '?', '#', '[', ']', '@'];
  46. SubDelims = ['!', '$', '&', '''', '(', ')', '*', '+', ',', ';', '='];
  47. ALPHA = ['A'..'Z', 'a'..'z'];
  48. DIGIT = ['0'..'9'];
  49. Unreserved = ALPHA + DIGIT + ['-', '.', '_', '~'];
  50. ValidPathChars = Unreserved + SubDelims + ['@', ':', '/'];
  51. function Escape(const s: String; const Allowed: TSysCharSet): String;
  52. var
  53. i, L: Integer;
  54. P: PChar;
  55. begin
  56. L := Length(s);
  57. for i := 1 to Length(s) do
  58. if not (s[i] in Allowed) then Inc(L,2);
  59. if L = Length(s) then
  60. begin
  61. Result := s;
  62. Exit;
  63. end;
  64. SetLength(Result, L);
  65. P := @Result[1];
  66. for i := 1 to Length(s) do
  67. begin
  68. if not (s[i] in Allowed) then
  69. begin
  70. P^ := '%'; Inc(P);
  71. StrFmt(P, '%.2x', [ord(s[i])]); Inc(P);
  72. end
  73. else
  74. P^ := s[i];
  75. Inc(P);
  76. end;
  77. end;
  78. function EncodeURI(const URI: TURI): String;
  79. // ! if no scheme then first colon in path should be escaped
  80. begin
  81. SetLength(Result, 0);
  82. if Length(URI.Protocol) > 0 then
  83. Result := LowerCase(URI.Protocol) + ':';
  84. if URI.HasAuthority then
  85. begin
  86. Result := Result + '//';
  87. if Length(URI.Username) > 0 then
  88. begin
  89. Result := Result + URI.Username;
  90. if Length(URI.Password) > 0 then
  91. Result := Result + ':' + URI.Password;
  92. Result := Result + '@';
  93. end;
  94. Result := Result + URI.Host;
  95. end;
  96. if URI.Port <> 0 then
  97. Result := Result + ':' + IntToStr(URI.Port);
  98. Result := Result + Escape(URI.Path, ValidPathChars);
  99. if Length(URI.Document) > 0 then
  100. begin
  101. if (Length(URI.Path) > 0) and ((Length(Result) = 0) or (Result[Length(Result)] <> '/')) then
  102. Result := Result + '/';
  103. Result := Result + Escape(URI.Document, ValidPathChars);
  104. end;
  105. if Length(URI.Params) > 0 then
  106. Result := Result + '?' + Escape(URI.Params, ValidPathChars);
  107. if Length(URI.Bookmark) > 0 then
  108. Result := Result + '#' + Escape(URI.Bookmark, ValidPathChars);
  109. end;
  110. function ParseURI(const URI: String; Decode : Boolean = True): TURI;
  111. begin
  112. Result := ParseURI(URI, '', 0, Decode);
  113. end;
  114. function HexValue(c: Char): Integer;
  115. begin
  116. case c of
  117. '0'..'9': Result := ord(c) - ord('0');
  118. 'A'..'F': Result := ord(c) - (ord('A') - 10);
  119. 'a'..'f': Result := ord(c) - (ord('a') - 10);
  120. else
  121. Result := 0;
  122. end;
  123. end;
  124. function Unescape(const s: String): String;
  125. var
  126. i, RealLength: Integer;
  127. P: PChar;
  128. begin
  129. SetLength(Result, Length(s));
  130. i := 1;
  131. P := PChar(Result); { use PChar to prevent numerous calls to UniqueString }
  132. RealLength := 0;
  133. while i <= Length(s) do
  134. begin
  135. if s[i] = '%' then
  136. begin
  137. P[RealLength] := Chr(HexValue(s[i + 1]) shl 4 or HexValue(s[i + 2]));
  138. Inc(i, 3);
  139. end else
  140. begin
  141. P[RealLength] := s[i];
  142. Inc(i);
  143. end;
  144. Inc(RealLength);
  145. end;
  146. SetLength(Result, RealLength);
  147. end;
  148. function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word;Decode : Boolean = True): TURI;
  149. var
  150. s, Authority: String;
  151. i,j: Integer;
  152. PortValid: Boolean;
  153. begin
  154. Result:=Default(TURI);
  155. Result.Protocol := LowerCase(DefaultProtocol);
  156. Result.Port := DefaultPort;
  157. s := URI;
  158. // Extract scheme
  159. for i := 1 to Length(s) do
  160. if s[i] = ':' then
  161. begin
  162. Result.Protocol := Copy(s, 1, i - 1);
  163. s := Copy(s, i + 1, MaxInt);
  164. break;
  165. end
  166. else
  167. if not (((i=1) and (s[i] in ALPHA)) or (s[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
  168. break;
  169. // Extract the bookmark
  170. i := LastDelimiter('#', s);
  171. if i > 0 then
  172. begin
  173. Result.Bookmark := Copy(s, i + 1, MaxInt);
  174. if Decode then
  175. Result.Bookmark:=Unescape(Result.Bookmark);
  176. s := Copy(s, 1, i - 1);
  177. end;
  178. // Extract the params
  179. i := LastDelimiter('?', s);
  180. if i > 0 then
  181. begin
  182. Result.Params := Copy(s, i + 1, MaxInt);
  183. if Decode then
  184. Result.Params:=Unescape(Result.Params);
  185. s := Copy(s, 1, i - 1);
  186. end;
  187. // extract authority
  188. if (Length(s) > 1) and (s[1] = '/') and (s[2] = '/') then
  189. begin
  190. i := 3;
  191. while (i <= Length(s)) and (s[i] <> '/') do
  192. Inc(i);
  193. Authority := Copy(s, 3, i-3);
  194. s := Copy(s, i, MaxInt);
  195. Result.HasAuthority := True; // even if Authority is empty
  196. end
  197. else
  198. begin
  199. Result.HasAuthority := False;
  200. Authority := '';
  201. end;
  202. // now s is 'hier-part' per RFC3986
  203. // Extract the document name (nasty...)
  204. for i := Length(s) downto 1 do
  205. if s[i] = '/' then
  206. begin
  207. Result.Document :=Copy(s, i + 1, Length(s));
  208. if Decode then
  209. Result.Document:=Unescape(Result.Document);
  210. if (Result.Document <> '.') and (Result.Document <> '..') then
  211. s := Copy(s, 1, i)
  212. else
  213. Result.Document := '';
  214. break;
  215. end else if s[i] = ':' then
  216. break
  217. else if i = 1 then
  218. begin
  219. Result.Document :=s;
  220. if Decode then
  221. Result.Document:=Unescape(Result.Document);
  222. if (Result.Document <> '.') and (Result.Document <> '..') then
  223. s := ''
  224. else
  225. Result.Document := '';
  226. // break - not needed, last iteration
  227. end;
  228. // Everything left is a path
  229. Result.Path := s;
  230. if Decode then
  231. Result.Path:=Unescape(Result.Path);
  232. // Extract the port number
  233. i := LastDelimiter(':@', Authority);
  234. if (i > 0) and (i < Length(Authority)) and (Authority[i] = ':') then
  235. begin
  236. PortValid := true;
  237. for j:=i+1 to Length(Authority) do
  238. if not (Authority[j] in ['0'..'9']) then
  239. begin
  240. PortValid := false;
  241. break;
  242. end;
  243. if PortValid then
  244. begin
  245. Result.Port := StrToInt(Copy(Authority, i + 1, MaxInt));
  246. Authority := Copy(Authority, 1, i - 1);
  247. end;
  248. end;
  249. // Extract the hostname
  250. i := Pos('@', Authority);
  251. if i > 0 then
  252. begin
  253. Result.Host := Copy(Authority, i+1, MaxInt);
  254. Delete(Authority, i, MaxInt);
  255. // Extract username and password
  256. if Length(Authority) > 0 then
  257. begin
  258. i := Pos(':', Authority);
  259. if i = 0 then
  260. Result.Username := Authority
  261. else
  262. begin
  263. Result.Username := Copy(Authority, 1, i - 1);
  264. Result.Password := Copy(Authority, i + 1, MaxInt);
  265. end;
  266. end;
  267. end
  268. else
  269. Result.Host := Authority;
  270. end;
  271. procedure RemoveDotSegments(var s: string);
  272. var
  273. Cur, Prev: Integer;
  274. begin
  275. Prev := Pos('/', s);
  276. while (Prev > 0) and (Prev < Length(s)) do
  277. begin
  278. Cur := Prev+1;
  279. while (Cur <= Length(s)) and (s[Cur] <> '/') do
  280. Inc(Cur);
  281. if (Cur - Prev = 2) and (s[Prev+1] = '.') then
  282. Delete(s, Prev+1, 2)
  283. else if (Cur - Prev = 3) and (s[Prev+1] = '.') and (s[Prev+2] = '.') then
  284. begin
  285. while (Prev > 1) and (s[Prev-1] <> '/') do
  286. Dec(Prev);
  287. if Prev > 1 then
  288. Dec(Prev);
  289. Delete(s, Prev+1, Cur-Prev);
  290. end
  291. else
  292. Prev := Cur;
  293. end;
  294. end;
  295. function ResolveRelativeURI(const BaseUri, RelUri: AnsiString; out ResultUri: AnsiString): Boolean;
  296. var
  297. Base, Rel: TUri;
  298. begin
  299. Base := ParseUri(BaseUri);
  300. Rel := ParseUri(RelUri);
  301. Result := (Base.Protocol <> '') or (Rel.Protocol <> '');
  302. if not Result then
  303. Exit;
  304. with Rel do
  305. begin
  306. if (Path = '') and (Document = '') then
  307. begin
  308. if (Protocol = '') and (Host = '') then
  309. begin
  310. if Params <> '' then
  311. Base.Params := Params;
  312. Base.Bookmark := Bookmark;
  313. ResultUri := EncodeUri(Base);
  314. Exit;
  315. end;
  316. end;
  317. if (Protocol <> '') then // RelURI is absolute - return it...
  318. begin
  319. ResultUri := RelUri;
  320. Exit;
  321. end;
  322. // Inherit protocol
  323. Protocol := Base.Protocol;
  324. if (Host = '') then // TODO: or "not HasAuthority"?
  325. begin
  326. // Inherit Authority (host, port, username, password)
  327. Host := Base.Host;
  328. Port := Base.Port;
  329. Username := Base.Username;
  330. Password := Base.Password;
  331. HasAuthority := Base.HasAuthority;
  332. if (Path = '') or (Path[1] <> '/') then // path is empty or relative
  333. Path := Base.Path + Path;
  334. RemoveDotSegments(Path);
  335. end;
  336. end; // with
  337. // EncodeUri percent-encodes the result, and that's good
  338. ResultUri := EncodeUri(Rel);
  339. end;
  340. {$IFDEF WINDOWS}
  341. function ResolveRelativeURI(const BaseUri, RelUri: WideString; out ResultUri: WideString): Boolean;
  342. Var
  343. Res : AnsiString;
  344. begin
  345. Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), Res);
  346. if Result then
  347. ResultURI := UTF8Decode(res);
  348. end;
  349. {$ENDIF}
  350. function ResolveRelativeURI(const BaseUri, RelUri: UnicodeString;
  351. out ResultUri: UnicodeString): Boolean;
  352. var
  353. rslt: AnsiString;
  354. begin
  355. Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), rslt);
  356. if Result then
  357. ResultURI := UTF8Decode(rslt);
  358. end;
  359. function URIToFilename(const URI: string; out Filename: string): Boolean;
  360. var
  361. U: TURI;
  362. I: Integer;
  363. begin
  364. Result := False;
  365. U := ParseURI(URI);
  366. if SameText(U.Protocol, 'file') then
  367. begin
  368. if (Length(U.Path) > 2) and (U.Path[1] = '/') and (U.Path[3] = ':') then
  369. Filename := Copy(U.Path, 2, MaxInt)
  370. else
  371. Filename := U.Path;
  372. Filename := Filename + U.Document;
  373. Result := True;
  374. end
  375. else
  376. if U.Protocol = '' then // fire and pray?
  377. begin
  378. Filename := U.Path + U.Document;
  379. Result := True;
  380. end;
  381. if PathDelim <> '/' then
  382. begin
  383. I := Pos('/', Filename);
  384. while I > 0 do
  385. begin
  386. Filename[I] := PathDelim;
  387. I := Pos('/', Filename);
  388. end;
  389. end;
  390. end;
  391. function FilenameToURI(const Filename: string; Encode : Boolean = True): string;
  392. var
  393. I: Integer;
  394. IsAbsFilename: Boolean;
  395. FilenamePart: string;
  396. begin
  397. IsAbsFilename := ((Filename <> '') and (Filename[1] = PathDelim)) or
  398. ((Length(Filename) > 2) and (Filename[1] in ['A'..'Z', 'a'..'z']) and (Filename[2] = ':'));
  399. Result := 'file:';
  400. if IsAbsFilename then
  401. begin
  402. if Filename[1] <> PathDelim then
  403. Result := Result + '///'
  404. else
  405. Result := Result + '//';
  406. end;
  407. FilenamePart := Filename;
  408. { unreachable code warning is ok here }
  409. if PathDelim <> '/' then
  410. begin
  411. I := Pos(PathDelim, FilenamePart);
  412. while I <> 0 do
  413. begin
  414. FilenamePart[I] := '/';
  415. I := Pos(PathDelim, FilenamePart);
  416. end;
  417. end;
  418. if Encode then
  419. FilenamePart := Escape(FilenamePart, ValidPathChars);
  420. Result := Result + FilenamePart;
  421. end;
  422. function IsAbsoluteURI(const UriReference: string): Boolean;
  423. var
  424. I: Integer;
  425. begin
  426. Result := True;
  427. for I := 1 to Length(UriReference) do
  428. begin
  429. if UriReference[I] = ':' then
  430. Exit
  431. else
  432. if not (((I=1) and (UriReference[I] in ALPHA)) or
  433. (UriReference[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
  434. Break;
  435. end;
  436. Result := False;
  437. end;
  438. end.