uriparser.pp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2003 by the Free Pascal development team
  4. Original author: Sebastian Guenther
  5. Unit to parse complete URI in its parts or to reassemble an URI
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$IFDEF FPC}
  13. {$MODE objfpc}
  14. {$H+}
  15. {$ENDIF}
  16. {$IFNDEF FPC_DOTTEDUNITS}
  17. unit URIParser;
  18. {$ENDIF FPC_DOTTEDUNITS}
  19. interface
  20. type
  21. TURI = record
  22. Protocol: String;
  23. Username: String;
  24. Password: String;
  25. Host: String;
  26. Port: Word;
  27. Path: String;
  28. Document: String;
  29. Params: String;
  30. Bookmark: String;
  31. HasAuthority: Boolean;
  32. end;
  33. function EncodeURI(const URI: TURI): String;
  34. function ParseURI(const URI: String; Decode : Boolean = True): TURI; overload;
  35. function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word; Decode : Boolean = True): TURI; overload;
  36. function ResolveRelativeURI(const BaseUri, RelUri: UnicodeString;out ResultUri: UnicodeString): Boolean; overload;
  37. {$ifdef WINDOWS}
  38. function ResolveRelativeURI(const BaseUri, RelUri: WideString; out ResultUri: WideString): Boolean; overload;
  39. {$ENDIF}
  40. function ResolveRelativeURI(const BaseUri, RelUri: AnsiString; out ResultUri: AnsiString): Boolean; overload;
  41. function URIToFilename(const URI: string; out Filename: string): Boolean;
  42. function FilenameToURI(const Filename: string; Encode : Boolean = True): string;
  43. function IsAbsoluteURI(const UriReference: string): Boolean;
  44. implementation
  45. {$IFDEF FPC_DOTTEDUNITS}
  46. uses System.SysUtils;
  47. {$ELSE FPC_DOTTEDUNITS}
  48. uses SysUtils;
  49. {$ENDIF FPC_DOTTEDUNITS}
  50. const
  51. GenDelims = [':', '/', '?', '#', '[', ']', '@'];
  52. SubDelims = ['!', '$', '&', '''', '(', ')', '*', '+', ',', ';', '='];
  53. ALPHA = ['A'..'Z', 'a'..'z'];
  54. DIGIT = ['0'..'9'];
  55. Unreserved = ALPHA + DIGIT + ['-', '.', '_', '~'];
  56. ValidPathChars = Unreserved + SubDelims + ['@', ':', '/'];
  57. function Escape(const s: String; const Allowed: TSysCharSet): String;
  58. var
  59. i, L: Integer;
  60. P: PChar;
  61. begin
  62. L := Length(s);
  63. for i := 1 to Length(s) do
  64. if not (s[i] in Allowed) then Inc(L,2);
  65. if L = Length(s) then
  66. begin
  67. Result := s;
  68. Exit;
  69. end;
  70. SetLength(Result, L);
  71. P := @Result[1];
  72. for i := 1 to Length(s) do
  73. begin
  74. if not (s[i] in Allowed) then
  75. begin
  76. P^ := '%'; Inc(P);
  77. StrFmt(P, '%.2x', [ord(s[i])]); Inc(P);
  78. end
  79. else
  80. P^ := s[i];
  81. Inc(P);
  82. end;
  83. end;
  84. function EncodeURI(const URI: TURI): String;
  85. // ! if no scheme then first colon in path should be escaped
  86. begin
  87. Result := '';
  88. if URI.Protocol <> '' then
  89. Result := LowerCase(URI.Protocol) + ':';
  90. if URI.HasAuthority then
  91. begin
  92. Result := Result + '//';
  93. if URI.Username <> '' then
  94. begin
  95. Result := Result + URI.Username;
  96. if URI.Password <> '' then
  97. Result := Result + ':' + URI.Password;
  98. Result := Result + '@';
  99. end;
  100. Result := Result + URI.Host;
  101. end;
  102. if URI.Port <> 0 then
  103. Result := Result + ':' + IntToStr(URI.Port);
  104. Result := Result + Escape(URI.Path, ValidPathChars);
  105. if URI.Document <> '' then
  106. begin
  107. if (URI.Path <> '') and ((Result = '') or (Result[Length(Result)] <> '/')) then
  108. Result := Result + '/';
  109. Result := Result + Escape(URI.Document, ValidPathChars);
  110. end;
  111. if URI.Params <> '' then
  112. Result := Result + '?' + Escape(URI.Params, ValidPathChars);
  113. if URI.Bookmark <> '' then
  114. Result := Result + '#' + Escape(URI.Bookmark, ValidPathChars);
  115. end;
  116. function ParseURI(const URI: String; Decode : Boolean = True): TURI;
  117. begin
  118. Result := ParseURI(URI, '', 0, Decode);
  119. end;
  120. function HexValue(c: Char): Integer;
  121. begin
  122. case c of
  123. '0'..'9': Result := ord(c) - ord('0');
  124. 'A'..'F': Result := ord(c) - (ord('A') - 10);
  125. 'a'..'f': Result := ord(c) - (ord('a') - 10);
  126. else
  127. Result := 0;
  128. end;
  129. end;
  130. function Unescape(const s: String): String;
  131. var
  132. i, RealLength: Integer;
  133. P: PChar;
  134. begin
  135. SetLength(Result, Length(s));
  136. i := 1;
  137. P := PChar(Result); { use PChar to prevent numerous calls to UniqueString }
  138. RealLength := 0;
  139. while i <= Length(s) do
  140. begin
  141. if s[i] = '%' then
  142. begin
  143. P[RealLength] := Chr(HexValue(s[i + 1]) shl 4 or HexValue(s[i + 2]));
  144. Inc(i, 3);
  145. end else
  146. begin
  147. P[RealLength] := s[i];
  148. Inc(i);
  149. end;
  150. Inc(RealLength);
  151. end;
  152. SetLength(Result, RealLength);
  153. end;
  154. function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word;Decode : Boolean = True): TURI;
  155. var
  156. s, Authority: String;
  157. i,j: Integer;
  158. PortValid: Boolean;
  159. begin
  160. Result:=Default(TURI);
  161. Result.Protocol := LowerCase(DefaultProtocol);
  162. Result.Port := DefaultPort;
  163. s := URI;
  164. // Extract scheme
  165. for i := 1 to Length(s) do
  166. if s[i] = ':' then
  167. begin
  168. Result.Protocol := Copy(s, 1, i - 1);
  169. s := Copy(s, i + 1, MaxInt);
  170. break;
  171. end
  172. else
  173. if not (((i=1) and (s[i] in ALPHA)) or (s[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
  174. break;
  175. // Extract the bookmark
  176. i := LastDelimiter('#', s);
  177. if i > 0 then
  178. begin
  179. Result.Bookmark := Copy(s, i + 1, MaxInt);
  180. if Decode then
  181. Result.Bookmark:=Unescape(Result.Bookmark);
  182. SetLength(s, i - 1);
  183. end;
  184. // Extract the params
  185. i := LastDelimiter('?', s);
  186. if i > 0 then
  187. begin
  188. Result.Params := Copy(s, i + 1, MaxInt);
  189. if Decode then
  190. Result.Params:=Unescape(Result.Params);
  191. SetLength(s, i - 1);
  192. end;
  193. // extract authority
  194. if (Length(s) > 1) and (s[1] = '/') and (s[2] = '/') then
  195. begin
  196. i := 3;
  197. while (i <= Length(s)) and (s[i] <> '/') do
  198. Inc(i);
  199. Authority := Copy(s, 3, i-3);
  200. s := Copy(s, i, MaxInt);
  201. Result.HasAuthority := True; // even if Authority is empty
  202. end
  203. else
  204. begin
  205. Result.HasAuthority := False;
  206. Authority := '';
  207. end;
  208. // now s is 'hier-part' per RFC3986
  209. // Extract the document name (nasty...)
  210. for i := Length(s) downto 1 do
  211. if s[i] = '/' then
  212. begin
  213. Result.Document :=Copy(s, i + 1, Length(s));
  214. if Decode then
  215. Result.Document:=Unescape(Result.Document);
  216. if (Result.Document <> '.') and (Result.Document <> '..') then
  217. SetLength(s, i)
  218. else
  219. Result.Document := '';
  220. break;
  221. end else if s[i] = ':' then
  222. break
  223. else if i = 1 then
  224. begin
  225. Result.Document :=s;
  226. if Decode then
  227. Result.Document:=Unescape(Result.Document);
  228. if (Result.Document <> '.') and (Result.Document <> '..') then
  229. s := ''
  230. else
  231. Result.Document := '';
  232. // break - not needed, last iteration
  233. end;
  234. // Everything left is a path
  235. Result.Path := s;
  236. if Decode then
  237. Result.Path:=Unescape(Result.Path);
  238. // Extract the port number
  239. i := LastDelimiter(':@', Authority);
  240. if (i > 0) and (i < Length(Authority)) and (Authority[i] = ':') then
  241. begin
  242. PortValid := true;
  243. for j:=i+1 to Length(Authority) do
  244. if not (Authority[j] in ['0'..'9']) then
  245. begin
  246. PortValid := false;
  247. break;
  248. end;
  249. if PortValid then
  250. begin
  251. Result.Port := StrToInt(Copy(Authority, i + 1, MaxInt));
  252. SetLength(Authority, i - 1);
  253. end;
  254. end;
  255. // Extract the hostname
  256. i := Pos('@', Authority);
  257. if i > 0 then
  258. begin
  259. Result.Host := Copy(Authority, i + 1, MaxInt);
  260. SetLength(Authority, i - 1);
  261. // Extract username and password
  262. if Authority <> '' then
  263. begin
  264. i := Pos(':', Authority);
  265. if i = 0 then
  266. Result.Username := Authority
  267. else
  268. begin
  269. Result.Username := Copy(Authority, 1, i - 1);
  270. Result.Password := Copy(Authority, i + 1, MaxInt);
  271. end;
  272. end;
  273. end
  274. else
  275. Result.Host := Authority;
  276. end;
  277. procedure RemoveDotSegments(var s: string);
  278. var
  279. Cur, Prev: Integer;
  280. begin
  281. Prev := Pos('/', s);
  282. while (Prev > 0) and (Prev < Length(s)) do
  283. begin
  284. Cur := Prev+1;
  285. while (Cur <= Length(s)) and (s[Cur] <> '/') do
  286. Inc(Cur);
  287. if (Cur - Prev = 2) and (s[Prev+1] = '.') then
  288. Delete(s, Prev+1, 2)
  289. else if (Cur - Prev = 3) and (s[Prev+1] = '.') and (s[Prev+2] = '.') then
  290. begin
  291. while (Prev > 1) and (s[Prev-1] <> '/') do
  292. Dec(Prev);
  293. if Prev > 1 then
  294. Dec(Prev);
  295. Delete(s, Prev+1, Cur-Prev);
  296. end
  297. else
  298. Prev := Cur;
  299. end;
  300. end;
  301. function ResolveRelativeURI(const BaseUri, RelUri: AnsiString; out ResultUri: AnsiString): Boolean;
  302. var
  303. Base, Rel: TUri;
  304. begin
  305. Base := ParseUri(BaseUri);
  306. Rel := ParseUri(RelUri);
  307. Result := (Base.Protocol <> '') or (Rel.Protocol <> '');
  308. if not Result then
  309. Exit;
  310. with Rel do
  311. begin
  312. if (Path = '') and (Document = '') then
  313. begin
  314. if (Protocol = '') and (Host = '') then
  315. begin
  316. if Params <> '' then
  317. Base.Params := Params;
  318. Base.Bookmark := Bookmark;
  319. ResultUri := EncodeUri(Base);
  320. Exit;
  321. end;
  322. end;
  323. if (Protocol <> '') then // RelURI is absolute - return it...
  324. begin
  325. ResultUri := RelUri;
  326. Exit;
  327. end;
  328. // Inherit protocol
  329. Protocol := Base.Protocol;
  330. if (Host = '') then // TODO: or "not HasAuthority"?
  331. begin
  332. // Inherit Authority (host, port, username, password)
  333. Host := Base.Host;
  334. Port := Base.Port;
  335. Username := Base.Username;
  336. Password := Base.Password;
  337. HasAuthority := Base.HasAuthority;
  338. if (Path = '') or (Path[1] <> '/') then // path is empty or relative
  339. Path := Base.Path + Path;
  340. RemoveDotSegments(Path);
  341. end;
  342. end; // with
  343. // EncodeUri percent-encodes the result, and that's good
  344. ResultUri := EncodeUri(Rel);
  345. end;
  346. {$IFDEF WINDOWS}
  347. function ResolveRelativeURI(const BaseUri, RelUri: WideString; out ResultUri: WideString): Boolean;
  348. Var
  349. Res : AnsiString;
  350. begin
  351. Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), Res);
  352. if Result then
  353. ResultURI := UTF8Decode(res);
  354. end;
  355. {$ENDIF}
  356. function ResolveRelativeURI(const BaseUri, RelUri: UnicodeString;
  357. out ResultUri: UnicodeString): Boolean;
  358. var
  359. rslt: AnsiString;
  360. begin
  361. Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), rslt);
  362. if Result then
  363. ResultURI := UTF8Decode(rslt);
  364. end;
  365. function URIToFilename(const URI: string; out Filename: string): Boolean;
  366. var
  367. U: TURI;
  368. I: Integer;
  369. begin
  370. Result := False;
  371. U := ParseURI(URI);
  372. if SameText(U.Protocol, 'file') then
  373. begin
  374. if (Length(U.Path) > 2) and (U.Path[1] = '/') and (U.Path[3] = ':') then
  375. Filename := Copy(U.Path, 2, MaxInt)
  376. else
  377. Filename := U.Path;
  378. Filename := Filename + U.Document;
  379. Result := True;
  380. end
  381. else
  382. if U.Protocol = '' then // fire and pray?
  383. begin
  384. Filename := U.Path + U.Document;
  385. Result := True;
  386. end;
  387. if PathDelim <> '/' then
  388. begin
  389. I := Pos('/', Filename);
  390. while I > 0 do
  391. begin
  392. Filename[I] := PathDelim;
  393. I := Pos('/', Filename);
  394. end;
  395. end;
  396. end;
  397. function FilenameToURI(const Filename: string; Encode : Boolean = True): string;
  398. var
  399. I: Integer;
  400. IsAbsFilename: Boolean;
  401. FilenamePart: string;
  402. begin
  403. IsAbsFilename := ((Filename <> '') and (Filename[1] = PathDelim)) or
  404. ((Length(Filename) > 2) and (Filename[1] in ['A'..'Z', 'a'..'z']) and (Filename[2] = ':'));
  405. Result := 'file:';
  406. if IsAbsFilename then
  407. begin
  408. if Filename[1] <> PathDelim then
  409. Result := Result + '///'
  410. else
  411. Result := Result + '//';
  412. end;
  413. FilenamePart := Filename;
  414. { unreachable code warning is ok here }
  415. if PathDelim <> '/' then
  416. begin
  417. I := Pos(PathDelim, FilenamePart);
  418. while I <> 0 do
  419. begin
  420. FilenamePart[I] := '/';
  421. I := Pos(PathDelim, FilenamePart);
  422. end;
  423. end;
  424. if Encode then
  425. FilenamePart := Escape(FilenamePart, ValidPathChars);
  426. Result := Result + FilenamePart;
  427. end;
  428. function IsAbsoluteURI(const UriReference: string): Boolean;
  429. var
  430. I: Integer;
  431. begin
  432. Result := True;
  433. for I := 1 to Length(UriReference) do
  434. begin
  435. if UriReference[I] = ':' then
  436. Exit
  437. else
  438. if not (((I=1) and (UriReference[I] in ALPHA)) or
  439. (UriReference[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
  440. Break;
  441. end;
  442. Result := False;
  443. end;
  444. end.