IdUriUtils.pas 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. unit IdUriUtils;
  2. interface
  3. {$i IdCompilerDefines.inc}
  4. {$IFDEF DOTNET}
  5. {$DEFINE HAS_ConvertToUtf32}
  6. {$ENDIF}
  7. {$IFDEF HAS_TCharacter}
  8. {$DEFINE HAS_ConvertToUtf32}
  9. {$ENDIF}
  10. {$IFDEF HAS_Character_TCharHelper}
  11. {$DEFINE HAS_ConvertToUtf32}
  12. {$ENDIF}
  13. {$IFDEF DOTNET}
  14. {$DEFINE HAS_String_IndexOf}
  15. {$ENDIF}
  16. {$IFDEF HAS_SysUtils_TStringHelper}
  17. {$DEFINE HAS_String_IndexOf}
  18. {$ENDIF}
  19. uses
  20. IdGlobal
  21. {$IFNDEF DOTNET}
  22. {$IFDEF HAS_ConvertToUtf32}
  23. , Character
  24. {$ELSE}
  25. , IdException
  26. {$ENDIF}
  27. {$IFDEF HAS_String_IndexOf}
  28. , SysUtils
  29. {$ENDIF}
  30. {$ENDIF}
  31. ;
  32. {$IFNDEF HAS_ConvertToUtf32}
  33. type
  34. //for .NET, we use Char.ConvertToUtf32() as-is
  35. //for XE3.5+, we use TCharHelper.ConvertToUtf32() as-is
  36. //for D2009+, we use TCharacter.ConvertToUtf32() as-is
  37. EIdUTF16Exception = class(EIdException);
  38. EIdUTF16IndexOutOfRange = class(EIdUTF16Exception);
  39. EIdUTF16InvalidHighSurrogate = class(EIdUTF16Exception);
  40. EIdUTF16InvalidLowSurrogate = class(EIdUTF16Exception);
  41. EIdUTF16MissingLowSurrogate = class(EIdUTF16Exception);
  42. {$ENDIF}
  43. // calculates character length, including surrogates
  44. function CalcUTF16CharLength(const AStr: {$IFDEF STRING_IS_UNICODE}string{$ELSE}TIdWideChars{$ENDIF}; const AIndex: Integer): Integer;
  45. function WideCharIsInSet(const ASet: TIdUnicodeString; const AChar: WideChar): Boolean;
  46. function GetUTF16Codepoint(const AStr: {$IFDEF STRING_IS_UNICODE}string{$ELSE}TIdWideChars{$ENDIF}; const AIndex: Integer): Integer;
  47. implementation
  48. {$IFNDEF HAS_ConvertToUtf32}
  49. uses
  50. IdResourceStringsUriUtils;
  51. {$ENDIF}
  52. function CalcUTF16CharLength(const AStr: {$IFDEF STRING_IS_UNICODE}string{$ELSE}TIdWideChars{$ENDIF};
  53. const AIndex: Integer): Integer;
  54. {$IFDEF DOTNET}
  55. var
  56. C: Integer;
  57. {$ELSE}
  58. {$IFDEF HAS_ConvertToUtf32}
  59. {$IFDEF USE_INLINE}inline;{$ENDIF}
  60. {$ELSE}
  61. var
  62. C: WideChar;
  63. {$ENDIF}
  64. {$ENDIF}
  65. begin
  66. {$IFDEF DOTNET}
  67. C := System.Char.ConvertToUtf32(AStr, AIndex-1);
  68. if (C >= #$10000) and (C <= #$10FFFF) then begin
  69. Result := 2;
  70. end else begin
  71. Result := 1;
  72. end;
  73. {$ELSE}
  74. {$IFDEF HAS_Character_TCharHelper}
  75. Char.ConvertToUtf32(AStr, AIndex-1, Result);
  76. {$ELSE}
  77. {$IFDEF HAS_TCharacter}
  78. TCharacter.ConvertToUtf32(AStr, AIndex, Result);
  79. {$ELSE}
  80. // TODO: use GetUTF16Codepoint() here...
  81. {
  82. C := GetUTF16Codepoint(AStr, AIndex);
  83. if (C >= #$10000) and (C <= #$10FFFF) then begin
  84. Result := 2;
  85. end else begin
  86. Result := 1;
  87. end;
  88. }
  89. if (AIndex < {$IFDEF STRING_IS_UNICODE}1{$ELSE}0{$ENDIF}) or
  90. (AIndex > (Length(AStr){$IFNDEF STRING_IS_UNICODE}-1{$ENDIF})) then
  91. begin
  92. raise EIdUTF16IndexOutOfRange.CreateResFmt(@RSUTF16IndexOutOfRange, [AIndex, Length(AStr)]);
  93. end;
  94. C := AStr[AIndex];
  95. if (C >= #$D800) and (C <= #$DFFF) then
  96. begin
  97. if C > #$DBFF then begin
  98. raise EIdUTF16InvalidHighSurrogate.CreateResFmt(@RSUTF16InvalidHighSurrogate, [AIndex]);
  99. end;
  100. if AIndex = (Length(AStr){$IFNDEF STRING_IS_UNICODE}-1{$ENDIF}) then begin
  101. raise EIdUTF16MissingLowSurrogate.CreateRes(@RSUTF16MissingLowSurrogate);
  102. end;
  103. C := AStr[AIndex+1];
  104. if (C < #$DC00) or (C > #$DFFF) then begin
  105. raise EIdUTF16InvalidLowSurrogate.CreateResFmt(@RSUTF16InvalidLowSurrogate, [AIndex+1]);
  106. end;
  107. Result := 2;
  108. end else begin
  109. Result := 1;
  110. end;
  111. {$ENDIF}
  112. {$ENDIF}
  113. {$ENDIF}
  114. end;
  115. function WideCharIsInSet(const ASet: TIdUnicodeString; const AChar: WideChar): Boolean;
  116. {$IFDEF HAS_String_IndexOf}
  117. {$IFDEF USE_INLINE}inline;{$ENDIF}
  118. {$ELSE}
  119. var
  120. I: Integer;
  121. {$ENDIF}
  122. begin
  123. {$IFDEF HAS_String_IndexOf}
  124. Result := ASet.IndexOf(AChar) > -1;
  125. {$ELSE}
  126. // RLebeau 5/8/08: Calling Pos() with a Char as input creates a temporary
  127. // String. Normally this is fine, but profiling reveils this to be a big
  128. // bottleneck for code that makes a lot of calls to CharIsInSet(), so we
  129. // will scan through ASet looking for the character without a conversion...
  130. //
  131. // Result := IndyPos(AString[ACharPos], ASet);
  132. //
  133. Result := False;
  134. for I := 1 to Length(ASet) do begin
  135. if ASet[I] = AChar then begin
  136. Result := True;
  137. Exit;
  138. end;
  139. end;
  140. {$ENDIF}
  141. end;
  142. function GetUTF16Codepoint(const AStr: {$IFDEF STRING_IS_UNICODE}string{$ELSE}TIdWideChars{$ENDIF};
  143. const AIndex: Integer): Integer;
  144. {$IFDEF HAS_ConvertToUtf32}
  145. {$IFDEF USE_INLINE}inline;{$ENDIF}
  146. {$ELSE}
  147. var
  148. C: WideChar;
  149. LowSurrogate, HighSurrogate: Integer;
  150. {$ENDIF}
  151. begin
  152. {$IFDEF DOTNET}
  153. Result := System.Char.ConvertToUtf32(AStr, AIndex-1);
  154. {$ELSE}
  155. {$IFDEF HAS_Character_TCharHelper}
  156. Result := Char.ConvertToUtf32(AStr, AIndex-1);
  157. {$ELSE}
  158. {$IFDEF HAS_TCharacter}
  159. Result := TCharacter.ConvertToUtf32(AStr, AIndex);
  160. {$ELSE}
  161. if (AIndex < {$IFDEF STRING_IS_UNICODE}1{$ELSE}0{$ENDIF}) or
  162. (AIndex > (Length(AStr){$IFNDEF STRING_IS_UNICODE}-1{$ENDIF})) then
  163. begin
  164. raise EIdUTF16IndexOutOfRange.CreateResFmt(@RSUTF16IndexOutOfRange, [AIndex, Length(AStr)]);
  165. end;
  166. C := AStr[AIndex];
  167. if (C >= #$D800) and (C <= #$DFFF) then
  168. begin
  169. HighSurrogate := Integer(C);
  170. if HighSurrogate > $DBFF then begin
  171. raise EIdUTF16InvalidHighSurrogate.CreateResFmt(@RSUTF16InvalidHighSurrogate, [AIndex]);
  172. end;
  173. if AIndex = (Length(AStr){$IFNDEF STRING_IS_UNICODE}-1{$ENDIF}) then begin
  174. raise EIdUTF16MissingLowSurrogate.CreateRes(@RSUTF16MissingLowSurrogate);
  175. end;
  176. LowSurrogate := Integer(AStr[AIndex+1]);
  177. if (LowSurrogate < $DC00) or (LowSurrogate > $DFFF) then begin
  178. raise EIdUTF16InvalidLowSurrogate.CreateResFmt(@RSUTF16InvalidLowSurrogate, [AIndex+1]);
  179. end;
  180. Result := ((HighSurrogate - $D800) shl 10) or (LowSurrogate - $DC00) + $10000;
  181. end else begin
  182. Result := Integer(C);
  183. end;
  184. {$ENDIF}
  185. {$ENDIF}
  186. {$ENDIF}
  187. end;
  188. end.