dcstringhashlistutf8.pas 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. {
  2. Double Commander
  3. -------------------------------------------------------------------------
  4. Modified version of StringHashList unit with UTF-8 support
  5. Copyright (C) 2019 Alexander Koblov ([email protected])
  6. This file is based on stringhashlist.pas from the LazUtils package
  7. See the file COPYING.modifiedLGPL.txt, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. }
  13. unit DCStringHashListUtf8;
  14. {$mode objfpc}{$H+}
  15. interface
  16. uses
  17. Classes, SysUtils,
  18. // LazUtils
  19. LazUtilsStrConsts;
  20. type
  21. PStringHashItem = ^TStringHashItem;
  22. TStringHashItem = record
  23. HashValue: Cardinal;
  24. Key: String;
  25. Data: Pointer;
  26. end;
  27. PStringHashItemList = ^PStringHashItem;
  28. { TStringHashListUtf8 }
  29. TStringHashListUtf8 = class(TObject)
  30. private
  31. FList: PStringHashItemList;
  32. FCount: Integer;
  33. fCaseSensitive: Boolean;
  34. function BinarySearch(HashValue: Cardinal): Integer;
  35. function CompareString(const Low, Key: String): Boolean;
  36. function CompareValue(const Value1, Value2: Cardinal): Integer;
  37. procedure FindHashBoundaries(HashValue: Cardinal; StartFrom: Integer; out First, Last: Integer);
  38. function GetData(const S: String): Pointer;
  39. procedure SetCaseSensitive(const Value: Boolean);
  40. procedure Delete(Index: Integer);
  41. procedure SetData(const S: String; const AValue: Pointer);
  42. protected
  43. function HashOf(const Key: string): Cardinal;
  44. procedure Insert(Index: Integer; Item: PStringHashItem);
  45. public
  46. constructor Create(CaseSensitivity: boolean);
  47. destructor Destroy; override;
  48. function Add(const S: String): Integer;
  49. function Add(const S: String; ItemData: Pointer): Integer;
  50. procedure Clear;
  51. procedure Remove(Index: Integer);
  52. function Find(const S: String): Integer;
  53. function Find(const S: String; Data: Pointer): Integer;
  54. function Remove(const S: String): Integer;
  55. function Remove(const S: String; Data: Pointer): Integer;
  56. procedure FindBoundaries(StartFrom: Integer; out First, Last: Integer);
  57. property CaseSensitive: Boolean read fCaseSensitive write SetCaseSensitive;
  58. property Count: Integer read FCount;
  59. property Data[const S: String]: Pointer read GetData write SetData; default;
  60. property List: PStringHashItemList read FList;
  61. end;
  62. implementation
  63. uses
  64. LazUTF8;
  65. { TStringHashListUtf8 }
  66. function TStringHashListUtf8.Add(const S: String): Integer;
  67. begin
  68. Result:=Add(S,nil);
  69. end;
  70. function TStringHashListUtf8.Add(const S: String; ItemData: Pointer): Integer;
  71. var
  72. Text: String;
  73. Item: PStringHashItem;
  74. First, Last, I: Integer;
  75. Val: Cardinal;
  76. Larger: boolean;
  77. begin
  78. if fCaseSensitive then
  79. Text := S
  80. else begin
  81. Text:= UTF8LowerCase(S);
  82. end;
  83. New(Item);
  84. Val:= HashOf(Text);
  85. Item^.HashValue := Val;
  86. Item^.Key := S;
  87. Item^.Data := ItemData;
  88. if FCount > 0 then
  89. begin
  90. First:=0;
  91. Last:= FCount-1;
  92. Larger:=False;
  93. while First<=Last do
  94. begin
  95. I:=(First+Last)shr 1;
  96. Case CompareValue(Val, fList[I]^.HashValue)<=0 of
  97. True:
  98. begin
  99. Last:=I-1;
  100. Larger:=False;
  101. end;
  102. False:
  103. begin
  104. First:=I+1;
  105. Larger:=True;
  106. end;
  107. end;
  108. end;
  109. Case Larger of
  110. True: Result:=I+1;
  111. False: Result:=I;
  112. end;
  113. end else
  114. Result:=0;
  115. Insert(Result,Item);
  116. end;
  117. function TStringHashListUtf8.BinarySearch(HashValue: Cardinal): Integer;
  118. var
  119. First, Last, Temp: Integer;
  120. begin
  121. Result:= -1;
  122. First:= 0;
  123. Last:= Count -1;
  124. while First <= Last do
  125. begin
  126. Temp:= (First + Last) div 2;
  127. case CompareValue(HashValue, FList[Temp]^.HashValue) of
  128. 1: First:= Temp + 1;
  129. 0: exit(Temp);
  130. -1: Last:= Temp-1;
  131. end;
  132. end;
  133. end;
  134. procedure TStringHashListUtf8.Clear;
  135. var
  136. I: Integer;
  137. begin
  138. for I:= 0 to fCount -1 do
  139. Dispose(fList[I]);
  140. if FList<>nil then begin
  141. FreeMem(FList);
  142. FList:=nil;
  143. end;
  144. fCount:= 0;
  145. end;
  146. procedure TStringHashListUtf8.Remove(Index: Integer);
  147. begin
  148. if (Index >= 0) and (Index < FCount) then
  149. begin
  150. Dispose(fList[Index]);
  151. Delete(Index);
  152. end;
  153. end;
  154. function TStringHashListUtf8.CompareString(const Low, Key: String): Boolean;
  155. var
  156. P: Pointer;
  157. Len: Integer;
  158. LKey: String;
  159. begin
  160. P:= Pointer(Low);
  161. Len:= Length(Low);
  162. if fCaseSensitive then
  163. begin
  164. Result:= (Len = Length(Key));
  165. if Result then Result:= (CompareByte(P^, Pointer(Key)^, Len) = 0);
  166. end
  167. else begin
  168. LKey:= UTF8LowerCase(Key);
  169. Result:= (Len = Length(LKey));
  170. if Result then Result:= (CompareByte(P^, Pointer(LKey)^, Len) = 0);
  171. end;
  172. end;
  173. function TStringHashListUtf8.CompareValue(const Value1, Value2: Cardinal): Integer;
  174. begin
  175. Result:= 0;
  176. if Value1 > Value2 then
  177. Result:= 1
  178. else if Value1 < Value2 then
  179. Result:= -1;
  180. end;
  181. function TStringHashListUtf8.GetData(const S: String): Pointer;
  182. var i: integer;
  183. begin
  184. i:=Find(S);
  185. if i>=0 then
  186. Result:=FList[i]^.Data
  187. else
  188. Result:=nil;
  189. end;
  190. procedure TStringHashListUtf8.Delete(Index: Integer);
  191. begin
  192. if (Index >= 0) and (Index < FCount) then
  193. begin
  194. dec(FCount);
  195. if Index < FCount then
  196. System.Move(FList[Index + 1], FList[Index],
  197. (FCount - Index) * SizeOf(PStringHashItem));
  198. end;
  199. end;
  200. procedure TStringHashListUtf8.SetData(const S: String; const AValue: Pointer);
  201. var i: integer;
  202. begin
  203. i:=Find(S);
  204. if i>=0 then
  205. FList[i]^.Data:=AValue
  206. else
  207. Add(S,AValue);
  208. end;
  209. destructor TStringHashListUtf8.Destroy;
  210. begin
  211. Clear;
  212. inherited Destroy;
  213. end;
  214. function TStringHashListUtf8.Find(const S: String): Integer;
  215. var
  216. Text: String;
  217. Value: Cardinal;
  218. First, Last, I: Integer;
  219. begin
  220. if fCaseSensitive then
  221. Text := S
  222. else begin
  223. Text:= UTF8LowerCase(S);
  224. end;
  225. Value:= HashOf(Text);
  226. Result:= BinarySearch(Value);
  227. if (Result <> -1) and not CompareString(Text, FList[Result]^.Key) then
  228. begin
  229. FindHashBoundaries(Value, Result, First, Last);
  230. Result:= -1;
  231. for I := First to Last do
  232. if CompareString(Text, FList[I]^.Key) then
  233. begin
  234. Result:= I;
  235. Exit;
  236. end;
  237. end;
  238. end;
  239. function TStringHashListUtf8.Find(const S: String; Data: Pointer): Integer;
  240. var
  241. Text: String;
  242. Value: Cardinal;
  243. First, Last, I: Integer;
  244. begin
  245. if fCaseSensitive then
  246. Text := S
  247. else begin
  248. Text:= UTF8LowerCase(S);
  249. end;
  250. Value:= HashOf(Text);
  251. Result:= BinarySearch(Value);
  252. if (Result <> -1) and
  253. not (CompareString(Text, FList[Result]^.Key) and (FList[Result]^.Data = Data)) then
  254. begin
  255. FindHashBoundaries(Value, Result, First, Last);
  256. Result:= -1;
  257. for I := First to Last do
  258. if CompareString(Text, FList[I]^.Key) and (FList[I]^.Data = Data) then
  259. begin
  260. Result:= I;
  261. Exit;
  262. end;
  263. end;
  264. end;
  265. procedure TStringHashListUtf8.FindHashBoundaries(HashValue: Cardinal; StartFrom: Integer; out First, Last: Integer);
  266. begin
  267. First:= StartFrom -1;
  268. //Find first matching hash index
  269. while (First >= 0) and (CompareValue(HashValue, FList[First]^.HashValue) = 0) do
  270. dec(First);
  271. if (First < 0) or ((CompareValue(HashValue, FList[First]^.HashValue) <> 0)) then
  272. inc(First);
  273. //Find the last matching hash index
  274. Last:= StartFrom +1;
  275. while (Last <= (FCount - 1)) and (CompareValue(HashValue, FList[Last]^.HashValue) = 0) do
  276. inc(Last);
  277. if (Last > (FCount - 1)) or (CompareValue(HashValue, FList[Last]^.HashValue) <> 0) then
  278. dec(Last);
  279. end;
  280. function TStringHashListUtf8.HashOf(const Key: string): Cardinal;
  281. var
  282. P: PAnsiChar;
  283. I, Len: Integer;
  284. begin
  285. P:= PAnsiChar(Key);
  286. Len:= Length(Key);
  287. Result := Len;
  288. {$PUSH}{$R-}{$Q-} // no range, no overflow checks
  289. for I := Len - 1 downto 0 do
  290. Inc(Result, Cardinal(Ord(P[I])) shl I);
  291. {$POP}
  292. end;
  293. procedure TStringHashListUtf8.Insert(Index: Integer; Item: PStringHashItem);
  294. begin
  295. ReallocMem(FList, (fCount +1) * SizeOf(PStringHashItem));
  296. if Index > fCount then Index:= fCount;
  297. if Index < 0 then Index:= 0;
  298. if Index < FCount then
  299. System.Move(FList[Index], FList[Index + 1],
  300. (FCount - Index) * SizeOf(PStringHashItem));
  301. FList[Index] := Item;
  302. Inc(FCount);
  303. end;
  304. constructor TStringHashListUtf8.Create(CaseSensitivity: boolean);
  305. begin
  306. fCaseSensitive:=CaseSensitivity;
  307. inherited Create;
  308. end;
  309. function TStringHashListUtf8.Remove(const S: String): Integer;
  310. begin
  311. Result:= Find(S);
  312. if Result > -1 then
  313. begin
  314. Dispose(fList[Result]);
  315. Delete(Result);
  316. end;
  317. end;
  318. function TStringHashListUtf8.Remove(const S: String; Data: Pointer): Integer;
  319. begin
  320. Result:= Find(S, Data);
  321. if Result > -1 then
  322. begin
  323. Dispose(fList[Result]);
  324. Delete(Result);
  325. end;
  326. end;
  327. procedure TStringHashListUtf8.FindBoundaries(StartFrom: Integer; out First,
  328. Last: Integer);
  329. begin
  330. FindHashBoundaries(FList[StartFrom]^.HashValue, StartFrom, First, Last);
  331. end;
  332. procedure TStringHashListUtf8.SetCaseSensitive(const Value: Boolean);
  333. begin
  334. if fCaseSensitive <> Value then
  335. begin
  336. if Count > 0 then
  337. begin
  338. raise EListError.Create(lrsListMustBeEmpty);
  339. exit;
  340. end;
  341. fCaseSensitive := Value;
  342. end;
  343. end;
  344. end.