2
0

dcstringhashlistutf8.pas 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. {
  2. Double Commander
  3. -------------------------------------------------------------------------
  4. Modified version of StringHashList unit with UTF-8 support
  5. Copyright (C) 2019 Alexander Koblov ([email protected])
  6. This file is based on stringhashlist.pas from the LazUtils package
  7. See the file COPYING.modifiedLGPL.txt, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. }
  13. unit DCStringHashListUtf8;
  14. {$mode objfpc}{$H+}
  15. interface
  16. uses
  17. Classes, SysUtils,
  18. // LazUtils
  19. LazUtilsStrConsts;
  20. type
  21. PStringHashItem = ^TStringHashItem;
  22. TStringHashItem = record
  23. HashValue: Cardinal;
  24. Key: String;
  25. Data: Pointer;
  26. end;
  27. PStringHashItemList = ^PStringHashItem;
  28. { TStringHashListUtf8 }
  29. TStringHashListUtf8 = class(TObject)
  30. private
  31. FList: PStringHashItemList;
  32. FCount: Integer;
  33. fNormalize: Boolean;
  34. fCaseSensitive: Boolean;
  35. function BinarySearch(HashValue: Cardinal): Integer;
  36. function CompareString(const Low, Key: String): Boolean;
  37. function CompareValue(const Value1, Value2: Cardinal): Integer;
  38. procedure FindHashBoundaries(HashValue: Cardinal; StartFrom: Integer; out First, Last: Integer);
  39. function GetData(const S: String): Pointer;
  40. procedure SetNormalize(AValue: Boolean);
  41. procedure SetCaseSensitive(const Value: Boolean);
  42. procedure Delete(Index: Integer);
  43. procedure SetData(const S: String; const AValue: Pointer);
  44. protected
  45. function HashOf(const Key: string): Cardinal;
  46. procedure Insert(Index: Integer; Item: PStringHashItem);
  47. public
  48. constructor Create(CaseSensitivity: boolean);
  49. destructor Destroy; override;
  50. function Add(const S: String): Integer;
  51. function Add(const S: String; ItemData: Pointer): Integer;
  52. procedure Clear;
  53. procedure Remove(Index: Integer);
  54. function Find(const S: String): Integer;
  55. function Find(const S: String; Data: Pointer): Integer;
  56. function Remove(const S: String): Integer;
  57. function Remove(const S: String; Data: Pointer): Integer;
  58. procedure FindBoundaries(StartFrom: Integer; out First, Last: Integer);
  59. property Normalize: Boolean read fNormalize write SetNormalize;
  60. property CaseSensitive: Boolean read fCaseSensitive write SetCaseSensitive;
  61. property Count: Integer read FCount;
  62. property Data[const S: String]: Pointer read GetData write SetData; default;
  63. property List: PStringHashItemList read FList;
  64. end;
  65. implementation
  66. uses
  67. LazUTF8, DCOSUtils;
  68. { TStringHashListUtf8 }
  69. function TStringHashListUtf8.Add(const S: String): Integer;
  70. begin
  71. Result:=Add(S,nil);
  72. end;
  73. function TStringHashListUtf8.Add(const S: String; ItemData: Pointer): Integer;
  74. var
  75. Text: String;
  76. Item: PStringHashItem;
  77. First, Last, I: Integer;
  78. Val: Cardinal;
  79. Larger: boolean;
  80. begin
  81. if fCaseSensitive then
  82. Text := S
  83. else begin
  84. Text:= UTF8LowerCase(S);
  85. end;
  86. if fNormalize then
  87. begin
  88. Text:= NormalizeFileName(Text);
  89. end;
  90. New(Item);
  91. Val:= HashOf(Text);
  92. Item^.HashValue := Val;
  93. Item^.Key := S;
  94. Item^.Data := ItemData;
  95. if FCount > 0 then
  96. begin
  97. First:=0;
  98. Last:= FCount-1;
  99. Larger:=False;
  100. while First<=Last do
  101. begin
  102. I:=(First+Last)shr 1;
  103. Case CompareValue(Val, fList[I]^.HashValue)<=0 of
  104. True:
  105. begin
  106. Last:=I-1;
  107. Larger:=False;
  108. end;
  109. False:
  110. begin
  111. First:=I+1;
  112. Larger:=True;
  113. end;
  114. end;
  115. end;
  116. Case Larger of
  117. True: Result:=I+1;
  118. False: Result:=I;
  119. end;
  120. end else
  121. Result:=0;
  122. Insert(Result,Item);
  123. end;
  124. function TStringHashListUtf8.BinarySearch(HashValue: Cardinal): Integer;
  125. var
  126. First, Last, Temp: Integer;
  127. begin
  128. Result:= -1;
  129. First:= 0;
  130. Last:= Count -1;
  131. while First <= Last do
  132. begin
  133. Temp:= (First + Last) div 2;
  134. case CompareValue(HashValue, FList[Temp]^.HashValue) of
  135. 1: First:= Temp + 1;
  136. 0: exit(Temp);
  137. -1: Last:= Temp-1;
  138. end;
  139. end;
  140. end;
  141. procedure TStringHashListUtf8.Clear;
  142. var
  143. I: Integer;
  144. begin
  145. for I:= 0 to fCount -1 do
  146. Dispose(fList[I]);
  147. if FList<>nil then begin
  148. FreeMem(FList);
  149. FList:=nil;
  150. end;
  151. fCount:= 0;
  152. end;
  153. procedure TStringHashListUtf8.Remove(Index: Integer);
  154. begin
  155. if (Index >= 0) and (Index < FCount) then
  156. begin
  157. Dispose(fList[Index]);
  158. Delete(Index);
  159. end;
  160. end;
  161. function TStringHashListUtf8.CompareString(const Low, Key: String): Boolean;
  162. var
  163. P: Pointer;
  164. Len: Integer;
  165. LKey: String;
  166. begin
  167. P:= Pointer(Low);
  168. Len:= Length(Low);
  169. if not fNormalize then
  170. begin
  171. LKey:= Key;
  172. end
  173. else begin
  174. LKey:= NormalizeFileName(Key);
  175. end;
  176. if fCaseSensitive then
  177. begin
  178. Result:= (Len = Length(LKey));
  179. if Result then Result:= (CompareByte(P^, Pointer(LKey)^, Len) = 0);
  180. end
  181. else begin
  182. LKey:= UTF8LowerCase(LKey);
  183. Result:= (Len = Length(LKey));
  184. if Result then Result:= (CompareByte(P^, Pointer(LKey)^, Len) = 0);
  185. end;
  186. end;
  187. function TStringHashListUtf8.CompareValue(const Value1, Value2: Cardinal): Integer;
  188. begin
  189. Result:= 0;
  190. if Value1 > Value2 then
  191. Result:= 1
  192. else if Value1 < Value2 then
  193. Result:= -1;
  194. end;
  195. function TStringHashListUtf8.GetData(const S: String): Pointer;
  196. var i: integer;
  197. begin
  198. i:=Find(S);
  199. if i>=0 then
  200. Result:=FList[i]^.Data
  201. else
  202. Result:=nil;
  203. end;
  204. procedure TStringHashListUtf8.Delete(Index: Integer);
  205. begin
  206. if (Index >= 0) and (Index < FCount) then
  207. begin
  208. dec(FCount);
  209. if Index < FCount then
  210. System.Move(FList[Index + 1], FList[Index],
  211. (FCount - Index) * SizeOf(PStringHashItem));
  212. end;
  213. end;
  214. procedure TStringHashListUtf8.SetData(const S: String; const AValue: Pointer);
  215. var i: integer;
  216. begin
  217. i:=Find(S);
  218. if i>=0 then
  219. FList[i]^.Data:=AValue
  220. else
  221. Add(S,AValue);
  222. end;
  223. procedure TStringHashListUtf8.SetNormalize(AValue: Boolean);
  224. begin
  225. if fNormalize <> AValue then
  226. begin
  227. if Count > 0 then
  228. begin
  229. raise EListError.Create(lrsListMustBeEmpty);
  230. end;
  231. fNormalize := AValue;
  232. end;
  233. end;
  234. destructor TStringHashListUtf8.Destroy;
  235. begin
  236. Clear;
  237. inherited Destroy;
  238. end;
  239. function TStringHashListUtf8.Find(const S: String): Integer;
  240. var
  241. Text: String;
  242. Value: Cardinal;
  243. First, Last, I: Integer;
  244. begin
  245. if fCaseSensitive then
  246. Text := S
  247. else begin
  248. Text:= UTF8LowerCase(S);
  249. end;
  250. if fNormalize then
  251. begin
  252. Text:= NormalizeFileName(Text);
  253. end;
  254. Value:= HashOf(Text);
  255. Result:= BinarySearch(Value);
  256. if (Result <> -1) and not CompareString(Text, FList[Result]^.Key) then
  257. begin
  258. FindHashBoundaries(Value, Result, First, Last);
  259. Result:= -1;
  260. for I := First to Last do
  261. if CompareString(Text, FList[I]^.Key) then
  262. begin
  263. Result:= I;
  264. Exit;
  265. end;
  266. end;
  267. end;
  268. function TStringHashListUtf8.Find(const S: String; Data: Pointer): Integer;
  269. var
  270. Text: String;
  271. Value: Cardinal;
  272. First, Last, I: Integer;
  273. begin
  274. if fCaseSensitive then
  275. Text := S
  276. else begin
  277. Text:= UTF8LowerCase(S);
  278. end;
  279. if fNormalize then
  280. begin
  281. Text:= NormalizeFileName(Text);
  282. end;
  283. Value:= HashOf(Text);
  284. Result:= BinarySearch(Value);
  285. if (Result <> -1) and
  286. not (CompareString(Text, FList[Result]^.Key) and (FList[Result]^.Data = Data)) then
  287. begin
  288. FindHashBoundaries(Value, Result, First, Last);
  289. Result:= -1;
  290. for I := First to Last do
  291. if CompareString(Text, FList[I]^.Key) and (FList[I]^.Data = Data) then
  292. begin
  293. Result:= I;
  294. Exit;
  295. end;
  296. end;
  297. end;
  298. procedure TStringHashListUtf8.FindHashBoundaries(HashValue: Cardinal; StartFrom: Integer; out First, Last: Integer);
  299. begin
  300. First:= StartFrom -1;
  301. //Find first matching hash index
  302. while (First >= 0) and (CompareValue(HashValue, FList[First]^.HashValue) = 0) do
  303. dec(First);
  304. if (First < 0) or ((CompareValue(HashValue, FList[First]^.HashValue) <> 0)) then
  305. inc(First);
  306. //Find the last matching hash index
  307. Last:= StartFrom +1;
  308. while (Last <= (FCount - 1)) and (CompareValue(HashValue, FList[Last]^.HashValue) = 0) do
  309. inc(Last);
  310. if (Last > (FCount - 1)) or (CompareValue(HashValue, FList[Last]^.HashValue) <> 0) then
  311. dec(Last);
  312. end;
  313. function TStringHashListUtf8.HashOf(const Key: string): Cardinal;
  314. var
  315. P: PAnsiChar;
  316. I, Len: Integer;
  317. begin
  318. P:= PAnsiChar(Key);
  319. Len:= Length(Key);
  320. Result := Len;
  321. {$PUSH}{$R-}{$Q-} // no range, no overflow checks
  322. for I := Len - 1 downto 0 do
  323. Inc(Result, Cardinal(Ord(P[I])) shl I);
  324. {$POP}
  325. end;
  326. procedure TStringHashListUtf8.Insert(Index: Integer; Item: PStringHashItem);
  327. begin
  328. ReallocMem(FList, (fCount +1) * SizeOf(PStringHashItem));
  329. if Index > fCount then Index:= fCount;
  330. if Index < 0 then Index:= 0;
  331. if Index < FCount then
  332. System.Move(FList[Index], FList[Index + 1],
  333. (FCount - Index) * SizeOf(PStringHashItem));
  334. FList[Index] := Item;
  335. Inc(FCount);
  336. end;
  337. constructor TStringHashListUtf8.Create(CaseSensitivity: boolean);
  338. begin
  339. fNormalize:= FileNameNormalized;
  340. fCaseSensitive:= CaseSensitivity;
  341. inherited Create;
  342. end;
  343. function TStringHashListUtf8.Remove(const S: String): Integer;
  344. begin
  345. Result:= Find(S);
  346. if Result > -1 then
  347. begin
  348. Dispose(fList[Result]);
  349. Delete(Result);
  350. end;
  351. end;
  352. function TStringHashListUtf8.Remove(const S: String; Data: Pointer): Integer;
  353. begin
  354. Result:= Find(S, Data);
  355. if Result > -1 then
  356. begin
  357. Dispose(fList[Result]);
  358. Delete(Result);
  359. end;
  360. end;
  361. procedure TStringHashListUtf8.FindBoundaries(StartFrom: Integer; out First,
  362. Last: Integer);
  363. begin
  364. FindHashBoundaries(FList[StartFrom]^.HashValue, StartFrom, First, Last);
  365. end;
  366. procedure TStringHashListUtf8.SetCaseSensitive(const Value: Boolean);
  367. begin
  368. if fCaseSensitive <> Value then
  369. begin
  370. if Count > 0 then
  371. begin
  372. raise EListError.Create(lrsListMustBeEmpty);
  373. exit;
  374. end;
  375. fCaseSensitive := Value;
  376. end;
  377. end;
  378. end.