dbindexer.pp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 2012 by the Free Pascal development team
  4. Database indexer
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit DBIndexer;
  12. {$mode objfpc}{$H+}
  13. interface
  14. uses
  15. Classes, SysUtils, ireadertxt, db, sqldb, ibconnection, fpindexer;
  16. Type
  17. { TDBIndexer }
  18. TDBIndexer = Class(TComponent)
  19. private
  20. FDatabase: TSQLConnection;
  21. FFieldInURL: Boolean;
  22. FIndexDB: TCustomIndexDB;
  23. FIndexer: TFPIndexer;
  24. FMFL: integer;
  25. FSKipTables: TStrings;
  26. function GetRecordCount(const TableName: string): Int64;
  27. Function IndexRecord(const TableName: String; Dataset: TDataset;
  28. KeyField: TField; List: TStrings) : int64;
  29. procedure SetDatabase(AValue: TSQLConnection);
  30. procedure SetIndexDB(AValue: TCustomIndexDB);
  31. procedure SetSkipTables(AValue: TStrings);
  32. Protected
  33. Procedure CreateIndexer;
  34. Procedure FreeIndexer;
  35. Procedure Log(Msg : String);
  36. Procedure Log(Fmt : String; Args : Array of const);
  37. procedure Notification(AComponent: TComponent;Operation: TOperation); override;
  38. procedure GetFieldNames(const TableName : String; List: TStrings; out KeyField: String); virtual;
  39. Function IndexTable(const TableName: string) : int64; virtual;
  40. Property Indexer : TFPIndexer read FIndexer;
  41. Public
  42. Constructor Create(AOwner : TComponent); override;
  43. Destructor Destroy; override;
  44. Procedure IndexDatabase;
  45. Property Database : TSQLConnection Read FDatabase Write SetDatabase;
  46. Property IndexDB : TCustomIndexDB Read FIndexDB Write SetIndexDB;
  47. Property FieldInURL : Boolean Read FFieldInURL Write FFIeldInURl;
  48. Property MinFieldLength : integer Read FMFL Write FMFL;
  49. Property SkipTables : TStrings Read FSKipTables Write SetSkipTables;
  50. end;
  51. { TIBIndexer }
  52. TIBIndexer = Class(TDBIndexer)
  53. procedure GetFieldNames(const TableName : String; List: TStrings; out KeyField: String); override;
  54. end;
  55. implementation
  56. uses dateutils;
  57. { TIBIndexer }
  58. procedure TIBIndexer.GetFieldNames(const TableName: String; List: TStrings;
  59. out KeyField: String);
  60. Const
  61. SQLFieldNames =
  62. ' SELECT'
  63. +' rel_field.rdb$field_name AS FIELDNAME'
  64. {+' rdb$field_type AS FIELDTYPE,'
  65. +' rdb$field_sub_type AS FIELDSUBTYPE,'
  66. +' rel_field.rdb$null_flag AS FIELDISNULL,'
  67. +' rdb$field_length AS FIELDSIZE,'
  68. +' rdb$field_scale AS FIELDSCALE,'
  69. +' rdb$character_length AS FIELDCHARLENGTH,'
  70. +' rdb$field_precision AS FIELDPRECISION,'
  71. +' field.rdb$default_source AS FIELDDEFAULT,'
  72. +' field.rdb$validation_source AS FIELDCHECK'}
  73. +' FROM'
  74. +' rdb$relations rel'
  75. +' JOIN rdb$relation_fields rel_field'
  76. +' ON rel_field.rdb$relation_name = rel.rdb$relation_name'
  77. +' JOIN rdb$fields field'
  78. +' ON rel_field.rdb$field_source = field.rdb$field_name'
  79. +' WHERE'
  80. +' (rel.rdb$relation_name = :TableName)'
  81. +' AND ((rdb$field_type in (14,37)'
  82. +' and (rdb$character_length >= :MinLength))'
  83. +' or ((rdb$field_type=261) AND(rdb$field_sub_type in (0,1))))'
  84. +' ORDER BY'
  85. +' rel_field.rdb$field_position,'
  86. +' rel_field.rdb$field_name';
  87. SQLPrimaryKeyField =
  88. ' SELECT'
  89. +' ixf.rdb$field_name as FIELDNAME'
  90. {
  91. ' ix.rdb$relation_name AS TABLENAME,'
  92. ' ix.rdb$index_name AS INDEXNAME,'
  93. ' ix.rdb$unique_flag AS INDEXUNIQUE,'
  94. ' ix.rdb$index_type AS INDEX_TYPE'
  95. }
  96. +' FROM'
  97. +' rdb$indices ix'
  98. +' JOIN rdb$relations rel'
  99. +' ON ix.rdb$relation_name = rel.rdb$relation_name'
  100. +' JOIN rdb$relation_constraints rel_con'
  101. +' on ((ix.rdb$relation_name = rel_con.rdb$relation_name)'
  102. +' and (ix.rdb$index_name=rel_con.rdb$index_name))'
  103. +' JOIN rdb$index_segments ixf'
  104. +' on (ixf.rdb$index_name = ix.rdb$index_name)'
  105. +' WHERE'
  106. +' (rel.rdb$system_flag <> 1 OR rel.rdb$system_flag IS NULL)'
  107. +' AND'
  108. +' rel.rdb$relation_name = :TableName'
  109. +' AND rel_con.rdb$constraint_type=''PRIMARY KEY'''
  110. +' ORDER BY ix.rdb$relation_name, ix.rdb$index_name';
  111. Var
  112. Q : TSQLQuery;
  113. begin
  114. Q:=TSQLQuery.Create(Self);
  115. try
  116. Q.Database:=Self.Database;
  117. Q.SQL.TExt:=SQLFieldNames;
  118. Q.ParamByName('TableName').AsString:=TableName;
  119. If MinFieldLength=0 then
  120. MinFieldLength:=2;
  121. Q.ParamByName('MinLength').AsInteger:=MinFieldLength;
  122. Q.Open;
  123. try
  124. While not Q.EOF do
  125. begin
  126. List.Add(Trim(Q.Fields[0].AsString));
  127. Q.Next;
  128. end;
  129. finally
  130. Q.Close;
  131. end;
  132. Q.SQL.TExt:=SQLPrimaryKeyField;
  133. Q.ParamByName('TableName').AsString:=TableName;
  134. Q.Open;
  135. try
  136. If not Q.EOF then
  137. KeyField:=Trim(Q.Fields[0].AsString);
  138. Q.Next;
  139. If not Q.EOF then
  140. Raise Exception.CreateFmt('Primary key of table "%s" has multiple fields',[TableName]);
  141. finally
  142. Q.Close;
  143. end;
  144. finally
  145. Q.Free;
  146. end;
  147. end;
  148. { TDBIndexer }
  149. procedure TDBIndexer.SetDatabase(AValue: TSQLConnection);
  150. begin
  151. if FDatabase=AValue then exit;
  152. if Assigned(FDatabase) then
  153. FDatabase.RemoveFreeNotification(Self);
  154. FDatabase:=AValue;
  155. if Assigned(FDatabase) then
  156. FDatabase.FreeNotification(Self);
  157. end;
  158. procedure TDBIndexer.SetIndexDB(AValue: TCustomIndexDB);
  159. begin
  160. if FIndexDB=AValue then exit;
  161. if Assigned(FIndexDB) then
  162. FIndexDB.RemoveFreeNotification(Self);
  163. FIndexDB:=AValue;
  164. if Assigned(FIndexDB) then
  165. FIndexDB.FreeNotification(Self);
  166. end;
  167. procedure TDBIndexer.SetSkipTables(AValue: TStrings);
  168. begin
  169. if FSKipTables=AValue then exit;
  170. FSKipTables.Assign(AValue);
  171. end;
  172. procedure TDBIndexer.CreateIndexer;
  173. begin
  174. FIndexer:=TFPIndexer.Create(Self);
  175. Findexer.FreeNotification(Self);
  176. Findexer.Database:=IndexDB;
  177. FIndexer.DetectLanguage:=False;
  178. FIndexer.CommitFiles:=True;
  179. end;
  180. procedure TDBIndexer.FreeIndexer;
  181. begin
  182. Findexer.Free;
  183. end;
  184. procedure TDBIndexer.Log(Msg: String);
  185. begin
  186. Writeln(Msg);
  187. end;
  188. procedure TDBIndexer.Log(Fmt: String; Args: array of const);
  189. begin
  190. Log(Format(Fmt,Args));
  191. end;
  192. procedure TDBIndexer.IndexDatabase;
  193. Var
  194. TL : TStringList;
  195. I : Integer;
  196. Start : TDateTime;
  197. C,D : Int64;
  198. begin
  199. if FMFL=0 then
  200. FMFL:=2;
  201. TL:=TStringList.Create;
  202. try
  203. Database.GetTableNames(TL);
  204. For I:=TL.Count-1 downto 0 do
  205. if FSkipTables.IndexOf(TL[i])<>-1 then
  206. TL.Delete(I);
  207. Log('Found %d tables.',[TL.Count]);
  208. if TL.Count=0 then
  209. exit;
  210. CreateIndexer;
  211. try
  212. For I:=0 to TL.Count-1 do
  213. begin
  214. Log('Indexing table %d/%d : %s',[I+1,TL.Count,TL[i]]);
  215. Start:=Now;
  216. C:=IndexTable(TL[i]);
  217. D:=SecondsBetween(Now,Start);
  218. if (D<>0) then
  219. D:=Round(C/D);
  220. Log('%d records. %d records/sec',[C,D]);
  221. end;
  222. finally
  223. FreeIndexer;
  224. end;
  225. finally
  226. TL.Free;
  227. end;
  228. end;
  229. procedure TDBIndexer.Notification(AComponent: TComponent; Operation: TOperation
  230. );
  231. begin
  232. Inherited;
  233. if Operation=opRemove then
  234. if AComponent=FIndexer then
  235. FIndexer:=Nil
  236. else if AComponent=FDatabase then
  237. FDatabase:=Nil;
  238. end;
  239. procedure TDBIndexer.GetFieldNames(Const TableName : String; List : TStrings; Out KeyField : String);
  240. begin
  241. Database.GetFieldNames(TableName,List);
  242. KeyField:=List[0];
  243. end;
  244. Function TDBIndexer.GetRecordCount(Const TableName : string) : Int64;
  245. begin
  246. With TSQLQuery.Create(Self) do
  247. try
  248. Database:=Self.Database;
  249. SQL.Text:=Format('SELECT COUNT(*) AS THECOUNT FROM %s',[TableName]);
  250. Open;
  251. If not (EOF and BOF) then
  252. begin
  253. if Fields[0].DataType=ftLargeInt then
  254. Result:=(Fields[0] as TLargeIntField).AsLargeInt
  255. else
  256. Result:=Fields[0].AsInteger;
  257. end
  258. else
  259. Result:=0;
  260. finally
  261. Free;
  262. end;
  263. end;
  264. function TDBIndexer.IndexTable(Const TableName : string) : int64;
  265. Var
  266. FL : TStringList;
  267. KF : String;
  268. SQL : String;
  269. Q : TSQLQuery;
  270. I : Integer;
  271. KFF : TField;
  272. RCount,TCount : Int64;
  273. BS : Integer;
  274. begin
  275. Result:=0;
  276. FL:=TStringList.Create;
  277. try
  278. GetFieldNames(TableName,FL,KF);
  279. if FL.Count=0 then
  280. begin
  281. Log('Table "%s" has no indexable fields.',[TAbleName]);
  282. exit;
  283. end;
  284. if (KF='') then
  285. begin
  286. Log('Table "%s" has no key field.',[TableName]);
  287. exit;
  288. end;
  289. FL.Sorted:=True;
  290. SQL:=KF;
  291. For I:=0 to FL.Count-1 do
  292. begin
  293. if (FL[i]<>KF) then
  294. SQL:=SQL+', '+FL[i];
  295. end;
  296. SQL:='SELECT '+SQL+' FROM '+TableName;
  297. Log('SQL : %s',[SQL]);
  298. RCount:=0;
  299. Result:=0;
  300. TCount:=GetRecordCount(TableName);
  301. if TCount>10000 then
  302. BS:=1000
  303. else
  304. BS:=100;
  305. Q:=TSQLQuery.Create(Self);
  306. try
  307. Q.SQL.Text:=SQL;
  308. Q.UniDirectional:=True;
  309. Q.DataBase:=Self.Database;
  310. Q.Open;
  311. KFF:=Q.FieldByName(KF);
  312. For I:=0 to FL.Count-1 do
  313. FL.Objects[i]:=Q.FieldByName(FL[i]);
  314. While Not Q.EOF do
  315. begin
  316. Inc(RCount);
  317. If (RCount mod BS)=1 then
  318. Log('Indexing record %d of %d',[RCount,TCount]);
  319. IndexRecord(TableName,Q,KFF,FL);
  320. Inc(Result);
  321. Q.Next;
  322. end;
  323. finally
  324. Q.Free;
  325. end;
  326. finally
  327. FL.Free;
  328. end;
  329. end;
  330. constructor TDBIndexer.Create(AOwner: TComponent);
  331. begin
  332. inherited Create(AOwner);
  333. FSkipTables:=TStringList.Create;
  334. TStringList(FSkipTables).Sorted:=True;
  335. end;
  336. destructor TDBIndexer.Destroy;
  337. begin
  338. FreeAndNil(FSkipTables);
  339. inherited Destroy;
  340. end;
  341. Function TDBIndexer.IndexRecord(Const TableName : String; Dataset : TDataset; KeyField : TField; List : TStrings) : Int64;
  342. Var
  343. URL,FURL : String;
  344. F : TField;
  345. SS : TStringStream;
  346. I : Integer;
  347. R : TIReaderTxt;
  348. S,T : String;
  349. wc : Int64;
  350. begin
  351. Result:=0;
  352. T:='';
  353. URL:=TableName+'/'+KeyField.AsString;
  354. R:=TIReaderTXT.Create(URL,CP_UTF8);
  355. try
  356. For I:=0 to List.Count-1 do
  357. begin
  358. F:=TField(List.Objects[i]);
  359. if (F.DataType in [ftString,ftWideString,ftMemo]) and (F.Size>MinFieldLength) then
  360. begin
  361. If FieldInURL Then
  362. begin
  363. SS:=TStringStream.Create(F.AsString);
  364. try
  365. FURL:=URL+'/'+F.AsString;
  366. WC:=Indexer.IndexStream(FURL,Now,SS,R);
  367. Result:=Result+WC;
  368. // Writeln(URL,' : ',F.FieldName,' (',F.Size,')');
  369. finally
  370. SS.Free;
  371. end;
  372. end
  373. else
  374. begin
  375. S:=Trim(F.AsString);
  376. if (S<>'') then
  377. T:=T+' '+F.AsString;
  378. end;
  379. end;
  380. end;
  381. if (not FieldInURL) and (T<>'') then
  382. begin
  383. SS:=TStringStream.Create(T);
  384. try
  385. WC:=Indexer.IndexStream(URL,Now,SS,R);
  386. Result:=WC;
  387. // Writeln(URL,' : "',T,'" : ',wc);
  388. finally
  389. SS.Free;
  390. end;
  391. end
  392. finally
  393. R.Free;
  394. end;
  395. end;
  396. end.