docindexer.pp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. program docindexer;
  2. {$mode objfpc}{$H+}
  3. {$IFDEF UNIX}
  4. {$linklib pthread}
  5. {$ENDIF}
  6. uses
  7. cwstring, cthreads, SysUtils, Classes, DateUtils, sqldb, SQLDBindexDB, FBindexDB, sqliteindexdb, pgindexdb, memindexdb, fpIndexer, inifiles,
  8. // indexer readers
  9. IReaderTXT, IReaderPAS, IReaderHTML, CustApp;
  10. Type
  11. { TDocIndexerApplication }
  12. TDocIndexerApplication = class(TCustomApplication)
  13. Private
  14. FDirs : TStringArray;
  15. FCreateDB : Boolean;
  16. FEmptyDB : Boolean;
  17. FStripPath,
  18. FLanguage,
  19. FIgnoreList,
  20. FConfig : String;
  21. FCommitFiles,
  22. FLogSQL : Boolean;
  23. FCodePage : TSystemCodePage;
  24. Protected
  25. Procedure WriteLog(Const Msg : String); virtual;
  26. Procedure WriteLog(Const Fmt : String; Const Args : Array of Const);
  27. procedure IndexLog(Sender : TObject; Const ACurrent,ACount : Integer; Const AURL : UTF8String);
  28. Procedure DBHook(Sender : TSQLConnection; EventType : TDBEventType; Const Msg : String);
  29. function ParseOptions: Boolean; virtual;
  30. function SetupDB : TCustomIndexDB; virtual;
  31. procedure CreateDB(aDB : TCustomIndexDB);virtual;
  32. procedure ClearDB(aDB : TCustomIndexDB);virtual;
  33. procedure DoIndex(aDB: TCustomIndexDB);virtual;
  34. procedure Usage(const Msg: String);virtual;
  35. Procedure DoRun; override;
  36. Public
  37. Constructor Create(aOwner : TComponent); override;
  38. end;
  39. procedure TDocIndexerApplication.CreateDB(aDB : TCustomIndexDB);
  40. begin
  41. WriteLog('Creating database');
  42. aDB.CreateDB;
  43. end;
  44. procedure TDocIndexerApplication.ClearDB(aDB: TCustomIndexDB);
  45. begin
  46. WriteLog('Clearing database tables');
  47. aDB.CreateIndexerTables;
  48. end;
  49. function TDocIndexerApplication.SetupDB : TCustomIndexDB;
  50. Const
  51. SDatabase = 'Database';
  52. KeyHostName = 'HostName';
  53. KeyDatabaseName = 'DatabaseName';
  54. KeyUser = 'User';
  55. KeyPassword = 'Password';
  56. KeyType = 'Type';
  57. Procedure ConfigSQLDB(DB : TSQLDBIndexDB; aIni : TInifile);
  58. begin
  59. DB.HostName:= aIni.ReadString(SDatabase,KeyHostName,DB.HostName);
  60. DB.DatabasePath := aIni.ReadString(SDatabase,KeyDatabaseName,DB.DatabasePath);
  61. DB.UserName := aIni.ReadString(SDatabase,KeyUser,DB.UserName);
  62. DB.Password := aIni.ReadString(SDatabase,KeyPassword,DB.Password);
  63. end;
  64. Procedure ConfigSQLIte(SDB : TSQLiteIndexDB; aIni : TInifile);
  65. begin
  66. SDB.FileName := aIni.ReadString(SDatabase,KeyDatabaseName,SDB.FileName);
  67. end;
  68. Procedure ConfigFile(FDB : TFileIndexDB; aIni : TInifile);
  69. begin
  70. FDB.FileName := aIni.ReadString(SDatabase,KeyDatabaseName,FDB.FileName);
  71. end;
  72. Var
  73. Ini : TIniFile;
  74. DB : TSQLDBIndexDB;
  75. SDB : TSQLiteIndexDB;
  76. FDB : TFileIndexDB;
  77. begin
  78. if FLogSQL then
  79. GlobalDBLogHook:=@DBHook;
  80. Result:=nil;
  81. Ini:=TIniFile.Create(FConfig);
  82. try
  83. Case lowercase(Ini.ReadString(SDatabase,KeyType,'PostGres')) of
  84. 'postgres' :
  85. begin
  86. DB := TPGIndexDB.Create(nil);
  87. ConfigSQLDB(DB,Ini);
  88. Result:=DB;
  89. end;
  90. 'firebird' :
  91. begin
  92. DB := TFBIndexDB.Create(nil);
  93. ConfigSQLDB(DB,Ini);
  94. Result:=DB;
  95. end;
  96. 'sqlite' :
  97. begin
  98. SDB := TSQLiteIndexDB.Create(nil);
  99. ConfigSQLite(SDB,Ini);
  100. Result:=SDB;
  101. end;
  102. 'file' :
  103. begin
  104. FDB := TFileIndexDB.Create(nil);
  105. ConfigFile(FDB,Ini);
  106. Result:=FDB;
  107. end;
  108. else
  109. Raise Exception.CreateFmt('Unknown database type: "%s" ',[Ini.ReadString(SDatabase,KeyType,'PostGres')]);
  110. end;
  111. finally
  112. ini.Free;
  113. end;
  114. end;
  115. Procedure TDocIndexerApplication.DoIndex(aDB : TCustomIndexDB);
  116. var
  117. Indexer: TFPIndexer; //indexes files
  118. start: TDateTime;
  119. Dn,n: int64;
  120. endtime: TDateTime;
  121. D : String;
  122. begin
  123. //SetHeapTraceOutput('heap.trc');
  124. start := Now;
  125. Indexer := TFPIndexer.Create(Nil);
  126. try
  127. Indexer.CodePage:=FCodePage;
  128. Indexer.Database:=aDB;
  129. //setup parameters for indexing
  130. Indexer.FileMask := '*.pas;*.html;readme.txt'; //semicolon separated list
  131. Indexer.SearchRecursive := True;
  132. Indexer.DetectLanguage := False;
  133. if (FIgnoreList<>'') then
  134. IgnoreListManager.LoadIgnoreWordsFromFile(FLanguage,FIgnoreList);
  135. indexer.Language:=FLanguage;
  136. Indexer.UseIgnoreList:=true;
  137. Indexer.CommitFiles:=FCommitFiles;
  138. Indexer.StripPath:=FStripPath;
  139. Indexer.OnProgress:=@IndexLog;
  140. N:=0;
  141. DN:=0;
  142. For D in FDirs do
  143. begin
  144. inc(DN);
  145. IndexLog(Self,-1,-1,Format('Treating directory %d of %d: %s',[DN,Length(FDirs),D]));
  146. Indexer.SearchPath:=D;
  147. //execute the search
  148. N := N+Indexer.Execute(False);
  149. end;
  150. endtime := Now;
  151. if N <> 0 then
  152. WriteLog('Endexing succesful')
  153. else
  154. WriteLog('Error indexing or no words found...');
  155. WriteLog(Format('Done, indexed %d words in %d directories in %d sec.', [N,Length(FDirs),SecondsBetween(endtime,start)]));
  156. finally
  157. FreeAndNil(Indexer);
  158. end;
  159. end;
  160. Procedure TDocIndexerApplication.Usage(Const Msg : String);
  161. begin
  162. If (Msg<>'') then
  163. Writeln(Msg);
  164. ExitCode:=Ord(Msg<>'')
  165. end;
  166. Function TDocIndexerApplication.ParseOptions : Boolean;
  167. Var
  168. Enc : String;
  169. begin
  170. Result:=True;
  171. FConfig:=GetOptionValue('c','config');
  172. If (FConfig='') then
  173. begin
  174. Usage('Need database connection configuration file');
  175. Exit(False);
  176. end;
  177. FDirs:=GetOptionValues('d','directory');
  178. if (Length(FDirs)=0) then
  179. begin
  180. SetLength(FDirs,1);
  181. FDirs[0]:='.';
  182. end;
  183. FCreateDB:=HasOption('r','createdb');
  184. FEmptyDB:=(Not FCreateDB) and HasOption('e','cleardb');
  185. FLogSQL:=HasOption('q','querylog');
  186. FCommitFiles:=HasOption('m','commit-files');
  187. FLanguage:=GetOptionValue('l','language');
  188. if FLanguage='' then
  189. FLanguage:='english';
  190. FIgnoreList:=GetOptionValue('i','ignore');
  191. Enc:=getOptionValue('p','codepage');
  192. FStripPath:=GetOptionValue('s','strip');
  193. if Enc='' then
  194. FCodePage:=CP_UTF8
  195. else
  196. begin
  197. FCodePage := CodePageNameToCodePage(Enc);
  198. if (FCodePage = $FFFF) then
  199. begin
  200. Usage('Invalid or unsupported encoding: '+Enc);
  201. Exit(False);
  202. end;
  203. end;
  204. end;
  205. procedure TDocIndexerApplication.DoRun;
  206. Var
  207. S : String;
  208. DB : TCustomIndexDB;
  209. begin
  210. Terminate;
  211. S:=Checkoptions('hd:reqmc:l:i:p:s:',['help','directory','createdb','cleardb','querylog','commit-files','config','language','ignore-list','codepage','strip']);
  212. if (S<>'') or HasOption('h','help') then
  213. begin
  214. Usage(S);
  215. exit;
  216. end;
  217. If not ParseOptions then
  218. exit;
  219. DB:=SetupDB;
  220. try
  221. If FCreateDB then
  222. DB.CreateDB
  223. else
  224. begin
  225. DB.Connect;
  226. if FEmptyDB then
  227. ClearDB(DB);
  228. end;
  229. DoIndex(DB);
  230. finally
  231. DB.Free;
  232. end;
  233. end;
  234. constructor TDocIndexerApplication.Create(aOwner: TComponent);
  235. begin
  236. inherited Create(aOwner);
  237. StopOnException:=True;
  238. FCodePage:=CP_UTF8;
  239. end;
  240. procedure TDocIndexerApplication.WriteLog(const Msg: String);
  241. begin
  242. Writeln(Msg);
  243. end;
  244. procedure TDocIndexerApplication.WriteLog(const Fmt: String; const Args: array of const);
  245. begin
  246. WriteLog(Format(Fmt,Args));
  247. end;
  248. procedure TDocIndexerApplication.IndexLog(Sender: TObject; const ACurrent, ACount: Integer; const AURL: UTF8String);
  249. begin
  250. if ACurrent=-1 then
  251. WriteLog(AURL)
  252. else
  253. WriteLog('%5.2f%% [%d/%d] : %s',[(ACurrent/ACount*100),ACurrent,ACount,AURL]);
  254. end;
  255. procedure TDocIndexerApplication.DBHook(Sender: TSQLConnection; EventType: TDBEventType; const Msg: String);
  256. Var
  257. S : String;
  258. begin
  259. Str(EventType,S);
  260. WriteLog('SQL [%s] : %s',[S,Msg]);
  261. end;
  262. begin
  263. with TDocIndexerApplication.Create(Nil) do
  264. try
  265. Initialize;
  266. Run;
  267. finally
  268. Free;
  269. end;
  270. end.