whtmlscn.pas 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 2000 by Berczi Gabor
  4. HTML scanner objects
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit WHTMLScn;
  12. {$ifdef cpullvm}
  13. {$modeswitch nestedprocvars}
  14. {$endif}
  15. {$H-}
  16. interface
  17. uses Objects,
  18. WHTML;
  19. const
  20. HTMLIndexMagicNo = ord('H')+ord('H') shl 8+ord('I') shl 16+ord('X') shl 24;
  21. HTMLIndexVersion = 2;
  22. type
  23. PHTMLLinkScanner = ^THTMLLinkScanner;
  24. PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
  25. TCustomHTMLLinkScanner = object(THTMLParser)
  26. function DocAddTextChar(C: AnsiChar): boolean; virtual;
  27. procedure DocAnchor(Entered: boolean); virtual;
  28. public
  29. {a}function CheckURL(const URL: string): boolean; virtual;
  30. {a}function CheckText(const Text: string): boolean; virtual;
  31. {a}procedure AddLink(const LinkText, LinkURL: string); virtual;
  32. {a}procedure AddRef(LinkURL: string); virtual;
  33. {a}procedure AddNameID(AName: string); virtual;
  34. {a}procedure AddID(AName: string); virtual;
  35. {a}function GetDocumentBaseURL: string; virtual;
  36. private
  37. CurLinkText: string;
  38. CurURL: string;
  39. CurName,
  40. CurID: string;
  41. CurDoc: string;
  42. InAnchor,InNameAnchor,
  43. HasHRef : boolean;
  44. LastSynonym: PHTMLLinkScanDocument;
  45. end;
  46. TNameIDState = (IsReferenced, IsFound,IsID);
  47. TNameIDStates = set of TNameIDState;
  48. PNameID = ^TNameID;
  49. TNameID = object(TObject)
  50. constructor Init(const AName : string; Astate : TNameIDState);
  51. destructor Done; virtual;
  52. procedure SetState(Astate : TNameIDState; enabled : boolean);
  53. procedure SetOrigin(const AOrigin : string);
  54. procedure SetLine(ALine : sw_integer);
  55. function GetLine : sw_integer;
  56. function GetState : TNameIDStates;
  57. function GetName : string;
  58. function GetOrigin : string;
  59. private
  60. Name : pstring;
  61. Origin : pstring;
  62. Line : sw_integer;
  63. State : TNameIDStates;
  64. end;
  65. PNameIDCollection = ^TNameIDCollection;
  66. TNameIDCollection = object(TSortedCollection)
  67. function At(Index: sw_Integer): PNameID;
  68. function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
  69. end;
  70. THTMLLinkScanDocument = object(TObject)
  71. constructor Init(const ADocName: string);
  72. function GetName: string;
  73. function GetUniqueName: string;
  74. function GetAliasCount: sw_integer;
  75. function GetAlias(Index: sw_integer): string;
  76. procedure AddAlias(const Alias: string);
  77. constructor Load(var S: TStream);
  78. procedure Store(var S: TStream);
  79. destructor Done; virtual;
  80. private
  81. DocName: PString;
  82. Synonym: PHTMLLinkScanDocument;
  83. Aliases: PStringCollection;
  84. end;
  85. PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection;
  86. THTMLLinkScanDocumentCollection = object(TSortedCollection)
  87. constructor Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
  88. function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
  89. function At(Index: sw_Integer): PHTMLLinkScanDocument;
  90. function SearchDocument(const DocName: string): PHTMLLinkScanDocument;
  91. procedure MoveAliasesToSynonym;
  92. private
  93. Scanner: PHTMLLinkScanner;
  94. end;
  95. THTMLLinkScanner = object(TCustomHTMLLinkScanner)
  96. constructor Init(const ABaseDir: string);
  97. procedure SetBaseDir(const ABaseDir: string);
  98. {a}function FindID(const AName : string) : PNameID; virtual;
  99. function GetDocumentCount: sw_integer;
  100. function GetDocumentURL(DocIndex: sw_integer): string;
  101. function GetUniqueDocumentURL(DocIndex: sw_integer): string;
  102. function GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
  103. function GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
  104. constructor LoadDocuments(var S: TStream);
  105. procedure StoreDocuments(var S: TStream);
  106. destructor Done; virtual;
  107. public
  108. procedure AddLink(const LinkText, LinkURL: string); virtual;
  109. private
  110. Documents: PHTMLLinkScanDocumentCollection;
  111. BaseDir: PString;
  112. function ExpandChildURL(const S: string): string;
  113. function NormalizeChildURL(const S: string): string;
  114. end;
  115. THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned);
  116. PHTMLLinkScanFile = ^THTMLLinkScanFile;
  117. THTMLLinkScanFile = object(TObject)
  118. constructor Init(const ADocumentURL: string);
  119. function GetDocumentURL: string;
  120. destructor Done; virtual;
  121. function AddReferencedName (const AName : string) : PNameID;
  122. function AddFoundName (const AName : string) : PNameID;
  123. procedure CheckNameList;
  124. function FindID(const AName : string) : PNameID; virtual;
  125. private
  126. DocumentURL : PString;
  127. NameIDList : PNameIDCollection;
  128. Owner : PHTMLLinkScanner;
  129. public
  130. State : THTMLLinkScanState;
  131. end;
  132. PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection;
  133. THTMLLinkScanFileCollection = object(TSortedCollection)
  134. function At(Index: sw_Integer): PHTMLLinkScanFile;
  135. function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
  136. function SearchFile(const DocURL: string): PHTMLLinkScanFile;
  137. function FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
  138. procedure CheckNameIDLists;
  139. end;
  140. THTMLLinkScanOption = (soSubDocsOnly);
  141. THTMLLinkScanOptions = set of THTMLLinkScanOption;
  142. THTMLFileLinkScanner = object(THTMLLinkScanner)
  143. constructor Init(const ABaseDir: string);
  144. procedure ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
  145. destructor Done; virtual;
  146. public
  147. function GetDocumentBaseURL: string; virtual;
  148. function FindID(const AName : string) : PNameID; virtual;
  149. procedure AddLink(const LinkText, LinkURL: string); virtual;
  150. procedure AddRef(LinkURL: string); virtual;
  151. procedure AddNameID(AName: string); virtual;
  152. procedure AddID(AName: string); virtual;
  153. function CheckURL(const URL: string): boolean; virtual;
  154. private
  155. Options: THTMLLinkScanOptions;
  156. BaseURL: string;
  157. CurBaseURL: string;
  158. IDList : PNameIDCollection;
  159. DocumentFiles: PHTMLLinkScanFileCollection;
  160. procedure ScheduleDoc(const DocumentURL: string);
  161. public
  162. procedure ProcessDoc(Doc: PHTMLLinkScanFile); virtual;
  163. end;
  164. procedure RegisterWHTMLScan;
  165. implementation
  166. uses
  167. WUtils;
  168. const
  169. RHTMLLinkScanDocument: TStreamRec = (
  170. ObjType: 19500;
  171. VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^);
  172. Load: @THTMLLinkScanDocument.Load;
  173. Store: @THTMLLinkScanDocument.Store
  174. );
  175. const
  176. CurrentHTMLIndexVersion : sw_integer = HTMLIndexVersion;
  177. function TCustomHTMLLinkScanner.DocAddTextChar(C: AnsiChar): boolean;
  178. var Added: boolean;
  179. begin
  180. Added:=false;
  181. if InAnchor then
  182. begin
  183. CurLinkText:=CurLinkText+C;
  184. Added:=true;
  185. end;
  186. if ord(c)>32 then
  187. LastSynonym:=nil;
  188. DocAddTextChar:=Added;
  189. end;
  190. procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean);
  191. begin
  192. if Entered then
  193. begin
  194. CurLinkText:='';
  195. if DocGetTagParam('HREF',CurURL) then
  196. HasHRef:=true
  197. else
  198. CurURL:='';
  199. if not DocGetTagParam('NAME',CurName) then
  200. if not DocGetTagParam('ID',CurName) then
  201. CurName:='';
  202. if not DocGetTagParam('ID',CurID) then
  203. CurID:='';
  204. if CurName<>'' then
  205. begin
  206. InNameAnchor:=true;
  207. If Pos('#',CurName)=0 then
  208. CurName:=CurDoc+'#'+CurName;
  209. CurName:=Trim(CurName);
  210. CurName:=CompleteURL(GetDocumentBaseURL,CurName);
  211. if CurURL='' then
  212. CurURL:=CurName;
  213. end
  214. else
  215. CurName:='';
  216. CurURL:=Trim(CurURL);
  217. if pos('#',CurURL)=1 then
  218. CurURL:=CurDoc+CurURL;
  219. CurURL:=CompleteURL(GetDocumentBaseURL,CurURL);
  220. end
  221. else
  222. begin
  223. CurLinkText:=Trim(CurLinkText);
  224. if HasHRef then
  225. begin
  226. if CheckURL(CurURL) and CheckText(CurLinkText) and
  227. not DisableCrossIndexing then
  228. begin
  229. AddLink(CurLinkText,CurURL);
  230. {$ifdef DEBUG}
  231. DebugMessage(CurDoc,' Adding ScanLink "'+CurLinkText+'" to "'+
  232. CurURL+'"',Line,1);
  233. {$endif DEBUG}
  234. end;
  235. { Be sure to parse referenced file,
  236. even if that link is not valid }
  237. AddRef(CurURL);
  238. end;
  239. if not HasHRef and InNameAnchor and CheckURL(CurName) and CheckText(CurLinkText) then
  240. begin
  241. AddLink(CurLinkText,CurName);
  242. {$ifdef DEBUG}
  243. DebugMessage(CurDoc,' Adding ScanName "'+CurLinkText+'" to "'+CurName+'"',Line,1);
  244. {$endif DEBUG}
  245. end;
  246. if InNameAnchor then
  247. begin
  248. AddNameID(CurName);
  249. end;
  250. if not HasHRef and (CurID<>'') then
  251. AddID(CurID);
  252. InNameAnchor:=false;
  253. HasHRef:=false;
  254. end;
  255. InAnchor:=Entered;
  256. end;
  257. function TCustomHTMLLinkScanner.GetDocumentBaseURL: string;
  258. begin
  259. { Abstract }
  260. GetDocumentBaseURL:='';
  261. end;
  262. function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean;
  263. begin
  264. { Abstract }
  265. CheckURL:=true;
  266. end;
  267. function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean;
  268. begin
  269. { Abstract }
  270. CheckText:=true;
  271. end;
  272. procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
  273. begin
  274. { Abstract }
  275. end;
  276. procedure TCustomHTMLLinkScanner.AddRef(LinkURL: string);
  277. begin
  278. { Abstract }
  279. end;
  280. procedure TCustomHTMLLinkScanner.AddNameID(AName: string);
  281. begin
  282. { Abstract }
  283. end;
  284. procedure TCustomHTMLLinkScanner.AddID(AName: string);
  285. begin
  286. { Abstract }
  287. end;
  288. constructor TNameID.Init(const AName : string; Astate : TNameIDState);
  289. begin
  290. inherited Init;
  291. SetStr(Name,AName);
  292. Origin:=nil;
  293. State:=[AState];
  294. end;
  295. destructor TNameID.Done;
  296. begin
  297. if assigned(Name) then
  298. DisposeStr(Name);
  299. Name:=nil;
  300. if assigned(Origin) then
  301. DisposeStr(Origin);
  302. Origin:=nil;
  303. inherited Done;
  304. end;
  305. procedure TNameID.SetState(Astate : TNameIDState; enabled : boolean);
  306. begin
  307. if enabled then
  308. Include(State,AState)
  309. else
  310. Exclude(State,AState);
  311. end;
  312. function TNameID.GetState : TNameIDStates;
  313. begin
  314. GetState:=State;
  315. end;
  316. function TNameID.GetName : string;
  317. begin
  318. GetName:=GetStr(Name);
  319. end;
  320. function TNameID.GetOrigin : string;
  321. begin
  322. GetOrigin:=GetStr(Origin);
  323. end;
  324. procedure TNameID.SetOrigin(const AOrigin : string);
  325. begin
  326. SetStr(Origin,AOrigin);
  327. end;
  328. procedure TNameID.SetLine(ALine : sw_integer);
  329. begin
  330. Line:=ALine;
  331. end;
  332. function TNameID.GetLine : sw_integer;
  333. begin
  334. GetLine:=Line;
  335. end;
  336. function TNameIDCollection.At(Index: sw_Integer): PNameID;
  337. begin
  338. At:=Inherited At(Index);
  339. end;
  340. function TNameIDCollection.Compare(Key1, Key2: Pointer): sw_Integer;
  341. var
  342. R: sw_integer;
  343. K1: PNameID absolute Key1;
  344. K2: PNameID absolute Key2;
  345. S1,S2: string;
  346. begin
  347. S1:=K1^.GetName;
  348. S2:=K2^.GetName;
  349. S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
  350. if S1<S2 then R:=-1 else
  351. if S1>S2 then R:= 1 else
  352. R:=0;
  353. Compare:=R;
  354. end;
  355. constructor THTMLLinkScanDocument.Init(const ADocName: string);
  356. begin
  357. inherited Init;
  358. SetStr(DocName,ADocName);
  359. New(Aliases, Init(10,10));
  360. {$ifdef DEBUG}
  361. DebugMessage('',' Adding New LinkScan document "'+ADocName+'"',1,1);
  362. {$endif DEBUG}
  363. Synonym:=nil;
  364. end;
  365. function THTMLLinkScanDocument.GetName: string;
  366. begin
  367. GetName:=GetStr(DocName);
  368. end;
  369. function THTMLLinkScanDocument.GetUniqueName: string;
  370. var
  371. PD: PHTMLLinkScanDocument;
  372. begin
  373. PD:=@Self;
  374. while assigned(PD^.synonym) do
  375. PD:=PD^.Synonym;
  376. GetUniqueName:=GetStr(PD^.DocName);
  377. end;
  378. function THTMLLinkScanDocument.GetAliasCount: sw_integer;
  379. begin
  380. GetAliasCount:=Aliases^.Count;
  381. end;
  382. function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string;
  383. begin
  384. GetAlias:=GetStr(Aliases^.At(Index));
  385. end;
  386. procedure THTMLLinkScanDocument.AddAlias(const Alias: string);
  387. begin
  388. Aliases^.Insert(NewStr(Alias));
  389. {$ifdef DEBUG}
  390. DebugMessage('',' Adding alias "'+Alias+'" to LinkScan document "'+GetStr(DocName)+'"',1,1);
  391. {$endif DEBUG}
  392. end;
  393. constructor THTMLLinkScanDocument.Load(var S: TStream);
  394. var
  395. i: sw_integer;
  396. begin
  397. inherited Init;
  398. DocName:=S.ReadStr;
  399. if assigned(DocName) then
  400. for i:=1 to Length(DocName^) do
  401. if (DocName^[i]='\') or (DocName^[i]='/') then
  402. DocName^[i]:=DirSep;
  403. New(Aliases, Load(S));
  404. end;
  405. procedure THTMLLinkScanDocument.Store(var S: TStream);
  406. begin
  407. S.WriteStr(DocName);
  408. Aliases^.Store(S);
  409. end;
  410. destructor THTMLLinkScanDocument.Done;
  411. begin
  412. if Assigned(Aliases) then
  413. Dispose(Aliases, Done);
  414. Aliases:=nil;
  415. if Assigned(DocName) then
  416. DisposeStr(DocName);
  417. DocName:=nil;
  418. inherited Done;
  419. end;
  420. constructor THTMLLinkScanDocumentCollection.Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
  421. begin
  422. inherited Init(ALimit,ADelta);
  423. Scanner:=AScanner;
  424. end;
  425. function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer;
  426. var R: sw_integer;
  427. K1: PHTMLLinkScanDocument absolute Key1;
  428. K2: PHTMLLinkScanDocument absolute Key2;
  429. S1,S2: string;
  430. begin
  431. S1:=K1^.GetName; S2:=K2^.GetName;
  432. if Assigned(Scanner) then
  433. begin S1:=Scanner^.ExpandChildURL(S1); S2:=Scanner^.ExpandChildURL(S2); end;
  434. S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
  435. if S1<S2 then R:=-1 else
  436. if S1>S2 then R:= 1 else
  437. R:=0;
  438. Compare:=R;
  439. end;
  440. function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument;
  441. begin
  442. At:=inherited At(Index);
  443. end;
  444. function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument;
  445. var D,P: PHTMLLinkScanDocument;
  446. Index: sw_integer;
  447. begin
  448. New(D, Init(DocName));
  449. if Search(D, Index)=false then P:=nil else
  450. P:=At(Index);
  451. Dispose(D, Done);
  452. SearchDocument:=P;
  453. end;
  454. procedure THTMLLinkScanDocumentCollection.MoveAliasesToSynonym;
  455. procedure MoveAliases(P: PHTMLLinkScanDocument);
  456. var
  457. PD: PHTMLLinkScanDocument;
  458. i: sw_integer;
  459. begin
  460. if not assigned(P^.synonym) then
  461. exit;
  462. PD:=P;
  463. while assigned(PD^.synonym) do
  464. PD:=PD^.Synonym;
  465. For i:=P^.GetAliasCount-1 downto 0 do
  466. begin
  467. PD^.AddAlias(P^.GetAlias(i));
  468. P^.Aliases^.AtFree(i);
  469. end;
  470. end;
  471. begin
  472. ForEach(TCallbackProcParam(@MoveAliases));
  473. end;
  474. constructor THTMLLinkScanner.Init(const ABaseDir: string);
  475. begin
  476. inherited Init;
  477. New(Documents, Init(@Self,50,100));
  478. SetBaseDir(ABaseDir);
  479. end;
  480. procedure THTMLLinkScanner.SetBaseDir(const ABaseDir: string);
  481. begin
  482. if Assigned(BaseDir) then DisposeStr(BaseDir);
  483. BaseDir:=NewStr(CompleteDir(ABaseDir));
  484. end;
  485. function THTMLLinkScanner.GetDocumentCount: sw_integer;
  486. begin
  487. GetDocumentCount:=Documents^.Count;
  488. end;
  489. function THTMLLinkScanner.ExpandChildURL(const S: string): string;
  490. begin
  491. ExpandChildURL:=CompleteURL(GetStr(BaseDir),S);
  492. end;
  493. function THTMLLinkScanner.NormalizeChildURL(const S: string): string;
  494. var URL: string;
  495. begin
  496. URL:=S;
  497. if GetStr(BaseDir)<>'' then
  498. if copy(UpcaseStr(S),1,length(GetStr(BaseDir)))=UpcaseStr(GetStr(BaseDir)) then
  499. URL:=copy(S,length(GetStr(BaseDir))+1,length(S));
  500. NormalizeChildURL:=URL;
  501. end;
  502. function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string;
  503. begin
  504. GetDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetName);
  505. end;
  506. function THTMLLinkScanner.GetUniqueDocumentURL(DocIndex: sw_integer): string;
  507. begin
  508. GetUniqueDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetUniqueName);
  509. end;
  510. function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
  511. begin
  512. GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
  513. end;
  514. function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
  515. begin
  516. GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex);
  517. end;
  518. procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
  519. var D: PHTMLLinkScanDocument;
  520. DoInsert: boolean;
  521. int: sw_integer;
  522. Text: string;
  523. error: word;
  524. begin
  525. D:=Documents^.SearchDocument(LinkURL);
  526. if D=nil then
  527. begin
  528. New(D, Init(NormalizeChildURL(LinkURL)));
  529. Documents^.Insert(D);
  530. end;
  531. If assigned(LastSynonym) then
  532. LastSynonym^.Synonym:=D;
  533. DoInsert:=true;
  534. If (length(LinkText)=0) or (Pos(',',LinkText)=1) then
  535. DoInsert:=false;
  536. Val(LinkText,int,error);
  537. If (Error>1) and (LinkText[Error]=' ') then
  538. Text:=Trim(Copy(LinkText,error+1,length(LinkText)))
  539. else
  540. Text:=LinkText;
  541. IF DoInsert then
  542. D^.AddAlias(Text);
  543. If InNameAnchor then
  544. LastSynonym:=D;
  545. end;
  546. constructor THTMLLinkScanner.LoadDocuments(var S: TStream);
  547. var P,L: longint;
  548. OK: boolean;
  549. PS: PString;
  550. begin
  551. OK:=false;
  552. P:=S.GetPos;
  553. S.Read(L,sizeof(L));
  554. if (S.Status=stOK) and (L=HTMLIndexMagicNo) then
  555. begin
  556. S.Read(L,sizeof(L));
  557. CurrentHTMLIndexVersion:=L;
  558. OK:=(S.Status=stOK);
  559. end;
  560. if not OK then
  561. begin
  562. S.Reset;
  563. S.Seek(P);
  564. end
  565. else
  566. BaseDir:=S.ReadStr;
  567. New(Documents, Load(S));
  568. if not Assigned(Documents) then
  569. Fail;
  570. Documents^.MoveAliasesToSynonym;
  571. CurrentHTMLIndexVersion:=HTMLIndexVersion;
  572. end;
  573. function THTMLLinkScanner.FindID(const AName : string) : PNameID;
  574. begin
  575. {abstract}FindID:=nil;
  576. end;
  577. procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
  578. var L: longint;
  579. begin
  580. L:=HTMLIndexMagicNo;
  581. S.Write(L,sizeof(L));
  582. L:=HTMLIndexVersion;
  583. CurrentHTMLIndexVersion:=L;
  584. S.Write(L,sizeof(L));
  585. S.WriteStr(BaseDir);
  586. Documents^.MoveAliasesToSynonym;
  587. Documents^.Store(S);
  588. end;
  589. destructor THTMLLinkScanner.Done;
  590. begin
  591. if Assigned(Documents) then
  592. Dispose(Documents, Done);
  593. Documents:=nil;
  594. if Assigned(BaseDir) then
  595. DisposeStr(BaseDir);
  596. BaseDir:=nil;
  597. inherited Done;
  598. end;
  599. constructor THTMLLinkScanFile.Init(const ADocumentURL: string);
  600. begin
  601. inherited Init;
  602. SetStr(DocumentURL,ADocumentURL);
  603. New(NameIDList, Init(5,10));
  604. end;
  605. function THTMLLinkScanFile.GetDocumentURL: string;
  606. begin
  607. GetDocumentURL:=GetStr(DocumentURL);
  608. end;
  609. function THTMLLinkScanFile.AddReferencedName (const AName : string) : PNameID;
  610. var
  611. index : sw_integer;
  612. PN : PNameID;
  613. begin
  614. new(PN,init(AName,IsReferenced));
  615. if not NameIDList^.Search(PN,Index) then
  616. NameIDList^.Insert(PN)
  617. else
  618. begin
  619. dispose(PN,Done);
  620. PN:=NameIDList^.At(Index);
  621. PN^.SetState(IsReferenced,true);
  622. end;
  623. AddReferencedName:=PN;
  624. end;
  625. function THTMLLinkScanFile.AddFoundName (const AName : string) : PNameID;
  626. var
  627. index : sw_integer;
  628. PN : PNameID;
  629. begin
  630. new(PN,init(AName,IsFound));
  631. if not NameIDList^.Search(PN,Index) then
  632. NameIDList^.Insert(PN)
  633. else
  634. begin
  635. dispose(PN,Done);
  636. PN:=NameIDList^.At(Index);
  637. PN^.SetState(IsFound,true);
  638. end;
  639. AddFoundName:=PN;
  640. end;
  641. procedure THTMLLinkScanFile.CheckNameList;
  642. var
  643. i : sw_integer;
  644. PN,PN2 : PNameID;
  645. begin
  646. {$ifdef DEBUG}
  647. for i:=0 to NameIDList^.Count-1 do
  648. begin
  649. PN:=NameIDList^.At(i);
  650. if not (IsFound in PN^.GetState) then
  651. begin
  652. if (IsReferenced in PN^.GetState) then
  653. DebugMessage(GetDocumentURL,'Name "'+PN^.GetName+'" from "'+
  654. PN^.GetOrigin+'" not found',1,1);
  655. PN2:=Owner^.FindID(PN^.GetName);
  656. if assigned(PN2) then
  657. begin
  658. DebugMessage('','ID found in "'+PN2^.GetOrigin+'"',1,1);
  659. if not (IsFound in PN2^.GetState) then
  660. DebugMessage('','ID not found',1,1);
  661. end;
  662. end;
  663. end;
  664. {$endif DEBUG}
  665. end;
  666. function THTMLLinkScanFile.FindID(const AName : string) : PNameID;
  667. var
  668. PN : PNameID;
  669. Index : sw_integer;
  670. begin
  671. new(PN,init(AName,IsID));
  672. if NameIDList^.Search(PN,Index) then
  673. begin
  674. dispose(PN,done);
  675. PN:=NameIDList^.At(Index);
  676. if (IsID in PN^.GetState) then
  677. FindId:=PN
  678. else
  679. FindID:=nil;
  680. end
  681. else
  682. begin
  683. dispose(PN,done);
  684. PN:=nil;
  685. FindID:=nil;
  686. end;
  687. end;
  688. destructor THTMLLinkScanFile.Done;
  689. begin
  690. if Assigned(DocumentURL) then
  691. DisposeStr(DocumentURL);
  692. DocumentURL:=nil;
  693. dispose(NameIDList,done);
  694. NameIDList:=nil;
  695. inherited Done;
  696. end;
  697. function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile;
  698. begin
  699. At:=inherited At(Index);
  700. end;
  701. function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer;
  702. var R: integer;
  703. K1: PHTMLLinkScanFile absolute Key1;
  704. K2: PHTMLLinkScanFile absolute Key2;
  705. S1,S2: string;
  706. begin
  707. S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL);
  708. if S1<S2 then R:=-1 else
  709. if S1>S2 then R:= 1 else
  710. R:=0;
  711. Compare:=R;
  712. end;
  713. function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile;
  714. var P,D: PHTMLLinkScanFile;
  715. Index: sw_integer;
  716. begin
  717. New(D, Init(DocURL));
  718. if Search(D,Index)=false then P:=nil else
  719. P:=At(Index);
  720. Dispose(D, Done);
  721. SearchFile:=P;
  722. end;
  723. function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
  724. var I: sw_integer;
  725. P,D: PHTMLLinkScanFile;
  726. begin
  727. P:=nil;
  728. for I:=0 to Count-1 do
  729. begin
  730. D:=At(I);
  731. if D^.State=AState then
  732. begin
  733. P:=D;
  734. Break;
  735. end;
  736. end;
  737. FindFileWithState:=P;
  738. end;
  739. procedure THTMLLinkScanFileCollection.CheckNameIDLists;
  740. procedure DoCheckNameList(P : PHTMLLinkScanFile);
  741. begin
  742. P^.CheckNameList;
  743. end;
  744. begin
  745. ForEach(TCallbackProcParam(@DoCheckNameList));
  746. end;
  747. constructor THTMLFileLinkScanner.Init(const ABaseDir: string);
  748. begin
  749. inherited Init(ABaseDir);
  750. New(DocumentFiles, Init(50,100));
  751. New(IDList, Init(50,100));
  752. {$ifdef DEBUG}
  753. DebugMessage('','THTMLFileLinkScanner Init "'+ABaseDir+'"',1,1);
  754. {$endif DEBUG}
  755. end;
  756. procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
  757. var P: PHTMLLinkScanFile;
  758. begin
  759. CurBaseURL:='';
  760. Options:=AOptions;
  761. ScheduleDoc(DocumentURL);
  762. repeat
  763. P:=DocumentFiles^.FindFileWithState(ssScheduled);
  764. if Assigned(P) then
  765. ProcessDoc(P);
  766. until P=nil;
  767. {$ifdef DEBUG}
  768. DebugMessage('','THTMLFileLinkScanner CheckNameList start ',1,1);
  769. DocumentFiles^.CheckNameIDLists;
  770. DebugMessage('','THTMLFileLinkScanner CheckNameList end ',1,1);
  771. {$endif DEBUG}
  772. end;
  773. function THTMLFileLinkScanner.GetDocumentBaseURL: string;
  774. begin
  775. GetDocumentBaseURL:=CurBaseURL;
  776. end;
  777. function THTMLFileLinkScanner.CheckURL(const URL: string): boolean;
  778. var OK: boolean;
  779. begin
  780. if soSubDocsOnly in Options then
  781. OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL)
  782. else
  783. OK:=true;
  784. CheckURL:=OK;
  785. end;
  786. procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string);
  787. var D: PHTMLLinkScanFile;
  788. P: sw_integer;
  789. DocURL: string;
  790. begin
  791. P:=Pos('#',LinkURL);
  792. if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
  793. D:=DocumentFiles^.SearchFile(DocURL);
  794. if not Assigned(D) then
  795. ScheduleDoc(DocURL);
  796. inherited AddLink(LinkText,LinkURL);
  797. end;
  798. procedure THTMLFileLinkScanner.AddRef(LinkURL: string);
  799. var D: PHTMLLinkScanFile;
  800. P: sw_integer;
  801. DocURL: string;
  802. PN : PNameID;
  803. begin
  804. {$ifdef DEBUG}
  805. DebugMessage(CurDoc,' Adding Ref to "'+
  806. LinkURL+'"',Line,1);
  807. {$endif DEBUG}
  808. P:=Pos('#',LinkURL);
  809. if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
  810. D:=DocumentFiles^.SearchFile(DocURL);
  811. if not Assigned(D) then
  812. ScheduleDoc(DocURL);
  813. D:=DocumentFiles^.SearchFile(DocURL);
  814. if P>0 then
  815. begin
  816. PN:=D^.AddReferencedName(copy(LinkURL,P+1,length(LinkURL)));
  817. PN^.SetOrigin(CurDoc);
  818. PN^.SetLine(Line);
  819. end;
  820. end;
  821. procedure THTMLFileLinkScanner.AddNameID(AName : string);
  822. var D: PHTMLLinkScanFile;
  823. P: sw_integer;
  824. PN : PNameID;
  825. DocURL: string;
  826. begin
  827. {$ifdef DEBUG}
  828. DebugMessage(CurDoc,' Adding NameID "'+
  829. CurName+'"',Line,1);
  830. {$endif DEBUG}
  831. P:=Pos('#',AName);
  832. if P=0 then DocURL:=AName else DocURL:=copy(AName,1,P-1);
  833. D:=DocumentFiles^.SearchFile(DocURL);
  834. if not Assigned(D) then
  835. ScheduleDoc(DocURL);
  836. D:=DocumentFiles^.SearchFile(DocURL);
  837. PN:=D^.AddFoundName(copy(AName,P+1,length(AName)));
  838. PN^.SetOrigin(CurDoc);
  839. PN^.SetLine(Line);
  840. end;
  841. procedure THTMLFileLinkScanner.AddID(AName : string);
  842. var
  843. D: PHTMLLinkScanFile;
  844. PN : PNameID;
  845. index : sw_integer;
  846. begin
  847. {$ifdef DEBUG}
  848. DebugMessage(CurDoc,' Adding Id "'+
  849. AName+'"',Line,1);
  850. {$endif DEBUG}
  851. D:=DocumentFiles^.SearchFile(CurDoc);
  852. if not Assigned(D) then
  853. ScheduleDoc(CurDoc);
  854. D:=DocumentFiles^.SearchFile(CurDoc);
  855. PN:=D^.AddFoundName(AName);
  856. PN^.SetState(IsId,true);
  857. PN^.SetOrigin(CurDoc);
  858. PN^.SetLine(Line);
  859. new(PN,init(AName,IsID));
  860. if IDList^ .Search(PN,index) then
  861. begin
  862. dispose(PN,done);
  863. {$ifdef DEBUG}
  864. PN:=IDList^.At(Index);
  865. DebugMessage(CurDoc,'ID "'+AName+'" already defined in "'+
  866. PN^.GetOrigin+'('+IntToStr(PN^.GetLine)+')"',Line,1);
  867. {$endif DEBUG}
  868. end
  869. else
  870. begin
  871. IDList^.Insert(PN);
  872. PN^.SetOrigin(CurDoc);
  873. PN^.SetLine(Line);
  874. end;
  875. end;
  876. function THTMLFileLinkScanner.FindID(const AName : string) : PNameID;
  877. Function ContainsNamedID(D : PHTMLLinkScanFile) : boolean;
  878. begin
  879. ContainsNamedID:=D^.FindID(AName)<>nil;
  880. end;
  881. var
  882. D : PHTMLLinkScanFile;
  883. begin
  884. D:=DocumentFiles^.FirstThat(TCallbackFunBoolParam(@ContainsNamedID));
  885. if assigned(D) then
  886. FindID:=D^.FindID(AName)
  887. else
  888. FindID:=nil;
  889. end;
  890. procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile);
  891. var F: PDOSTextFile;
  892. begin
  893. if Assigned(Doc)=false then Exit;
  894. Doc^.State:=ssProcessing;
  895. CurDoc:=Doc^.GetDocumentURL;
  896. New(F, Init(CurDoc));
  897. if Assigned(F) then
  898. begin
  899. CurBaseURL:=CompleteURL(CurDoc,'');
  900. {$ifdef DEBUG}
  901. DebugMessage(CurDoc,'Processing "'+CurDoc+'"',1,1);
  902. {$endif DEBUG}
  903. Process(F);
  904. {$ifdef DEBUG}
  905. DebugMessage(CurDoc,'Finished processing "'+CurDoc+'"',Line,1);
  906. {$endif DEBUG}
  907. Dispose(F, Done);
  908. end
  909. else
  910. begin
  911. {$ifdef DEBUG}
  912. DebugMessage(CurDoc,'file not found',1,1);
  913. {$endif DEBUG}
  914. end;
  915. Doc^.State:=ssScanned;
  916. CurDoc:='';
  917. end;
  918. procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string);
  919. var D: PHTMLLinkScanFile;
  920. begin
  921. New(D, Init(DocumentURL));
  922. D^.State:=ssScheduled;
  923. D^.Owner:=@Self;
  924. {$ifdef DEBUG}
  925. DebugMessage('','Scheduling file "'+DocumentURL+'"',1,1);
  926. {$endif DEBUG}
  927. DocumentFiles^.Insert(D);
  928. end;
  929. destructor THTMLFileLinkScanner.Done;
  930. begin
  931. if Assigned(DocumentFiles) then
  932. Dispose(DocumentFiles, Done);
  933. DocumentFiles:=nil;
  934. if Assigned(IDList) then
  935. Dispose(IDList, Done);
  936. IDList:=nil;
  937. inherited Done;
  938. end;
  939. procedure RegisterWHTMLScan;
  940. begin
  941. RegisterType(RHTMLLinkScanDocument);
  942. end;
  943. END.