whtmlscn.pas 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 2000 by Berczi Gabor
  4. HTML scanner objects
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit WHTMLScn;
  12. {$ifdef cpullvm}
  13. {$modeswitch nestedprocvars}
  14. {$endif}
  15. interface
  16. uses Objects,
  17. WHTML;
  18. const
  19. HTMLIndexMagicNo = ord('H')+ord('H') shl 8+ord('I') shl 16+ord('X') shl 24;
  20. HTMLIndexVersion = 2;
  21. type
  22. PHTMLLinkScanner = ^THTMLLinkScanner;
  23. PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
  24. TCustomHTMLLinkScanner = object(THTMLParser)
  25. function DocAddTextChar(C: char): boolean; virtual;
  26. procedure DocAnchor(Entered: boolean); virtual;
  27. public
  28. {a}function CheckURL(const URL: string): boolean; virtual;
  29. {a}function CheckText(const Text: string): boolean; virtual;
  30. {a}procedure AddLink(const LinkText, LinkURL: string); virtual;
  31. {a}procedure AddRef(LinkURL: string); virtual;
  32. {a}procedure AddNameID(AName: string); virtual;
  33. {a}procedure AddID(AName: string); virtual;
  34. {a}function GetDocumentBaseURL: string; virtual;
  35. private
  36. CurLinkText: string;
  37. CurURL: string;
  38. CurName,
  39. CurID: string;
  40. CurDoc: string;
  41. InAnchor,InNameAnchor,
  42. HasHRef : boolean;
  43. LastSynonym: PHTMLLinkScanDocument;
  44. end;
  45. TNameIDState = (IsReferenced, IsFound,IsID);
  46. TNameIDStates = set of TNameIDState;
  47. PNameID = ^TNameID;
  48. TNameID = object(TObject)
  49. constructor Init(const AName : string; Astate : TNameIDState);
  50. destructor Done; virtual;
  51. procedure SetState(Astate : TNameIDState; enabled : boolean);
  52. procedure SetOrigin(const AOrigin : string);
  53. procedure SetLine(ALine : sw_integer);
  54. function GetLine : sw_integer;
  55. function GetState : TNameIDStates;
  56. function GetName : string;
  57. function GetOrigin : string;
  58. private
  59. Name : pstring;
  60. Origin : pstring;
  61. Line : sw_integer;
  62. State : TNameIDStates;
  63. end;
  64. PNameIDCollection = ^TNameIDCollection;
  65. TNameIDCollection = object(TSortedCollection)
  66. function At(Index: sw_Integer): PNameID;
  67. function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
  68. end;
  69. THTMLLinkScanDocument = object(TObject)
  70. constructor Init(const ADocName: string);
  71. function GetName: string;
  72. function GetUniqueName: string;
  73. function GetAliasCount: sw_integer;
  74. function GetAlias(Index: sw_integer): string;
  75. procedure AddAlias(const Alias: string);
  76. constructor Load(var S: TStream);
  77. procedure Store(var S: TStream);
  78. destructor Done; virtual;
  79. private
  80. DocName: PString;
  81. Synonym: PHTMLLinkScanDocument;
  82. Aliases: PStringCollection;
  83. end;
  84. PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection;
  85. THTMLLinkScanDocumentCollection = object(TSortedCollection)
  86. constructor Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
  87. function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
  88. function At(Index: sw_Integer): PHTMLLinkScanDocument;
  89. function SearchDocument(const DocName: string): PHTMLLinkScanDocument;
  90. procedure MoveAliasesToSynonym;
  91. private
  92. Scanner: PHTMLLinkScanner;
  93. end;
  94. THTMLLinkScanner = object(TCustomHTMLLinkScanner)
  95. constructor Init(const ABaseDir: string);
  96. procedure SetBaseDir(const ABaseDir: string);
  97. {a}function FindID(const AName : string) : PNameID; virtual;
  98. function GetDocumentCount: sw_integer;
  99. function GetDocumentURL(DocIndex: sw_integer): string;
  100. function GetUniqueDocumentURL(DocIndex: sw_integer): string;
  101. function GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
  102. function GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
  103. constructor LoadDocuments(var S: TStream);
  104. procedure StoreDocuments(var S: TStream);
  105. destructor Done; virtual;
  106. public
  107. procedure AddLink(const LinkText, LinkURL: string); virtual;
  108. private
  109. Documents: PHTMLLinkScanDocumentCollection;
  110. BaseDir: PString;
  111. function ExpandChildURL(const S: string): string;
  112. function NormalizeChildURL(const S: string): string;
  113. end;
  114. THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned);
  115. PHTMLLinkScanFile = ^THTMLLinkScanFile;
  116. THTMLLinkScanFile = object(TObject)
  117. constructor Init(const ADocumentURL: string);
  118. function GetDocumentURL: string;
  119. destructor Done; virtual;
  120. function AddReferencedName (const AName : string) : PNameID;
  121. function AddFoundName (const AName : string) : PNameID;
  122. procedure CheckNameList;
  123. function FindID(const AName : string) : PNameID; virtual;
  124. private
  125. DocumentURL : PString;
  126. NameIDList : PNameIDCollection;
  127. Owner : PHTMLLinkScanner;
  128. public
  129. State : THTMLLinkScanState;
  130. end;
  131. PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection;
  132. THTMLLinkScanFileCollection = object(TSortedCollection)
  133. function At(Index: sw_Integer): PHTMLLinkScanFile;
  134. function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
  135. function SearchFile(const DocURL: string): PHTMLLinkScanFile;
  136. function FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
  137. procedure CheckNameIDLists;
  138. end;
  139. THTMLLinkScanOption = (soSubDocsOnly);
  140. THTMLLinkScanOptions = set of THTMLLinkScanOption;
  141. THTMLFileLinkScanner = object(THTMLLinkScanner)
  142. constructor Init(const ABaseDir: string);
  143. procedure ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
  144. destructor Done; virtual;
  145. public
  146. function GetDocumentBaseURL: string; virtual;
  147. function FindID(const AName : string) : PNameID; virtual;
  148. procedure AddLink(const LinkText, LinkURL: string); virtual;
  149. procedure AddRef(LinkURL: string); virtual;
  150. procedure AddNameID(AName: string); virtual;
  151. procedure AddID(AName: string); virtual;
  152. function CheckURL(const URL: string): boolean; virtual;
  153. private
  154. Options: THTMLLinkScanOptions;
  155. BaseURL: string;
  156. CurBaseURL: string;
  157. IDList : PNameIDCollection;
  158. DocumentFiles: PHTMLLinkScanFileCollection;
  159. procedure ScheduleDoc(const DocumentURL: string);
  160. public
  161. procedure ProcessDoc(Doc: PHTMLLinkScanFile); virtual;
  162. end;
  163. procedure RegisterWHTMLScan;
  164. implementation
  165. uses
  166. WUtils;
  167. const
  168. RHTMLLinkScanDocument: TStreamRec = (
  169. ObjType: 19500;
  170. VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^);
  171. Load: @THTMLLinkScanDocument.Load;
  172. Store: @THTMLLinkScanDocument.Store
  173. );
  174. const
  175. CurrentHTMLIndexVersion : sw_integer = HTMLIndexVersion;
  176. function TCustomHTMLLinkScanner.DocAddTextChar(C: char): boolean;
  177. var Added: boolean;
  178. begin
  179. Added:=false;
  180. if InAnchor then
  181. begin
  182. CurLinkText:=CurLinkText+C;
  183. Added:=true;
  184. end;
  185. if ord(c)>32 then
  186. LastSynonym:=nil;
  187. DocAddTextChar:=Added;
  188. end;
  189. procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean);
  190. begin
  191. if Entered then
  192. begin
  193. CurLinkText:='';
  194. if DocGetTagParam('HREF',CurURL) then
  195. HasHRef:=true
  196. else
  197. CurURL:='';
  198. if not DocGetTagParam('NAME',CurName) then
  199. if not DocGetTagParam('ID',CurName) then
  200. CurName:='';
  201. if not DocGetTagParam('ID',CurID) then
  202. CurID:='';
  203. if CurName<>'' then
  204. begin
  205. InNameAnchor:=true;
  206. If Pos('#',CurName)=0 then
  207. CurName:=CurDoc+'#'+CurName;
  208. CurName:=Trim(CurName);
  209. CurName:=CompleteURL(GetDocumentBaseURL,CurName);
  210. if CurURL='' then
  211. CurURL:=CurName;
  212. end
  213. else
  214. CurName:='';
  215. CurURL:=Trim(CurURL);
  216. if pos('#',CurURL)=1 then
  217. CurURL:=CurDoc+CurURL;
  218. CurURL:=CompleteURL(GetDocumentBaseURL,CurURL);
  219. end
  220. else
  221. begin
  222. CurLinkText:=Trim(CurLinkText);
  223. if HasHRef then
  224. begin
  225. if CheckURL(CurURL) and CheckText(CurLinkText) and
  226. not DisableCrossIndexing then
  227. begin
  228. AddLink(CurLinkText,CurURL);
  229. {$ifdef DEBUG}
  230. DebugMessage(CurDoc,' Adding ScanLink "'+CurLinkText+'" to "'+
  231. CurURL+'"',Line,1);
  232. {$endif DEBUG}
  233. end;
  234. { Be sure to parse referenced file,
  235. even if that link is not valid }
  236. AddRef(CurURL);
  237. end;
  238. if not HasHRef and InNameAnchor and CheckURL(CurName) and CheckText(CurLinkText) then
  239. begin
  240. AddLink(CurLinkText,CurName);
  241. {$ifdef DEBUG}
  242. DebugMessage(CurDoc,' Adding ScanName "'+CurLinkText+'" to "'+CurName+'"',Line,1);
  243. {$endif DEBUG}
  244. end;
  245. if InNameAnchor then
  246. begin
  247. AddNameID(CurName);
  248. end;
  249. if not HasHRef and (CurID<>'') then
  250. AddID(CurID);
  251. InNameAnchor:=false;
  252. HasHRef:=false;
  253. end;
  254. InAnchor:=Entered;
  255. end;
  256. function TCustomHTMLLinkScanner.GetDocumentBaseURL: string;
  257. begin
  258. { Abstract }
  259. GetDocumentBaseURL:='';
  260. end;
  261. function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean;
  262. begin
  263. { Abstract }
  264. CheckURL:=true;
  265. end;
  266. function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean;
  267. begin
  268. { Abstract }
  269. CheckText:=true;
  270. end;
  271. procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
  272. begin
  273. { Abstract }
  274. end;
  275. procedure TCustomHTMLLinkScanner.AddRef(LinkURL: string);
  276. begin
  277. { Abstract }
  278. end;
  279. procedure TCustomHTMLLinkScanner.AddNameID(AName: string);
  280. begin
  281. { Abstract }
  282. end;
  283. procedure TCustomHTMLLinkScanner.AddID(AName: string);
  284. begin
  285. { Abstract }
  286. end;
  287. constructor TNameID.Init(const AName : string; Astate : TNameIDState);
  288. begin
  289. inherited Init;
  290. SetStr(Name,AName);
  291. Origin:=nil;
  292. State:=[AState];
  293. end;
  294. destructor TNameID.Done;
  295. begin
  296. if assigned(Name) then
  297. DisposeStr(Name);
  298. Name:=nil;
  299. if assigned(Origin) then
  300. DisposeStr(Origin);
  301. Origin:=nil;
  302. inherited Done;
  303. end;
  304. procedure TNameID.SetState(Astate : TNameIDState; enabled : boolean);
  305. begin
  306. if enabled then
  307. Include(State,AState)
  308. else
  309. Exclude(State,AState);
  310. end;
  311. function TNameID.GetState : TNameIDStates;
  312. begin
  313. GetState:=State;
  314. end;
  315. function TNameID.GetName : string;
  316. begin
  317. GetName:=GetStr(Name);
  318. end;
  319. function TNameID.GetOrigin : string;
  320. begin
  321. GetOrigin:=GetStr(Origin);
  322. end;
  323. procedure TNameID.SetOrigin(const AOrigin : string);
  324. begin
  325. SetStr(Origin,AOrigin);
  326. end;
  327. procedure TNameID.SetLine(ALine : sw_integer);
  328. begin
  329. Line:=ALine;
  330. end;
  331. function TNameID.GetLine : sw_integer;
  332. begin
  333. GetLine:=Line;
  334. end;
  335. function TNameIDCollection.At(Index: sw_Integer): PNameID;
  336. begin
  337. At:=Inherited At(Index);
  338. end;
  339. function TNameIDCollection.Compare(Key1, Key2: Pointer): sw_Integer;
  340. var
  341. R: sw_integer;
  342. K1: PNameID absolute Key1;
  343. K2: PNameID absolute Key2;
  344. S1,S2: string;
  345. begin
  346. S1:=K1^.GetName;
  347. S2:=K2^.GetName;
  348. S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
  349. if S1<S2 then R:=-1 else
  350. if S1>S2 then R:= 1 else
  351. R:=0;
  352. Compare:=R;
  353. end;
  354. constructor THTMLLinkScanDocument.Init(const ADocName: string);
  355. begin
  356. inherited Init;
  357. SetStr(DocName,ADocName);
  358. New(Aliases, Init(10,10));
  359. {$ifdef DEBUG}
  360. DebugMessage('',' Adding New LinkScan document "'+ADocName+'"',1,1);
  361. {$endif DEBUG}
  362. Synonym:=nil;
  363. end;
  364. function THTMLLinkScanDocument.GetName: string;
  365. begin
  366. GetName:=GetStr(DocName);
  367. end;
  368. function THTMLLinkScanDocument.GetUniqueName: string;
  369. var
  370. PD: PHTMLLinkScanDocument;
  371. begin
  372. PD:=@Self;
  373. while assigned(PD^.synonym) do
  374. PD:=PD^.Synonym;
  375. GetUniqueName:=GetStr(PD^.DocName);
  376. end;
  377. function THTMLLinkScanDocument.GetAliasCount: sw_integer;
  378. begin
  379. GetAliasCount:=Aliases^.Count;
  380. end;
  381. function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string;
  382. begin
  383. GetAlias:=GetStr(Aliases^.At(Index));
  384. end;
  385. procedure THTMLLinkScanDocument.AddAlias(const Alias: string);
  386. begin
  387. Aliases^.Insert(NewStr(Alias));
  388. {$ifdef DEBUG}
  389. DebugMessage('',' Adding alias "'+Alias+'" to LinkScan document "'+GetStr(DocName)+'"',1,1);
  390. {$endif DEBUG}
  391. end;
  392. constructor THTMLLinkScanDocument.Load(var S: TStream);
  393. var
  394. i: sw_integer;
  395. begin
  396. inherited Init;
  397. DocName:=S.ReadStr;
  398. if assigned(DocName) then
  399. for i:=1 to Length(DocName^) do
  400. if (DocName^[i]='\') or (DocName^[i]='/') then
  401. DocName^[i]:=DirSep;
  402. New(Aliases, Load(S));
  403. end;
  404. procedure THTMLLinkScanDocument.Store(var S: TStream);
  405. begin
  406. S.WriteStr(DocName);
  407. Aliases^.Store(S);
  408. end;
  409. destructor THTMLLinkScanDocument.Done;
  410. begin
  411. if Assigned(Aliases) then
  412. Dispose(Aliases, Done);
  413. Aliases:=nil;
  414. if Assigned(DocName) then
  415. DisposeStr(DocName);
  416. DocName:=nil;
  417. inherited Done;
  418. end;
  419. constructor THTMLLinkScanDocumentCollection.Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
  420. begin
  421. inherited Init(ALimit,ADelta);
  422. Scanner:=AScanner;
  423. end;
  424. function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer;
  425. var R: sw_integer;
  426. K1: PHTMLLinkScanDocument absolute Key1;
  427. K2: PHTMLLinkScanDocument absolute Key2;
  428. S1,S2: string;
  429. begin
  430. S1:=K1^.GetName; S2:=K2^.GetName;
  431. if Assigned(Scanner) then
  432. begin S1:=Scanner^.ExpandChildURL(S1); S2:=Scanner^.ExpandChildURL(S2); end;
  433. S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
  434. if S1<S2 then R:=-1 else
  435. if S1>S2 then R:= 1 else
  436. R:=0;
  437. Compare:=R;
  438. end;
  439. function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument;
  440. begin
  441. At:=inherited At(Index);
  442. end;
  443. function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument;
  444. var D,P: PHTMLLinkScanDocument;
  445. Index: sw_integer;
  446. begin
  447. New(D, Init(DocName));
  448. if Search(D, Index)=false then P:=nil else
  449. P:=At(Index);
  450. Dispose(D, Done);
  451. SearchDocument:=P;
  452. end;
  453. procedure THTMLLinkScanDocumentCollection.MoveAliasesToSynonym;
  454. procedure MoveAliases(P: PHTMLLinkScanDocument);
  455. var
  456. PD: PHTMLLinkScanDocument;
  457. i: sw_integer;
  458. begin
  459. if not assigned(P^.synonym) then
  460. exit;
  461. PD:=P;
  462. while assigned(PD^.synonym) do
  463. PD:=PD^.Synonym;
  464. For i:=P^.GetAliasCount-1 downto 0 do
  465. begin
  466. PD^.AddAlias(P^.GetAlias(i));
  467. P^.Aliases^.AtFree(i);
  468. end;
  469. end;
  470. begin
  471. ForEach(TCallbackProcParam(@MoveAliases));
  472. end;
  473. constructor THTMLLinkScanner.Init(const ABaseDir: string);
  474. begin
  475. inherited Init;
  476. New(Documents, Init(@Self,50,100));
  477. SetBaseDir(ABaseDir);
  478. end;
  479. procedure THTMLLinkScanner.SetBaseDir(const ABaseDir: string);
  480. begin
  481. if Assigned(BaseDir) then DisposeStr(BaseDir);
  482. BaseDir:=NewStr(CompleteDir(ABaseDir));
  483. end;
  484. function THTMLLinkScanner.GetDocumentCount: sw_integer;
  485. begin
  486. GetDocumentCount:=Documents^.Count;
  487. end;
  488. function THTMLLinkScanner.ExpandChildURL(const S: string): string;
  489. begin
  490. ExpandChildURL:=CompleteURL(GetStr(BaseDir),S);
  491. end;
  492. function THTMLLinkScanner.NormalizeChildURL(const S: string): string;
  493. var URL: string;
  494. begin
  495. URL:=S;
  496. if GetStr(BaseDir)<>'' then
  497. if copy(UpcaseStr(S),1,length(GetStr(BaseDir)))=UpcaseStr(GetStr(BaseDir)) then
  498. URL:=copy(S,length(GetStr(BaseDir))+1,length(S));
  499. NormalizeChildURL:=URL;
  500. end;
  501. function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string;
  502. begin
  503. GetDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetName);
  504. end;
  505. function THTMLLinkScanner.GetUniqueDocumentURL(DocIndex: sw_integer): string;
  506. begin
  507. GetUniqueDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetUniqueName);
  508. end;
  509. function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
  510. begin
  511. GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
  512. end;
  513. function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
  514. begin
  515. GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex);
  516. end;
  517. procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
  518. var D: PHTMLLinkScanDocument;
  519. DoInsert: boolean;
  520. int: sw_integer;
  521. Text: string;
  522. error: word;
  523. begin
  524. D:=Documents^.SearchDocument(LinkURL);
  525. if D=nil then
  526. begin
  527. New(D, Init(NormalizeChildURL(LinkURL)));
  528. Documents^.Insert(D);
  529. end;
  530. If assigned(LastSynonym) then
  531. LastSynonym^.Synonym:=D;
  532. DoInsert:=true;
  533. If (length(LinkText)=0) or (Pos(',',LinkText)=1) then
  534. DoInsert:=false;
  535. Val(LinkText,int,error);
  536. If (Error>1) and (LinkText[Error]=' ') then
  537. Text:=Trim(Copy(LinkText,error+1,length(LinkText)))
  538. else
  539. Text:=LinkText;
  540. IF DoInsert then
  541. D^.AddAlias(Text);
  542. If InNameAnchor then
  543. LastSynonym:=D;
  544. end;
  545. constructor THTMLLinkScanner.LoadDocuments(var S: TStream);
  546. var P,L: longint;
  547. OK: boolean;
  548. PS: PString;
  549. begin
  550. OK:=false;
  551. P:=S.GetPos;
  552. S.Read(L,sizeof(L));
  553. if (S.Status=stOK) and (L=HTMLIndexMagicNo) then
  554. begin
  555. S.Read(L,sizeof(L));
  556. CurrentHTMLIndexVersion:=L;
  557. OK:=(S.Status=stOK);
  558. end;
  559. if not OK then
  560. begin
  561. S.Reset;
  562. S.Seek(P);
  563. end
  564. else
  565. BaseDir:=S.ReadStr;
  566. New(Documents, Load(S));
  567. if not Assigned(Documents) then
  568. Fail;
  569. Documents^.MoveAliasesToSynonym;
  570. CurrentHTMLIndexVersion:=HTMLIndexVersion;
  571. end;
  572. function THTMLLinkScanner.FindID(const AName : string) : PNameID;
  573. begin
  574. {abstract}FindID:=nil;
  575. end;
  576. procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
  577. var L: longint;
  578. begin
  579. L:=HTMLIndexMagicNo;
  580. S.Write(L,sizeof(L));
  581. L:=HTMLIndexVersion;
  582. CurrentHTMLIndexVersion:=L;
  583. S.Write(L,sizeof(L));
  584. S.WriteStr(BaseDir);
  585. Documents^.MoveAliasesToSynonym;
  586. Documents^.Store(S);
  587. end;
  588. destructor THTMLLinkScanner.Done;
  589. begin
  590. if Assigned(Documents) then
  591. Dispose(Documents, Done);
  592. Documents:=nil;
  593. if Assigned(BaseDir) then
  594. DisposeStr(BaseDir);
  595. BaseDir:=nil;
  596. inherited Done;
  597. end;
  598. constructor THTMLLinkScanFile.Init(const ADocumentURL: string);
  599. begin
  600. inherited Init;
  601. SetStr(DocumentURL,ADocumentURL);
  602. New(NameIDList, Init(5,10));
  603. end;
  604. function THTMLLinkScanFile.GetDocumentURL: string;
  605. begin
  606. GetDocumentURL:=GetStr(DocumentURL);
  607. end;
  608. function THTMLLinkScanFile.AddReferencedName (const AName : string) : PNameID;
  609. var
  610. index : sw_integer;
  611. PN : PNameID;
  612. begin
  613. new(PN,init(AName,IsReferenced));
  614. if not NameIDList^.Search(PN,Index) then
  615. NameIDList^.Insert(PN)
  616. else
  617. begin
  618. dispose(PN,Done);
  619. PN:=NameIDList^.At(Index);
  620. PN^.SetState(IsReferenced,true);
  621. end;
  622. AddReferencedName:=PN;
  623. end;
  624. function THTMLLinkScanFile.AddFoundName (const AName : string) : PNameID;
  625. var
  626. index : sw_integer;
  627. PN : PNameID;
  628. begin
  629. new(PN,init(AName,IsFound));
  630. if not NameIDList^.Search(PN,Index) then
  631. NameIDList^.Insert(PN)
  632. else
  633. begin
  634. dispose(PN,Done);
  635. PN:=NameIDList^.At(Index);
  636. PN^.SetState(IsFound,true);
  637. end;
  638. AddFoundName:=PN;
  639. end;
  640. procedure THTMLLinkScanFile.CheckNameList;
  641. var
  642. i : sw_integer;
  643. PN,PN2 : PNameID;
  644. begin
  645. {$ifdef DEBUG}
  646. for i:=0 to NameIDList^.Count-1 do
  647. begin
  648. PN:=NameIDList^.At(i);
  649. if not (IsFound in PN^.GetState) then
  650. begin
  651. if (IsReferenced in PN^.GetState) then
  652. DebugMessage(GetDocumentURL,'Name "'+PN^.GetName+'" from "'+
  653. PN^.GetOrigin+'" not found',1,1);
  654. PN2:=Owner^.FindID(PN^.GetName);
  655. if assigned(PN2) then
  656. begin
  657. DebugMessage('','ID found in "'+PN2^.GetOrigin+'"',1,1);
  658. if not (IsFound in PN2^.GetState) then
  659. DebugMessage('','ID not found',1,1);
  660. end;
  661. end;
  662. end;
  663. {$endif DEBUG}
  664. end;
  665. function THTMLLinkScanFile.FindID(const AName : string) : PNameID;
  666. var
  667. PN : PNameID;
  668. Index : sw_integer;
  669. begin
  670. new(PN,init(AName,IsID));
  671. if NameIDList^.Search(PN,Index) then
  672. begin
  673. dispose(PN,done);
  674. PN:=NameIDList^.At(Index);
  675. if (IsID in PN^.GetState) then
  676. FindId:=PN
  677. else
  678. FindID:=nil;
  679. end
  680. else
  681. begin
  682. dispose(PN,done);
  683. PN:=nil;
  684. FindID:=nil;
  685. end;
  686. end;
  687. destructor THTMLLinkScanFile.Done;
  688. begin
  689. if Assigned(DocumentURL) then
  690. DisposeStr(DocumentURL);
  691. DocumentURL:=nil;
  692. dispose(NameIDList,done);
  693. NameIDList:=nil;
  694. inherited Done;
  695. end;
  696. function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile;
  697. begin
  698. At:=inherited At(Index);
  699. end;
  700. function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer;
  701. var R: integer;
  702. K1: PHTMLLinkScanFile absolute Key1;
  703. K2: PHTMLLinkScanFile absolute Key2;
  704. S1,S2: string;
  705. begin
  706. S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL);
  707. if S1<S2 then R:=-1 else
  708. if S1>S2 then R:= 1 else
  709. R:=0;
  710. Compare:=R;
  711. end;
  712. function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile;
  713. var P,D: PHTMLLinkScanFile;
  714. Index: sw_integer;
  715. begin
  716. New(D, Init(DocURL));
  717. if Search(D,Index)=false then P:=nil else
  718. P:=At(Index);
  719. Dispose(D, Done);
  720. SearchFile:=P;
  721. end;
  722. function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
  723. var I: sw_integer;
  724. P,D: PHTMLLinkScanFile;
  725. begin
  726. P:=nil;
  727. for I:=0 to Count-1 do
  728. begin
  729. D:=At(I);
  730. if D^.State=AState then
  731. begin
  732. P:=D;
  733. Break;
  734. end;
  735. end;
  736. FindFileWithState:=P;
  737. end;
  738. procedure THTMLLinkScanFileCollection.CheckNameIDLists;
  739. procedure DoCheckNameList(P : PHTMLLinkScanFile);
  740. begin
  741. P^.CheckNameList;
  742. end;
  743. begin
  744. ForEach(TCallbackProcParam(@DoCheckNameList));
  745. end;
  746. constructor THTMLFileLinkScanner.Init(const ABaseDir: string);
  747. begin
  748. inherited Init(ABaseDir);
  749. New(DocumentFiles, Init(50,100));
  750. New(IDList, Init(50,100));
  751. {$ifdef DEBUG}
  752. DebugMessage('','THTMLFileLinkScanner Init "'+ABaseDir+'"',1,1);
  753. {$endif DEBUG}
  754. end;
  755. procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
  756. var P: PHTMLLinkScanFile;
  757. begin
  758. CurBaseURL:='';
  759. Options:=AOptions;
  760. ScheduleDoc(DocumentURL);
  761. repeat
  762. P:=DocumentFiles^.FindFileWithState(ssScheduled);
  763. if Assigned(P) then
  764. ProcessDoc(P);
  765. until P=nil;
  766. {$ifdef DEBUG}
  767. DebugMessage('','THTMLFileLinkScanner CheckNameList start ',1,1);
  768. DocumentFiles^.CheckNameIDLists;
  769. DebugMessage('','THTMLFileLinkScanner CheckNameList end ',1,1);
  770. {$endif DEBUG}
  771. end;
  772. function THTMLFileLinkScanner.GetDocumentBaseURL: string;
  773. begin
  774. GetDocumentBaseURL:=CurBaseURL;
  775. end;
  776. function THTMLFileLinkScanner.CheckURL(const URL: string): boolean;
  777. var OK: boolean;
  778. begin
  779. if soSubDocsOnly in Options then
  780. OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL)
  781. else
  782. OK:=true;
  783. CheckURL:=OK;
  784. end;
  785. procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string);
  786. var D: PHTMLLinkScanFile;
  787. P: sw_integer;
  788. DocURL: string;
  789. begin
  790. P:=Pos('#',LinkURL);
  791. if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
  792. D:=DocumentFiles^.SearchFile(DocURL);
  793. if not Assigned(D) then
  794. ScheduleDoc(DocURL);
  795. inherited AddLink(LinkText,LinkURL);
  796. end;
  797. procedure THTMLFileLinkScanner.AddRef(LinkURL: string);
  798. var D: PHTMLLinkScanFile;
  799. P: sw_integer;
  800. DocURL: string;
  801. PN : PNameID;
  802. begin
  803. {$ifdef DEBUG}
  804. DebugMessage(CurDoc,' Adding Ref to "'+
  805. LinkURL+'"',Line,1);
  806. {$endif DEBUG}
  807. P:=Pos('#',LinkURL);
  808. if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
  809. D:=DocumentFiles^.SearchFile(DocURL);
  810. if not Assigned(D) then
  811. ScheduleDoc(DocURL);
  812. D:=DocumentFiles^.SearchFile(DocURL);
  813. if P>0 then
  814. begin
  815. PN:=D^.AddReferencedName(copy(LinkURL,P+1,length(LinkURL)));
  816. PN^.SetOrigin(CurDoc);
  817. PN^.SetLine(Line);
  818. end;
  819. end;
  820. procedure THTMLFileLinkScanner.AddNameID(AName : string);
  821. var D: PHTMLLinkScanFile;
  822. P: sw_integer;
  823. PN : PNameID;
  824. DocURL: string;
  825. begin
  826. {$ifdef DEBUG}
  827. DebugMessage(CurDoc,' Adding NameID "'+
  828. CurName+'"',Line,1);
  829. {$endif DEBUG}
  830. P:=Pos('#',AName);
  831. if P=0 then DocURL:=AName else DocURL:=copy(AName,1,P-1);
  832. D:=DocumentFiles^.SearchFile(DocURL);
  833. if not Assigned(D) then
  834. ScheduleDoc(DocURL);
  835. D:=DocumentFiles^.SearchFile(DocURL);
  836. PN:=D^.AddFoundName(copy(AName,P+1,length(AName)));
  837. PN^.SetOrigin(CurDoc);
  838. PN^.SetLine(Line);
  839. end;
  840. procedure THTMLFileLinkScanner.AddID(AName : string);
  841. var
  842. D: PHTMLLinkScanFile;
  843. PN : PNameID;
  844. index : sw_integer;
  845. begin
  846. {$ifdef DEBUG}
  847. DebugMessage(CurDoc,' Adding Id "'+
  848. AName+'"',Line,1);
  849. {$endif DEBUG}
  850. D:=DocumentFiles^.SearchFile(CurDoc);
  851. if not Assigned(D) then
  852. ScheduleDoc(CurDoc);
  853. D:=DocumentFiles^.SearchFile(CurDoc);
  854. PN:=D^.AddFoundName(AName);
  855. PN^.SetState(IsId,true);
  856. PN^.SetOrigin(CurDoc);
  857. PN^.SetLine(Line);
  858. new(PN,init(AName,IsID));
  859. if IDList^ .Search(PN,index) then
  860. begin
  861. dispose(PN,done);
  862. {$ifdef DEBUG}
  863. PN:=IDList^.At(Index);
  864. DebugMessage(CurDoc,'ID "'+AName+'" already defined in "'+
  865. PN^.GetOrigin+'('+IntToStr(PN^.GetLine)+')"',Line,1);
  866. {$endif DEBUG}
  867. end
  868. else
  869. begin
  870. IDList^.Insert(PN);
  871. PN^.SetOrigin(CurDoc);
  872. PN^.SetLine(Line);
  873. end;
  874. end;
  875. function THTMLFileLinkScanner.FindID(const AName : string) : PNameID;
  876. Function ContainsNamedID(D : PHTMLLinkScanFile) : boolean;
  877. begin
  878. ContainsNamedID:=D^.FindID(AName)<>nil;
  879. end;
  880. var
  881. D : PHTMLLinkScanFile;
  882. begin
  883. D:=DocumentFiles^.FirstThat(TCallbackFunBoolParam(@ContainsNamedID));
  884. if assigned(D) then
  885. FindID:=D^.FindID(AName)
  886. else
  887. FindID:=nil;
  888. end;
  889. procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile);
  890. var F: PDOSTextFile;
  891. begin
  892. if Assigned(Doc)=false then Exit;
  893. Doc^.State:=ssProcessing;
  894. CurDoc:=Doc^.GetDocumentURL;
  895. New(F, Init(CurDoc));
  896. if Assigned(F) then
  897. begin
  898. CurBaseURL:=CompleteURL(CurDoc,'');
  899. {$ifdef DEBUG}
  900. DebugMessage(CurDoc,'Processing "'+CurDoc+'"',1,1);
  901. {$endif DEBUG}
  902. Process(F);
  903. {$ifdef DEBUG}
  904. DebugMessage(CurDoc,'Finished processing "'+CurDoc+'"',Line,1);
  905. {$endif DEBUG}
  906. Dispose(F, Done);
  907. end
  908. else
  909. begin
  910. {$ifdef DEBUG}
  911. DebugMessage(CurDoc,'file not found',1,1);
  912. {$endif DEBUG}
  913. end;
  914. Doc^.State:=ssScanned;
  915. CurDoc:='';
  916. end;
  917. procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string);
  918. var D: PHTMLLinkScanFile;
  919. begin
  920. New(D, Init(DocumentURL));
  921. D^.State:=ssScheduled;
  922. D^.Owner:=@Self;
  923. {$ifdef DEBUG}
  924. DebugMessage('','Scheduling file "'+DocumentURL+'"',1,1);
  925. {$endif DEBUG}
  926. DocumentFiles^.Insert(D);
  927. end;
  928. destructor THTMLFileLinkScanner.Done;
  929. begin
  930. if Assigned(DocumentFiles) then
  931. Dispose(DocumentFiles, Done);
  932. DocumentFiles:=nil;
  933. if Assigned(IDList) then
  934. Dispose(IDList, Done);
  935. IDList:=nil;
  936. inherited Done;
  937. end;
  938. procedure RegisterWHTMLScan;
  939. begin
  940. RegisterType(RHTMLLinkScanDocument);
  941. end;
  942. END.