2
0

whtmlscn.pas 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. {
  2. This file is part of the Free Pascal Integrated Development Environment
  3. Copyright (c) 2000 by Berczi Gabor
  4. HTML scanner objects
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit WHTMLScn;
  12. interface
  13. uses Objects,
  14. WHTML;
  15. const
  16. HTMLIndexMagicNo = ord('H')+ord('H') shl 8+ord('I') shl 16+ord('X') shl 24;
  17. HTMLIndexVersion = 2;
  18. type
  19. PHTMLLinkScanner = ^THTMLLinkScanner;
  20. PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
  21. TCustomHTMLLinkScanner = object(THTMLParser)
  22. function DocAddTextChar(C: char): boolean; virtual;
  23. procedure DocAnchor(Entered: boolean); virtual;
  24. public
  25. {a}function CheckURL(const URL: string): boolean; virtual;
  26. {a}function CheckText(const Text: string): boolean; virtual;
  27. {a}procedure AddLink(const LinkText, LinkURL: string); virtual;
  28. {a}procedure AddRef(LinkURL: string); virtual;
  29. {a}procedure AddNameID(AName: string); virtual;
  30. {a}procedure AddID(AName: string); virtual;
  31. {a}function GetDocumentBaseURL: string; virtual;
  32. private
  33. CurLinkText: string;
  34. CurURL: string;
  35. CurName,
  36. CurID: string;
  37. CurDoc: string;
  38. InAnchor,InNameAnchor,
  39. HasHRef : boolean;
  40. LastSynonym: PHTMLLinkScanDocument;
  41. end;
  42. TNameIDState = (IsReferenced, IsFound,IsID);
  43. TNameIDStates = set of TNameIDState;
  44. PNameID = ^TNameID;
  45. TNameID = object(TObject)
  46. constructor Init(const AName : string; Astate : TNameIDState);
  47. destructor Done; virtual;
  48. procedure SetState(Astate : TNameIDState; enabled : boolean);
  49. procedure SetOrigin(const AOrigin : string);
  50. procedure SetLine(ALine : sw_integer);
  51. function GetLine : sw_integer;
  52. function GetState : TNameIDStates;
  53. function GetName : string;
  54. function GetOrigin : string;
  55. private
  56. Name : pstring;
  57. Origin : pstring;
  58. Line : sw_integer;
  59. State : TNameIDStates;
  60. end;
  61. PNameIDCollection = ^TNameIDCollection;
  62. TNameIDCollection = object(TSortedCollection)
  63. function At(Index: sw_Integer): PNameID;
  64. function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
  65. end;
  66. THTMLLinkScanDocument = object(TObject)
  67. constructor Init(const ADocName: string);
  68. function GetName: string;
  69. function GetUniqueName: string;
  70. function GetAliasCount: sw_integer;
  71. function GetAlias(Index: sw_integer): string;
  72. procedure AddAlias(const Alias: string);
  73. constructor Load(var S: TStream);
  74. procedure Store(var S: TStream);
  75. destructor Done; virtual;
  76. private
  77. DocName: PString;
  78. Synonym: PHTMLLinkScanDocument;
  79. Aliases: PStringCollection;
  80. end;
  81. PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection;
  82. THTMLLinkScanDocumentCollection = object(TSortedCollection)
  83. constructor Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
  84. function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
  85. function At(Index: sw_Integer): PHTMLLinkScanDocument;
  86. function SearchDocument(const DocName: string): PHTMLLinkScanDocument;
  87. procedure MoveAliasesToSynonym;
  88. private
  89. Scanner: PHTMLLinkScanner;
  90. end;
  91. THTMLLinkScanner = object(TCustomHTMLLinkScanner)
  92. constructor Init(const ABaseDir: string);
  93. procedure SetBaseDir(const ABaseDir: string);
  94. {a}function FindID(const AName : string) : PNameID; virtual;
  95. function GetDocumentCount: sw_integer;
  96. function GetDocumentURL(DocIndex: sw_integer): string;
  97. function GetUniqueDocumentURL(DocIndex: sw_integer): string;
  98. function GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
  99. function GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
  100. constructor LoadDocuments(var S: TStream);
  101. procedure StoreDocuments(var S: TStream);
  102. destructor Done; virtual;
  103. public
  104. procedure AddLink(const LinkText, LinkURL: string); virtual;
  105. private
  106. Documents: PHTMLLinkScanDocumentCollection;
  107. BaseDir: PString;
  108. function ExpandChildURL(const S: string): string;
  109. function NormalizeChildURL(const S: string): string;
  110. end;
  111. THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned);
  112. PHTMLLinkScanFile = ^THTMLLinkScanFile;
  113. THTMLLinkScanFile = object(TObject)
  114. constructor Init(const ADocumentURL: string);
  115. function GetDocumentURL: string;
  116. destructor Done; virtual;
  117. function AddReferencedName (const AName : string) : PNameID;
  118. function AddFoundName (const AName : string) : PNameID;
  119. procedure CheckNameList;
  120. function FindID(const AName : string) : PNameID; virtual;
  121. private
  122. DocumentURL : PString;
  123. NameIDList : PNameIDCollection;
  124. Owner : PHTMLLinkScanner;
  125. public
  126. State : THTMLLinkScanState;
  127. end;
  128. PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection;
  129. THTMLLinkScanFileCollection = object(TSortedCollection)
  130. function At(Index: sw_Integer): PHTMLLinkScanFile;
  131. function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
  132. function SearchFile(const DocURL: string): PHTMLLinkScanFile;
  133. function FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
  134. procedure CheckNameIDLists;
  135. end;
  136. THTMLLinkScanOption = (soSubDocsOnly);
  137. THTMLLinkScanOptions = set of THTMLLinkScanOption;
  138. THTMLFileLinkScanner = object(THTMLLinkScanner)
  139. constructor Init(const ABaseDir: string);
  140. procedure ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
  141. destructor Done; virtual;
  142. public
  143. function GetDocumentBaseURL: string; virtual;
  144. function FindID(const AName : string) : PNameID; virtual;
  145. procedure AddLink(const LinkText, LinkURL: string); virtual;
  146. procedure AddRef(LinkURL: string); virtual;
  147. procedure AddNameID(AName: string); virtual;
  148. procedure AddID(AName: string); virtual;
  149. function CheckURL(const URL: string): boolean; virtual;
  150. private
  151. Options: THTMLLinkScanOptions;
  152. BaseURL: string;
  153. CurBaseURL: string;
  154. IDList : PNameIDCollection;
  155. DocumentFiles: PHTMLLinkScanFileCollection;
  156. procedure ScheduleDoc(const DocumentURL: string);
  157. public
  158. procedure ProcessDoc(Doc: PHTMLLinkScanFile); virtual;
  159. end;
  160. procedure RegisterWHTMLScan;
  161. implementation
  162. uses
  163. WUtils;
  164. const
  165. RHTMLLinkScanDocument: TStreamRec = (
  166. ObjType: 19500;
  167. VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^);
  168. Load: @THTMLLinkScanDocument.Load;
  169. Store: @THTMLLinkScanDocument.Store
  170. );
  171. const
  172. CurrentHTMLIndexVersion : sw_integer = HTMLIndexVersion;
  173. function TCustomHTMLLinkScanner.DocAddTextChar(C: char): boolean;
  174. var Added: boolean;
  175. begin
  176. Added:=false;
  177. if InAnchor then
  178. begin
  179. CurLinkText:=CurLinkText+C;
  180. Added:=true;
  181. end;
  182. if ord(c)>32 then
  183. LastSynonym:=nil;
  184. DocAddTextChar:=Added;
  185. end;
  186. procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean);
  187. begin
  188. if Entered then
  189. begin
  190. CurLinkText:='';
  191. if DocGetTagParam('HREF',CurURL) then
  192. HasHRef:=true
  193. else
  194. CurURL:='';
  195. if not DocGetTagParam('NAME',CurName) then
  196. if not DocGetTagParam('ID',CurName) then
  197. CurName:='';
  198. if not DocGetTagParam('ID',CurID) then
  199. CurID:='';
  200. if CurName<>'' then
  201. begin
  202. InNameAnchor:=true;
  203. If Pos('#',CurName)=0 then
  204. CurName:=CurDoc+'#'+CurName;
  205. CurName:=Trim(CurName);
  206. CurName:=CompleteURL(GetDocumentBaseURL,CurName);
  207. if CurURL='' then
  208. CurURL:=CurName;
  209. end
  210. else
  211. CurName:='';
  212. CurURL:=Trim(CurURL);
  213. if pos('#',CurURL)=1 then
  214. CurURL:=CurDoc+CurURL;
  215. CurURL:=CompleteURL(GetDocumentBaseURL,CurURL);
  216. end
  217. else
  218. begin
  219. CurLinkText:=Trim(CurLinkText);
  220. if HasHRef then
  221. begin
  222. if CheckURL(CurURL) and CheckText(CurLinkText) and
  223. not DisableCrossIndexing then
  224. begin
  225. AddLink(CurLinkText,CurURL);
  226. {$ifdef DEBUG}
  227. DebugMessage(CurDoc,' Adding ScanLink "'+CurLinkText+'" to "'+
  228. CurURL+'"',Line,1);
  229. {$endif DEBUG}
  230. end;
  231. { Be sure to parse referenced file,
  232. even if that link is not valid }
  233. AddRef(CurURL);
  234. end;
  235. if not HasHRef and InNameAnchor and CheckURL(CurName) and CheckText(CurLinkText) then
  236. begin
  237. AddLink(CurLinkText,CurName);
  238. {$ifdef DEBUG}
  239. DebugMessage(CurDoc,' Adding ScanName "'+CurLinkText+'" to "'+CurName+'"',Line,1);
  240. {$endif DEBUG}
  241. end;
  242. if InNameAnchor then
  243. begin
  244. AddNameID(CurName);
  245. end;
  246. if not HasHRef and (CurID<>'') then
  247. AddID(CurID);
  248. InNameAnchor:=false;
  249. HasHRef:=false;
  250. end;
  251. InAnchor:=Entered;
  252. end;
  253. function TCustomHTMLLinkScanner.GetDocumentBaseURL: string;
  254. begin
  255. { Abstract }
  256. GetDocumentBaseURL:='';
  257. end;
  258. function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean;
  259. begin
  260. { Abstract }
  261. CheckURL:=true;
  262. end;
  263. function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean;
  264. begin
  265. { Abstract }
  266. CheckText:=true;
  267. end;
  268. procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
  269. begin
  270. { Abstract }
  271. end;
  272. procedure TCustomHTMLLinkScanner.AddRef(LinkURL: string);
  273. begin
  274. { Abstract }
  275. end;
  276. procedure TCustomHTMLLinkScanner.AddNameID(AName: string);
  277. begin
  278. { Abstract }
  279. end;
  280. procedure TCustomHTMLLinkScanner.AddID(AName: string);
  281. begin
  282. { Abstract }
  283. end;
  284. constructor TNameID.Init(const AName : string; Astate : TNameIDState);
  285. begin
  286. inherited Init;
  287. SetStr(Name,AName);
  288. Origin:=nil;
  289. State:=[AState];
  290. end;
  291. destructor TNameID.Done;
  292. begin
  293. if assigned(Name) then
  294. DisposeStr(Name);
  295. Name:=nil;
  296. if assigned(Origin) then
  297. DisposeStr(Origin);
  298. Origin:=nil;
  299. inherited Done;
  300. end;
  301. procedure TNameID.SetState(Astate : TNameIDState; enabled : boolean);
  302. begin
  303. if enabled then
  304. Include(State,AState)
  305. else
  306. Exclude(State,AState);
  307. end;
  308. function TNameID.GetState : TNameIDStates;
  309. begin
  310. GetState:=State;
  311. end;
  312. function TNameID.GetName : string;
  313. begin
  314. GetName:=GetStr(Name);
  315. end;
  316. function TNameID.GetOrigin : string;
  317. begin
  318. GetOrigin:=GetStr(Origin);
  319. end;
  320. procedure TNameID.SetOrigin(const AOrigin : string);
  321. begin
  322. SetStr(Origin,AOrigin);
  323. end;
  324. procedure TNameID.SetLine(ALine : sw_integer);
  325. begin
  326. Line:=ALine;
  327. end;
  328. function TNameID.GetLine : sw_integer;
  329. begin
  330. GetLine:=Line;
  331. end;
  332. function TNameIDCollection.At(Index: sw_Integer): PNameID;
  333. begin
  334. At:=Inherited At(Index);
  335. end;
  336. function TNameIDCollection.Compare(Key1, Key2: Pointer): sw_Integer;
  337. var
  338. R: sw_integer;
  339. K1: PNameID absolute Key1;
  340. K2: PNameID absolute Key2;
  341. S1,S2: string;
  342. begin
  343. S1:=K1^.GetName;
  344. S2:=K2^.GetName;
  345. S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
  346. if S1<S2 then R:=-1 else
  347. if S1>S2 then R:= 1 else
  348. R:=0;
  349. Compare:=R;
  350. end;
  351. constructor THTMLLinkScanDocument.Init(const ADocName: string);
  352. begin
  353. inherited Init;
  354. SetStr(DocName,ADocName);
  355. New(Aliases, Init(10,10));
  356. {$ifdef DEBUG}
  357. DebugMessage('',' Adding New LinkScan document "'+ADocName+'"',1,1);
  358. {$endif DEBUG}
  359. Synonym:=nil;
  360. end;
  361. function THTMLLinkScanDocument.GetName: string;
  362. begin
  363. GetName:=GetStr(DocName);
  364. end;
  365. function THTMLLinkScanDocument.GetUniqueName: string;
  366. var
  367. PD: PHTMLLinkScanDocument;
  368. begin
  369. PD:=@Self;
  370. while assigned(PD^.synonym) do
  371. PD:=PD^.Synonym;
  372. GetUniqueName:=GetStr(PD^.DocName);
  373. end;
  374. function THTMLLinkScanDocument.GetAliasCount: sw_integer;
  375. begin
  376. GetAliasCount:=Aliases^.Count;
  377. end;
  378. function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string;
  379. begin
  380. GetAlias:=GetStr(Aliases^.At(Index));
  381. end;
  382. procedure THTMLLinkScanDocument.AddAlias(const Alias: string);
  383. begin
  384. Aliases^.Insert(NewStr(Alias));
  385. {$ifdef DEBUG}
  386. DebugMessage('',' Adding alias "'+Alias+'" to LinkScan document "'+GetStr(DocName)+'"',1,1);
  387. {$endif DEBUG}
  388. end;
  389. constructor THTMLLinkScanDocument.Load(var S: TStream);
  390. var
  391. i: sw_integer;
  392. begin
  393. inherited Init;
  394. DocName:=S.ReadStr;
  395. if assigned(DocName) then
  396. for i:=1 to Length(DocName^) do
  397. if (DocName^[i]='\') or (DocName^[i]='/') then
  398. DocName^[i]:=DirSep;
  399. New(Aliases, Load(S));
  400. end;
  401. procedure THTMLLinkScanDocument.Store(var S: TStream);
  402. begin
  403. S.WriteStr(DocName);
  404. Aliases^.Store(S);
  405. end;
  406. destructor THTMLLinkScanDocument.Done;
  407. begin
  408. if Assigned(Aliases) then
  409. Dispose(Aliases, Done);
  410. Aliases:=nil;
  411. if Assigned(DocName) then
  412. DisposeStr(DocName);
  413. DocName:=nil;
  414. inherited Done;
  415. end;
  416. constructor THTMLLinkScanDocumentCollection.Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
  417. begin
  418. inherited Init(ALimit,ADelta);
  419. Scanner:=AScanner;
  420. end;
  421. function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer;
  422. var R: sw_integer;
  423. K1: PHTMLLinkScanDocument absolute Key1;
  424. K2: PHTMLLinkScanDocument absolute Key2;
  425. S1,S2: string;
  426. begin
  427. S1:=K1^.GetName; S2:=K2^.GetName;
  428. if Assigned(Scanner) then
  429. begin S1:=Scanner^.ExpandChildURL(S1); S2:=Scanner^.ExpandChildURL(S2); end;
  430. S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
  431. if S1<S2 then R:=-1 else
  432. if S1>S2 then R:= 1 else
  433. R:=0;
  434. Compare:=R;
  435. end;
  436. function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument;
  437. begin
  438. At:=inherited At(Index);
  439. end;
  440. function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument;
  441. var D,P: PHTMLLinkScanDocument;
  442. Index: sw_integer;
  443. begin
  444. New(D, Init(DocName));
  445. if Search(D, Index)=false then P:=nil else
  446. P:=At(Index);
  447. Dispose(D, Done);
  448. SearchDocument:=P;
  449. end;
  450. procedure THTMLLinkScanDocumentCollection.MoveAliasesToSynonym;
  451. procedure MoveAliases(P: PHTMLLinkScanDocument);
  452. var
  453. PD: PHTMLLinkScanDocument;
  454. i: sw_integer;
  455. begin
  456. if not assigned(P^.synonym) then
  457. exit;
  458. PD:=P;
  459. while assigned(PD^.synonym) do
  460. PD:=PD^.Synonym;
  461. For i:=P^.GetAliasCount-1 downto 0 do
  462. begin
  463. PD^.AddAlias(P^.GetAlias(i));
  464. P^.Aliases^.AtFree(i);
  465. end;
  466. end;
  467. begin
  468. ForEach(@MoveAliases);
  469. end;
  470. constructor THTMLLinkScanner.Init(const ABaseDir: string);
  471. begin
  472. inherited Init;
  473. New(Documents, Init(@Self,50,100));
  474. SetBaseDir(ABaseDir);
  475. end;
  476. procedure THTMLLinkScanner.SetBaseDir(const ABaseDir: string);
  477. begin
  478. if Assigned(BaseDir) then DisposeStr(BaseDir);
  479. BaseDir:=NewStr(CompleteDir(ABaseDir));
  480. end;
  481. function THTMLLinkScanner.GetDocumentCount: sw_integer;
  482. begin
  483. GetDocumentCount:=Documents^.Count;
  484. end;
  485. function THTMLLinkScanner.ExpandChildURL(const S: string): string;
  486. begin
  487. ExpandChildURL:=CompleteURL(GetStr(BaseDir),S);
  488. end;
  489. function THTMLLinkScanner.NormalizeChildURL(const S: string): string;
  490. var URL: string;
  491. begin
  492. URL:=S;
  493. if GetStr(BaseDir)<>'' then
  494. if copy(UpcaseStr(S),1,length(GetStr(BaseDir)))=UpcaseStr(GetStr(BaseDir)) then
  495. URL:=copy(S,length(GetStr(BaseDir))+1,length(S));
  496. NormalizeChildURL:=URL;
  497. end;
  498. function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string;
  499. begin
  500. GetDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetName);
  501. end;
  502. function THTMLLinkScanner.GetUniqueDocumentURL(DocIndex: sw_integer): string;
  503. begin
  504. GetUniqueDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetUniqueName);
  505. end;
  506. function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
  507. begin
  508. GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
  509. end;
  510. function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
  511. begin
  512. GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex);
  513. end;
  514. procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
  515. var D: PHTMLLinkScanDocument;
  516. DoInsert: boolean;
  517. int: sw_integer;
  518. Text: string;
  519. error: word;
  520. begin
  521. D:=Documents^.SearchDocument(LinkURL);
  522. if D=nil then
  523. begin
  524. New(D, Init(NormalizeChildURL(LinkURL)));
  525. Documents^.Insert(D);
  526. end;
  527. If assigned(LastSynonym) then
  528. LastSynonym^.Synonym:=D;
  529. DoInsert:=true;
  530. If (length(LinkText)=0) or (Pos(',',LinkText)=1) then
  531. DoInsert:=false;
  532. Val(LinkText,int,error);
  533. If (Error>1) and (LinkText[Error]=' ') then
  534. Text:=Trim(Copy(LinkText,error+1,length(LinkText)))
  535. else
  536. Text:=LinkText;
  537. IF DoInsert then
  538. D^.AddAlias(Text);
  539. If InNameAnchor then
  540. LastSynonym:=D;
  541. end;
  542. constructor THTMLLinkScanner.LoadDocuments(var S: TStream);
  543. var P,L: longint;
  544. OK: boolean;
  545. PS: PString;
  546. begin
  547. OK:=false;
  548. P:=S.GetPos;
  549. S.Read(L,sizeof(L));
  550. if (S.Status=stOK) and (L=HTMLIndexMagicNo) then
  551. begin
  552. S.Read(L,sizeof(L));
  553. CurrentHTMLIndexVersion:=L;
  554. OK:=(S.Status=stOK);
  555. end;
  556. if not OK then
  557. begin
  558. S.Reset;
  559. S.Seek(P);
  560. end
  561. else
  562. BaseDir:=S.ReadStr;
  563. New(Documents, Load(S));
  564. if not Assigned(Documents) then
  565. Fail;
  566. Documents^.MoveAliasesToSynonym;
  567. CurrentHTMLIndexVersion:=HTMLIndexVersion;
  568. end;
  569. function THTMLLinkScanner.FindID(const AName : string) : PNameID;
  570. begin
  571. {abstract}FindID:=nil;
  572. end;
  573. procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
  574. var L: longint;
  575. begin
  576. L:=HTMLIndexMagicNo;
  577. S.Write(L,sizeof(L));
  578. L:=HTMLIndexVersion;
  579. CurrentHTMLIndexVersion:=L;
  580. S.Write(L,sizeof(L));
  581. S.WriteStr(BaseDir);
  582. Documents^.MoveAliasesToSynonym;
  583. Documents^.Store(S);
  584. end;
  585. destructor THTMLLinkScanner.Done;
  586. begin
  587. if Assigned(Documents) then
  588. Dispose(Documents, Done);
  589. Documents:=nil;
  590. if Assigned(BaseDir) then
  591. DisposeStr(BaseDir);
  592. BaseDir:=nil;
  593. inherited Done;
  594. end;
  595. constructor THTMLLinkScanFile.Init(const ADocumentURL: string);
  596. begin
  597. inherited Init;
  598. SetStr(DocumentURL,ADocumentURL);
  599. New(NameIDList, Init(5,10));
  600. end;
  601. function THTMLLinkScanFile.GetDocumentURL: string;
  602. begin
  603. GetDocumentURL:=GetStr(DocumentURL);
  604. end;
  605. function THTMLLinkScanFile.AddReferencedName (const AName : string) : PNameID;
  606. var
  607. index : sw_integer;
  608. PN : PNameID;
  609. begin
  610. new(PN,init(AName,IsReferenced));
  611. if not NameIDList^.Search(PN,Index) then
  612. NameIDList^.Insert(PN)
  613. else
  614. begin
  615. dispose(PN,Done);
  616. PN:=NameIDList^.At(Index);
  617. PN^.SetState(IsReferenced,true);
  618. end;
  619. AddReferencedName:=PN;
  620. end;
  621. function THTMLLinkScanFile.AddFoundName (const AName : string) : PNameID;
  622. var
  623. index : sw_integer;
  624. PN : PNameID;
  625. begin
  626. new(PN,init(AName,IsFound));
  627. if not NameIDList^.Search(PN,Index) then
  628. NameIDList^.Insert(PN)
  629. else
  630. begin
  631. dispose(PN,Done);
  632. PN:=NameIDList^.At(Index);
  633. PN^.SetState(IsFound,true);
  634. end;
  635. AddFoundName:=PN;
  636. end;
  637. procedure THTMLLinkScanFile.CheckNameList;
  638. var
  639. i : sw_integer;
  640. PN,PN2 : PNameID;
  641. begin
  642. {$ifdef DEBUG}
  643. for i:=0 to NameIDList^.Count-1 do
  644. begin
  645. PN:=NameIDList^.At(i);
  646. if not (IsFound in PN^.GetState) then
  647. begin
  648. if (IsReferenced in PN^.GetState) then
  649. DebugMessage(GetDocumentURL,'Name "'+PN^.GetName+'" from "'+
  650. PN^.GetOrigin+'" not found',1,1);
  651. PN2:=Owner^.FindID(PN^.GetName);
  652. if assigned(PN2) then
  653. begin
  654. DebugMessage('','ID found in "'+PN2^.GetOrigin+'"',1,1);
  655. if not (IsFound in PN2^.GetState) then
  656. DebugMessage('','ID not found',1,1);
  657. end;
  658. end;
  659. end;
  660. {$endif DEBUG}
  661. end;
  662. function THTMLLinkScanFile.FindID(const AName : string) : PNameID;
  663. var
  664. PN : PNameID;
  665. Index : sw_integer;
  666. begin
  667. new(PN,init(AName,IsID));
  668. if NameIDList^.Search(PN,Index) then
  669. begin
  670. dispose(PN,done);
  671. PN:=NameIDList^.At(Index);
  672. if (IsID in PN^.GetState) then
  673. FindId:=PN
  674. else
  675. FindID:=nil;
  676. end
  677. else
  678. begin
  679. dispose(PN,done);
  680. PN:=nil;
  681. FindID:=nil;
  682. end;
  683. end;
  684. destructor THTMLLinkScanFile.Done;
  685. begin
  686. if Assigned(DocumentURL) then
  687. DisposeStr(DocumentURL);
  688. DocumentURL:=nil;
  689. dispose(NameIDList,done);
  690. NameIDList:=nil;
  691. inherited Done;
  692. end;
  693. function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile;
  694. begin
  695. At:=inherited At(Index);
  696. end;
  697. function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer;
  698. var R: integer;
  699. K1: PHTMLLinkScanFile absolute Key1;
  700. K2: PHTMLLinkScanFile absolute Key2;
  701. S1,S2: string;
  702. begin
  703. S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL);
  704. if S1<S2 then R:=-1 else
  705. if S1>S2 then R:= 1 else
  706. R:=0;
  707. Compare:=R;
  708. end;
  709. function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile;
  710. var P,D: PHTMLLinkScanFile;
  711. Index: sw_integer;
  712. begin
  713. New(D, Init(DocURL));
  714. if Search(D,Index)=false then P:=nil else
  715. P:=At(Index);
  716. Dispose(D, Done);
  717. SearchFile:=P;
  718. end;
  719. function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
  720. var I: sw_integer;
  721. P,D: PHTMLLinkScanFile;
  722. begin
  723. P:=nil;
  724. for I:=0 to Count-1 do
  725. begin
  726. D:=At(I);
  727. if D^.State=AState then
  728. begin
  729. P:=D;
  730. Break;
  731. end;
  732. end;
  733. FindFileWithState:=P;
  734. end;
  735. procedure THTMLLinkScanFileCollection.CheckNameIDLists;
  736. procedure DoCheckNameList(P : PHTMLLinkScanFile);
  737. begin
  738. P^.CheckNameList;
  739. end;
  740. begin
  741. ForEach(@DoCheckNameList);
  742. end;
  743. constructor THTMLFileLinkScanner.Init(const ABaseDir: string);
  744. begin
  745. inherited Init(ABaseDir);
  746. New(DocumentFiles, Init(50,100));
  747. New(IDList, Init(50,100));
  748. {$ifdef DEBUG}
  749. DebugMessage('','THTMLFileLinkScanner Init "'+ABaseDir+'"',1,1);
  750. {$endif DEBUG}
  751. end;
  752. procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
  753. var P: PHTMLLinkScanFile;
  754. begin
  755. CurBaseURL:='';
  756. Options:=AOptions;
  757. ScheduleDoc(DocumentURL);
  758. repeat
  759. P:=DocumentFiles^.FindFileWithState(ssScheduled);
  760. if Assigned(P) then
  761. ProcessDoc(P);
  762. until P=nil;
  763. {$ifdef DEBUG}
  764. DebugMessage('','THTMLFileLinkScanner CheckNameList start ',1,1);
  765. DocumentFiles^.CheckNameIDLists;
  766. DebugMessage('','THTMLFileLinkScanner CheckNameList end ',1,1);
  767. {$endif DEBUG}
  768. end;
  769. function THTMLFileLinkScanner.GetDocumentBaseURL: string;
  770. begin
  771. GetDocumentBaseURL:=CurBaseURL;
  772. end;
  773. function THTMLFileLinkScanner.CheckURL(const URL: string): boolean;
  774. var OK: boolean;
  775. begin
  776. if soSubDocsOnly in Options then
  777. OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL)
  778. else
  779. OK:=true;
  780. CheckURL:=OK;
  781. end;
  782. procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string);
  783. var D: PHTMLLinkScanFile;
  784. P: sw_integer;
  785. DocURL: string;
  786. begin
  787. P:=Pos('#',LinkURL);
  788. if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
  789. D:=DocumentFiles^.SearchFile(DocURL);
  790. if not Assigned(D) then
  791. ScheduleDoc(DocURL);
  792. inherited AddLink(LinkText,LinkURL);
  793. end;
  794. procedure THTMLFileLinkScanner.AddRef(LinkURL: string);
  795. var D: PHTMLLinkScanFile;
  796. P: sw_integer;
  797. DocURL: string;
  798. PN : PNameID;
  799. begin
  800. {$ifdef DEBUG}
  801. DebugMessage(CurDoc,' Adding Ref to "'+
  802. LinkURL+'"',Line,1);
  803. {$endif DEBUG}
  804. P:=Pos('#',LinkURL);
  805. if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
  806. D:=DocumentFiles^.SearchFile(DocURL);
  807. if not Assigned(D) then
  808. ScheduleDoc(DocURL);
  809. D:=DocumentFiles^.SearchFile(DocURL);
  810. if P>0 then
  811. begin
  812. PN:=D^.AddReferencedName(copy(LinkURL,P+1,length(LinkURL)));
  813. PN^.SetOrigin(CurDoc);
  814. PN^.SetLine(Line);
  815. end;
  816. end;
  817. procedure THTMLFileLinkScanner.AddNameID(AName : string);
  818. var D: PHTMLLinkScanFile;
  819. P: sw_integer;
  820. PN : PNameID;
  821. DocURL: string;
  822. begin
  823. {$ifdef DEBUG}
  824. DebugMessage(CurDoc,' Adding NameID "'+
  825. CurName+'"',Line,1);
  826. {$endif DEBUG}
  827. P:=Pos('#',AName);
  828. if P=0 then DocURL:=AName else DocURL:=copy(AName,1,P-1);
  829. D:=DocumentFiles^.SearchFile(DocURL);
  830. if not Assigned(D) then
  831. ScheduleDoc(DocURL);
  832. D:=DocumentFiles^.SearchFile(DocURL);
  833. PN:=D^.AddFoundName(copy(AName,P+1,length(AName)));
  834. PN^.SetOrigin(CurDoc);
  835. PN^.SetLine(Line);
  836. end;
  837. procedure THTMLFileLinkScanner.AddID(AName : string);
  838. var
  839. D: PHTMLLinkScanFile;
  840. PN : PNameID;
  841. index : sw_integer;
  842. begin
  843. {$ifdef DEBUG}
  844. DebugMessage(CurDoc,' Adding Id "'+
  845. AName+'"',Line,1);
  846. {$endif DEBUG}
  847. D:=DocumentFiles^.SearchFile(CurDoc);
  848. if not Assigned(D) then
  849. ScheduleDoc(CurDoc);
  850. D:=DocumentFiles^.SearchFile(CurDoc);
  851. PN:=D^.AddFoundName(AName);
  852. PN^.SetState(IsId,true);
  853. PN^.SetOrigin(CurDoc);
  854. PN^.SetLine(Line);
  855. new(PN,init(AName,IsID));
  856. if IDList^ .Search(PN,index) then
  857. begin
  858. dispose(PN,done);
  859. {$ifdef DEBUG}
  860. PN:=IDList^.At(Index);
  861. DebugMessage(CurDoc,'ID "'+AName+'" already defined in "'+
  862. PN^.GetOrigin+'('+IntToStr(PN^.GetLine)+')"',Line,1);
  863. {$endif DEBUG}
  864. end
  865. else
  866. begin
  867. IDList^.Insert(PN);
  868. PN^.SetOrigin(CurDoc);
  869. PN^.SetLine(Line);
  870. end;
  871. end;
  872. function THTMLFileLinkScanner.FindID(const AName : string) : PNameID;
  873. Function ContainsNamedID(D : PHTMLLinkScanFile) : boolean;
  874. begin
  875. ContainsNamedID:=D^.FindID(AName)<>nil;
  876. end;
  877. var
  878. D : PHTMLLinkScanFile;
  879. begin
  880. D:=DocumentFiles^.FirstThat(@ContainsNamedID);
  881. if assigned(D) then
  882. FindID:=D^.FindID(AName)
  883. else
  884. FindID:=nil;
  885. end;
  886. procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile);
  887. var F: PDOSTextFile;
  888. begin
  889. if Assigned(Doc)=false then Exit;
  890. Doc^.State:=ssProcessing;
  891. CurDoc:=Doc^.GetDocumentURL;
  892. New(F, Init(CurDoc));
  893. if Assigned(F) then
  894. begin
  895. CurBaseURL:=CompleteURL(CurDoc,'');
  896. {$ifdef DEBUG}
  897. DebugMessage(CurDoc,'Processing "'+CurDoc+'"',1,1);
  898. {$endif DEBUG}
  899. Process(F);
  900. {$ifdef DEBUG}
  901. DebugMessage(CurDoc,'Finished processing "'+CurDoc+'"',Line,1);
  902. {$endif DEBUG}
  903. Dispose(F, Done);
  904. end
  905. else
  906. begin
  907. {$ifdef DEBUG}
  908. DebugMessage(CurDoc,'file not found',1,1);
  909. {$endif DEBUG}
  910. end;
  911. Doc^.State:=ssScanned;
  912. CurDoc:='';
  913. end;
  914. procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string);
  915. var D: PHTMLLinkScanFile;
  916. begin
  917. New(D, Init(DocumentURL));
  918. D^.State:=ssScheduled;
  919. D^.Owner:=@Self;
  920. {$ifdef DEBUG}
  921. DebugMessage('','Scheduling file "'+DocumentURL+'"',1,1);
  922. {$endif DEBUG}
  923. DocumentFiles^.Insert(D);
  924. end;
  925. destructor THTMLFileLinkScanner.Done;
  926. begin
  927. if Assigned(DocumentFiles) then
  928. Dispose(DocumentFiles, Done);
  929. DocumentFiles:=nil;
  930. if Assigned(IDList) then
  931. Dispose(IDList, Done);
  932. IDList:=nil;
  933. inherited Done;
  934. end;
  935. procedure RegisterWHTMLScan;
  936. begin
  937. RegisterType(RHTMLLinkScanDocument);
  938. end;
  939. END.