123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059 |
- {
- This file is part of the Free Pascal Integrated Development Environment
- Copyright (c) 2000 by Berczi Gabor
- HTML scanner objects
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- unit WHTMLScn;
- {$ifdef cpullvm}
- {$modeswitch nestedprocvars}
- {$endif}
- {$H-}
- interface
- uses Objects,
- WHTML;
- const
- HTMLIndexMagicNo = ord('H')+ord('H') shl 8+ord('I') shl 16+ord('X') shl 24;
- HTMLIndexVersion = 2;
- type
- PHTMLLinkScanner = ^THTMLLinkScanner;
- PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
- TCustomHTMLLinkScanner = object(THTMLParser)
- function DocAddTextChar(C: AnsiChar): boolean; virtual;
- procedure DocAnchor(Entered: boolean); virtual;
- public
- {a}function CheckURL(const URL: string): boolean; virtual;
- {a}function CheckText(const Text: string): boolean; virtual;
- {a}procedure AddLink(const LinkText, LinkURL: string); virtual;
- {a}procedure AddRef(LinkURL: string); virtual;
- {a}procedure AddNameID(AName: string); virtual;
- {a}procedure AddID(AName: string); virtual;
- {a}function GetDocumentBaseURL: string; virtual;
- private
- CurLinkText: string;
- CurURL: string;
- CurName,
- CurID: string;
- CurDoc: string;
- InAnchor,InNameAnchor,
- HasHRef : boolean;
- LastSynonym: PHTMLLinkScanDocument;
- end;
- TNameIDState = (IsReferenced, IsFound,IsID);
- TNameIDStates = set of TNameIDState;
- PNameID = ^TNameID;
- TNameID = object(TObject)
- constructor Init(const AName : string; Astate : TNameIDState);
- destructor Done; virtual;
- procedure SetState(Astate : TNameIDState; enabled : boolean);
- procedure SetOrigin(const AOrigin : string);
- procedure SetLine(ALine : sw_integer);
- function GetLine : sw_integer;
- function GetState : TNameIDStates;
- function GetName : string;
- function GetOrigin : string;
- private
- Name : pstring;
- Origin : pstring;
- Line : sw_integer;
- State : TNameIDStates;
- end;
- PNameIDCollection = ^TNameIDCollection;
- TNameIDCollection = object(TSortedCollection)
- function At(Index: sw_Integer): PNameID;
- function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
- end;
- THTMLLinkScanDocument = object(TObject)
- constructor Init(const ADocName: string);
- function GetName: string;
- function GetUniqueName: string;
- function GetAliasCount: sw_integer;
- function GetAlias(Index: sw_integer): string;
- procedure AddAlias(const Alias: string);
- constructor Load(var S: TStream);
- procedure Store(var S: TStream);
- destructor Done; virtual;
- private
- DocName: PString;
- Synonym: PHTMLLinkScanDocument;
- Aliases: PStringCollection;
- end;
- PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection;
- THTMLLinkScanDocumentCollection = object(TSortedCollection)
- constructor Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
- function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
- function At(Index: sw_Integer): PHTMLLinkScanDocument;
- function SearchDocument(const DocName: string): PHTMLLinkScanDocument;
- procedure MoveAliasesToSynonym;
- private
- Scanner: PHTMLLinkScanner;
- end;
- THTMLLinkScanner = object(TCustomHTMLLinkScanner)
- constructor Init(const ABaseDir: string);
- procedure SetBaseDir(const ABaseDir: string);
- {a}function FindID(const AName : string) : PNameID; virtual;
- function GetDocumentCount: sw_integer;
- function GetDocumentURL(DocIndex: sw_integer): string;
- function GetUniqueDocumentURL(DocIndex: sw_integer): string;
- function GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
- function GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
- constructor LoadDocuments(var S: TStream);
- procedure StoreDocuments(var S: TStream);
- destructor Done; virtual;
- public
- procedure AddLink(const LinkText, LinkURL: string); virtual;
- private
- Documents: PHTMLLinkScanDocumentCollection;
- BaseDir: PString;
- function ExpandChildURL(const S: string): string;
- function NormalizeChildURL(const S: string): string;
- end;
- THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned);
- PHTMLLinkScanFile = ^THTMLLinkScanFile;
- THTMLLinkScanFile = object(TObject)
- constructor Init(const ADocumentURL: string);
- function GetDocumentURL: string;
- destructor Done; virtual;
- function AddReferencedName (const AName : string) : PNameID;
- function AddFoundName (const AName : string) : PNameID;
- procedure CheckNameList;
- function FindID(const AName : string) : PNameID; virtual;
- private
- DocumentURL : PString;
- NameIDList : PNameIDCollection;
- Owner : PHTMLLinkScanner;
- public
- State : THTMLLinkScanState;
- end;
- PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection;
- THTMLLinkScanFileCollection = object(TSortedCollection)
- function At(Index: sw_Integer): PHTMLLinkScanFile;
- function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
- function SearchFile(const DocURL: string): PHTMLLinkScanFile;
- function FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
- procedure CheckNameIDLists;
- end;
- THTMLLinkScanOption = (soSubDocsOnly);
- THTMLLinkScanOptions = set of THTMLLinkScanOption;
- THTMLFileLinkScanner = object(THTMLLinkScanner)
- constructor Init(const ABaseDir: string);
- procedure ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
- destructor Done; virtual;
- public
- function GetDocumentBaseURL: string; virtual;
- function FindID(const AName : string) : PNameID; virtual;
- procedure AddLink(const LinkText, LinkURL: string); virtual;
- procedure AddRef(LinkURL: string); virtual;
- procedure AddNameID(AName: string); virtual;
- procedure AddID(AName: string); virtual;
- function CheckURL(const URL: string): boolean; virtual;
- private
- Options: THTMLLinkScanOptions;
- BaseURL: string;
- CurBaseURL: string;
- IDList : PNameIDCollection;
- DocumentFiles: PHTMLLinkScanFileCollection;
- procedure ScheduleDoc(const DocumentURL: string);
- public
- procedure ProcessDoc(Doc: PHTMLLinkScanFile); virtual;
- end;
- procedure RegisterWHTMLScan;
- implementation
- uses
- WUtils;
- const
- RHTMLLinkScanDocument: TStreamRec = (
- ObjType: 19500;
- VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^);
- Load: @THTMLLinkScanDocument.Load;
- Store: @THTMLLinkScanDocument.Store
- );
- const
- CurrentHTMLIndexVersion : sw_integer = HTMLIndexVersion;
- function TCustomHTMLLinkScanner.DocAddTextChar(C: AnsiChar): boolean;
- var Added: boolean;
- begin
- Added:=false;
- if InAnchor then
- begin
- CurLinkText:=CurLinkText+C;
- Added:=true;
- end;
- if ord(c)>32 then
- LastSynonym:=nil;
- DocAddTextChar:=Added;
- end;
- procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean);
- begin
- if Entered then
- begin
- CurLinkText:='';
- if DocGetTagParam('HREF',CurURL) then
- HasHRef:=true
- else
- CurURL:='';
- if not DocGetTagParam('NAME',CurName) then
- if not DocGetTagParam('ID',CurName) then
- CurName:='';
- if not DocGetTagParam('ID',CurID) then
- CurID:='';
- if CurName<>'' then
- begin
- InNameAnchor:=true;
- If Pos('#',CurName)=0 then
- CurName:=CurDoc+'#'+CurName;
- CurName:=Trim(CurName);
- CurName:=CompleteURL(GetDocumentBaseURL,CurName);
- if CurURL='' then
- CurURL:=CurName;
- end
- else
- CurName:='';
- CurURL:=Trim(CurURL);
- if pos('#',CurURL)=1 then
- CurURL:=CurDoc+CurURL;
- CurURL:=CompleteURL(GetDocumentBaseURL,CurURL);
- end
- else
- begin
- CurLinkText:=Trim(CurLinkText);
- if HasHRef then
- begin
- if CheckURL(CurURL) and CheckText(CurLinkText) and
- not DisableCrossIndexing then
- begin
- AddLink(CurLinkText,CurURL);
- {$ifdef DEBUG}
- DebugMessage(CurDoc,' Adding ScanLink "'+CurLinkText+'" to "'+
- CurURL+'"',Line,1);
- {$endif DEBUG}
- end;
- { Be sure to parse referenced file,
- even if that link is not valid }
- AddRef(CurURL);
- end;
- if not HasHRef and InNameAnchor and CheckURL(CurName) and CheckText(CurLinkText) then
- begin
- AddLink(CurLinkText,CurName);
- {$ifdef DEBUG}
- DebugMessage(CurDoc,' Adding ScanName "'+CurLinkText+'" to "'+CurName+'"',Line,1);
- {$endif DEBUG}
- end;
- if InNameAnchor then
- begin
- AddNameID(CurName);
- end;
- if not HasHRef and (CurID<>'') then
- AddID(CurID);
- InNameAnchor:=false;
- HasHRef:=false;
- end;
- InAnchor:=Entered;
- end;
- function TCustomHTMLLinkScanner.GetDocumentBaseURL: string;
- begin
- { Abstract }
- GetDocumentBaseURL:='';
- end;
- function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean;
- begin
- { Abstract }
- CheckURL:=true;
- end;
- function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean;
- begin
- { Abstract }
- CheckText:=true;
- end;
- procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
- begin
- { Abstract }
- end;
- procedure TCustomHTMLLinkScanner.AddRef(LinkURL: string);
- begin
- { Abstract }
- end;
- procedure TCustomHTMLLinkScanner.AddNameID(AName: string);
- begin
- { Abstract }
- end;
- procedure TCustomHTMLLinkScanner.AddID(AName: string);
- begin
- { Abstract }
- end;
- constructor TNameID.Init(const AName : string; Astate : TNameIDState);
- begin
- inherited Init;
- SetStr(Name,AName);
- Origin:=nil;
- State:=[AState];
- end;
- destructor TNameID.Done;
- begin
- if assigned(Name) then
- DisposeStr(Name);
- Name:=nil;
- if assigned(Origin) then
- DisposeStr(Origin);
- Origin:=nil;
- inherited Done;
- end;
- procedure TNameID.SetState(Astate : TNameIDState; enabled : boolean);
- begin
- if enabled then
- Include(State,AState)
- else
- Exclude(State,AState);
- end;
- function TNameID.GetState : TNameIDStates;
- begin
- GetState:=State;
- end;
- function TNameID.GetName : string;
- begin
- GetName:=GetStr(Name);
- end;
- function TNameID.GetOrigin : string;
- begin
- GetOrigin:=GetStr(Origin);
- end;
- procedure TNameID.SetOrigin(const AOrigin : string);
- begin
- SetStr(Origin,AOrigin);
- end;
- procedure TNameID.SetLine(ALine : sw_integer);
- begin
- Line:=ALine;
- end;
- function TNameID.GetLine : sw_integer;
- begin
- GetLine:=Line;
- end;
- function TNameIDCollection.At(Index: sw_Integer): PNameID;
- begin
- At:=Inherited At(Index);
- end;
- function TNameIDCollection.Compare(Key1, Key2: Pointer): sw_Integer;
- var
- R: sw_integer;
- K1: PNameID absolute Key1;
- K2: PNameID absolute Key2;
- S1,S2: string;
- begin
- S1:=K1^.GetName;
- S2:=K2^.GetName;
- S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
- if S1<S2 then R:=-1 else
- if S1>S2 then R:= 1 else
- R:=0;
- Compare:=R;
- end;
- constructor THTMLLinkScanDocument.Init(const ADocName: string);
- begin
- inherited Init;
- SetStr(DocName,ADocName);
- New(Aliases, Init(10,10));
- {$ifdef DEBUG}
- DebugMessage('',' Adding New LinkScan document "'+ADocName+'"',1,1);
- {$endif DEBUG}
- Synonym:=nil;
- end;
- function THTMLLinkScanDocument.GetName: string;
- begin
- GetName:=GetStr(DocName);
- end;
- function THTMLLinkScanDocument.GetUniqueName: string;
- var
- PD: PHTMLLinkScanDocument;
- begin
- PD:=@Self;
- while assigned(PD^.synonym) do
- PD:=PD^.Synonym;
- GetUniqueName:=GetStr(PD^.DocName);
- end;
- function THTMLLinkScanDocument.GetAliasCount: sw_integer;
- begin
- GetAliasCount:=Aliases^.Count;
- end;
- function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string;
- begin
- GetAlias:=GetStr(Aliases^.At(Index));
- end;
- procedure THTMLLinkScanDocument.AddAlias(const Alias: string);
- begin
- Aliases^.Insert(NewStr(Alias));
- {$ifdef DEBUG}
- DebugMessage('',' Adding alias "'+Alias+'" to LinkScan document "'+GetStr(DocName)+'"',1,1);
- {$endif DEBUG}
- end;
- constructor THTMLLinkScanDocument.Load(var S: TStream);
- var
- i: sw_integer;
- begin
- inherited Init;
- DocName:=S.ReadStr;
- if assigned(DocName) then
- for i:=1 to Length(DocName^) do
- if (DocName^[i]='\') or (DocName^[i]='/') then
- DocName^[i]:=DirSep;
- New(Aliases, Load(S));
- end;
- procedure THTMLLinkScanDocument.Store(var S: TStream);
- begin
- S.WriteStr(DocName);
- Aliases^.Store(S);
- end;
- destructor THTMLLinkScanDocument.Done;
- begin
- if Assigned(Aliases) then
- Dispose(Aliases, Done);
- Aliases:=nil;
- if Assigned(DocName) then
- DisposeStr(DocName);
- DocName:=nil;
- inherited Done;
- end;
- constructor THTMLLinkScanDocumentCollection.Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
- begin
- inherited Init(ALimit,ADelta);
- Scanner:=AScanner;
- end;
- function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer;
- var R: sw_integer;
- K1: PHTMLLinkScanDocument absolute Key1;
- K2: PHTMLLinkScanDocument absolute Key2;
- S1,S2: string;
- begin
- S1:=K1^.GetName; S2:=K2^.GetName;
- if Assigned(Scanner) then
- begin S1:=Scanner^.ExpandChildURL(S1); S2:=Scanner^.ExpandChildURL(S2); end;
- S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
- if S1<S2 then R:=-1 else
- if S1>S2 then R:= 1 else
- R:=0;
- Compare:=R;
- end;
- function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument;
- begin
- At:=inherited At(Index);
- end;
- function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument;
- var D,P: PHTMLLinkScanDocument;
- Index: sw_integer;
- begin
- New(D, Init(DocName));
- if Search(D, Index)=false then P:=nil else
- P:=At(Index);
- Dispose(D, Done);
- SearchDocument:=P;
- end;
- procedure THTMLLinkScanDocumentCollection.MoveAliasesToSynonym;
- procedure MoveAliases(P: PHTMLLinkScanDocument);
- var
- PD: PHTMLLinkScanDocument;
- i: sw_integer;
- begin
- if not assigned(P^.synonym) then
- exit;
- PD:=P;
- while assigned(PD^.synonym) do
- PD:=PD^.Synonym;
- For i:=P^.GetAliasCount-1 downto 0 do
- begin
- PD^.AddAlias(P^.GetAlias(i));
- P^.Aliases^.AtFree(i);
- end;
- end;
- begin
- ForEach(TCallbackProcParam(@MoveAliases));
- end;
- constructor THTMLLinkScanner.Init(const ABaseDir: string);
- begin
- inherited Init;
- New(Documents, Init(@Self,50,100));
- SetBaseDir(ABaseDir);
- end;
- procedure THTMLLinkScanner.SetBaseDir(const ABaseDir: string);
- begin
- if Assigned(BaseDir) then DisposeStr(BaseDir);
- BaseDir:=NewStr(CompleteDir(ABaseDir));
- end;
- function THTMLLinkScanner.GetDocumentCount: sw_integer;
- begin
- GetDocumentCount:=Documents^.Count;
- end;
- function THTMLLinkScanner.ExpandChildURL(const S: string): string;
- begin
- ExpandChildURL:=CompleteURL(GetStr(BaseDir),S);
- end;
- function THTMLLinkScanner.NormalizeChildURL(const S: string): string;
- var URL: string;
- begin
- URL:=S;
- if GetStr(BaseDir)<>'' then
- if copy(UpcaseStr(S),1,length(GetStr(BaseDir)))=UpcaseStr(GetStr(BaseDir)) then
- URL:=copy(S,length(GetStr(BaseDir))+1,length(S));
- NormalizeChildURL:=URL;
- end;
- function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string;
- begin
- GetDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetName);
- end;
- function THTMLLinkScanner.GetUniqueDocumentURL(DocIndex: sw_integer): string;
- begin
- GetUniqueDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetUniqueName);
- end;
- function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
- begin
- GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
- end;
- function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
- begin
- GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex);
- end;
- procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
- var D: PHTMLLinkScanDocument;
- DoInsert: boolean;
- int: sw_integer;
- Text: string;
- error: word;
- begin
- D:=Documents^.SearchDocument(LinkURL);
- if D=nil then
- begin
- New(D, Init(NormalizeChildURL(LinkURL)));
- Documents^.Insert(D);
- end;
- If assigned(LastSynonym) then
- LastSynonym^.Synonym:=D;
- DoInsert:=true;
- If (length(LinkText)=0) or (Pos(',',LinkText)=1) then
- DoInsert:=false;
- Val(LinkText,int,error);
- If (Error>1) and (LinkText[Error]=' ') then
- Text:=Trim(Copy(LinkText,error+1,length(LinkText)))
- else
- Text:=LinkText;
- IF DoInsert then
- D^.AddAlias(Text);
- If InNameAnchor then
- LastSynonym:=D;
- end;
- constructor THTMLLinkScanner.LoadDocuments(var S: TStream);
- var P,L: longint;
- OK: boolean;
- PS: PString;
- begin
- OK:=false;
- P:=S.GetPos;
- S.Read(L,sizeof(L));
- if (S.Status=stOK) and (L=HTMLIndexMagicNo) then
- begin
- S.Read(L,sizeof(L));
- CurrentHTMLIndexVersion:=L;
- OK:=(S.Status=stOK);
- end;
- if not OK then
- begin
- S.Reset;
- S.Seek(P);
- end
- else
- BaseDir:=S.ReadStr;
- New(Documents, Load(S));
- if not Assigned(Documents) then
- Fail;
- Documents^.MoveAliasesToSynonym;
- CurrentHTMLIndexVersion:=HTMLIndexVersion;
- end;
- function THTMLLinkScanner.FindID(const AName : string) : PNameID;
- begin
- {abstract}FindID:=nil;
- end;
- procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
- var L: longint;
- begin
- L:=HTMLIndexMagicNo;
- S.Write(L,sizeof(L));
- L:=HTMLIndexVersion;
- CurrentHTMLIndexVersion:=L;
- S.Write(L,sizeof(L));
- S.WriteStr(BaseDir);
- Documents^.MoveAliasesToSynonym;
- Documents^.Store(S);
- end;
- destructor THTMLLinkScanner.Done;
- begin
- if Assigned(Documents) then
- Dispose(Documents, Done);
- Documents:=nil;
- if Assigned(BaseDir) then
- DisposeStr(BaseDir);
- BaseDir:=nil;
- inherited Done;
- end;
- constructor THTMLLinkScanFile.Init(const ADocumentURL: string);
- begin
- inherited Init;
- SetStr(DocumentURL,ADocumentURL);
- New(NameIDList, Init(5,10));
- end;
- function THTMLLinkScanFile.GetDocumentURL: string;
- begin
- GetDocumentURL:=GetStr(DocumentURL);
- end;
- function THTMLLinkScanFile.AddReferencedName (const AName : string) : PNameID;
- var
- index : sw_integer;
- PN : PNameID;
- begin
- new(PN,init(AName,IsReferenced));
- if not NameIDList^.Search(PN,Index) then
- NameIDList^.Insert(PN)
- else
- begin
- dispose(PN,Done);
- PN:=NameIDList^.At(Index);
- PN^.SetState(IsReferenced,true);
- end;
- AddReferencedName:=PN;
- end;
- function THTMLLinkScanFile.AddFoundName (const AName : string) : PNameID;
- var
- index : sw_integer;
- PN : PNameID;
- begin
- new(PN,init(AName,IsFound));
- if not NameIDList^.Search(PN,Index) then
- NameIDList^.Insert(PN)
- else
- begin
- dispose(PN,Done);
- PN:=NameIDList^.At(Index);
- PN^.SetState(IsFound,true);
- end;
- AddFoundName:=PN;
- end;
- procedure THTMLLinkScanFile.CheckNameList;
- var
- i : sw_integer;
- PN,PN2 : PNameID;
- begin
- {$ifdef DEBUG}
- for i:=0 to NameIDList^.Count-1 do
- begin
- PN:=NameIDList^.At(i);
- if not (IsFound in PN^.GetState) then
- begin
- if (IsReferenced in PN^.GetState) then
- DebugMessage(GetDocumentURL,'Name "'+PN^.GetName+'" from "'+
- PN^.GetOrigin+'" not found',1,1);
- PN2:=Owner^.FindID(PN^.GetName);
- if assigned(PN2) then
- begin
- DebugMessage('','ID found in "'+PN2^.GetOrigin+'"',1,1);
- if not (IsFound in PN2^.GetState) then
- DebugMessage('','ID not found',1,1);
- end;
- end;
- end;
- {$endif DEBUG}
- end;
- function THTMLLinkScanFile.FindID(const AName : string) : PNameID;
- var
- PN : PNameID;
- Index : sw_integer;
- begin
- new(PN,init(AName,IsID));
- if NameIDList^.Search(PN,Index) then
- begin
- dispose(PN,done);
- PN:=NameIDList^.At(Index);
- if (IsID in PN^.GetState) then
- FindId:=PN
- else
- FindID:=nil;
- end
- else
- begin
- dispose(PN,done);
- PN:=nil;
- FindID:=nil;
- end;
- end;
- destructor THTMLLinkScanFile.Done;
- begin
- if Assigned(DocumentURL) then
- DisposeStr(DocumentURL);
- DocumentURL:=nil;
- dispose(NameIDList,done);
- NameIDList:=nil;
- inherited Done;
- end;
- function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile;
- begin
- At:=inherited At(Index);
- end;
- function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer;
- var R: integer;
- K1: PHTMLLinkScanFile absolute Key1;
- K2: PHTMLLinkScanFile absolute Key2;
- S1,S2: string;
- begin
- S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL);
- if S1<S2 then R:=-1 else
- if S1>S2 then R:= 1 else
- R:=0;
- Compare:=R;
- end;
- function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile;
- var P,D: PHTMLLinkScanFile;
- Index: sw_integer;
- begin
- New(D, Init(DocURL));
- if Search(D,Index)=false then P:=nil else
- P:=At(Index);
- Dispose(D, Done);
- SearchFile:=P;
- end;
- function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
- var I: sw_integer;
- P,D: PHTMLLinkScanFile;
- begin
- P:=nil;
- for I:=0 to Count-1 do
- begin
- D:=At(I);
- if D^.State=AState then
- begin
- P:=D;
- Break;
- end;
- end;
- FindFileWithState:=P;
- end;
- procedure THTMLLinkScanFileCollection.CheckNameIDLists;
- procedure DoCheckNameList(P : PHTMLLinkScanFile);
- begin
- P^.CheckNameList;
- end;
- begin
- ForEach(TCallbackProcParam(@DoCheckNameList));
- end;
- constructor THTMLFileLinkScanner.Init(const ABaseDir: string);
- begin
- inherited Init(ABaseDir);
- New(DocumentFiles, Init(50,100));
- New(IDList, Init(50,100));
- {$ifdef DEBUG}
- DebugMessage('','THTMLFileLinkScanner Init "'+ABaseDir+'"',1,1);
- {$endif DEBUG}
- end;
- procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
- var P: PHTMLLinkScanFile;
- begin
- CurBaseURL:='';
- Options:=AOptions;
- ScheduleDoc(DocumentURL);
- repeat
- P:=DocumentFiles^.FindFileWithState(ssScheduled);
- if Assigned(P) then
- ProcessDoc(P);
- until P=nil;
- {$ifdef DEBUG}
- DebugMessage('','THTMLFileLinkScanner CheckNameList start ',1,1);
- DocumentFiles^.CheckNameIDLists;
- DebugMessage('','THTMLFileLinkScanner CheckNameList end ',1,1);
- {$endif DEBUG}
- end;
- function THTMLFileLinkScanner.GetDocumentBaseURL: string;
- begin
- GetDocumentBaseURL:=CurBaseURL;
- end;
- function THTMLFileLinkScanner.CheckURL(const URL: string): boolean;
- var OK: boolean;
- begin
- if soSubDocsOnly in Options then
- OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL)
- else
- OK:=true;
- CheckURL:=OK;
- end;
- procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string);
- var D: PHTMLLinkScanFile;
- P: sw_integer;
- DocURL: string;
- begin
- P:=Pos('#',LinkURL);
- if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
- D:=DocumentFiles^.SearchFile(DocURL);
- if not Assigned(D) then
- ScheduleDoc(DocURL);
- inherited AddLink(LinkText,LinkURL);
- end;
- procedure THTMLFileLinkScanner.AddRef(LinkURL: string);
- var D: PHTMLLinkScanFile;
- P: sw_integer;
- DocURL: string;
- PN : PNameID;
- begin
- {$ifdef DEBUG}
- DebugMessage(CurDoc,' Adding Ref to "'+
- LinkURL+'"',Line,1);
- {$endif DEBUG}
- P:=Pos('#',LinkURL);
- if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
- D:=DocumentFiles^.SearchFile(DocURL);
- if not Assigned(D) then
- ScheduleDoc(DocURL);
- D:=DocumentFiles^.SearchFile(DocURL);
- if P>0 then
- begin
- PN:=D^.AddReferencedName(copy(LinkURL,P+1,length(LinkURL)));
- PN^.SetOrigin(CurDoc);
- PN^.SetLine(Line);
- end;
- end;
- procedure THTMLFileLinkScanner.AddNameID(AName : string);
- var D: PHTMLLinkScanFile;
- P: sw_integer;
- PN : PNameID;
- DocURL: string;
- begin
- {$ifdef DEBUG}
- DebugMessage(CurDoc,' Adding NameID "'+
- CurName+'"',Line,1);
- {$endif DEBUG}
- P:=Pos('#',AName);
- if P=0 then DocURL:=AName else DocURL:=copy(AName,1,P-1);
- D:=DocumentFiles^.SearchFile(DocURL);
- if not Assigned(D) then
- ScheduleDoc(DocURL);
- D:=DocumentFiles^.SearchFile(DocURL);
- PN:=D^.AddFoundName(copy(AName,P+1,length(AName)));
- PN^.SetOrigin(CurDoc);
- PN^.SetLine(Line);
- end;
- procedure THTMLFileLinkScanner.AddID(AName : string);
- var
- D: PHTMLLinkScanFile;
- PN : PNameID;
- index : sw_integer;
- begin
- {$ifdef DEBUG}
- DebugMessage(CurDoc,' Adding Id "'+
- AName+'"',Line,1);
- {$endif DEBUG}
- D:=DocumentFiles^.SearchFile(CurDoc);
- if not Assigned(D) then
- ScheduleDoc(CurDoc);
- D:=DocumentFiles^.SearchFile(CurDoc);
- PN:=D^.AddFoundName(AName);
- PN^.SetState(IsId,true);
- PN^.SetOrigin(CurDoc);
- PN^.SetLine(Line);
- new(PN,init(AName,IsID));
- if IDList^ .Search(PN,index) then
- begin
- dispose(PN,done);
- {$ifdef DEBUG}
- PN:=IDList^.At(Index);
- DebugMessage(CurDoc,'ID "'+AName+'" already defined in "'+
- PN^.GetOrigin+'('+IntToStr(PN^.GetLine)+')"',Line,1);
- {$endif DEBUG}
- end
- else
- begin
- IDList^.Insert(PN);
- PN^.SetOrigin(CurDoc);
- PN^.SetLine(Line);
- end;
- end;
- function THTMLFileLinkScanner.FindID(const AName : string) : PNameID;
- Function ContainsNamedID(D : PHTMLLinkScanFile) : boolean;
- begin
- ContainsNamedID:=D^.FindID(AName)<>nil;
- end;
- var
- D : PHTMLLinkScanFile;
- begin
- D:=DocumentFiles^.FirstThat(TCallbackFunBoolParam(@ContainsNamedID));
- if assigned(D) then
- FindID:=D^.FindID(AName)
- else
- FindID:=nil;
- end;
- procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile);
- var F: PDOSTextFile;
- begin
- if Assigned(Doc)=false then Exit;
- Doc^.State:=ssProcessing;
- CurDoc:=Doc^.GetDocumentURL;
- New(F, Init(CurDoc));
- if Assigned(F) then
- begin
- CurBaseURL:=CompleteURL(CurDoc,'');
- {$ifdef DEBUG}
- DebugMessage(CurDoc,'Processing "'+CurDoc+'"',1,1);
- {$endif DEBUG}
- Process(F);
- {$ifdef DEBUG}
- DebugMessage(CurDoc,'Finished processing "'+CurDoc+'"',Line,1);
- {$endif DEBUG}
- Dispose(F, Done);
- end
- else
- begin
- {$ifdef DEBUG}
- DebugMessage(CurDoc,'file not found',1,1);
- {$endif DEBUG}
- end;
- Doc^.State:=ssScanned;
- CurDoc:='';
- end;
- procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string);
- var D: PHTMLLinkScanFile;
- begin
- New(D, Init(DocumentURL));
- D^.State:=ssScheduled;
- D^.Owner:=@Self;
- {$ifdef DEBUG}
- DebugMessage('','Scheduling file "'+DocumentURL+'"',1,1);
- {$endif DEBUG}
- DocumentFiles^.Insert(D);
- end;
- destructor THTMLFileLinkScanner.Done;
- begin
- if Assigned(DocumentFiles) then
- Dispose(DocumentFiles, Done);
- DocumentFiles:=nil;
- if Assigned(IDList) then
- Dispose(IDList, Done);
- IDList:=nil;
- inherited Done;
- end;
- procedure RegisterWHTMLScan;
- begin
- RegisterType(RHTMLLinkScanDocument);
- end;
- END.
|