|
@@ -1,453 +0,0 @@
|
|
-{
|
|
|
|
- $Id$
|
|
|
|
- This file is part of the Free Pascal Integrated Development Environment
|
|
|
|
- Copyright (c) 2000 by Berczi Gabor
|
|
|
|
-
|
|
|
|
- HTML scanner objects
|
|
|
|
-
|
|
|
|
- See the file COPYING.FPC, included in this distribution,
|
|
|
|
- for details about the copyright.
|
|
|
|
-
|
|
|
|
- This program is distributed in the hope that it will be useful,
|
|
|
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
-
|
|
|
|
- **********************************************************************}
|
|
|
|
-unit WHTMLScn;
|
|
|
|
-
|
|
|
|
-interface
|
|
|
|
-
|
|
|
|
-uses Objects,
|
|
|
|
- WHTML;
|
|
|
|
-
|
|
|
|
-type
|
|
|
|
- TCustomHTMLLinkScanner = object(THTMLParser)
|
|
|
|
- function DocAddTextChar(C: char): boolean; virtual;
|
|
|
|
- procedure DocAnchor(Entered: boolean); virtual;
|
|
|
|
- public
|
|
|
|
- {a}function CheckURL(const URL: string): boolean; virtual;
|
|
|
|
- {a}function CheckText(const Text: string): boolean; virtual;
|
|
|
|
- {a}procedure AddLink(const LinkText, LinkURL: string); virtual;
|
|
|
|
- {a}function GetDocumentBaseURL: string; virtual;
|
|
|
|
- private
|
|
|
|
- CurLinkText: string;
|
|
|
|
- CurURL: string;
|
|
|
|
- InAnchor: boolean;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
- PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
|
|
|
|
- THTMLLinkScanDocument = object(TObject)
|
|
|
|
- constructor Init(const ADocName: string);
|
|
|
|
- function GetName: string;
|
|
|
|
- function GetAliasCount: sw_integer;
|
|
|
|
- function GetAlias(Index: sw_integer): string;
|
|
|
|
- procedure AddAlias(const Alias: string);
|
|
|
|
- constructor Load(var S: TStream);
|
|
|
|
- procedure Store(var S: TStream);
|
|
|
|
- destructor Done; virtual;
|
|
|
|
- private
|
|
|
|
- DocName: PString;
|
|
|
|
- Aliases: PStringCollection;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
- PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection;
|
|
|
|
- THTMLLinkScanDocumentCollection = object(TSortedCollection)
|
|
|
|
- function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
|
|
|
|
- function At(Index: sw_Integer): PHTMLLinkScanDocument;
|
|
|
|
- function SearchDocument(const DocName: string): PHTMLLinkScanDocument;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
- THTMLLinkScanner = object(TCustomHTMLLinkScanner)
|
|
|
|
- constructor Init;
|
|
|
|
- function GetDocumentCount: sw_integer;
|
|
|
|
- function GetDocumentURL(DocIndex: sw_integer): string;
|
|
|
|
- function GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
|
|
|
|
- function GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
|
|
|
|
- procedure StoreDocuments(var S: TStream);
|
|
|
|
- destructor Done; virtual;
|
|
|
|
- public
|
|
|
|
- procedure AddLink(const LinkText, LinkURL: string); virtual;
|
|
|
|
- private
|
|
|
|
- Documents: PHTMLLinkScanDocumentCollection;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
- THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned);
|
|
|
|
-
|
|
|
|
- PHTMLLinkScanFile = ^THTMLLinkScanFile;
|
|
|
|
- THTMLLinkScanFile = object(TObject)
|
|
|
|
- constructor Init(const ADocumentURL: string);
|
|
|
|
- function GetDocumentURL: string;
|
|
|
|
- destructor Done; virtual;
|
|
|
|
- private
|
|
|
|
- DocumentURL : PString;
|
|
|
|
- public
|
|
|
|
- State : THTMLLinkScanState;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
- PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection;
|
|
|
|
- THTMLLinkScanFileCollection = object(TSortedCollection)
|
|
|
|
- function At(Index: sw_Integer): PHTMLLinkScanFile;
|
|
|
|
- function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
|
|
|
|
- function SearchFile(const DocURL: string): PHTMLLinkScanFile;
|
|
|
|
- function FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
- THTMLLinkScanOption = (soSubDocsOnly);
|
|
|
|
- THTMLLinkScanOptions = set of THTMLLinkScanOption;
|
|
|
|
-
|
|
|
|
- THTMLFileLinkScanner = object(THTMLLinkScanner)
|
|
|
|
- constructor Init;
|
|
|
|
- procedure ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
|
|
|
|
- destructor Done; virtual;
|
|
|
|
- public
|
|
|
|
- function GetDocumentBaseURL: string; virtual;
|
|
|
|
- procedure AddLink(const LinkText, LinkURL: string); virtual;
|
|
|
|
- function CheckURL(const URL: string): boolean; virtual;
|
|
|
|
- private
|
|
|
|
- Options: THTMLLinkScanOptions;
|
|
|
|
- BaseURL: string;
|
|
|
|
- CurBaseURL: string;
|
|
|
|
- DocumentFiles: PHTMLLinkScanFileCollection;
|
|
|
|
- procedure ScheduleDoc(const DocumentURL: string);
|
|
|
|
- public
|
|
|
|
- procedure ProcessDoc(Doc: PHTMLLinkScanFile); virtual;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
-procedure RegisterWHTMLScan;
|
|
|
|
-
|
|
|
|
-implementation
|
|
|
|
-
|
|
|
|
-uses WUtils;
|
|
|
|
-
|
|
|
|
-const
|
|
|
|
- RHTMLLinkScanDocument: TStreamRec = (
|
|
|
|
- ObjType: 19500;
|
|
|
|
- VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^);
|
|
|
|
- Load: @THTMLLinkScanDocument.Load;
|
|
|
|
- Store: @THTMLLinkScanDocument.Store
|
|
|
|
- );
|
|
|
|
-
|
|
|
|
-function TCustomHTMLLinkScanner.DocAddTextChar(C: char): boolean;
|
|
|
|
-begin
|
|
|
|
- if InAnchor then
|
|
|
|
- CurLinkText:=CurLinkText+C;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean);
|
|
|
|
-begin
|
|
|
|
- if Entered then
|
|
|
|
- begin
|
|
|
|
- CurLinkText:='';
|
|
|
|
- if DocGetTagParam('HREF',CurURL)=false then CurURL:='';
|
|
|
|
- CurURL:=Trim(CurURL);
|
|
|
|
- CurURL:=CompleteURL(GetDocumentBaseURL,CurURL);
|
|
|
|
- end
|
|
|
|
- else
|
|
|
|
- begin
|
|
|
|
- CurLinkText:=Trim(CurLinkText);
|
|
|
|
- if CheckURL(CurURL) and CheckText(CurLinkText) then
|
|
|
|
- AddLink(CurLinkText,CurURL);
|
|
|
|
- end;
|
|
|
|
- InAnchor:=Entered;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function TCustomHTMLLinkScanner.GetDocumentBaseURL: string;
|
|
|
|
-begin
|
|
|
|
- { Abstract }
|
|
|
|
- GetDocumentBaseURL:='';
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean;
|
|
|
|
-begin
|
|
|
|
- { Abstract }
|
|
|
|
- CheckURL:=true;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean;
|
|
|
|
-begin
|
|
|
|
- { Abstract }
|
|
|
|
- CheckText:=true;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
|
|
|
|
-begin
|
|
|
|
- { Abstract }
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-constructor THTMLLinkScanDocument.Init(const ADocName: string);
|
|
|
|
-begin
|
|
|
|
- inherited Init;
|
|
|
|
- SetStr(DocName,ADocName);
|
|
|
|
- New(Aliases, Init(10,10));
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanDocument.GetName: string;
|
|
|
|
-begin
|
|
|
|
- GetName:=GetStr(DocName);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanDocument.GetAliasCount: sw_integer;
|
|
|
|
-begin
|
|
|
|
- GetAliasCount:=Aliases^.Count;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string;
|
|
|
|
-begin
|
|
|
|
- GetAlias:=GetStr(Aliases^.At(Index));
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure THTMLLinkScanDocument.AddAlias(const Alias: string);
|
|
|
|
-begin
|
|
|
|
- Aliases^.Insert(NewStr(Alias));
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-constructor THTMLLinkScanDocument.Load(var S: TStream);
|
|
|
|
-begin
|
|
|
|
- inherited Init;
|
|
|
|
- DocName:=S.ReadStr;
|
|
|
|
- New(Aliases, Load(S));
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure THTMLLinkScanDocument.Store(var S: TStream);
|
|
|
|
-var I: integer;
|
|
|
|
-begin
|
|
|
|
- S.WriteStr(DocName);
|
|
|
|
- Aliases^.Store(S);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-destructor THTMLLinkScanDocument.Done;
|
|
|
|
-begin
|
|
|
|
- inherited Done;
|
|
|
|
- if Assigned(Aliases) then Dispose(Aliases, Done); Aliases:=nil;
|
|
|
|
- if Assigned(DocName) then DisposeStr(DocName); DocName:=nil;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer;
|
|
|
|
-var R: sw_integer;
|
|
|
|
- K1: PHTMLLinkScanDocument absolute Key1;
|
|
|
|
- K2: PHTMLLinkScanDocument absolute Key2;
|
|
|
|
- S1,S2: string;
|
|
|
|
-begin
|
|
|
|
- S1:=UpcaseStr(K1^.GetName); S2:=UpcaseStr(K2^.GetName);
|
|
|
|
- if S1<S2 then R:=-1 else
|
|
|
|
- if S1>S2 then R:= 1 else
|
|
|
|
- R:=0;
|
|
|
|
- Compare:=R;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument;
|
|
|
|
-begin
|
|
|
|
- At:=inherited At(Index);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument;
|
|
|
|
-var D,P: PHTMLLinkScanDocument;
|
|
|
|
- Index: sw_integer;
|
|
|
|
-begin
|
|
|
|
- New(D, Init(DocName));
|
|
|
|
- if Search(D, Index)=false then P:=nil else
|
|
|
|
- P:=At(Index);
|
|
|
|
- Dispose(D, Done);
|
|
|
|
- SearchDocument:=P;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-constructor THTMLLinkScanner.Init;
|
|
|
|
-begin
|
|
|
|
- inherited Init;
|
|
|
|
- New(Documents, Init(50,100));
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanner.GetDocumentCount: sw_integer;
|
|
|
|
-begin
|
|
|
|
- GetDocumentCount:=Documents^.Count;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string;
|
|
|
|
-begin
|
|
|
|
- GetDocumentURL:=Documents^.At(DocIndex)^.GetName;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
|
|
|
|
-begin
|
|
|
|
- GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
|
|
|
|
-begin
|
|
|
|
- GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
|
|
|
|
-var D: PHTMLLinkScanDocument;
|
|
|
|
-begin
|
|
|
|
- D:=Documents^.SearchDocument(LinkURL);
|
|
|
|
- if D=nil then
|
|
|
|
- begin
|
|
|
|
- New(D, Init(LinkURL));
|
|
|
|
- Documents^.Insert(D);
|
|
|
|
- end;
|
|
|
|
- D^.AddAlias(LinkText);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
|
|
|
|
-begin
|
|
|
|
- Documents^.Store(S);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-destructor THTMLLinkScanner.Done;
|
|
|
|
-begin
|
|
|
|
- inherited Done;
|
|
|
|
- if Assigned(Documents) then Dispose(Documents, Done); Documents:=nil;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-constructor THTMLLinkScanFile.Init(const ADocumentURL: string);
|
|
|
|
-begin
|
|
|
|
- inherited Init;
|
|
|
|
- SetStr(DocumentURL,ADocumentURL);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanFile.GetDocumentURL: string;
|
|
|
|
-begin
|
|
|
|
- GetDocumentURL:=GetStr(DocumentURL);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-destructor THTMLLinkScanFile.Done;
|
|
|
|
-begin
|
|
|
|
- inherited Done;
|
|
|
|
- if Assigned(DocumentURL) then DisposeStr(DocumentURL); DocumentURL:=nil;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile;
|
|
|
|
-begin
|
|
|
|
- At:=inherited At(Index);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer;
|
|
|
|
-var R: integer;
|
|
|
|
- K1: PHTMLLinkScanFile absolute Key1;
|
|
|
|
- K2: PHTMLLinkScanFile absolute Key2;
|
|
|
|
- S1,S2: string;
|
|
|
|
-begin
|
|
|
|
- S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL);
|
|
|
|
- if S1<S2 then R:=-1 else
|
|
|
|
- if S1>S2 then R:= 1 else
|
|
|
|
- R:=0;
|
|
|
|
- Compare:=R;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile;
|
|
|
|
-var P,D: PHTMLLinkScanFile;
|
|
|
|
- Index: sw_integer;
|
|
|
|
-begin
|
|
|
|
- New(D, Init(DocURL));
|
|
|
|
- if Search(D,Index)=false then P:=nil else
|
|
|
|
- P:=At(Index);
|
|
|
|
- Dispose(D, Done);
|
|
|
|
- SearchFile:=P;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
|
|
|
|
-var I: sw_integer;
|
|
|
|
- P,D: PHTMLLinkScanFile;
|
|
|
|
-begin
|
|
|
|
- P:=nil;
|
|
|
|
- for I:=0 to Count-1 do
|
|
|
|
- begin
|
|
|
|
- D:=At(I);
|
|
|
|
- if D^.State=AState then
|
|
|
|
- begin
|
|
|
|
- P:=D;
|
|
|
|
- Break;
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
- FindFileWithState:=P;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-constructor THTMLFileLinkScanner.Init;
|
|
|
|
-begin
|
|
|
|
- inherited Init;
|
|
|
|
- New(DocumentFiles, Init(50,100));
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
|
|
|
|
-var P: PHTMLLinkScanFile;
|
|
|
|
-begin
|
|
|
|
- CurBaseURL:=''; Options:=AOptions;
|
|
|
|
- ScheduleDoc(DocumentURL);
|
|
|
|
- repeat
|
|
|
|
- P:=DocumentFiles^.FindFileWithState(ssScheduled);
|
|
|
|
- if Assigned(P) then
|
|
|
|
- ProcessDoc(P);
|
|
|
|
- until P=nil;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLFileLinkScanner.GetDocumentBaseURL: string;
|
|
|
|
-begin
|
|
|
|
- GetDocumentBaseURL:=CurBaseURL;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-function THTMLFileLinkScanner.CheckURL(const URL: string): boolean;
|
|
|
|
-var OK: boolean;
|
|
|
|
-begin
|
|
|
|
- if soSubDocsOnly in Options then
|
|
|
|
- OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL)
|
|
|
|
- else
|
|
|
|
- OK:=true;
|
|
|
|
- CheckURL:=OK;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string);
|
|
|
|
-var D: PHTMLLinkScanFile;
|
|
|
|
-begin
|
|
|
|
- D:=DocumentFiles^.SearchFile(LinkURL);
|
|
|
|
- if Assigned(D)=false then
|
|
|
|
- ScheduleDoc(LinkURL);
|
|
|
|
- inherited AddLink(LinkText,LinkURL);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile);
|
|
|
|
-var F: PDOSTextFile;
|
|
|
|
-begin
|
|
|
|
- if Assigned(Doc)=false then Exit;
|
|
|
|
-
|
|
|
|
- Doc^.State:=ssProcessing;
|
|
|
|
- New(F, Init(Doc^.GetDocumentURL));
|
|
|
|
- if Assigned(F) then
|
|
|
|
- begin
|
|
|
|
- CurBaseURL:=CompleteURL(Doc^.GetDocumentURL,'');
|
|
|
|
- Process(F);
|
|
|
|
- Dispose(F, Done);
|
|
|
|
- end;
|
|
|
|
- Doc^.State:=ssScanned;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string);
|
|
|
|
-var D: PHTMLLinkScanFile;
|
|
|
|
-begin
|
|
|
|
- New(D, Init(DocumentURL));
|
|
|
|
- D^.State:=ssScheduled;
|
|
|
|
- DocumentFiles^.Insert(D);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-destructor THTMLFileLinkScanner.Done;
|
|
|
|
-begin
|
|
|
|
- inherited Done;
|
|
|
|
- if Assigned(DocumentFiles) then Dispose(DocumentFiles, Done); DocumentFiles:=nil;
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-procedure RegisterWHTMLScan;
|
|
|
|
-begin
|
|
|
|
- RegisterType(RHTMLLinkScanDocument);
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-END.
|
|
|
|
-{
|
|
|
|
- $Log$
|
|
|
|
- Revision 1.3 2000-04-25 10:59:02 pierre
|
|
|
|
- * renamed lowercased
|
|
|
|
-
|
|
|
|
- Revision 1.1 2000/04/25 08:42:32 pierre
|
|
|
|
- * New Gabor changes : see fixes.txt
|
|
|
|
-
|
|
|
|
-}
|
|
|