pierre 25 years ago
parent
commit
a875c16ae9
1 changed files with 453 additions and 0 deletions
  1. 453 0
      ide/text/whtmlscn.pas

+ 453 - 0
ide/text/whtmlscn.pas

@@ -0,0 +1,453 @@
+{
+    $Id$
+    This file is part of the Free Pascal Integrated Development Environment
+    Copyright (c) 2000 by Berczi Gabor
+
+    HTML scanner objects
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+unit WHTMLScn;
+
+interface
+
+uses Objects,
+     WHTML;
+
+type
+     TCustomHTMLLinkScanner = object(THTMLParser)
+       function    DocAddTextChar(C: char): boolean; virtual;
+       procedure   DocAnchor(Entered: boolean); virtual;
+     public
+    {a}function    CheckURL(const URL: string): boolean; virtual;
+    {a}function    CheckText(const Text: string): boolean; virtual;
+    {a}procedure   AddLink(const LinkText, LinkURL: string); virtual;
+    {a}function    GetDocumentBaseURL: string; virtual;
+     private
+       CurLinkText: string;
+       CurURL: string;
+       InAnchor: boolean;
+     end;
+
+     PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
+     THTMLLinkScanDocument = object(TObject)
+       constructor Init(const ADocName: string);
+       function    GetName: string;
+       function    GetAliasCount: sw_integer;
+       function    GetAlias(Index: sw_integer): string;
+       procedure   AddAlias(const Alias: string);
+       constructor Load(var S: TStream);
+       procedure   Store(var S: TStream);
+       destructor  Done; virtual;
+     private
+       DocName: PString;
+       Aliases: PStringCollection;
+     end;
+
+     PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection;
+     THTMLLinkScanDocumentCollection = object(TSortedCollection)
+       function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
+       function At(Index: sw_Integer): PHTMLLinkScanDocument;
+       function SearchDocument(const DocName: string): PHTMLLinkScanDocument;
+     end;
+
+     THTMLLinkScanner = object(TCustomHTMLLinkScanner)
+       constructor Init;
+       function    GetDocumentCount: sw_integer;
+       function    GetDocumentURL(DocIndex: sw_integer): string;
+       function    GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
+       function    GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
+       procedure   StoreDocuments(var S: TStream);
+       destructor  Done; virtual;
+     public
+       procedure   AddLink(const LinkText, LinkURL: string); virtual;
+     private
+       Documents: PHTMLLinkScanDocumentCollection;
+     end;
+
+     THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned);
+
+     PHTMLLinkScanFile = ^THTMLLinkScanFile;
+     THTMLLinkScanFile = object(TObject)
+       constructor Init(const ADocumentURL: string);
+       function    GetDocumentURL: string;
+       destructor  Done; virtual;
+     private
+       DocumentURL  : PString;
+     public
+       State        : THTMLLinkScanState;
+     end;
+
+     PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection;
+     THTMLLinkScanFileCollection = object(TSortedCollection)
+       function At(Index: sw_Integer): PHTMLLinkScanFile;
+       function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
+       function SearchFile(const DocURL: string): PHTMLLinkScanFile;
+       function FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
+     end;
+
+     THTMLLinkScanOption = (soSubDocsOnly);
+     THTMLLinkScanOptions = set of THTMLLinkScanOption;
+
+     THTMLFileLinkScanner = object(THTMLLinkScanner)
+       constructor Init;
+       procedure   ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
+       destructor  Done; virtual;
+     public
+       function    GetDocumentBaseURL: string; virtual;
+       procedure   AddLink(const LinkText, LinkURL: string); virtual;
+       function    CheckURL(const URL: string): boolean; virtual;
+     private
+       Options: THTMLLinkScanOptions;
+       BaseURL: string;
+       CurBaseURL: string;
+       DocumentFiles: PHTMLLinkScanFileCollection;
+       procedure   ScheduleDoc(const DocumentURL: string);
+     public
+       procedure   ProcessDoc(Doc: PHTMLLinkScanFile); virtual;
+     end;
+
+procedure RegisterWHTMLScan;
+
+implementation
+
+uses WUtils;
+
+const
+  RHTMLLinkScanDocument: TStreamRec = (
+     ObjType: 19500;
+     VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^);
+     Load:    @THTMLLinkScanDocument.Load;
+     Store:   @THTMLLinkScanDocument.Store
+  );
+
+function TCustomHTMLLinkScanner.DocAddTextChar(C: char): boolean;
+begin
+  if InAnchor then
+    CurLinkText:=CurLinkText+C;
+end;
+
+procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean);
+begin
+  if Entered then
+    begin
+      CurLinkText:='';
+      if DocGetTagParam('HREF',CurURL)=false then CurURL:='';
+      CurURL:=Trim(CurURL);
+      CurURL:=CompleteURL(GetDocumentBaseURL,CurURL);
+    end
+  else
+    begin
+      CurLinkText:=Trim(CurLinkText);
+      if CheckURL(CurURL) and CheckText(CurLinkText) then
+        AddLink(CurLinkText,CurURL);
+    end;
+  InAnchor:=Entered;
+end;
+
+function TCustomHTMLLinkScanner.GetDocumentBaseURL: string;
+begin
+  { Abstract }
+  GetDocumentBaseURL:='';
+end;
+
+function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean;
+begin
+  { Abstract }
+  CheckURL:=true;
+end;
+
+function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean;
+begin
+  { Abstract }
+  CheckText:=true;
+end;
+
+procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
+begin
+  { Abstract }
+end;
+
+constructor THTMLLinkScanDocument.Init(const ADocName: string);
+begin
+  inherited Init;
+  SetStr(DocName,ADocName);
+  New(Aliases, Init(10,10));
+end;
+
+function THTMLLinkScanDocument.GetName: string;
+begin
+  GetName:=GetStr(DocName);
+end;
+
+function THTMLLinkScanDocument.GetAliasCount: sw_integer;
+begin
+  GetAliasCount:=Aliases^.Count;
+end;
+
+function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string;
+begin
+  GetAlias:=GetStr(Aliases^.At(Index));
+end;
+
+procedure THTMLLinkScanDocument.AddAlias(const Alias: string);
+begin
+  Aliases^.Insert(NewStr(Alias));
+end;
+
+constructor THTMLLinkScanDocument.Load(var S: TStream);
+begin
+  inherited Init;
+  DocName:=S.ReadStr;
+  New(Aliases, Load(S));
+end;
+
+procedure THTMLLinkScanDocument.Store(var S: TStream);
+var I: integer;
+begin
+  S.WriteStr(DocName);
+  Aliases^.Store(S);
+end;
+
+destructor THTMLLinkScanDocument.Done;
+begin
+  inherited Done;
+  if Assigned(Aliases) then Dispose(Aliases, Done); Aliases:=nil;
+  if Assigned(DocName) then DisposeStr(DocName); DocName:=nil;
+end;
+
+function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer;
+var R: sw_integer;
+    K1: PHTMLLinkScanDocument absolute Key1;
+    K2: PHTMLLinkScanDocument absolute Key2;
+    S1,S2: string;
+begin
+  S1:=UpcaseStr(K1^.GetName); S2:=UpcaseStr(K2^.GetName);
+  if S1<S2 then R:=-1 else
+  if S1>S2 then R:= 1 else
+  R:=0;
+  Compare:=R;
+end;
+
+function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument;
+begin
+  At:=inherited At(Index);
+end;
+
+function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument;
+var D,P: PHTMLLinkScanDocument;
+    Index: sw_integer;
+begin
+  New(D, Init(DocName));
+  if Search(D, Index)=false then P:=nil else
+    P:=At(Index);
+  Dispose(D, Done);
+  SearchDocument:=P;
+end;
+
+constructor THTMLLinkScanner.Init;
+begin
+  inherited Init;
+  New(Documents, Init(50,100));
+end;
+
+function THTMLLinkScanner.GetDocumentCount: sw_integer;
+begin
+  GetDocumentCount:=Documents^.Count;
+end;
+
+function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string;
+begin
+  GetDocumentURL:=Documents^.At(DocIndex)^.GetName;
+end;
+
+function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
+begin
+  GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
+end;
+
+function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
+begin
+  GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex);
+end;
+
+procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
+var D: PHTMLLinkScanDocument;
+begin
+  D:=Documents^.SearchDocument(LinkURL);
+  if D=nil then
+  begin
+    New(D, Init(LinkURL));
+    Documents^.Insert(D);
+  end;
+  D^.AddAlias(LinkText);
+end;
+
+procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
+begin
+  Documents^.Store(S);
+end;
+
+destructor THTMLLinkScanner.Done;
+begin
+  inherited Done;
+  if Assigned(Documents) then Dispose(Documents, Done); Documents:=nil;
+end;
+
+constructor THTMLLinkScanFile.Init(const ADocumentURL: string);
+begin
+  inherited Init;
+  SetStr(DocumentURL,ADocumentURL);
+end;
+
+function THTMLLinkScanFile.GetDocumentURL: string;
+begin
+  GetDocumentURL:=GetStr(DocumentURL);
+end;
+
+destructor THTMLLinkScanFile.Done;
+begin
+  inherited Done;
+  if Assigned(DocumentURL) then DisposeStr(DocumentURL); DocumentURL:=nil;
+end;
+
+function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile;
+begin
+  At:=inherited At(Index);
+end;
+
+function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer;
+var R: integer;
+    K1: PHTMLLinkScanFile absolute Key1;
+    K2: PHTMLLinkScanFile absolute Key2;
+    S1,S2: string;
+begin
+  S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL);
+  if S1<S2 then R:=-1 else
+  if S1>S2 then R:= 1 else
+  R:=0;
+  Compare:=R;
+end;
+
+function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile;
+var P,D: PHTMLLinkScanFile;
+    Index: sw_integer;
+begin
+  New(D, Init(DocURL));
+  if Search(D,Index)=false then P:=nil else
+    P:=At(Index);
+  Dispose(D, Done);
+  SearchFile:=P;
+end;
+
+function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
+var I: sw_integer;
+    P,D: PHTMLLinkScanFile;
+begin
+  P:=nil;
+  for I:=0 to Count-1 do
+  begin
+    D:=At(I);
+    if D^.State=AState then
+      begin
+        P:=D;
+        Break;
+      end;
+  end;
+  FindFileWithState:=P;
+end;
+
+constructor THTMLFileLinkScanner.Init;
+begin
+  inherited Init;
+  New(DocumentFiles, Init(50,100));
+end;
+
+procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
+var P: PHTMLLinkScanFile;
+begin
+  CurBaseURL:=''; Options:=AOptions;
+  ScheduleDoc(DocumentURL);
+  repeat
+    P:=DocumentFiles^.FindFileWithState(ssScheduled);
+    if Assigned(P) then
+      ProcessDoc(P);
+  until P=nil;
+end;
+
+function THTMLFileLinkScanner.GetDocumentBaseURL: string;
+begin
+  GetDocumentBaseURL:=CurBaseURL;
+end;
+
+function THTMLFileLinkScanner.CheckURL(const URL: string): boolean;
+var OK: boolean;
+begin
+  if soSubDocsOnly in Options then
+    OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL)
+  else
+    OK:=true;
+  CheckURL:=OK;
+end;
+
+procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string);
+var D: PHTMLLinkScanFile;
+begin
+  D:=DocumentFiles^.SearchFile(LinkURL);
+  if Assigned(D)=false then
+    ScheduleDoc(LinkURL);
+  inherited AddLink(LinkText,LinkURL);
+end;
+
+procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile);
+var F: PDOSTextFile;
+begin
+  if Assigned(Doc)=false then Exit;
+
+  Doc^.State:=ssProcessing;
+  New(F, Init(Doc^.GetDocumentURL));
+  if Assigned(F) then
+  begin
+    CurBaseURL:=CompleteURL(Doc^.GetDocumentURL,'');
+    Process(F);
+    Dispose(F, Done);
+  end;
+  Doc^.State:=ssScanned;
+end;
+
+procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string);
+var D: PHTMLLinkScanFile;
+begin
+  New(D, Init(DocumentURL));
+  D^.State:=ssScheduled;
+  DocumentFiles^.Insert(D);
+end;
+
+destructor THTMLFileLinkScanner.Done;
+begin
+  inherited Done;
+  if Assigned(DocumentFiles) then Dispose(DocumentFiles, Done); DocumentFiles:=nil;
+end;
+
+procedure RegisterWHTMLScan;
+begin
+  RegisterType(RHTMLLinkScanDocument);
+end;
+
+
+END.
+{
+  $Log$
+  Revision 1.5  2000-05-17 08:49:16  pierre
+   readded
+
+  Revision 1.1  2000/04/25 08:42:32  pierre
+   * New Gabor changes : see fixes.txt
+
+}