Jelajahi Sumber

Added ability for chm's to be searchable. A reader for the search index
is partially implemented

git-svn-id: trunk@12077 -

andrew 16 tahun lalu
induk
melakukan
7b0f17db5e

+ 3 - 0
.gitattributes

@@ -948,15 +948,18 @@ packages/chm/fpmake.pp svneol=native#text/plain
 packages/chm/src/chmbase.pas svneol=native#text/plain
 packages/chm/src/chmcmd.lpi svneol=native#text/plain
 packages/chm/src/chmcmd.lpr svneol=native#text/plain
+packages/chm/src/chmfiftimain.pas svneol=native#text/plain
 packages/chm/src/chmfilewriter.pas svneol=native#text/plain
 packages/chm/src/chmls.lpi svneol=native#text/plain
 packages/chm/src/chmls.lpr svneol=native#text/plain
+packages/chm/src/chmobjinstconst.inc svneol=native#text/plain
 packages/chm/src/chmreader.pas svneol=native#text/plain
 packages/chm/src/chmsitemap.pas svneol=native#text/plain
 packages/chm/src/chmspecialfiles.pas svneol=native#text/plain
 packages/chm/src/chmtypes.pas svneol=native#text/plain
 packages/chm/src/chmwriter.pas svneol=native#text/plain
 packages/chm/src/fasthtmlparser.pas svneol=native#text/plain
+packages/chm/src/htmlindexer.pas svneol=native#text/plain
 packages/chm/src/htmlutil.pas svneol=native#text/plain
 packages/chm/src/paslznonslide.pas svneol=native#text/plain
 packages/chm/src/paslzx.pas svneol=native#text/plain

+ 1 - 1
packages/chm/src/chmbase.pas

@@ -171,7 +171,7 @@ var
   Value: QWord = 0;
   TheEnd: DWord = 0;
 begin
-  bit := (sizeof(dWord)*8)div 7*7;
+  bit := 28; //((sizeof(dWord)*8)div 7)*7; // = 28
   buf := @Value;
   while True do begin
     mask := $7f shl bit;

+ 955 - 0
packages/chm/src/chmfiftimain.pas

@@ -0,0 +1,955 @@
+{ Copyright (C) <2008> <Andrew Haines> chmfiftimain.pas
+
+  This library is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Library General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or (at your
+  option) any later version.
+
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
+  for more details.
+
+  You should have received a copy of the GNU Library General Public License
+  along with this library; if not, write to the Free Software Foundation,
+  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+}
+{
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+}
+unit chmFiftiMain;
+{$mode objfpc}{$H+}
+interface
+
+uses Classes, HTMLIndexer;
+
+type
+  TFiftiMainHeader = record
+    Sig: array [0..3] of byte;  //$00,$00,$28,$00
+    HTMLFilesCount: DWord;
+    RootNodeOffset: DWord;
+    Unknown1: DWord; // = 0
+    LeafNodeCount: DWord;
+    CopyOfRootNodeOffset: DWord;
+    TreeDepth: Word;
+    Unknown2: DWord; // = 7
+    DocIndexScale: Byte;
+    DocIndexRootSize: Byte;
+    CodeCountScale: Byte;
+    CodeCountRootSize: Byte;
+    LocationCodeScale: Byte;
+    LocationCodeRootSize: Byte;
+    Unknown3: array[0..9] of byte; // = 0
+    NodeSize: DWord; // 4096;
+    Unknown4: DWord; // 0 or 1;
+    LastDupWordIndex: DWord;
+    LastDupCharIndex: DWord;
+    LongestWordLength: DWord; // maximum 99
+    TotalWordsIndexed: DWord; // includes duplicates
+    TotalWords: DWord; // word count not including duplicates
+    TotalWordsLengthPart1: DWord; // length of all the words with duplicates plus the next dword!
+    TotalWordsLengthPart2: DWord;
+    TotalWordsLength: DWord; // length of all words not including duplicates
+    WordBlockUnusedBytes: DWord; // who knows, this makes no sense when there are more than one blocks
+    Unknown5: DWord; // 0
+    HTMLFilesCountMinusOne: DWord; // maybe
+    Unknown6: array[0..23] of Byte; // 0
+    WindowsCodePage: DWord; // usually 1252
+    LocalID: DWord;
+    //Unknown7: array [0..893] of Byte; // 0
+  end;
+
+  { TFIftiNode }
+
+  TFIftiNode = class(TObject)
+    FLastWord: String;
+    FWriteStream: TStream;
+    FBlockStream: TMemoryStream;
+    ParentNode: TFIftiNode;
+    function  AdjustedWord(AWord: String; out AOffset: Byte; AOldWord: String): String;
+    procedure ChildIsFull(AWord: String; ANodeOffset: DWord); virtual; abstract;
+    function  GuessIfCanHold(AWord: String): Boolean; virtual; abstract;
+    procedure Flush(NewBlockNeeded: Boolean); virtual; abstract;
+    procedure FillRemainingSpace;
+    function  RemainingSpace: DWord;
+    constructor Create(AStream: TStream);
+    destructor Destroy; override;
+  end;
+
+  { TChmSearchWriter }
+
+  TChmSearchWriter = class(TObject)
+  private
+    FHeaderRec: TFiftiMainHeader;
+    FStream: TStream;
+    FWordList: TIndexedWordList;
+    FActiveLeafNode: TFIftiNode;
+    procedure ProcessWords;
+    procedure WriteHeader(IsPlaceHolder: Boolean);
+    procedure WriteAWord(AWord: TIndexedWord);
+  public
+    procedure WriteToStream;
+    constructor Create(AStream: TStream; AWordList: TIndexedWordList);
+  end;
+
+  { TChmSearchReader }
+
+  TChmWLCTopic = record
+    TopicIndex: DWord;
+    LocationCodes: array of DWord;
+  end;
+
+  TChmWLCTopicArray = array of TChmWLCTopic;
+
+  TChmSearchReader = class;
+
+  TChmSearchReaderFoundDataEvent = procedure(Sender: TChmSearchReader; AWord: String; ATopic: DWord; AWordIndex: DWord) of object;
+
+  TChmSearchReader = class(TObject)
+  private
+    FStream: TStream;
+    FFileIsValid: Boolean;
+    FFreeStreamOnDestroy: Boolean;
+    FDocRootSize,
+    FCodeCountRootSize,
+    FLocCodeRootSize: Integer;
+    FTreeDepth: Integer;
+    FRootNodeOffset: DWord;
+    FActiveNodeStart: DWord;
+    FActiveNodeFreeSpace: Word;
+    FNextLeafNode: DWord;
+    procedure ReadCommonData;
+    procedure MoveToFirstLeafNode;
+    procedure MoveToRootNode;
+    procedure MoveToNode(ANodeOffset: DWord; ANodeDepth: Integer);
+    function  ReadWordOrPartialWord(ALastWord: String): String; // returns the whole word using the last word as a base
+    procedure ReadRootNodeEntry(ALastWord: String; out AWord: String; out ASubNodeStart: DWord);
+    function  ReadLeafNodeEntry(ALastWord: String; out AWord: String; out AInTitle: Boolean; out AWLCCount: DWord; out AWLCOffset: DWord; out AWLCSize: DWord): Boolean;
+    function  ReadWLCEntries(AWLCCount: DWord; AWLCOffset: DWord; AWLCSize: DWord): TChmWLCTopicArray;
+  public
+    constructor Create(AStream: TStream; AFreeStreamOnDestroy: Boolean);
+    destructor  Destroy; override;
+    procedure   DumpData(AFoundDataEvent: TChmSearchReaderFoundDataEvent);
+    function    LookupWord(AWord: String): TChmWLCTopicArray;
+    property    FileIsValid: Boolean read FFileIsValid;
+  end;
+
+const
+  FIFTI_NODE_SIZE = 4096;
+
+implementation
+uses SysUtils, Math, ChmBase;
+
+type
+
+{ TIndexNode }
+
+  TIndexNode = class(TFIftiNode)
+    function  GuessIfCanHold(AWord: String): Boolean; override;
+    procedure ChildIsFull ( AWord: String; ANodeOffset: DWord ); override;
+    procedure Flush(NewBlockNeeded: Boolean); override;
+  end;
+
+  { TLeafNode }
+
+  TLeafNode = class(TFIftiNode)
+    FLeafNodeCount: DWord;
+    FLastNodeStart: DWord;
+    FreeSpace: DWord;
+    FDocRootSize,
+    FCodeRootSize,
+    FLocRootSize: Byte;
+    procedure WriteInitialHeader;
+    Destructor Destroy; override;
+    function  GuessIfCanHold(AWord: String): Boolean; override;
+    procedure Flush(NewBlockNeeded: Boolean); override;
+    procedure AddWord(AWord: TIndexedWord);
+    function WriteWLCEntries(AWord: TIndexedWord; ADocRootSize, ACodeRootSize, ALocRootSize: Byte): DWord;
+    property LeafNodeCount: DWord read FLeafNodeCount;
+    property DocRootSize: Byte read FDocRootSize write FDocRootSize;
+    property CodeRootSize: Byte read FCodeRootSize write FCodeRootSize;
+    property LocRootSize: Byte read FLocRootSize write FLocRootSize;
+  end;
+
+function WriteScaleRootInt(ANumber: DWord; out Bits: DWord; Root: Integer): Byte;
+var
+  Tmp: DWord;
+  Mask: DWord;
+//  Scale: Integer;
+  NeededBits: Integer;
+  PrefixBits: Integer;
+  RootBits: Integer;
+begin
+//  Scale := 2;
+  Bits := 0;
+  Result := Root;
+
+  Tmp := ANumber;
+  NeededBits := 0;
+  while Tmp <> 0 do
+  begin
+    Inc(NeededBits);
+    Tmp := Tmp shr 1;
+  end;
+  PrefixBits := Max(0, NeededBits-Root);
+
+  RootBits := NeededBits -1; //
+  if RootBits < Root then
+    RootBits := Root;
+  if RootBits < 0 then
+    RootBits := 0;
+
+  Mask := 0;
+  if RootBits-1 >= 0 then
+    for Tmp := 0 to RootBits-1 do
+       Mask := Mask or (DWord(1) shl Tmp);
+  Bits := not Mask;
+  Bits := Bits shl 1; // make space for empty bit
+  Bits := Bits or (ANumber and Mask);
+
+  Result := PrefixBits + 1 + RootBits;
+  Bits := (Bits shl (32-Result)) shr (32 - Result);
+end;
+
+
+
+{ TChmSearchWriter }
+
+procedure TChmSearchWriter.ProcessWords;
+var
+  AWord: TIndexedWord;
+begin
+  AWord := FWordList.FirstWord;
+  while AWord <> nil do
+  begin
+    WriteAWord(AWord);
+    AWord := AWord.NextWord;
+  end;
+  if FActiveLeafNode <> nil then
+    FActiveLeafNode.Flush(False); // causes the unwritten parts of the tree to be written
+end;
+
+procedure TChmSearchWriter.WriteHeader ( IsPlaceHolder: Boolean ) ;
+var
+  TmpNode: TFIftiNode;
+  i: Integer;
+begin
+  if IsPlaceHolder then
+  begin
+    FStream.Size := $400; // the header size. we will fill this after the nodes have been determined
+    FStream.Position := $400;
+    FillChar(PChar(TMemoryStream(FStream).Memory)^, $400, 0);
+    FHeaderRec.DocIndexRootSize := 1;
+    FHeaderRec.CodeCountRootSize := 1;
+    FHeaderRec.LocationCodeRootSize := 4;
+    Exit;
+  end;
+  // write the glorious header
+  FHeaderRec.Sig[2] := $28;
+  FHeaderRec.HTMLFilesCount := FWordList.IndexedFileCount;
+  FHeaderRec.RootNodeOffset := FStream.Size-4096;
+  FHeaderRec.LeafNodeCount := TLeafNode(FActiveLeafNode).LeafNodeCount;
+  FHeaderRec.CopyOfRootNodeOffset := FHeaderRec.RootNodeOffset;
+  FHeaderRec.TreeDepth := 0;
+  TmpNode := FActiveLeafNode;
+  while TmpNode <> nil do
+  begin
+    Inc(FHeaderRec.TreeDepth);
+    TmpNode := TmpNode.ParentNode;
+  end;
+  FHeaderRec.DocIndexScale := 2;
+  FHeaderRec.CodeCountScale := 2;
+  FHeaderRec.LocationCodeScale := 2;
+
+  //FHeaderRec.DocIndexRootSize := 15;
+  //FHeaderRec.CodeCountRootSize := 15;
+  //FHeaderRec.LocationCodeRootSize := 15;
+
+  FHeaderRec.NodeSize := 4096;
+  FHeaderRec.LongestWordLength := FWordList.LongestWord;
+  FHeaderRec.TotalWordsIndexed := FWordList.TotalWordCount;
+  FHeaderRec.TotalWords := FWordList.TotalDIfferentWords;
+  FHeaderRec.TotalWordsLengthPart1 := FWordList.TotalWordLength;
+  FHeaderRec.TotalWordsLength := FWordList.TotalDifferentWordLength;
+  FHeaderRec.WindowsCodePage := 1252;
+
+  FStream.Position := 0;
+
+  FStream.Write(FHeaderRec.Sig[0], 4);
+  FStream.WriteDWord(NtoLE(FHeaderRec.HTMLFilesCount));
+  FStream.WriteDWord(NtoLE(FHeaderRec.RootNodeOffset));
+  FStream.WriteDWord(NtoLE(0)); // unknown 1
+  FStream.WriteDWord(NtoLE(FHeaderRec.LeafNodeCount));
+  FStream.WriteDWord(NtoLE(FHeaderRec.RootNodeOffset)); // yes twice
+  FStream.WriteWord(NtoLE(FHeaderRec.TreeDepth));
+  FStream.WriteDWord(NtoLE(DWord(7)));
+  FStream.WriteByte(2);
+  FStream.WriteByte(FHeaderRec.DocIndexRootSize);
+  FStream.WriteByte(2);
+  FStream.WriteByte(FHeaderRec.CodeCountRootSize);
+  FStream.WriteByte(2);
+  FStream.WriteByte(FHeaderRec.LocationCodeRootSize);
+  // eat 10 bytes
+  FStream.WriteWord(0);
+  FStream.WriteDWord(0);
+  FStream.WriteDWord(0);
+
+  FStream.WriteDWord(NtoLE(FHeaderRec.NodeSize));
+  FStream.WriteDWord(NtoLE(DWord(0)));
+  FStream.WriteDWord(1);
+  FStream.WriteDWord(5);
+  FStream.WriteDWord(NtoLE(FHeaderRec.LongestWordLength));
+  FStream.WriteDWord(NtoLE(FHeaderRec.TotalWordsIndexed));
+  FStream.WriteDWord(NtoLE(FHeaderRec.TotalWords));
+  FStream.WriteDWord(NtoLE(FHeaderRec.TotalWordsLengthPart1));
+  FStream.WriteDWord(NtoLE(FHeaderRec.TotalWordsLengthPart2));
+  FStream.WriteDWord(NtoLE(FHeaderRec.TotalWordsLength));
+  FStream.WriteDWord(NtoLE(TLeafNode(FActiveLeafNode).FreeSpace));
+  FStream.WriteDWord(NtoLE(0));
+  FStream.WriteDWord(NtoLE(FHeaderRec.HTMLFilesCount-1));
+  for i := 0 to 23 do FStream.WriteByte(0);
+  FStream.WriteDWord(NtoLE(FHeaderRec.WindowsCodePage));
+  FStream.WriteDWord(NtoLE(DWord(1033))); // LCID
+  for i := 0 to 893 do FStream.WriteByte(0);
+end;
+
+procedure TChmSearchWriter.WriteAWord ( AWord: TIndexedWord ) ;
+begin
+  if FActiveLeafNode = nil then
+  begin
+    FActiveLeafNode := TLeafNode.Create(FStream);
+    with TLeafNode(FActiveLeafNode) do
+    begin
+      DocRootSize := FHeaderRec.DocIndexRootSize;
+      CodeRootSize := FHeaderRec.CodeCountRootSize;
+      LocRootSize := FHeaderRec.LocationCodeRootSize;
+    end;
+  end;
+
+  if FActiveLeafNode.GuessIfCanHold(AWord.TheWord) = False then
+  begin
+    FActiveLeafNode.Flush(True);
+  end;
+  TLeafNode(FActiveLeafNode).AddWord(AWord);
+end;
+
+
+procedure TChmSearchWriter.WriteToStream;
+begin
+  WriteHeader(True);
+  ProcessWords;
+  WriteHeader(False);
+end;
+
+constructor TChmSearchWriter.Create ( AStream: TStream;
+  AWordList: TIndexedWordList ) ;
+begin
+  FStream := AStream;
+  FWordList := AWordList;
+
+end;
+
+{ TLeafNode }
+
+function TFIftiNode.RemainingSpace: DWord;
+begin
+  Result := FIFTI_NODE_SIZE - FBlockStream.Position;
+end;
+
+constructor TFIftiNode.Create ( AStream: TStream ) ;
+begin
+  inherited Create;
+  FWriteStream := AStream;
+  FBlockStream := TMemoryStream.Create;
+end;
+
+destructor TFIftiNode.Destroy;
+begin
+  FBlockStream.Free;
+  inherited Destroy;
+end;
+
+procedure TFIftiNode.FillRemainingSpace;
+begin
+  while RemainingSpace > 0 do
+    FBlockStream.WriteByte(0);
+end;
+
+function TFIftiNode.AdjustedWord ( AWord: String; out AOffset: Byte; AOldWord: String ) : String;
+var
+  Count1,
+  Count2: Integer;
+  Count: Integer;
+  i: Integer;
+begin
+  if AWord = AOldWord then
+  begin
+    AOffset := Length(AWord);
+    Exit('');
+  end;
+  // else
+  Count1 := Length(AOldWord);
+  Count2 := Length(AWord);
+
+  if Count1<Count2 then
+    Count := Count1
+  else
+    Count := Count2;
+
+  for i := 1 to Count do
+  begin
+    AOffset := i-1;
+    if AOldWord[i] <> AWord[i]
+      then Exit(Copy(AWord, i, Length(AWord)));
+  end;
+  Result := AWord;
+  AOffset := 0;
+end;
+
+procedure TLeafNode.WriteInitialHeader;
+begin
+  FBlockStream.WriteDWord(0);
+  FBlockStream.WriteWord(0);
+  FBlockStream.WriteWord(0);
+end;
+
+destructor TLeafNode.Destroy;
+begin
+  inherited Destroy;
+end;
+
+function TLeafNode.GuessIfCanHold ( AWord: String ) : Boolean;
+var
+  WordOffset: Byte;
+begin
+  Result := 17 + Length(AdjustedWord(AWord, WordOffset, FLastWord)) < RemainingSpace;
+end;
+
+procedure TLeafNode.Flush(NewBlockNeeded: Boolean);
+var
+  FTmpPos: DWord;
+begin
+  Inc(FLeafNodeCount);
+  FTmpPos := FWriteStream.Position;
+  // update the previous leaf node about our position.
+  if FLastNodeStart > 0 then
+  begin
+    FWriteStream.Position := FLastNodeStart;
+    FWriteStream.WriteDWord(NtoLE(FTmpPos));
+    FWriteStream.Position := FTmpPos;
+  end;
+  FLastNodeStart := FTmpPos;
+
+  FreeSpace := RemainingSpace;
+
+  FillRemainingSpace;
+
+  // update the leaf header to show the available space.
+  FBlockStream.Position := 6;
+  FBlockStream.WriteWord(NtoLE(Word(FreeSpace)));
+
+  // copy the leaf block to the fiftimain file
+  FBlockStream.Position := 0;
+  FWriteStream.CopyFrom(FBlockStream, FIFTI_NODE_SIZE);
+  FBlockStream.Position := 0;
+
+  if NewBlockNeeded or ((NewBlockNeeded = False) and (ParentNode <> nil)) then
+  begin
+    if ParentNode = nil then
+      ParentNode := TIndexNode.Create(FWriteStream);
+
+    ParentNode.ChildIsFull(FLastWord, FLastNodeStart);
+    if (NewBlockNeeded = False) then
+      ParentNode.Flush(False);
+  end;
+
+  FLastWord := '';
+end;
+
+procedure TLeafNode.AddWord ( AWord: TIndexedWord ) ;
+var
+  Offset: Byte;
+  NewWord: String;
+  WLCSize: DWord;
+begin
+  if Length(AWord.TheWord) > 99 then
+    Exit; // Maximum word length is 99
+  if FBlockStream.Position = 0 then
+    WriteInitialHeader;
+
+  NewWord := AdjustedWord(AWord.TheWord, Offset, FLastWord);
+
+  FLastWord := AWord.TheWord;
+
+  FBlockStream.WriteByte(Length(NewWord)+1);
+  FBlockStream.WriteByte(Offset);
+  FBlockStream.Write(NewWord[1], Length(Trim(NewWord)));
+  FBlockStream.WriteByte(Ord(AWord.IsTitle));
+  WriteCompressedInteger(FBlockStream, AWord.DocumentCount);
+  FBlockStream.WriteDWord(NtoLE(DWord(FWriteStream.Position)));
+  FBlockStream.WriteWord(0);
+
+  // write WLC to FWriteStream so we can write the size of the wlc entries
+  WLCSize := WriteWLCEntries(AWord, FDocRootSize, FCodeRootSize, FLocRootSize);
+
+  WriteCompressedInteger(FBlockStream, WLCSize);
+end;
+
+function Min(AValue, BValue: Byte): Byte;
+begin
+  if AValue < BValue then
+    Result := AValue
+  else Result := BValue;
+end;
+
+function Max(AValue, BValue: Byte): Byte;
+begin
+  if AValue > BValue then
+    Result := AValue
+  else Result := BValue;
+end;
+
+function Max(AValue, BValue: Integer): Integer;
+begin
+  if AValue > BValue then
+    Result := AValue
+  else Result := BValue;
+end;
+
+function Max(AValue, BValue: DWord): DWord;
+begin
+  if AValue > BValue then
+    Result := AValue
+  else Result := BValue;
+end;
+
+
+
+function TLeafNode.WriteWLCEntries ( AWord: TIndexedWord ; ADocRootSize, ACodeRootSize, ALocRootSize: Byte) : DWord;
+var
+  LastDocIndex: DWord;
+  LastLocCode: DWord;
+  WLCLastWord: String;
+  UsedBits: Byte;
+  Buf: Byte;
+  function NewDocDelta(ADocIndex: DWord): DWord;
+  begin
+    Result := ADocIndex - LastDocIndex;
+    LastDocIndex := ADocIndex;
+  end;
+  function NewLocCode(ALocCode: DWord): DWord;
+  begin
+    Result := ALocCode - LastLocCode;
+    LastLocCode := ALocCode;
+  end;
+  procedure AddValue(AValue: DWord; BitCount: Byte);
+  var
+    NeededBits: Byte;
+    Tmp: Byte;
+  begin
+    AValue := AValue shl (32 - BitCount);
+    while BitCount > 0 do
+    begin
+      NeededBits := 8 - UsedBits;
+      Tmp := Hi(Hi(DWord(AValue shr (UsedBits))));
+      Buf := Buf or Tmp;
+      Inc(UsedBits, Min(BitCount, NeededBits));
+      AValue := AValue shl Min(BitCount, NeededBits);
+      Dec(BitCount, Min(BitCount, NeededBits));
+
+      if (UsedBits = 8) then
+      begin
+        FWriteStream.WriteByte(Buf);
+        UsedBits := 0;
+        NeededBits := 0;
+        Buf := 0;
+      end;
+    end;
+  end;
+  procedure FlushBuffer;
+  begin
+    if UsedBits > 0 then
+      FWriteStream.WriteByte(Buf);
+    UsedBits := 0;
+    Buf := 0;
+  end;
+var
+  DocDelta: DWord;
+  LocDelta: DWord;
+  StartPos: DWord;
+  Bits: DWord;
+  BitCount: Byte;
+  i,
+  j: Integer;
+  Doc: TIndexDocument;
+//  proced
+begin
+  StartPos := FWriteStream.Position;
+  LastDocIndex := 0;
+  UsedBits := 0;
+  Buf := 0;
+  for i := 0 to AWord.DocumentCount-1 do
+  begin
+    LastLocCode := 0;
+    Doc := AWord.GetLogicalDocument(i);
+    DocDelta := NewDocDelta(Doc.DocumentIndex);
+    BitCount := WriteScaleRootInt(DocDelta, Bits, ADocRootSize);
+    AddValue(Bits, BitCount);
+    BitCount := WriteScaleRootInt(Length(Doc.WordIndex), Bits, ACodeRootSize);
+    AddValue(Bits, BitCount);
+
+    for j := 0 to High(Doc.WordIndex) do
+    begin
+      LocDelta := NewLocCode(Doc.WordIndex[j]);
+      BitCount := WriteScaleRootInt(LocDelta, Bits, ALocRootSize);
+      AddValue(Bits, BitCount);
+    end;
+    FlushBuffer;
+  end;
+
+
+  Result := FWriteStream.Position-StartPos;
+end;
+
+
+{ TIndexNode }
+
+function TIndexNode.GuessIfCanHold ( AWord: String ) : Boolean;
+var
+  Offset: Byte;
+begin
+  Result := FBlockStream.Position + 8 + Length(AdjustedWord(AWord, Offset, FLastWord)) < FIFTI_NODE_SIZE;
+end;
+
+procedure TIndexNode.ChildIsFull ( AWord: String; ANodeOffset: DWord ) ;
+var
+  Offset: Byte;
+begin
+  if FBlockStream.Position = 0 then
+    FBlockStream.WriteWord(0); // free space at end. updated when the block is flushed
+  if GuessIfCanHold(AWord) = False then
+    Flush(True);
+  AWord := AdjustedWord(AWord, Offset, FLastWord);
+
+  // Write the Index node Entry
+  FBlockStream.WriteByte(Length(AWord)+1);
+  FBlockStream.WriteByte(Offset);
+  FBlockStream.Write(AWord[1], Length(AWord));
+  FBlockStream.WriteDWord(NtoLE(ANodeOffset));
+  FBlockStream.WriteWord(0);
+end;
+
+procedure TIndexNode.Flush ( NewBlockNeeded: Boolean ) ;
+var
+  RemSize: DWord;
+begin
+  if NewBlockNeeded then
+  begin
+    if ParentNode = nil then
+      ParentNode := TIndexNode.Create(FWriteStream);
+  end;
+
+  if ParentNode <> nil then
+    ParentNode.ChildIsFull(FLastWord, FWriteStream.Position);
+
+  RemSize := RemainingSpace;
+  FillRemainingSpace;
+  FBlockStream.Position := 0;
+  FBlockStream.WriteWord(NtoLE(RemSize));
+
+  FBlockStream.Position := 0;
+
+  FWriteStream.CopyFrom(FBlockStream, FIFTI_NODE_SIZE);
+
+  FLastWord := '';
+
+  if NewBlockNeeded then
+    FBlockStream.WriteDWord(0) // placeholder to write free space in when block is full
+  else
+    if ParentNode <> nil then
+      ParentNode.Flush(NewBlockNeeded);
+end;
+
+{ TChmSearchReader }
+
+procedure TChmSearchReader.ReadCommonData;
+var
+  Sig: DWord;
+begin
+   FStream.Position := 0;
+   Sig := LEtoN(FStream.ReadDWord);
+   FFileIsValid :=  Sig = $00280000;
+
+   if not FileIsValid then
+     Exit;
+
+   // root node address
+   FStream.Position := $8;
+   FRootNodeOffset := LEtoN(FStream.ReadDWord);
+
+   // Tree Depth
+   FStream.Position := $18;
+   FTreeDepth := LEtoN(FStream.ReadWord);
+
+   // Root sizes for scale and root integers
+   FStream.Position := $1E;
+   if FStream.ReadByte <> 2 then // we only can read the files when scale is 2
+     FFileIsValid := False;
+   FDocRootSize := FStream.ReadByte;
+
+   if FStream.ReadByte <> 2 then
+     FFileIsValid := False;
+   FCodeCountRootSize := FStream.ReadByte;
+
+   if FStream.ReadByte <> 2 then
+     FFileIsValid := False;
+   FLocCodeRootSize := FStream.ReadByte;
+
+end;
+
+procedure TChmSearchReader.MoveToFirstLeafNode;
+var
+  NodeDepth: Integer;
+  NodeOffset: DWord;
+  LastWord: String;
+  NewWord: String;
+begin
+  NodeDepth := FTreeDepth;
+  MoveToRootNode;
+  while NodeDepth > 1 do
+  begin
+    LastWord := '';
+    ReadRootNodeEntry(LastWord, NewWord, NodeOffset);
+    Dec(NodeDepth);
+    MoveToNode(NodeOffset, NodeDepth);
+  end;
+end;
+
+procedure TChmSearchReader.MoveToRootNode;
+begin
+  MoveToNode(FRootNodeOffset, FTreeDepth);
+end;
+
+procedure TChmSearchReader.MoveToNode(ANodeOffset: DWord; ANodeDepth: Integer);
+begin
+  FStream.Position := ANodeOffset;
+  FActiveNodeStart := FStream.Position;
+  if ANodeDepth > 1 then
+  begin
+    FnextLeafNode := 0;
+    FActiveNodeFreeSpace := LEtoN(FStream.ReadWord); // empty space at end of node
+  end
+  else
+  begin
+    FnextLeafNode := LEtoN(FStream.ReadDWord);
+    FStream.ReadWord;
+    FActiveNodeFreeSpace := LEtoN(FStream.ReadWord);
+  end;
+end;
+
+function TChmSearchReader.ReadWordOrPartialWord ( ALastWord: String ) : String;
+var
+  WordLength: Integer;
+  CopyLastWordCharCount: Integer;
+begin
+  WordLength := FStream.ReadByte;
+  CopyLastWordCharCount := FStream.ReadByte;
+  if CopyLastWordCharCount > 0 then
+    Result := Copy(ALastWord, 1, CopyLastWordCharCount);
+  SetLength(Result, (WordLength-1) + CopyLastWordCharCount);
+  FStream.Read(Result[1+CopyLastWordCharCount], WordLength-1);
+end;
+
+procedure TChmSearchReader.ReadRootNodeEntry (ALastWord: String;  out AWord: String; out
+  ASubNodeStart: DWord ) ;
+begin
+  AWord := ReadWordOrPartialWord(ALastWord);
+  ASubNodeStart := LEtoN(FStream.ReadDWord);
+end;
+
+function TChmSearchReader.ReadLeafNodeEntry ( ALastWord: String; out
+  AWord: String; out AInTitle: Boolean; out AWLCCount: DWord; out
+  AWLCOffset: DWord; out AWLCSize: DWord ): Boolean;
+var
+  WordLength: Integer;
+begin
+  Result := FStream.Position - FActiveNodeStart < FIFTI_NODE_SIZE - FActiveNodeFreeSpace;
+  if not Result then
+    Exit;
+  AWord := ReadWordOrPartialWord(ALastWord);
+  AInTitle := FStream.ReadByte = 1;
+  AWLCCount := GetCompressedInteger(FStream);
+  AWLCOffset := LEtoN(FStream.ReadDWord);
+  FStream.ReadWord;
+  AWLCSize := GetCompressedInteger(FStream);
+
+end;
+
+function TChmSearchReader.ReadWLCEntries (AWLCCount: DWord; AWLCOffset: DWord; AWLCSize: DWord ) : TChmWLCTopicArray;
+
+  function AtEndOfWLCEntries: Boolean;
+  begin
+    Result := AWLCOffset + AWLCSize <= FStream.Position;
+  end;
+var
+  Buf: Byte;
+  BitsInBuffer: Integer;
+
+  procedure FillBuffer;
+  begin
+    while (BitsInBuffer = 0) and not AtEndOfWLCEntries do
+    begin
+      Buf := FStream.ReadByte;
+      Inc(BitsInBuffer, 8);
+    end;
+  end;
+
+  function ReadWLC(RootSize: DWord): DWord;
+  var
+    PrefixBits: Integer = 0;
+    BitCount: Integer = 0;
+    RemainingBits: Integer; // only the bits for this number not the bits in buffer
+  begin
+    FillBuffer;
+    Result := 0;
+    while (Buf and $80) > 0 do // find out how many prefix bits there are
+    begin
+      Inc(PrefixBits);
+      Buf := Buf shl 1;
+      Dec(BitsInBuffer);
+      FillBuffer;
+
+    end;
+
+    if PrefixBits > 0 then
+      Result := 1;
+    Inc(BitCount, PrefixBits+1);
+    Buf := Buf shl 1;
+    Dec(BitsInBuffer);
+
+    FillBuffer;
+    Remainingbits := RootSize + Max(Integer(PrefixBits-1), 0);
+    while RemainingBits > 0 do
+    begin
+      Result := Result shl 1;
+      Result := Result or (Buf shr 7);
+      Dec(RemainingBits);
+      Buf := Buf shl 1;
+      Dec(BitsInBuffer);
+      FillBuffer;
+      Inc(BitCount);
+    end;
+  end;
+  procedure ClearBuffer;
+  begin
+    BitsInBuffer := 0;
+    Buf := 0;
+  end;
+
+var
+  TopicHits: DWord;
+  i: Integer;
+  j: Integer;
+  CachedStreamPos: QWord;
+  LastDoc,
+  LastLocCode: DWord;
+begin
+  CachedStreamPos := FStream.Position;
+  FStream.Position := AWLCOffset;
+  for i := 0 to AWLCSize-1 do
+  begin
+    Buf := FStream.ReadByte;
+    Write(binStr(Buf, 8), ' ');
+  end;
+  FStream.Position := AWLCOffset;
+
+  SetLength(Result, AWLCCount);
+  Buf := 0;
+  BitsInBuffer := 0;
+  LastDoc := 0;
+
+  for i := 0 to AWLCCount-1 do
+  begin
+    Result[i].TopicIndex := ReadWLC(FDocRootSize) + LastDoc;
+
+    LastDoc := Result[i].TopicIndex;
+    TopicHits := ReadWLC(FCodeCountRootSize);
+    SetLength(Result[i].LocationCodes, TopicHits);
+    LastLocCode := 0;
+    for j := 0 to TopicHits-1 do
+    begin
+      Result[i].LocationCodes[j] := ReadWLC(FLocCodeRootSize) + LastLocCode;
+      LastLocCode := Result[i].LocationCodes[j];
+    end;
+    ClearBuffer;
+  end;
+  FStream.Position := CachedStreamPos;
+end;
+
+
+
+constructor TChmSearchReader.Create ( AStream: TStream;
+  AFreeStreamOnDestroy: Boolean ) ;
+begin
+  FStream := AStream;
+  FFreeStreamOnDestroy := AFreeStreamOnDestroy;
+  ReadCommonData;
+end;
+
+destructor TChmSearchReader.Destroy;
+begin
+  if FFreeStreamOnDestroy then
+    FreeAndNil(FStream);
+  inherited Destroy;
+end;
+
+procedure TChmSearchReader.DumpData (
+  AFoundDataEvent: TChmSearchReaderFoundDataEvent ) ;
+var
+  LastWord: String;
+  TheWord: String;
+  InTitle: Boolean;
+  WLCCount: DWord;
+  WLCOffset: DWord;
+  WLCSize: DWord;
+  FoundHits: TChmWLCTopicArray;
+  i: Integer;
+  j: Integer;
+begin
+  MoveToFirstLeafNode;
+  LastWord := '';
+  repeat
+    if  (ReadLeafNodeEntry(LastWord, TheWord, InTitle, WLCCount, WLCOffset, WLCSize) = False) then
+    begin
+      if FnextLeafNode <> 0 then
+      begin
+        MoveToNode(FnextLeafNode, 1);
+        LastWord := '';
+      end
+      else
+        Break;
+    end
+    else begin
+      LastWord := TheWord;
+      FoundHits := ReadWLCEntries(WLCCount, WLCOffset, WLCSize);
+      for i := 0 to High(FoundHits) do
+        for j := 0 to High(FoundHits[i].LocationCodes) do
+           AFoundDataEvent(Self, TheWord, FoundHits[i].TopicIndex ,FoundHits[i].LocationCodes[j]);
+    end;
+  until False; //FStream.Position - FActiveNodeStart >= FIFTI_NODE_SIZE - FActiveNodeFreeSpace
+end;
+
+function TChmSearchReader.LookupWord(AWord: String): TChmWLCTopicArray;
+begin
+{  if not AIsReadyToReadWLC then
+  begin
+
+  end
+  else begin
+     //ReadWLCEntries();
+  end;}
+end;
+
+
+end.

+ 282 - 0
packages/chm/src/chmobjinstconst.inc

@@ -0,0 +1,282 @@
+{ Copyright (C) <2005> <Andrew Haines> chmobjinstconst.inc
+
+  This library is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Library General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or (at your
+  option) any later version.
+
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
+  for more details.
+
+  You should have received a copy of the GNU Library General Public License
+  along with this library; if not, write to the Free Software Foundation,
+  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+}
+{
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+}
+type
+  TObjInstEntry = array[0..9] of Byte;
+
+const
+  ObjInstEntries: array [0..255] of TObjInstEntry =(
+($00,$00,$00,$00,$00,$00,$00,$00,$00,$00),
+($07,$00,$01,$00,$01,$01,$01,$01,$00,$00),
+($00,$00,$02,$00,$02,$02,$02,$02,$00,$00),
+($00,$00,$03,$00,$03,$03,$03,$03,$00,$00),
+($00,$00,$04,$00,$04,$04,$04,$04,$00,$00),
+($00,$00,$05,$00,$05,$05,$05,$05,$00,$00),
+($00,$00,$06,$00,$06,$06,$06,$06,$00,$00),
+($00,$00,$07,$00,$07,$07,$07,$07,$00,$00),
+($00,$00,$08,$00,$08,$08,$08,$08,$00,$00),
+($00,$00,$09,$00,$09,$09,$09,$09,$00,$00),
+($00,$00,$0A,$00,$0A,$0A,$0A,$0A,$00,$00),
+($00,$00,$0B,$00,$0B,$0B,$0B,$0B,$00,$00),
+($00,$00,$0C,$00,$0C,$0C,$0C,$0C,$00,$00),
+($00,$00,$0D,$00,$0D,$0D,$0D,$0D,$00,$00),
+($00,$00,$0E,$00,$0E,$0E,$14,$14,$00,$00),
+($00,$00,$0F,$00,$0F,$0F,$0F,$0F,$00,$00),
+($00,$00,$10,$00,$10,$10,$10,$10,$00,$00),
+($00,$00,$11,$00,$11,$11,$11,$11,$00,$00),
+($00,$00,$12,$00,$12,$12,$12,$12,$00,$00),
+($00,$00,$13,$00,$13,$13,$13,$13,$00,$00),
+($00,$00,$20,$00,$14,$14,$14,$14,$00,$00),
+($00,$00,$15,$00,$15,$15,$15,$15,$00,$00),
+($00,$00,$16,$00,$16,$16,$16,$16,$00,$00),
+($00,$00,$17,$00,$17,$17,$17,$17,$00,$00),
+($00,$00,$18,$00,$18,$18,$18,$18,$00,$00),
+($00,$00,$19,$00,$19,$19,$19,$19,$00,$00),
+($00,$00,$1A,$00,$1A,$1A,$1A,$1A,$00,$00),
+($00,$00,$1B,$00,$1B,$1B,$1B,$1B,$00,$00),
+($00,$00,$1C,$00,$1C,$1C,$1C,$1C,$00,$00),
+($00,$00,$1D,$00,$1D,$1D,$1D,$1D,$00,$00),
+($00,$00,$1E,$00,$1E,$1E,$1E,$1E,$00,$00),
+($00,$00,$1F,$00,$1F,$1F,$1F,$1F,$00,$00),
+($00,$00,$20,$00,$20,$20,$20,$20,$00,$00),
+($00,$00,$23,$00,$21,$21,$21,$21,$00,$00),
+($00,$00,$28,$00,$22,$22,$22,$22,$00,$00),
+($00,$00,$2D,$00,$23,$23,$23,$23,$00,$00),
+($00,$00,$32,$00,$24,$24,$24,$24,$00,$00),
+($00,$00,$37,$00,$25,$25,$25,$25,$00,$00),
+($00,$00,$3C,$00,$26,$26,$26,$26,$00,$00),
+($06,$00,$41,$00,$27,$27,$27,$27,$00,$00),
+($00,$00,$46,$00,$28,$28,$28,$28,$00,$00),
+($00,$00,$4B,$00,$29,$29,$29,$29,$00,$00),
+($09,$00,$50,$00,$2A,$2A,$2A,$2A,$00,$00),
+($00,$00,$55,$00,$2B,$2B,$2B,$2B,$00,$00),
+($04,$00,$5A,$00,$2C,$2C,$2C,$2C,$00,$00),
+($00,$00,$5F,$00,$2D,$2D,$2D,$2D,$00,$00),
+($05,$00,$64,$00,$2E,$2E,$2E,$2E,$00,$00),
+($00,$00,$69,$00,$2F,$2F,$2F,$2F,$00,$00),
+($03,$00,$60,$04,$30,$30,$30,$30,$00,$00),
+($03,$00,$6A,$04,$31,$31,$31,$31,$00,$00),
+($03,$00,$74,$04,$32,$32,$32,$32,$00,$00),
+($03,$00,$7E,$04,$33,$33,$33,$33,$00,$00),
+($03,$00,$88,$04,$34,$34,$34,$34,$00,$00),
+($03,$00,$92,$04,$35,$35,$35,$35,$00,$00),
+($03,$00,$9C,$04,$36,$36,$36,$36,$00,$00),
+($03,$00,$A6,$04,$37,$37,$37,$37,$00,$00),
+($03,$00,$B0,$04,$38,$38,$38,$38,$00,$00),
+($03,$00,$BA,$04,$39,$39,$39,$39,$00,$00),
+($00,$00,$6E,$00,$3A,$3A,$3A,$3A,$00,$00),
+($00,$00,$73,$00,$3B,$3B,$3B,$3B,$00,$00),
+($00,$00,$78,$00,$3C,$3C,$3C,$3C,$00,$00),
+($00,$00,$7D,$00,$3D,$3D,$3D,$3D,$00,$00),
+($00,$00,$82,$00,$3E,$3E,$3E,$3E,$00,$00),
+($09,$00,$87,$00,$3F,$3F,$3F,$3F,$00,$00),
+($00,$00,$8C,$00,$40,$40,$40,$40,$00,$00),
+($02,$00,$CE,$04,$61,$41,$41,$41,$00,$00),
+($02,$00,$E2,$04,$62,$42,$42,$42,$00,$00),
+($02,$00,$F6,$04,$63,$43,$43,$43,$00,$00),
+($02,$00,$0A,$05,$64,$44,$44,$44,$00,$00),
+($02,$00,$1E,$05,$65,$45,$45,$45,$00,$00),
+($02,$00,$32,$05,$66,$46,$46,$46,$00,$00),
+($02,$00,$46,$05,$67,$47,$47,$47,$00,$00),
+($02,$00,$5A,$05,$68,$48,$48,$48,$00,$00),
+($02,$00,$6E,$05,$69,$49,$49,$49,$00,$00),
+($02,$00,$82,$05,$6A,$4A,$4A,$4A,$00,$00),
+($02,$00,$96,$05,$6B,$4B,$4B,$4B,$00,$00),
+($02,$00,$AA,$05,$6C,$4C,$4C,$4C,$00,$00),
+($02,$00,$BE,$05,$6D,$4D,$4D,$4D,$00,$00),
+($02,$00,$D2,$05,$6E,$4E,$4E,$4E,$00,$00),
+($02,$00,$E6,$05,$6F,$4F,$4F,$4F,$00,$00),
+($02,$00,$FA,$05,$70,$50,$50,$50,$00,$00),
+($02,$00,$0E,$06,$71,$51,$51,$51,$00,$00),
+($02,$00,$22,$06,$72,$52,$52,$52,$00,$00),
+($02,$00,$36,$06,$73,$53,$53,$53,$00,$00),
+($02,$00,$4A,$06,$74,$54,$54,$54,$00,$00),
+($02,$00,$5E,$06,$75,$55,$55,$55,$00,$00),
+($02,$00,$72,$06,$76,$56,$56,$56,$00,$00),
+($02,$00,$86,$06,$77,$57,$57,$57,$00,$00),
+($02,$00,$9A,$06,$78,$58,$58,$58,$00,$00),
+($02,$00,$AE,$06,$79,$59,$59,$59,$00,$00),
+($02,$00,$C2,$06,$7A,$5A,$5A,$5A,$00,$00),
+($00,$00,$91,$00,$5B,$5B,$5B,$5B,$00,$00),
+($00,$00,$96,$00,$5C,$5C,$5C,$5C,$00,$00),
+($00,$00,$9B,$00,$5D,$5D,$5D,$5D,$00,$00),
+($00,$00,$A0,$00,$5E,$5E,$5E,$5E,$00,$00),
+($01,$00,$A5,$00,$5F,$5F,$5F,$5F,$00,$00),
+($00,$00,$AA,$00,$60,$60,$60,$60,$00,$00),
+($01,$00,$CE,$04,$61,$61,$61,$61,$00,$00),
+($01,$00,$E2,$04,$62,$62,$62,$62,$00,$00),
+($01,$00,$F6,$04,$63,$63,$63,$63,$00,$00),
+($01,$00,$0A,$05,$64,$64,$64,$64,$00,$00),
+($01,$00,$1E,$05,$65,$65,$65,$65,$00,$00),
+($01,$00,$32,$05,$66,$66,$66,$66,$00,$00),
+($01,$00,$46,$05,$67,$67,$67,$67,$00,$00),
+($01,$00,$5A,$05,$68,$68,$68,$68,$00,$00),
+($01,$00,$6E,$05,$69,$69,$69,$69,$00,$00),
+($01,$00,$82,$05,$6A,$6A,$6A,$6A,$00,$00),
+($01,$00,$96,$05,$6B,$6B,$6B,$6B,$00,$00),
+($01,$00,$AA,$05,$6C,$6C,$6C,$6C,$00,$00),
+($01,$00,$BE,$05,$6D,$6D,$6D,$6D,$00,$00),
+($01,$00,$D2,$05,$6E,$6E,$6E,$6E,$00,$00),
+($01,$00,$E6,$05,$6F,$6F,$6F,$6F,$00,$00),
+($01,$00,$FA,$05,$70,$70,$70,$70,$00,$00),
+($01,$00,$0E,$06,$71,$71,$71,$71,$00,$00),
+($01,$00,$22,$06,$72,$72,$72,$72,$00,$00),
+($01,$00,$36,$06,$73,$73,$73,$73,$00,$00),
+($01,$00,$4A,$06,$74,$74,$74,$74,$00,$00),
+($01,$00,$5E,$06,$75,$75,$75,$75,$00,$00),
+($01,$00,$72,$06,$76,$76,$76,$76,$00,$00),
+($01,$00,$86,$06,$77,$77,$77,$77,$00,$00),
+($01,$00,$9A,$06,$78,$78,$78,$78,$00,$00),
+($01,$00,$AE,$06,$79,$79,$79,$79,$00,$00),
+($01,$00,$C2,$06,$7A,$7A,$7A,$7A,$00,$00),
+($00,$00,$AF,$00,$7B,$7B,$7B,$7B,$00,$00),
+($00,$00,$B4,$00,$7C,$7C,$7C,$7C,$00,$00),
+($00,$00,$B9,$00,$7D,$7D,$7D,$7D,$00,$00),
+($00,$00,$BE,$00,$7E,$7E,$7E,$7E,$00,$00),
+($00,$00,$BF,$00,$7F,$7F,$7F,$7F,$00,$00),
+($00,$00,$C0,$00,$80,$80,$20,$20,$00,$00),
+($00,$00,$C1,$00,$81,$81,$20,$20,$00,$00),
+($00,$00,$C3,$00,$82,$82,$E2,$E2,$00,$00),
+($00,$00,$C8,$00,$83,$83,$C4,$C4,$00,$00),
+($00,$00,$CD,$00,$84,$84,$E3,$E3,$00,$00),
+($00,$00,$D2,$00,$85,$85,$C9,$C9,$00,$00),
+($00,$00,$D7,$00,$86,$86,$A0,$A0,$00,$00),
+($00,$00,$DC,$00,$87,$87,$E0,$E0,$00,$00),
+($00,$00,$E1,$00,$88,$88,$5E,$5E,$00,$00),
+($00,$00,$E6,$00,$89,$89,$E4,$E4,$00,$00),
+($02,$00,$36,$06,$73,$8A,$20,$20,$00,$00),
+($00,$00,$F0,$00,$8B,$8B,$DC,$DC,$00,$00),
+($0C,$00,$E6,$05,$6F,$8C,$CE,$CE,$00,$00),
+($00,$00,$F6,$00,$8D,$8D,$20,$20,$00,$00),
+($00,$00,$F7,$00,$8E,$8E,$20,$20,$00,$00),
+($00,$00,$F8,$00,$8F,$8F,$20,$20,$00,$00),
+($00,$00,$F9,$00,$90,$90,$20,$20,$00,$00),
+($00,$04,$FA,$00,$91,$91,$D4,$D4,$00,$00),
+($00,$05,$FF,$00,$92,$92,$D5,$D5,$00,$00),
+($00,$06,$04,$01,$93,$93,$D2,$D2,$00,$00),
+($00,$07,$09,$01,$94,$94,$D3,$D3,$00,$00),
+($00,$01,$0E,$01,$95,$95,$A5,$A5,$00,$00),
+($00,$02,$13,$01,$96,$96,$D0,$D0,$00,$00),
+($00,$03,$18,$01,$97,$97,$D1,$D1,$00,$00),
+($00,$00,$1D,$01,$98,$98,$7E,$7E,$00,$00),
+($00,$00,$22,$01,$99,$99,$AA,$AA,$00,$00),
+($02,$00,$36,$06,$73,$9A,$20,$20,$00,$00),
+($00,$00,$31,$01,$9B,$9B,$DD,$DD,$00,$00),
+($0C,$00,$E6,$05,$6F,$9C,$CF,$CF,$00,$00),
+($00,$00,$37,$01,$9D,$9D,$20,$20,$00,$00),
+($00,$00,$38,$01,$9E,$9E,$20,$20,$00,$00),
+($02,$00,$AE,$06,$79,$9F,$D9,$D9,$00,$00),
+($00,$00,$3C,$01,$A0,$A0,$A0,$A0,$00,$00),
+($00,$00,$40,$01,$A1,$A1,$C1,$C1,$00,$00),
+($00,$00,$45,$01,$A2,$A2,$A2,$A2,$00,$00),
+($00,$00,$4A,$01,$A3,$A3,$A3,$A3,$00,$00),
+($00,$00,$4F,$01,$A4,$A4,$DB,$DB,$00,$00),
+($00,$00,$54,$01,$A5,$A5,$B4,$B4,$00,$00),
+($00,$00,$59,$01,$A6,$A6,$20,$20,$00,$00),
+($00,$00,$5E,$01,$A7,$A7,$A4,$A4,$00,$00),
+($00,$00,$63,$01,$A8,$A8,$AC,$AC,$00,$00),
+($00,$00,$68,$01,$A9,$A9,$A9,$A9,$00,$00),
+($00,$00,$6D,$01,$AA,$AA,$BB,$BB,$00,$00),
+($00,$00,$72,$01,$AB,$AB,$C7,$C7,$00,$00),
+($00,$00,$77,$01,$AC,$AC,$C2,$C2,$00,$00),
+($00,$00,$7C,$01,$AD,$AD,$2D,$2D,$00,$00),
+($00,$00,$81,$01,$AE,$AE,$A8,$A8,$00,$00),
+($00,$00,$86,$01,$AF,$AF,$F8,$F8,$00,$00),
+($00,$00,$8B,$01,$B0,$B0,$A1,$A1,$00,$00),
+($00,$00,$90,$01,$B1,$B1,$B1,$B1,$00,$00),
+($00,$00,$95,$01,$B2,$B2,$20,$20,$00,$00),
+($00,$00,$9A,$01,$B3,$B3,$20,$20,$00,$00),
+($00,$00,$9F,$01,$B4,$B4,$AB,$AB,$00,$00),
+($00,$00,$A4,$01,$B5,$B5,$B5,$B5,$00,$00),
+($00,$00,$A9,$01,$B6,$B6,$A6,$A6,$00,$00),
+($00,$00,$AE,$01,$B7,$B7,$E1,$E1,$00,$00),
+($00,$00,$B3,$01,$B8,$B8,$FC,$FC,$00,$00),
+($00,$00,$B8,$01,$B9,$B9,$20,$20,$00,$00),
+($00,$00,$BD,$01,$BA,$BA,$BC,$BC,$00,$00),
+($00,$00,$C2,$01,$BB,$BB,$C8,$C8,$00,$00),
+($00,$00,$C7,$01,$BC,$BC,$20,$20,$00,$00),
+($00,$00,$CC,$01,$BD,$BD,$20,$20,$00,$00),
+($00,$00,$D1,$01,$BE,$BE,$20,$20,$00,$00),
+($00,$00,$D6,$01,$BF,$BF,$C0,$C0,$00,$00),
+($02,$00,$CE,$04,$61,$C0,$CB,$CB,$00,$00),
+($02,$00,$CE,$04,$61,$C1,$E7,$E7,$00,$00),
+($02,$00,$CE,$04,$61,$C2,$E5,$E5,$00,$00),
+($02,$00,$CE,$04,$61,$C3,$CC,$CC,$00,$00),
+($02,$00,$CE,$04,$61,$C4,$80,$80,$00,$00),
+($02,$00,$CE,$04,$61,$C5,$81,$81,$00,$00),
+($0C,$00,$CE,$04,$61,$C6,$AE,$AE,$00,$00),
+($02,$00,$F6,$04,$63,$C7,$82,$82,$00,$00),
+($02,$00,$1E,$05,$65,$C8,$E9,$E9,$00,$00),
+($02,$00,$1E,$05,$65,$C9,$83,$83,$00,$00),
+($02,$00,$1E,$05,$65,$CA,$E6,$E6,$00,$00),
+($02,$00,$1E,$05,$65,$CB,$E8,$E8,$00,$00),
+($02,$00,$6E,$05,$69,$CC,$ED,$ED,$00,$00),
+($02,$00,$6E,$05,$69,$CD,$EA,$EA,$00,$00),
+($02,$00,$6E,$05,$69,$CE,$EB,$EB,$00,$00),
+($02,$00,$6E,$05,$69,$CF,$EC,$EC,$00,$00),
+($02,$00,$0A,$05,$64,$D0,$20,$20,$00,$00),
+($02,$00,$D2,$05,$6E,$D1,$84,$84,$00,$00),
+($02,$00,$E6,$05,$6F,$D2,$F1,$F1,$00,$00),
+($02,$00,$E6,$05,$6F,$D3,$EE,$EE,$00,$00),
+($02,$00,$E6,$05,$6F,$D4,$EF,$EF,$00,$00),
+($02,$00,$E6,$05,$6F,$D5,$CD,$CD,$00,$00),
+($02,$00,$E6,$05,$6F,$D6,$85,$85,$00,$00),
+($00,$00,$DB,$01,$D7,$D7,$20,$20,$00,$00),
+($02,$00,$E6,$05,$6F,$D8,$AF,$AF,$00,$00),
+($02,$00,$5E,$06,$75,$D9,$F4,$F4,$00,$00),
+($02,$00,$5E,$06,$75,$DA,$F2,$F2,$00,$00),
+($02,$00,$5E,$06,$75,$DB,$F3,$F3,$00,$00),
+($02,$00,$5E,$06,$75,$DC,$86,$86,$00,$00),
+($02,$00,$AE,$06,$79,$DD,$20,$20,$00,$00),
+($02,$00,$42,$04,$DE,$DE,$20,$20,$00,$00),
+($0C,$00,$36,$06,$73,$DF,$A7,$A7,$00,$00),
+($02,$00,$CE,$04,$61,$E0,$88,$88,$00,$00),
+($02,$00,$CE,$04,$61,$E1,$87,$87,$00,$00),
+($02,$00,$CE,$04,$61,$E2,$89,$89,$00,$00),
+($02,$00,$CE,$04,$61,$E3,$8B,$8B,$00,$00),
+($02,$00,$CE,$04,$61,$E4,$8A,$8A,$00,$00),
+($02,$00,$CE,$04,$61,$E5,$8C,$8C,$00,$00),
+($0C,$00,$CE,$04,$61,$E6,$BE,$BE,$00,$00),
+($02,$00,$F6,$04,$63,$E7,$8D,$8D,$00,$00),
+($02,$00,$1E,$05,$65,$E8,$8F,$8F,$00,$00),
+($02,$00,$1E,$05,$65,$E9,$8E,$8E,$00,$00),
+($02,$00,$1E,$05,$65,$EA,$90,$90,$00,$00),
+($02,$00,$1E,$05,$65,$EB,$91,$91,$00,$00),
+($02,$00,$6E,$05,$69,$EC,$93,$93,$00,$00),
+($02,$00,$6E,$05,$69,$ED,$92,$92,$00,$00),
+($02,$00,$6E,$05,$69,$EE,$94,$94,$00,$00),
+($02,$00,$6E,$05,$69,$EF,$95,$95,$00,$00),
+($02,$00,$0A,$05,$6F,$F0,$20,$20,$00,$00),
+($02,$00,$D2,$05,$6E,$F1,$96,$96,$00,$00),
+($02,$00,$E6,$05,$6F,$F2,$98,$98,$00,$00),
+($02,$00,$E6,$05,$6F,$F3,$97,$97,$00,$00),
+($02,$00,$E6,$05,$6F,$F4,$99,$99,$00,$00),
+($02,$00,$E6,$05,$6F,$F5,$9B,$9B,$00,$00),
+($02,$00,$E6,$05,$6F,$F6,$9A,$9A,$00,$00),
+($00,$00,$66,$00,$F7,$F7,$D6,$D6,$00,$00),
+($02,$00,$E6,$05,$6F,$F8,$BF,$BF,$00,$00),
+($02,$00,$5E,$06,$75,$F9,$9D,$9D,$00,$00),
+($02,$00,$5E,$06,$75,$FA,$9C,$9C,$00,$00),
+($02,$00,$5E,$06,$75,$FB,$9E,$9E,$00,$00),
+($02,$00,$5E,$06,$75,$FC,$9F,$9F,$00,$00),
+($02,$00,$AE,$06,$79,$FD,$20,$20,$00,$00),
+($02,$00,$4C,$04,$FE,$FE,$20,$20,$00,$00),
+($02,$00,$AE,$06,$79,$FF,$D8,$D8,$00,$00)
+);

+ 1 - 2
packages/chm/src/chmsitemap.pas

@@ -186,10 +186,9 @@ procedure TChmSiteMap.FoundTag(ACaseInsensitiveTag, AActualTag: string);
     end;
 var
   TagName,
-  TagAttribute,
+  //TagAttribute,
   TagAttributeName,
   TagAttributeValue: String;
-  I: Integer;
 begin
   //WriteLn('TAG:', AActualTag);
   TagName := GetTagName(ACaseInsensitiveTag);

+ 2 - 0
packages/chm/src/chmspecialfiles.pas

@@ -42,7 +42,9 @@ function WriteNameListToStream(const AStream: TStream; SectionNames: TSectionNam
 var
   MSCompressedName: WideString = 'MSCompressed'#0; // Length 13
   UnCompressedName: WideString = 'Uncompressed'#0;
+{$IFDEF ENDIAN_BIG}
   I: Integer;
+{$ENDIF}
   Size: Word = 2;
   NEntries: Word = 0;
 begin

+ 325 - 8
packages/chm/src/chmwriter.pas

@@ -22,7 +22,7 @@ unit chmwriter;
 {$MODE OBJFPC}{$H+}
 
 interface
-uses Classes, ChmBase, chmtypes, chmspecialfiles;
+uses Classes, ChmBase, chmtypes, chmspecialfiles, HtmlIndexer;
 
 type
 
@@ -50,7 +50,11 @@ type
     FCurrentStream: TStream; // used to buffer the files that are to be compressed
     FCurrentIndex: Integer;
     FOnGetFileData: TGetDataFunc;
-    FStringsStream: TMemoryStream;
+    FStringsStream: TMemoryStream; // the #STRINGS file
+    FTopicsStream: TMemoryStream;  // the #TOPICS file
+    FURLTBLStream: TMemoryStream;  // the #URLTBL file. has offsets of strings in URLSTR
+    FURLSTRStream: TMemoryStream;  // the #URLSTR file
+    FFiftiMainStream: TMemoryStream;
     FContextStream: TMemoryStream; // the #IVB file
     FSection0: TMemoryStream;
     FSection1: TStream; // Compressed Stream
@@ -67,6 +71,7 @@ type
     FHasIndex: Boolean;
     FWindowSize: LongWord;
     FReadCompressedSize: QWord; // Current Size of Uncompressed data that went in Section1 (compressed)
+    FIndexedFiles: TIndexedWordList;
     // Linear order of file
     ITSFHeader: TITSFHeader;
     HeaderSection0Table: TITSFHeaderEntry;  // points to HeaderSection0
@@ -88,12 +93,19 @@ type
     procedure WriteSYSTEM;
     procedure WriteITBITS;
     procedure WriteSTRINGS;
+    procedure WriteTOPICS;
     procedure WriteIVB; // context ids
+    procedure WriteURL_STR_TBL;
+    procedure WriteOBJINST;
+    procedure WriteFiftiMain;
     procedure WriteREADMEFile;
+    procedure WriteFinalCompressedFiles;
     procedure WriteSection0;
     procedure WriteSection1;
     procedure WriteDataSpaceFiles(const AStream: TStream);
     function AddString(AString: String): LongWord;
+    function AddURL(AURL: String; TopicsIndex: DWord): LongWord;
+    procedure CheckFileMakeSearchable(AStream: TStream; AFileEntry: TFileEntryRec);
     // callbacks for lzxcomp
     function  AtEndOfData: Longbool;
     function  GetData(Count: LongInt; Buffer: PByte): LongInt;
@@ -125,13 +137,15 @@ type
   end;
 
 implementation
-uses dateutils, sysutils, paslzxcomp;
+uses dateutils, sysutils, paslzxcomp, chmFiftiMain;
 
 const
 
   LZX_WINDOW_SIZE = 16; // 16 = 2 frames = 1 shl 16
   LZX_FRAME_SIZE = $8000;
 
+{$I chmobjinstconst.inc}
+
 { TChmWriter }
 
 procedure TChmWriter.InitITSFHeader;
@@ -390,14 +404,25 @@ var
   Entry: TFileEntryRec;
   TmpStr: String;
   TmpTitle: String;
+  TmpStream: TMemoryStream;
 const
   VersionStr = 'HHA Version 4.74.8702'; // does this matter?
 begin
+
+
   // this creates the /#SYSTEM file
   Entry.Name := '#SYSTEM';
   Entry.Path := '/';
   Entry.Compressed := False;
   Entry.DecompressedOffset := FSection0.Position;
+
+ { if FileExists('#SYSTEM') then
+  begin
+    TmpStream := TMemoryStream.Create;
+    TmpStream.LoadFromFile('#SYSTEM');
+    TmpStream.Position := 0;
+    FSection0.CopyFrom(TmpStream, TmpStream.Size);
+  end;                                    }
   // EntryCodeOrder: 10 9 4 2 3 16 6 0 1 5
   FSection0.WriteDWord(NToLE(Word(3))); // Version
   if Title <> '' then
@@ -418,11 +443,13 @@ begin
   // 4 A struct that is only needed to set if full text search is on.
   FSection0.WriteWord(NToLE(Word(4)));
   FSection0.WriteWord(NToLE(Word(36))); // size
+
   FSection0.WriteDWord(NToLE(DWord($0409)));
+  FSection0.WriteDWord(1);
   FSection0.WriteDWord(NToLE(DWord(Ord(FFullTextSearch))));
   FSection0.WriteDWord(0);
   FSection0.WriteDWord(0);
-  FSection0.WriteDWord(0);
+
   // two for a QWord
   FSection0.WriteDWord(0);
   FSection0.WriteDWord(0);
@@ -459,6 +486,10 @@ begin
   
   // 6
   // unneeded. if output file is :  /somepath/OutFile.chm the value here is outfile(lowercase)
+  {FSection0.WriteWord(6);
+  FSection0.WriteWord(Length('test1')+1);
+  Fsection0.Write('test1', 5);
+  FSection0.WriteByte(0);}
   
   // 0 Table of contents filename
   if FHasTOC then begin
@@ -479,7 +510,7 @@ begin
   end;
   // 5 Default Window.
   // Not likely needed
-  
+// }
   Entry.DecompressedSize := FSection0.Position - Entry.DecompressedOffset;
   FInternalFiles.AddEntry(Entry);
 end;
@@ -492,7 +523,7 @@ begin
   Entry.Name := '#ITBITS';
   Entry.Path := '/';
   Entry.Compressed := False;
-  Entry.DecompressedOffset := FSection0.Position;
+  Entry.DecompressedOffset :=0;// FSection0.Position;
   Entry.DecompressedSize := 0;
   
   FInternalFiles.AddEntry(Entry);
@@ -503,7 +534,36 @@ begin
   if FStringsStream.Size = 0 then;
     FStringsStream.WriteByte(0);
   FStringsStream.Position := 0;
-  AddStreamToArchive('#STRINGS', '/', FStringsStream);
+  PostAddStreamToArchive('#STRINGS', '/', FStringsStream);
+end;
+
+procedure TChmWriter.WriteTOPICS;
+var
+  AWord: TIndexedWord;
+  FHits: Integer;
+  i: Integer;
+begin
+  if FTopicsStream.Size = 0 then
+    Exit;
+  FTopicsStream.Position := 0;
+  PostAddStreamToArchive('#TOPICS', '/', FTopicsStream);
+
+  AWord := FIndexedFiles.FirstWord;
+  WriteLn('Writing TOPICS');
+  while AWord <> nil do
+  begin
+    FHits := 0;
+    for i := 0 to AWord.DocumentCount-1 do
+    begin
+      Inc(FHits, Length(AWord.GetLogicalDocument(i).WordIndex));
+    //if AWord.IsTitle then
+
+    end;
+    //WriteLn(AWord.TheWord,'             documents = ', AWord.DocumentCount, ' hits = ', FHits, ' is title = ', AWord.IsTitle);
+    AWord := AWord.NextWord;
+  end;
+
+
 end;
 
 procedure TChmWriter.WriteIVB;
@@ -518,6 +578,156 @@ begin
   AddStreamToArchive('#IVB', '/', FContextStream);
 end;
 
+procedure TChmWriter.WriteURL_STR_TBL;
+begin
+  if FURLSTRStream.Size <> 0 then begin
+    FURLSTRStream.Position := 0;
+    PostAddStreamToArchive('#URLSTR', '/', FURLSTRStream);
+  end;
+  if FURLTBLStream.Size <> 0 then begin
+    FURLTBLStream.Position := 0;
+    PostAddStreamToArchive('#URLTBL', '/', FURLTBLStream);
+  end;
+end;
+
+procedure TChmWriter.WriteOBJINST;
+var
+  Entry: TFileEntryRec;
+  i: Integer;
+  TmpPos: Integer;
+  ObjStream: TMemoryStream;
+  //Flags: Word;
+begin
+  ObjStream := TMemorystream.Create;
+  // this file is needed to enable searches for the ms reader
+  ObjStream.WriteDWord(NtoLE($04000000));
+  ObjStream.WriteDWord(NtoLE(Dword(2))); // two entries
+
+  ObjStream.WriteDWord(NtoLE(DWord(24))); // offset into file of entry
+  ObjStream.WriteDWord(NtoLE(DWord(2691))); // size
+
+  ObjStream.WriteDWord(NtoLE(DWord(2715))); // offset into file of entry
+  ObjStream.WriteDWord(NtoLE(DWord(36))); // size
+
+  // first entry
+  // write guid 4662DAAF-D393-11D0-9A56-00C04FB68BF7
+  ObjStream.WriteDWord(NtoLE($4662DAAF));
+  ObjStream.WriteWord(NtoLE($D393));
+  ObjStream.WriteWord(NtoLE($11D0));
+  ObjStream.WriteWord(NtoLE($569A));
+  ObjStream.WriteByte($00);
+  ObjStream.WriteByte($C0);
+  ObjStream.WriteByte($4F);
+  ObjStream.WriteByte($B6);
+  ObjStream.WriteByte($8B);
+  ObjStream.WriteByte($F7);
+
+  ObjStream.WriteDWord(NtoLE($04000000));
+  ObjStream.WriteDWord(NtoLE(11));  // bit flags
+  ObjStream.WriteDWord(NtoLE(DWord(1252)));
+  ObjStream.WriteDWord(NtoLE(DWord(1033)));
+  ObjStream.WriteDWord(NtoLE($00000000));
+  ObjStream.WriteDWord(NtoLE($00000000));
+  ObjStream.WriteDWord(NtoLE($00145555));
+  ObjStream.WriteDWord(NtoLE($00000A0F));
+  ObjStream.WriteWord(NtoLE($0100));
+  ObjStream.WriteDWord(NtoLE($00030005));
+  for i := 0 to 5 do
+    ObjStream.WriteDWord($00000000);
+  ObjStream.WriteWord($0000);
+  // okay now the fun stuff
+  for i := 0 to $FF do
+  ObjStream.Write(ObjInstEntries[i], SizeOF(TObjInstEntry));
+  {begin
+    if i = 1 then
+      Flags := 7
+    else
+      Flags := 0;
+    if (i >= $41) and (i <= $5A) then
+      Flags := Flags or 2;
+    if (i >= $61) and (i <= $7A) then
+      Flags := Flags or 1;
+    if i = $27 then
+      Flags := Flags or 6;
+    ObjStream.WriteWord(NtoLE(Flags));
+    ObjStream.WriteWord(NtoLE(Word(i)));
+    if (i >= $41) and (i <= $5A) then
+      ObjStream.WriteByte(NtoLE(i+$20))
+    else
+      ObjStream.WriteByte(NtoLE(i));
+    ObjStream.WriteByte(NtoLE(i));
+    ObjStream.WriteByte(NtoLE(i));
+    ObjStream.WriteByte(NtoLE(i));
+    ObjStream.WriteWord(NtoLE($0000));
+  end;}
+  ObjStream.WriteDWord(NtoLE($E66561C6));
+  ObjStream.WriteDWord(NtoLE($73DF6561));
+  ObjStream.WriteDWord(NtoLE($656F8C73));
+  ObjStream.WriteWord(NtoLE($6F9C));
+  ObjStream.WriteByte(NtoLE($65));
+  // third bit of second entry
+  // write guid 8FA0D5A8-DEDF-11D0-9A61-00C04FB68BF7
+  ObjStream.WriteDWord(NtoLE($8FA0D5A8));
+  ObjStream.WriteWord(NtoLE($DEDF));
+  ObjStream.WriteWord(NtoLE($11D0));
+  ObjStream.WriteWord(NtoLE($619A));
+  ObjStream.WriteByte($00);
+  ObjStream.WriteByte($C0);
+  ObjStream.WriteByte($4F);
+  ObjStream.WriteByte($B6);
+  ObjStream.WriteByte($8B);
+  ObjStream.WriteByte($F7);
+
+  ObjStream.WriteDWord(NtoLE($04000000));
+  ObjStream.WriteDWord(NtoLE(DWord(1)));
+  ObjStream.WriteDWord(NtoLE(DWord(1252)));
+  ObjStream.WriteDWord(NtoLE(DWord(1033)));
+  ObjStream.WriteDWord(NtoLE(DWord(0)));
+
+  // second entry
+  // write guid 4662DAB0-D393-11D0-9A56-00C04FB68B66
+  ObjStream.WriteDWord(NtoLE($4662DAB0));
+  ObjStream.WriteWord(NtoLE($D393));
+  ObjStream.WriteWord(NtoLE($11D0));
+  ObjStream.WriteWord(NtoLE($569A));
+  ObjStream.WriteByte($00);
+  ObjStream.WriteByte($C0);
+  ObjStream.WriteByte($4F);
+  ObjStream.WriteByte($B6);
+  ObjStream.WriteByte($8B);
+  ObjStream.WriteByte($66);
+
+  ObjStream.WriteDWord(NtoLE(DWord(666))); // not kidding
+  ObjStream.WriteDWord(NtoLE(DWord(1252)));
+  ObjStream.WriteDWord(NtoLE(DWord(1033)));
+  ObjStream.WriteDWord(NtoLE(DWord(10031)));
+  ObjStream.WriteDWord(NtoLE(DWord(0)));
+
+  ObjStream.Position := 0;
+  AddStreamToArchive('$OBJINST', '/', ObjStream, True);
+  ObjStream.Free;
+
+end;
+
+procedure TChmWriter.WriteFiftiMain;
+var
+  SearchWriter: TChmSearchWriter;
+begin
+  if FTopicsStream.Size = 0 then
+    Exit;
+  SearchWriter := TChmSearchWriter.Create(FFiftiMainStream, FIndexedFiles);
+  SearchWriter.WriteToStream;
+  SearchWriter.Free;
+
+  WriteLn('FIftiMain Size = ', FFiftiMainStream.Size);
+
+  if FFiftiMainStream.Size = 0 then
+    Exit;
+
+  FFiftiMainStream.Position := 0;
+  PostAddStreamToArchive('$FIftiMain', '/', FFiftiMainStream);
+end;
+
 procedure TChmWriter.WriteREADMEFile;
 const DISCLAIMER_STR = 'This archive was not made by the MS HTML Help Workshop(r)(tm) program.';
 var
@@ -533,6 +743,14 @@ begin
   FInternalFiles.AddEntry(Entry);
 end;
 
+procedure TChmWriter.WriteFinalCompressedFiles;
+begin
+  WriteTOPICS;
+  WriteURL_STR_TBL;
+  WriteSTRINGS;
+  WriteFiftiMain;
+end;
+
 
 procedure TChmWriter.WriteSection0;
 begin
@@ -609,6 +827,45 @@ begin
   FStringsStream.WriteByte(0);
 end;
 
+function TChmWriter.AddURL ( AURL: String; TopicsIndex: DWord ) : LongWord;
+
+  procedure CheckURLStrBlockCanHold(AString: String);
+  var
+    Rem: LongWord;
+    Len: LongWord;
+  begin
+    Rem := $4000 - (FURLSTRStream.Size mod $4000);
+    WriteLn(Rem);
+    Len := 9 + Length(AString);
+    if Rem < Len then
+      while Rem > 0 do
+      begin
+        FURLSTRStream.WriteByte(0);
+        Dec(Rem);
+      end;
+  end;
+
+  function AddURLString(AString: String): DWord;
+  begin
+    CheckURLStrBlockCanHold(AString);
+    if FURLSTRStream.Size mod $4000 = 0 then
+      FURLSTRStream.WriteByte(0);
+      Result := FURLSTRStream.Position;
+      FURLSTRStream.WriteDWord(NToLE(DWord(0))); // URL Offset for topic??
+      FURLSTRStream.WriteDWord(NToLE(DWord(0))); // Offset of FrameName??
+      FURLSTRStream.Write(AString[1], Length(AString));
+      FURLSTRStream.WriteByte(0); //NT
+  end;
+begin
+  if AURL[1] = '/' then Delete(AURL,1,1);
+  if $1000 - (FURLTBLStream.Size mod $1000) = 4 then
+    FURLTBLStream.WriteDWord(NtoLE(DWord(4096)));
+  Result := FURLTBLStream.Position;
+  FURLTBLStream.WriteDWord($231e9f5c); //unknown
+  FURLTBLStream.WriteDWord(NtoLE(TopicsIndex)); // Index of topic in #TOPICS
+  FURLTBLStream.WriteDWord(NtoLE(AddURLString(AURL)));
+end;
+
 function _AtEndOfData(arg: pointer): LongBool; cdecl;
 begin
   Result := TChmWriter(arg).AtEndOfData;
@@ -643,6 +900,9 @@ begin
       FileEntry.DecompressedOffset := FReadCompressedSize; //269047723;//to test writing really large numbers
       FileEntry.Compressed := True;
       
+      if FullTextSearch then
+        CheckFileMakeSearchable(FCurrentStream, FileEntry);
+
       FInternalFiles.AddEntry(FileEntry);
       // So the next file knows it's offset
       Inc(FReadCompressedSize,  FileEntry.DecompressedSize);
@@ -657,6 +917,7 @@ begin
       if Assigned(FOnLastFile) then
         FOnLastFile(Self);
       FCurrentStream.Free;
+      WriteFinalCompressedFiles;
       FCurrentStream := FPostStream;
       FCurrentStream.Position := 0;
       Inc(FReadCompressedSize, FCurrentStream.Size);
@@ -730,6 +991,45 @@ begin
   // We have to trim the last entry off when we are done because there is no next block in that case
 end;
 
+procedure TChmWriter.CheckFileMakeSearchable(AStream: TStream; AFileEntry: TFileEntryRec);
+type
+  TTopicEntry = record
+    TocOffset,
+    StringsOffset,
+    URLTableOffset: DWord;
+    InContents: Word;// 2 = in contents 6 = not in contents
+    Unknown: Word; // 0,2,4,8,10,12,16,32
+  end;
+
+  function GetNewTopicsIndex: Integer;
+  begin
+    Result := FTopicsStream.Size div 16;
+  end;
+  var
+    TopicEntry: TTopicEntry;
+    ATitle: String;
+begin
+  if Pos('.ht', AFileEntry.Name) > 0 then
+  begin
+    WriteLn('Should Search ', AFileEntry.Name);
+    ATitle := FIndexedFiles.IndexFile(AStream, GetNewTopicsIndex);
+    if ATitle <> '' then
+      TopicEntry.StringsOffset := AddString(ATitle)
+    else
+      TopicEntry.StringsOffset := $FFFFFFFF;
+    TopicEntry.URLTableOffset := AddURL(AFileEntry.Path+AFileEntry.Name, GetNewTopicsIndex);
+    TopicEntry.InContents := 2;
+    TopicEntry.Unknown := 0;
+    TopicEntry.TocOffset := 0;
+    FTopicsStream.WriteDWord(LEtoN(TopicEntry.TocOffset));
+    FTopicsStream.WriteDWord(LEtoN(TopicEntry.StringsOffset));
+    FTopicsStream.WriteDWord(LEtoN(TopicEntry.URLTableOffset));
+    FTopicsStream.WriteWord(LEtoN(TopicEntry.InContents));
+    FTopicsStream.WriteWord(LEtoN(TopicEntry.Unknown));
+  end
+  else  WriteLn('Don''t Search ', AFileEntry.Name);
+end;
+
 constructor TChmWriter.Create(OutStream: TStream; FreeStreamOnDestroy: Boolean);
 begin
   if OutStream = nil then Raise Exception.Create('TChmWriter.OutStream Cannot be nil!');
@@ -738,6 +1038,10 @@ begin
   FOutStream := OutStream;
   FInternalFiles := TFileEntryList.Create;
   FStringsStream := TmemoryStream.Create;
+  FTopicsStream := TMemoryStream.Create;
+  FURLSTRStream := TMemoryStream.Create;
+  FURLTBLStream := TMemoryStream.Create;
+  FFiftiMainStream := TMemoryStream.Create;
   FSection0 := TMemoryStream.Create;
   FSection1 := TMemoryStream.Create;
   FSection1ResetTable := TMemoryStream.Create;
@@ -745,6 +1049,7 @@ begin
   FPostStream := TMemoryStream.Create;;
   FDestroyStream := FreeStreamOnDestroy;
   FFileNames := TStringList.Create;
+  FIndexedFiles := TIndexedWordList.Create;
 end;
 
 destructor TChmWriter.Destroy;
@@ -754,11 +1059,16 @@ begin
   FInternalFiles.Free;
   FCurrentStream.Free;
   FStringsStream.Free;
+  FTopicsStream.Free;
+  FURLSTRStream.Free;
+  FURLTBLStream.Free;
+  FFiftiMainStream.Free;
   FSection0.Free;
   FSection1.Free;
   FSection1ResetTable.Free;
   FDirectoryListings.Free;
   FFileNames.Free;
+  FIndexedFiles.Free;
   inherited Destroy;
 end;
 
@@ -770,10 +1080,11 @@ begin
 
   // write any internal files to FCurrentStream that we want in the compressed section
   WriteIVB;
-  WriteSTRINGS;
   
   // written to Section0 (uncompressed)
   WriteREADMEFile;
+
+  WriteOBJINST;
   
   // move back to zero so that we can start reading from zero :)
   FReadCompressedSize := FCurrentStream.Size;
@@ -790,6 +1101,7 @@ begin
   // This creates and writes the #SYSTEM file to section0
   WriteSystem;
 
+
   //this creates all special files in the archive that start with ::DataSpace
   WriteDataSpaceFiles(FSection0);
   
@@ -843,6 +1155,8 @@ begin
   Entry.Compressed :=  Compress;
   Entry.DecompressedOffset := TargetStream.Position;
   Entry.DecompressedSize := AStream.Size;
+  if FullTextSearch then
+    CheckFileMakeSearchable(AStream, Entry); // Must check before we add it to the list so we know if the name needs to be added to #STRINGS
   FInternalFiles.AddEntry(Entry);
   AStream.Position := 0;
   TargetStream.CopyFrom(AStream, AStream.Size);
@@ -871,6 +1185,8 @@ begin
   FInternalFiles.AddEntry(Entry);
   AStream.Position := 0;
   TargetStream.CopyFrom(AStream, AStream.Size);
+  if FullTextSearch then
+    CheckFileMakeSearchable(AStream, Entry);
 end;
 
 procedure TChmWriter.AddContext(AContext: DWord; ATopic: String);
@@ -909,3 +1225,4 @@ begin
 end;
 
 end.
+

+ 6 - 1
packages/chm/src/fasthtmlparser.pas

@@ -159,7 +159,12 @@ type
   TOnFoundText = procedure(Text: string) of object;
 
   // Lars's modified html parser, case insensitive or case sensitive 
+
+  { THTMLParser }
+
   THTMLParser = class(TObject)
+    private
+      FDone: Boolean;
     public
       OnFoundTag: TOnFoundTag;
       OnFoundText: TOnFoundText;
@@ -169,6 +174,7 @@ type
       procedure Exec;
       procedure NilOnFoundTag(NoCaseTag, ActualTag: string);
       procedure NilOnFoundText(Text: string);
+      property Done: Boolean read FDone write FDone;
   end;
 
 
@@ -220,7 +226,6 @@ var
   L: Integer;
   TL: Integer;
   I: Integer;
-  Done: Boolean;
   TagStart,
   TextStart,
   P: PChar;   // Pointer to current char.

+ 477 - 0
packages/chm/src/htmlindexer.pas

@@ -0,0 +1,477 @@
+{ Copyright (C) <2008> <Andrew Haines> htmlindexer.pas
+
+  This library is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Library General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or (at your
+  option) any later version.
+
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
+  for more details.
+
+  You should have received a copy of the GNU Library General Public License
+  along with this library; if not, write to the Free Software Foundation,
+  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+}
+{
+  See the file COPYING.FPC, included in this distribution,
+  for details about the copyright.
+}
+unit HTMLIndexer;
+{$MODE OBJFPC}{$H+}
+interface
+uses Classes, SysUtils, FastHTMLParser;
+
+Type
+
+  { TIndexedWord }
+
+  { TIndexDocument }
+
+  TIndexDocument = class(TObject)
+  private
+    FDocumentIndex: Integer;
+  public
+    WordIndex: array of Integer;
+    procedure AddWordIndex(AIndex: Integer);
+    constructor Create(ADocumentIndex: Integer);
+    property DocumentIndex: Integer read FDocumentIndex;
+  end;
+
+
+
+
+  TIndexedWord = class(TObject)
+  private
+    FIsTitle: Boolean;
+    FNextWord: TIndexedWord;
+    FPrevWord: TIndexedWord;
+    FTheWord: string;
+    FCachedTopic: TIndexDocument;
+    FDocuments: Array of TIndexDocument;
+    function GetDocument ( TopicIndexNum: Integer ) : TIndexDocument;
+    function GetDocumentCount: Integer;
+  public
+    constructor Create(AWord: String; AIsTitle: Boolean);
+    destructor Destroy; override;
+    function GetLogicalDocument(AIndex: Integer): TIndexDocument;
+    property TheWord: string read FTheWord; // Always lowercase
+    property PrevWord: TIndexedWord read FPrevWord write FPrevWord;
+    property NextWord: TIndexedWord read FNextWord write FNextWord;
+    property DocumentTopic[TopicIndexNum: Integer]: TIndexDocument read GetDocument;
+    property DocumentCount: Integer read GetDocumentCount;
+    property IsTitle: Boolean read FIsTitle;
+  end;
+
+  { TIndexedWordList }
+
+  TIndexedWordList = class(TObject)
+  private
+    FIndexedFileCount: DWord;
+    //vars while processing page
+    FInTitle,
+    FInBody: Boolean;
+    FWordCount: Integer; // only words in body
+    FDocTitle: String;
+    FTopicIndex: Integer;
+    //end vars
+    FTotalDifferentWordLength: DWord;
+    FTotalDIfferentWords: DWord;
+    FTotalWordCount: DWord;
+    FTotalWordLength: DWord;
+    FLongestWord: DWord;
+    FFirstWord: TIndexedWord;
+    FCachedWord: TIndexedWord;
+    function AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord;
+    function GetWordForward(AWord: String; StartWord: TIndexedWord; out WrongWord: TIndexedWord; AIsTitle: Boolean): TIndexedWord;
+    function GetWordBackward(AWord: String; StartWord: TIndexedWord; out WrongWord: TIndexedWord; AIsTitle: Boolean): TIndexedWord;
+    function CompareWord(AWord: String; AIndexWord: TIndexedWord; AIsTitle: Boolean): Integer;
+    // callbacks
+    procedure CBFoundTag(NoCaseTag, ActualTag: string);
+    procedure CBFountText(Text: string);
+
+    procedure EatWords(Words: String; IsTitle: Boolean);
+  public
+    constructor Create;
+    destructor  Destroy; override;
+    function  IndexFile(AStream: TStream; ATOPICIndex: Integer): String; // returns the documents <Title>
+    procedure Clear;
+    procedure AddWord(const AWord: TIndexedWord; StartingWord: TIndexedWord; AIsTitle: Boolean);
+    property FirstWord: TIndexedWord read FFirstWord;
+    property IndexedFileCount: DWord read FIndexedFileCount;
+    property LongestWord: DWord read FLongestWord;
+    property TotalWordCount: DWord read FTotalWordCount;
+    property TotalDIfferentWords: DWord read FTotalDIfferentWords;
+    property TotalWordLength: DWord read FTotalWordLength;
+    property TotalDifferentWordLength: DWord read FTotalDifferentWordLength;
+    property Words[AWord: String; IsTitle: Boolean] : TIndexedWord read AddGetWord;
+  end;
+
+implementation
+
+function Max(ANumber, BNumber: DWord): DWord;
+begin
+  if ANumber > BNumber then
+    Result := ANumber
+  else
+    Result := BNumber;
+end;
+
+{ TIndexedWordList }
+
+function TIndexedWordList.AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord;
+var
+  //StartWord,
+  WrongWord: TIndexedWord;
+begin
+  Result := nil;
+  AWord := LowerCase(AWord);
+
+  {if FCachedWord <> nil then
+    StartWord := FCachedWord
+  else
+    StartWord := FFirstWord;
+
+  if StartWord <> nil then
+  begin
+    case CompareWord(AWord, StartWord, IsTitle) of
+      0: Exit(WrongWord);
+      1: Result := GetWordBackward(AWord, StartWord, WrongWord, IsTitle);
+     -1: Result := GetWordForward(AWord, StartWord, WrongWord, IsTitle);
+    end;
+  end
+  else}
+    Result := GetWordForward(AWord, FFirstWord, WrongWord, IsTitle);
+
+  if Result = nil then
+  begin
+    Inc(FTotalDifferentWordLength, Length(AWord));
+    Inc(FTotalDIfferentWords);
+    Result := TIndexedWord.Create(AWord,IsTitle);
+    AddWord(Result, WrongWord,IsTitle);
+    if IsTitle then
+    ;//WriteLn('Creating word: ', AWord);
+    FLongestWord := Max(FLongestWord, Length(AWord));
+  end;
+  Inc(FTotalWordLength, Length(AWord));
+  Inc(FTotalWordCount);
+end;
+
+function TIndexedWordList.GetWordForward(AWord: String; StartWord: TIndexedWord; out WrongWord: TIndexedWord; AIsTitle: Boolean): TIndexedWord;
+var
+  FCurrentWord: TIndexedWord;
+begin
+  Result := nil;
+  WrongWord := nil;
+  FCurrentWord := StartWord;
+  while (FCurrentWord <> nil) and (CompareWord(AWord, FCurrentWord, AIsTitle) <> 0) do
+  begin
+    WrongWord := FCurrentWord;
+    case CompareWord(AWord, FCurrentWord, AIsTitle) of
+      -1: FCurrentWord := nil;
+       0: Exit(FCurrentWord);
+       1: FCurrentWord := FCurrentWord.NextWord;
+    end;
+  end;
+
+  if FCurrentWord <> nil then
+    Result := FCurrentWord;
+end;
+
+function TIndexedWordList.GetWordBackward(AWord: String; StartWord: TIndexedWord; out WrongWord: TIndexedWord; AIsTitle: Boolean): TIndexedWord;
+var
+  FCurrentWord: TIndexedWord;
+begin
+  Result := nil;
+  WrongWord := nil;
+  FCurrentWord := StartWord;
+  while (FCurrentWord <> nil) and (CompareWord(AWord, FCurrentWord, AIsTitle) <> 0) do
+  begin
+    WrongWord := FCurrentWord;
+    case CompareWord(AWord, FCurrentWord, AIsTitle) of
+      -1:
+          begin
+            WrongWord := FCurrentWord;
+            FCurrentWord := nil
+          end;
+       0: Exit(FCurrentWord);
+       1: FCurrentWord := FCurrentWord.PrevWord;
+    end;
+  end;
+  if FCurrentWord <> nil then
+    Result := FCurrentWord;
+end;
+
+function TIndexedWordList.CompareWord ( AWord: String;
+  AIndexWord: TIndexedWord; AIsTitle: Boolean ) : Integer;
+begin
+  Result := CompareText(AWord, AIndexWord.TheWord);
+  if Result = 0 then
+  begin
+    Result := Result + ord(AIndexWord.IsTitle);
+    Result := Result - ord(AIsTitle);
+  end;
+  if Result < 0 then Result := -1
+  else if Result > 0 then Result := 1;
+  //if AIsTitle then
+    //WriteLn('Looking for title word :', AWord);
+  //WriteLn(Result);
+end;
+
+procedure TIndexedWordList.CBFoundTag(NoCaseTag, ActualTag: string);
+begin
+  if FInBody then begin
+    if NoCaseTag = '</BODY>' then FInBody := False;
+  end
+  else begin
+    //WriteLn('"',NoCaseTag,'"');
+    if NoCaseTag      = '<TITLE>' then FInTitle := True
+    else if NoCaseTag = '</TITLE>' then FInTitle := False
+    else if NoCaseTag = '<BODY>' then FInBody := True
+    else
+  end;
+
+end;
+
+procedure TIndexedWordList.CBFountText(Text: string);
+begin
+  if Length(Text) < 1 then
+    Exit;
+  EatWords(Text, FInTitle and not FInBody);
+end;
+
+procedure TIndexedWordList.EatWords ( Words: String; IsTitle: Boolean ) ;
+var
+  WordPtr: PChar;
+  WordStart: PChar;
+  InWord: Boolean;
+  IsNumberWord: Boolean;
+  function IsEndOfWord: Boolean;
+  begin
+    Result := not (WordPtr^ in ['a'..'z', '0'..'9', #01, #$DE, #$FE]);
+    if  Result and IsNumberWord then
+      Result :=  Result and (WordPtr[0] <> '.');
+    if Result and InWord then
+      Result := Result and (WordPtr[0] <> '''');
+  ;
+  end;
+  var
+    WordIndex: TIndexedWord;
+    WordName: String;
+    FPos: Integer;
+begin
+  if IsTitle then
+    FDocTitle := Words;
+  Words := LowerCase(Words);
+  WordStart := PChar(Words);
+  WordPtr := WordStart;
+  IsNumberWord := False;
+  InWord := False;
+  repeat
+    if InWord and IsEndOfWord then
+    begin
+      WordName := Copy(WordStart, 0, (WordPtr-WordStart));
+      FPos := Pos('''', WordName);
+      while FPos > 0 do
+      begin
+        Delete(WordName, FPos, 1);
+        FPos := Pos('''', WordName);
+      end;
+      WordIndex := Self.Words[WordName, IsTitle];
+      InWord := False;
+      //if IsNumberWord then WriteLn('Following is NUMBER WORD: "', (WordStart[0]),'"'); ;
+      IsNumberWord := False;
+      WordIndex.DocumentTopic[FTopicIndex].AddWordIndex(FWordCount);
+      //WriteLn(FWordCount, ' "', WordName,'"');
+      //if not IsTitle then
+        Inc(FWordCount);
+
+    end
+    else if not InWord and not IsEndOfWord then
+    begin
+      InWord := True;
+      WordStart := WordPtr;
+      IsNumberWord := WordPtr^ in ['0'..'9'];
+      //if IsNumberWord then WriteLn('Following is NUMBER WORD: "', WordPtr[0],'"'); ;
+    end;
+    Inc(WordPtr);
+  until WordPtr^ = #0;
+
+  if InWord then
+  begin
+    WordName := Copy(WordStart, 0, (WordPtr-WordStart));
+    WordIndex := Self.Words[WordName, IsTitle];
+    WordIndex.DocumentTopic[FTopicIndex].AddWordIndex(FWordCount);
+    InWord := False;
+    //if IsNumberWord then WriteLn('Following is NUMBER WORD: "', (WordStart[0]),'"'); ;
+    IsNumberWord := False;
+    //WriteLn(FWordCount, ' "', WordName,'"');
+    if not IsTitle then
+      Inc(FWordCount);
+
+  end;
+
+end;
+
+constructor TIndexedWordList.Create;
+begin
+  inherited;
+end;
+
+destructor TIndexedWordList.Destroy;
+begin
+  Clear;
+  inherited Destroy;
+end;
+
+function TIndexedWordList.IndexFile(AStream: TStream; ATOPICIndex: Integer): String;
+var
+  TheFile: String;
+  Parser: THTMLParser;
+begin
+  FInBody := False;
+  FInTitle:= False;
+  FWordCount := 0;
+  FTopicIndex := ATOPICIndex;
+  FIndexedFileCount := FIndexedFileCount +1;
+
+  SetLength(TheFile, AStream.Size+1);
+  AStream.Position := 0;
+  AStream.Read(TheFile[1], AStream.Size);
+  TheFile[Length(TheFile)] := #0;
+
+  Parser := THTMLParser.Create(@TheFile[1]);
+  Parser.OnFoundTag := @CBFoundTag;
+  Parser.OnFoundText := @CBFountText;
+  Parser.Exec;
+  Parser.Free;
+
+  Result := FDocTitle;
+  FDocTitle := '';
+  FInBody := False;
+  FInTitle:= False;
+  FWordCount := 0;
+  FTopicIndex := -1;
+
+  AStream.Position := 0;
+end;
+
+procedure TIndexedWordList.Clear;
+var
+  FCurrentWord: TIndexedWord;
+begin
+  FCurrentWord := FFirstWord;
+  while FCurrentWord <> nil do
+  begin
+    FFirstWord := FCurrentWord.NextWord;
+    FCurrentWord.Free;
+    FCurrentWord := FFirstWord;
+  end;
+end;
+
+procedure TIndexedWordList.AddWord(const AWord: TIndexedWord; StartingWord: TIndexedWord; AIsTitle: Boolean);
+var
+  WrongWord: TIndexedWord;
+begin
+  if FFirstWord = nil then
+    FFirstWord := AWord
+  else begin
+    if StartingWord <> nil then
+      WrongWord := StartingWord;
+    case CompareWord(AWord.TheWord, StartingWord, AIsTitle) of
+       1: GetWordForward(AWord.TheWord, StartingWord, WrongWord, AIsTitle);
+       0: ; // uh oh
+      -1: GetWordBackward(AWord.TheWord, StartingWord, WrongWord, AIsTitle);
+    end;
+    if WrongWord = nil then
+       WrongWord := FirstWord;
+    case CompareWord(AWord.TheWord, WrongWord, AIsTitle) of
+       -1:
+          begin
+            AWord.PrevWord := WrongWord.PrevWord;
+            if AWord.PrevWord <> nil then
+              AWord.PrevWord.NextWord := AWord;
+            WrongWord.PrevWord := AWord;
+            AWord.NextWord := WrongWord;
+          end;
+        0: ;//WriteLn('Found word which shouldn''t happen'); // uh oh
+        1:
+          begin
+            AWord.PrevWord := WrongWord;
+            AWord.NextWord := WrongWord.NextWord;
+            WrongWord.NextWord := AWord;
+          end;
+    end;
+  end;
+  if AWord.PrevWord = nil then
+     FFirstWord := AWord;
+  FCachedWord := AWord;
+end;
+
+
+{ TIndexedWord }
+
+function TIndexedWord.GetDocument ( TopicIndexNum: Integer ) : TIndexDocument;
+var
+  i: Integer;
+begin
+  Result := nil;
+  if (FCachedTopic <> nil) and (FCachedTopic.FDocumentIndex = TopicIndexNum) then
+    Exit(FCachedTopic);
+
+  for i := 0 to High(FDocuments) do
+    if FDocuments[i].FDocumentIndex = TopicIndexNum then
+      Exit(FDocuments[i]);
+  if Result = nil then
+  begin
+    Result := TIndexDocument.Create(TopicIndexNum);
+    SetLength(FDocuments, Length(FDocuments)+1);
+    FDocuments[High(FDocuments)] := Result;
+  end;
+  FCachedTopic := Result;
+end;
+
+function TIndexedWord.GetDocumentCount: Integer;
+begin
+  Result := Length(FDocuments);
+end;
+
+constructor TIndexedWord.Create(AWord: String; AIsTitle: Boolean);
+begin
+  FTheWord := AWord;
+  FIsTitle := AIsTitle;
+end;
+
+destructor TIndexedWord.Destroy;
+var
+  i: Integer;
+begin
+  if FPrevWord <> nil then
+    FPrevWord.NextWord := FNextWord;
+  if FNextWord <> nil then
+    FNextWord.PrevWord := FPrevWord;
+  for i := 0 to High(FDocuments) do
+    FreeAndNil(FDocuments[i]);
+  inherited Destroy;
+end;
+
+function TIndexedWord.GetLogicalDocument ( AIndex: Integer ) : TIndexDocument;
+begin
+  Result := FDocuments[AIndex];;
+end;
+
+{ TIndexDocument }
+
+procedure TIndexDocument.AddWordIndex ( AIndex: Integer ) ;
+begin
+  SetLength(WordIndex, Length(WordIndex)+1);
+  WordIndex[High(WordIndex)] := AIndex;
+end;
+
+constructor TIndexDocument.Create ( ADocumentIndex: Integer ) ;
+begin
+  FDocumentIndex := ADocumentIndex;
+end;
+
+end.