|
@@ -7,52 +7,272 @@
|
|
|
|
|
|
program knucleotide;
|
|
|
|
|
|
-{$mode objfpc}{$I-}
|
|
|
+{$mode objfpc}{$I-}{$INLINE ON}
|
|
|
|
|
|
(* simple_hash available from CVS *)
|
|
|
-uses simple_hash, SysUtils, Strings, Math;
|
|
|
+
|
|
|
+const
|
|
|
+ ht_num_primes = 28;
|
|
|
+
|
|
|
+ ht_prime_list: array[0 .. ht_num_primes-1] of dword =
|
|
|
+ ( 53, 97, 193, 389, 769,
|
|
|
+ 1543, 3079, 6151, 12289, 24593,
|
|
|
+ 49157, 98317, 196613, 393241, 786433,
|
|
|
+ 1572869, 3145739, 6291469, 12582917, 25165843,
|
|
|
+ 50331653, 100663319, 201326611, 402653189, 805306457,
|
|
|
+ 1610612741, 3221225473, 4294967291 );
|
|
|
+
|
|
|
+type
|
|
|
+ { TNonFreePooledMemManager - a memory manager for records without freeing }
|
|
|
+
|
|
|
+ PMemChunk = ^TMemChunk;
|
|
|
+ TMemChunk = record
|
|
|
+ data: pointer;
|
|
|
+ next: PMemChunk;
|
|
|
+ end;
|
|
|
+
|
|
|
+ TNonFreePooledMemManager = class
|
|
|
+ private
|
|
|
+ FItemSize: integer;
|
|
|
+ FItems: PMemChunk;
|
|
|
+ FCurItem: Pointer;
|
|
|
+ FEndItem: Pointer;
|
|
|
+ FCurSize: integer;
|
|
|
+ procedure Grow;
|
|
|
+ public
|
|
|
+ property ItemSize: integer read FItemSize;
|
|
|
+ constructor Create(TheItemSize: integer);
|
|
|
+ destructor Destroy; override;
|
|
|
+ function NewItem: Pointer; inline;
|
|
|
+ end;
|
|
|
+
|
|
|
+ { THashTable }
|
|
|
+
|
|
|
+ ht_ppnode = ^ht_pnode;
|
|
|
+ ht_pnode = ^ht_node;
|
|
|
+ ht_node = record
|
|
|
+ val: integer;
|
|
|
+ next: ht_pnode;
|
|
|
+ keydata: array[0..0] of char;
|
|
|
+ end;
|
|
|
+
|
|
|
+ THashTable=class
|
|
|
+ private
|
|
|
+ FSize: dword;
|
|
|
+ FKeysize: dword;
|
|
|
+ FTbl: ht_ppnode;
|
|
|
+ FIter_index: dword;
|
|
|
+ FIter_next: ht_pnode;
|
|
|
+ FNodeMemManager: TNonFreePooledMemManager;
|
|
|
+ public
|
|
|
+ constructor Create(size: dword; keysize: dword);
|
|
|
+ destructor Destroy; override;
|
|
|
+ function Find(key: pchar): ht_pnode;
|
|
|
+ function FindNew(key: pchar): ht_pnode;
|
|
|
+ function First: ht_pnode;
|
|
|
+ function Next: ht_pnode;
|
|
|
+ end;
|
|
|
+
|
|
|
+{ TNonFreePooledMemManager }
|
|
|
+
|
|
|
+procedure TNonFreePooledMemManager.Grow;
|
|
|
+var
|
|
|
+ memchunk: PMemChunk;
|
|
|
+begin
|
|
|
+ if FCurSize<256*1024 then
|
|
|
+ // each item has double the size of its predecessor
|
|
|
+ inc(FCurSize, FCurSize);
|
|
|
+ GetMem(FCurItem,FCurSize);
|
|
|
+ FillChar(FCurItem^, FCurSize, 0);
|
|
|
+ new(MemChunk);
|
|
|
+ MemChunk^.next := FItems;
|
|
|
+ MemChunk^.Data := FCurItem;
|
|
|
+ FItems := MemChunk;
|
|
|
+ FEndItem := FCurItem;
|
|
|
+ Inc(FEndItem, FCurSize);
|
|
|
+end;
|
|
|
+
|
|
|
+constructor TNonFreePooledMemManager.Create(TheItemSize: integer);
|
|
|
+begin
|
|
|
+ FItemSize:=TheItemSize;
|
|
|
+ FCurSize:=FItemSize*4; // 4 items => the first item has 8 entries
|
|
|
+end;
|
|
|
+
|
|
|
+destructor TNonFreePooledMemManager.Destroy;
|
|
|
+var
|
|
|
+ p: PMemChunk;
|
|
|
+begin
|
|
|
+ while FItems<>nil do begin
|
|
|
+ p := FItems;
|
|
|
+ FItems := Fitems^.next;
|
|
|
+ FreeMem(p^.Data);
|
|
|
+ Dispose(p);
|
|
|
+ end;
|
|
|
+ inherited Destroy;
|
|
|
+end;
|
|
|
+
|
|
|
+function TNonFreePooledMemManager.NewItem: Pointer; inline;
|
|
|
+begin
|
|
|
+ if (FCurItem=FEndItem) then
|
|
|
+ Grow;
|
|
|
+ Result:=FCurItem;
|
|
|
+ Inc(FCurItem, FItemSize);
|
|
|
+end;
|
|
|
+
|
|
|
+{ THashTable }
|
|
|
+
|
|
|
+constructor THashTable.Create(size: dword; keysize: dword);
|
|
|
+var
|
|
|
+ i: integer;
|
|
|
+begin
|
|
|
+ i := 0;
|
|
|
+ while (i<high(ht_prime_list)) and (size>ht_prime_list[i]) do
|
|
|
+ inc(i);
|
|
|
+ FSize := ht_prime_list[i];
|
|
|
+ fkeysize := keysize;
|
|
|
+ ftbl := allocmem(sizeof(ht_pnode) * FSize);
|
|
|
+ fiter_index := 0;
|
|
|
+ fiter_next := nil;
|
|
|
+ FNodeMemManager := TNonFreePooledMemManager.Create(SizeOf(ht_node)+FKeySize);
|
|
|
+end;
|
|
|
+
|
|
|
+destructor THashTable.Destroy;
|
|
|
+begin
|
|
|
+ FNodeMemManager.Free;
|
|
|
+ freemem(Ftbl);
|
|
|
+ inherited;
|
|
|
+end;
|
|
|
+
|
|
|
+function ht_hashcode(key: pchar; keysize: dword): dword; //inline;
|
|
|
+var
|
|
|
+ val: dword;
|
|
|
+ i: integer;
|
|
|
+begin
|
|
|
+ val := 0;
|
|
|
+
|
|
|
+ for i := 0 to Keysize -1 do
|
|
|
+ begin
|
|
|
+ val := val * 4;
|
|
|
+ inc(val, dword(byte(key^) and 6) shr 1);
|
|
|
+ inc(key);
|
|
|
+ end;
|
|
|
+ result := val;
|
|
|
+end;
|
|
|
+
|
|
|
+function THashTable.Find(key: pchar): ht_pnode;
|
|
|
+var
|
|
|
+ hash_code: dword;
|
|
|
+ node: ht_pnode;
|
|
|
+begin
|
|
|
+ hash_code := ht_hashcode(key, FKeySize) mod FSize;
|
|
|
+ node := FTbl[hash_code];
|
|
|
+ while node <> nil do
|
|
|
+ begin
|
|
|
+ if comparebyte(key^, node^.keydata, FKeysize) = 0 then
|
|
|
+ begin
|
|
|
+ result := node;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ node := node^.next;
|
|
|
+ end;
|
|
|
+ result := nil;
|
|
|
+end;
|
|
|
+
|
|
|
+function THashTable.FindNew(key: pchar): ht_pnode;
|
|
|
+var
|
|
|
+ hash_code: integer;
|
|
|
+ prev, node: ht_pnode;
|
|
|
+begin
|
|
|
+ prev := nil;
|
|
|
+ hash_code := ht_hashcode(key, FKeysize) mod FSize;
|
|
|
+ node := FTbl[hash_code];
|
|
|
+ while node <> nil do
|
|
|
+ begin
|
|
|
+ if CompareByte(key^, node^.keydata, FKeysize) = 0 then
|
|
|
+ begin
|
|
|
+ result := node;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ prev := node;
|
|
|
+ node := node^.next;
|
|
|
+ end;
|
|
|
+ result := FNodeMemManager.NewItem;
|
|
|
+ move(key^,Result^.keydata,FKeysize);
|
|
|
+ if prev <> nil then
|
|
|
+ begin
|
|
|
+ prev^.next := result;
|
|
|
+ end else begin
|
|
|
+ FTbl[hash_code] := result;
|
|
|
+ end;
|
|
|
+end;
|
|
|
+
|
|
|
+{
|
|
|
+ Hash Table iterator data / functions
|
|
|
+}
|
|
|
+
|
|
|
+function THashTable.First: ht_pnode;
|
|
|
+begin
|
|
|
+ FIter_index := 0;
|
|
|
+ FIter_next := nil;
|
|
|
+ result := next;
|
|
|
+end;
|
|
|
+
|
|
|
+function THashTable.Next: ht_pnode;
|
|
|
+var
|
|
|
+ index: dword;
|
|
|
+ node: ht_pnode;
|
|
|
+begin
|
|
|
+ node := FIter_next;
|
|
|
+ if node <> nil then
|
|
|
+ begin
|
|
|
+ FIter_next := node^.next;
|
|
|
+ result := node;
|
|
|
+ exit;
|
|
|
+ end else begin
|
|
|
+ while FIter_index < FSize do
|
|
|
+ begin
|
|
|
+ index := FIter_index;
|
|
|
+ inc(FIter_index);
|
|
|
+ if FTbl[index] <> nil then
|
|
|
+ begin
|
|
|
+ FIter_next := FTbl[index]^.next;
|
|
|
+ result := FTbl[index];
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ result := nil;
|
|
|
+end;
|
|
|
+
|
|
|
+{==============================================================================}
|
|
|
|
|
|
type
|
|
|
sorter = record
|
|
|
- sequence : PChar;
|
|
|
- num : longint;
|
|
|
+ sequence : ansistring;
|
|
|
+ num : longint;
|
|
|
end;
|
|
|
sorterArray = array of sorter;
|
|
|
|
|
|
-function hash_table_size (fl : dword; buflen : dword): dword;
|
|
|
-var
|
|
|
- maxsize1, maxsize2, r : dword;
|
|
|
+function hash_table_size (fl : dword): dword;
|
|
|
begin
|
|
|
- maxsize1 := buflen - fl;
|
|
|
- maxsize2 := 4;
|
|
|
- while (fl > 1) and (maxsize2 < maxsize1) do
|
|
|
- begin
|
|
|
- fl := fl - 1;
|
|
|
- maxsize2 := maxsize2 * 4;
|
|
|
- end;
|
|
|
- if maxsize1 < maxsize2 then
|
|
|
- r := maxsize1
|
|
|
- else
|
|
|
- r := maxsize2;
|
|
|
- hash_table_size := r;
|
|
|
+ if fl<8 then
|
|
|
+ hash_table_size := 1 shl (2 * fl)
|
|
|
+ else
|
|
|
+ hash_table_size := $10000;
|
|
|
end; { hash_table_size }
|
|
|
|
|
|
-function generate_frequencies(fl: integer; buffer: PChar; buflen : longint): ht_pht;
|
|
|
+function generate_frequencies(fl: integer; buffer: PChar; buflen : longint): THashTable;
|
|
|
var
|
|
|
reader : PChar;
|
|
|
- i, bufend : longint;
|
|
|
- nulled : char;
|
|
|
+ i : longint;
|
|
|
begin
|
|
|
if fl <= buflen then
|
|
|
begin
|
|
|
- result := ht_create (hash_table_size (fl, buflen));
|
|
|
+ result := THashTable.Create(hash_table_size (fl), fl);
|
|
|
reader := buffer;
|
|
|
for i := 0 to buflen-fl do
|
|
|
begin
|
|
|
- nulled := reader[fl];
|
|
|
- reader[fl] := #0;
|
|
|
- inc(ht_find_new (result, reader)^.val);
|
|
|
- reader[fl] := nulled;
|
|
|
+ inc(Result.FindNew(reader)^.val);
|
|
|
inc(reader);
|
|
|
end;
|
|
|
end else
|
|
@@ -76,7 +296,7 @@ end; { sortArray }
|
|
|
|
|
|
procedure write_frequencies(fl : integer; buffer : PChar; buflen : longint);
|
|
|
var
|
|
|
- ht : ht_pht;
|
|
|
+ ht : THashTable;
|
|
|
i, size : longint;
|
|
|
total : real;
|
|
|
nd : ht_pnode;
|
|
@@ -85,24 +305,23 @@ begin
|
|
|
ht := generate_frequencies(fl, buffer, buflen);
|
|
|
total := 0;
|
|
|
size := 0;
|
|
|
- nd := ht_first(ht);
|
|
|
+ nd := ht.First;
|
|
|
while (nd <> nil) do
|
|
|
begin
|
|
|
total := total + nd^.val;
|
|
|
size := size + 1;
|
|
|
- nd := ht_next(ht);
|
|
|
+ nd := ht.Next;
|
|
|
end;
|
|
|
SetLength(s, size);
|
|
|
|
|
|
- nd := ht_first(ht);
|
|
|
+ nd := ht.First;
|
|
|
size := 0;
|
|
|
while (nd <> nil) do
|
|
|
begin
|
|
|
- s[size].sequence := nd^.key;
|
|
|
- strupper(s[size].sequence);
|
|
|
+ s[size].sequence := upcase(pchar(@nd^.keydata));
|
|
|
s[size].num := nd^.val;
|
|
|
size := size + 1;
|
|
|
- nd := ht_next(ht);
|
|
|
+ nd := ht.Next;
|
|
|
end;
|
|
|
|
|
|
sortArray(s, size);
|
|
@@ -110,24 +329,24 @@ begin
|
|
|
writeln(s[i].sequence,' ', (100 * s[i].num / total):3:3);
|
|
|
writeln;
|
|
|
|
|
|
- ht_destroy(ht);
|
|
|
+ ht.Free;
|
|
|
end; { write_frequencies }
|
|
|
|
|
|
-procedure write_count(searchFor : PChar; buffer : PChar; buflen : longint);
|
|
|
+procedure write_count(searchFor : ansistring; buffer : PChar; buflen : longint);
|
|
|
var
|
|
|
- ht : ht_pht;
|
|
|
+ ht : THashTable;
|
|
|
nd : ht_pnode;
|
|
|
begin
|
|
|
- ht := generate_frequencies (strlen (searchFor), buffer, buflen);
|
|
|
- nd := ht_find(ht, searchFor);
|
|
|
+ ht := generate_frequencies (length(searchFor), buffer, buflen);
|
|
|
+ nd := ht.Find(pchar(searchFor));
|
|
|
if (nd <> nil) then
|
|
|
write(nd^.val)
|
|
|
else
|
|
|
write(0);
|
|
|
- strupper(searchFor);
|
|
|
+ searchfor := UpCase(searchFor);
|
|
|
writeln(#9, searchFor);
|
|
|
|
|
|
- ht_destroy(ht);
|
|
|
+ ht.Free;
|
|
|
end; { write_count }
|
|
|
|
|
|
procedure main;
|
|
@@ -171,6 +390,6 @@ end; { main }
|
|
|
|
|
|
|
|
|
begin
|
|
|
- SetPrecisionMode(pmDouble);
|
|
|
+ //SetPrecisionMode(pmDouble);
|
|
|
main;
|
|
|
-end.
|
|
|
+end.
|