| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396 |
- (* The Computer Language Benchmarks Game
- http://shootout.alioth.debian.org/
- contributed by Josh Goldfoot
- modified by Vincent Snijders
- *)
- {$mode objfpc}
- program knucleotide;
- (* simple_hash available from CVS *)
- const
- ht_num_primes = 28;
- ht_prime_list: array[0 .. ht_num_primes-1] of dword =
- ( 53, 97, 193, 389, 769,
- 1543, 3079, 6151, 12289, 24593,
- 49157, 98317, 196613, 393241, 786433,
- 1572869, 3145739, 6291469, 12582917, 25165843,
- 50331653, 100663319, 201326611, 402653189, 805306457,
- 1610612741, 3221225473, 4294967291 );
- type
- { TNonFreePooledMemManager - a memory manager for records without freeing }
- PMemChunk = ^TMemChunk;
- TMemChunk = record
- data: pointer;
- next: PMemChunk;
- end;
- TNonFreePooledMemManager = class
- private
- FItemSize: integer;
- FItems: PMemChunk;
- FCurItem: Pointer;
- FEndItem: Pointer;
- FCurSize: integer;
- procedure Grow;
- public
- property ItemSize: integer read FItemSize;
- constructor Create(TheItemSize: integer);
- destructor Destroy; override;
- function NewItem: Pointer; inline;
- end;
- { THashTable }
- ht_ppnode = ^ht_pnode;
- ht_pnode = ^ht_node;
- ht_node = record
- val: integer;
- next: ht_pnode;
- keydata: array[0..0] of char;
- end;
- THashTable=class
- private
- FSize: dword;
- FKeysize: dword;
- FTbl: ht_ppnode;
- FIter_index: dword;
- FIter_next: ht_pnode;
- FNodeMemManager: TNonFreePooledMemManager;
- public
- constructor Create(size: dword; keysize: dword);
- destructor Destroy; override;
- function Find(key: pchar): ht_pnode;
- function FindNew(key: pchar): ht_pnode;
- function First: ht_pnode;
- function Next: ht_pnode;
- end;
- { TNonFreePooledMemManager }
- procedure TNonFreePooledMemManager.Grow;
- var
- memchunk: PMemChunk;
- begin
- if FCurSize<256*1024 then
- // each item has double the size of its predecessor
- inc(FCurSize, FCurSize);
- GetMem(FCurItem,FCurSize);
- FillChar(FCurItem^, FCurSize, 0);
- new(MemChunk);
- MemChunk^.next := FItems;
- MemChunk^.Data := FCurItem;
- FItems := MemChunk;
- FEndItem := FCurItem;
- Inc(FEndItem, FCurSize);
- end;
- constructor TNonFreePooledMemManager.Create(TheItemSize: integer);
- begin
- FItemSize:=TheItemSize;
- FCurSize:=FItemSize*4; // 4 items => the first item has 8 entries
- end;
- destructor TNonFreePooledMemManager.Destroy;
- var
- p: PMemChunk;
- begin
- while FItems<>nil do begin
- p := FItems;
- FItems := Fitems^.next;
- FreeMem(p^.Data);
- Dispose(p);
- end;
- inherited Destroy;
- end;
- function TNonFreePooledMemManager.NewItem: Pointer; inline;
- begin
- if (FCurItem=FEndItem) then
- Grow;
- Result:=FCurItem;
- Inc(FCurItem, FItemSize);
- end;
- { THashTable }
- constructor THashTable.Create(size: dword; keysize: dword);
- var
- i: integer;
- begin
- i := 0;
- while (i<high(ht_prime_list)) and (size>ht_prime_list[i]) do
- inc(i);
- FSize := ht_prime_list[i];
- fkeysize := keysize;
- ftbl := allocmem(sizeof(ht_pnode) * FSize);
- fiter_index := 0;
- fiter_next := nil;
- FNodeMemManager := TNonFreePooledMemManager.Create(SizeOf(ht_node)+FKeySize);
- end;
- destructor THashTable.Destroy;
- begin
- FNodeMemManager.Free;
- freemem(Ftbl);
- inherited;
- end;
- function ht_hashcode(key: pchar; keysize: dword): dword; //inline;
- var
- val: dword;
- i: integer;
- begin
- val := 0;
- for i := 0 to Keysize -1 do
- begin
- val := val * 4;
- inc(val, dword(byte(key^) and 6) shr 1);
- inc(key);
- end;
- result := val;
- end;
- function THashTable.Find(key: pchar): ht_pnode;
- var
- hash_code: dword;
- node: ht_pnode;
- begin
- hash_code := ht_hashcode(key, FKeySize) mod FSize;
- node := FTbl[hash_code];
- while node <> nil do
- begin
- if comparebyte(key^, node^.keydata, FKeysize) = 0 then
- begin
- result := node;
- exit;
- end;
- node := node^.next;
- end;
- result := nil;
- end;
- function THashTable.FindNew(key: pchar): ht_pnode;
- var
- hash_code: integer;
- prev, node: ht_pnode;
- begin
- prev := nil;
- hash_code := ht_hashcode(key, FKeysize) mod FSize;
- node := FTbl[hash_code];
- while node <> nil do
- begin
- if CompareByte(key^, node^.keydata, FKeysize) = 0 then
- begin
- result := node;
- exit;
- end;
- prev := node;
- node := node^.next;
- end;
- result := FNodeMemManager.NewItem;
- move(key^,Result^.keydata,FKeysize);
- if prev <> nil then
- begin
- prev^.next := result;
- end else begin
- FTbl[hash_code] := result;
- end;
- end;
- {
- Hash Table iterator data / functions
- }
- function THashTable.First: ht_pnode;
- begin
- FIter_index := 0;
- FIter_next := nil;
- result := next;
- end;
- function THashTable.Next: ht_pnode;
- var
- index: dword;
- node: ht_pnode;
- begin
- node := FIter_next;
- if node <> nil then
- begin
- FIter_next := node^.next;
- result := node;
- exit;
- end else begin
- while FIter_index < FSize do
- begin
- index := FIter_index;
- inc(FIter_index);
- if FTbl[index] <> nil then
- begin
- FIter_next := FTbl[index]^.next;
- result := FTbl[index];
- exit;
- end;
- end;
- end;
- result := nil;
- end;
- {==============================================================================}
- type
- sorter = record
- sequence : ansistring;
- num : longint;
- end;
- sorterArray = array of sorter;
- function hash_table_size (fl : dword): dword;
- begin
- if fl<8 then
- hash_table_size := 1 shl (2 * fl)
- else
- hash_table_size := $10000;
- end; { hash_table_size }
- function generate_frequencies(fl: integer; buffer: PChar; buflen : longint): THashTable;
- var
- reader : PChar;
- i : longint;
- begin
- if fl <= buflen then
- begin
- result := THashTable.Create(hash_table_size (fl), fl);
- reader := buffer;
- for i := 0 to buflen-fl do
- begin
- inc(Result.FindNew(reader)^.val);
- inc(reader);
- end;
- end else
- result := nil;
- end; { generate_frequencies }
- procedure sortArray(var s : sorterArray; size:longint);
- var
- i,j : longint;
- tmp : sorter;
- begin
- for i := 0 to size-2 do
- for j := i+1 to size-1 do
- if s[i].num < s[j].num then
- begin
- tmp := s[i];
- s[i] := s[j];
- s[j] := tmp;
- end;
- end; { sortArray }
- procedure write_frequencies(fl : integer; buffer : PChar; buflen : longint);
- var
- ht : THashTable;
- i, size : longint;
- total : real;
- nd : ht_pnode;
- s : sorterArray;
- begin
- ht := generate_frequencies(fl, buffer, buflen);
- total := 0;
- size := 0;
- nd := ht.First;
- while (nd <> nil) do
- begin
- total := total + nd^.val;
- size := size + 1;
- nd := ht.Next;
- end;
- SetLength(s, size);
- nd := ht.First;
- size := 0;
- while (nd <> nil) do
- begin
- s[size].sequence := upcase(pchar(@nd^.keydata));
- s[size].num := nd^.val;
- size := size + 1;
- nd := ht.Next;
- end;
- sortArray(s, size);
- for i := 0 to size - 1 do
- writeln(s[i].sequence,' ', (100 * (s[i].num / total)):3:3);
- writeln;
- ht.Free;
- end; { write_frequencies }
- procedure write_count(searchFor : ansistring; buffer : PChar; buflen : longint);
- var
- ht : THashTable;
- nd : ht_pnode;
- begin
- ht := generate_frequencies (length(searchFor), buffer, buflen);
- nd := ht.Find(pchar(searchFor));
- if (nd <> nil) then
- write(nd^.val)
- else
- write(0);
- searchfor := UpCase(searchFor);
- writeln(#9, searchFor);
- ht.Free;
- end; { write_count }
- procedure main;
- var
- buffer : PChar;
- len, seqlen : longint;
- buffersize, bufferptr: longint;
- s : String;
- begin
- seqlen := 0;
- repeat
- readln(s)
- until (s[1] = '>') and (s[2] = 'T') and (s[3] = 'H');
- buffersize:=1024;
- buffer:=getmem(buffersize);
- bufferptr :=0;
- while not eof do begin
- readln(s);
- if (s[1] <> '>') and (s[1] <> ';') then begin
- len:=length(s);
- if (bufferptr+len+1)>buffersize then begin
- inc(buffersize,buffersize);
- reallocmem(buffer,buffersize);
- end;
- move (s[1],buffer[bufferptr],len);
- inc(bufferptr,len);
- end;
- end;
- buffer[bufferptr] := #0;
- seqlen := strlen(buffer);
- write_frequencies(1, buffer, seqlen);
- write_frequencies(2, buffer, seqlen);
- write_count('ggt', buffer, seqlen);
- write_count('ggta', buffer, seqlen);
- write_count('ggtatt', buffer, seqlen);
- write_count('ggtattttaatt', buffer, seqlen);
- write_count('ggtattttaatttatagt', buffer, seqlen);
- freemem(buffer);
- end; { main }
- begin
- //SetPrecisionMode(pmDouble);
- main;
- end.
|