Browse Source

+ added k-nucleotide benchmark

git-svn-id: trunk@4871 -
Vincent Snijders 19 years ago
parent
commit
61d979808a

+ 3 - 0
.gitattributes

@@ -5104,6 +5104,7 @@ tests/bench/pi.c -text
 tests/bench/pi.pp svneol=native#text/plain
 tests/bench/pi.pp svneol=native#text/plain
 tests/bench/shootout/README.txt svneol=native#text/plain
 tests/bench/shootout/README.txt svneol=native#text/plain
 tests/bench/shootout/io/binarytrees-output.txt svneol=native#text/plain
 tests/bench/shootout/io/binarytrees-output.txt svneol=native#text/plain
+tests/bench/shootout/io/knucleotide-output.txt svneol=native#text/plain
 tests/bench/shootout/io/moments.in -text
 tests/bench/shootout/io/moments.in -text
 tests/bench/shootout/io/moments.out -text
 tests/bench/shootout/io/moments.out -text
 tests/bench/shootout/io/recursive-output.txt svneol=native#text/plain
 tests/bench/shootout/io/recursive-output.txt svneol=native#text/plain
@@ -5152,8 +5153,10 @@ tests/bench/shootout/obsolete/wc.pp svneol=native#text/plain
 tests/bench/shootout/src/bench.c -text
 tests/bench/shootout/src/bench.c -text
 tests/bench/shootout/src/binarytrees.pp svneol=native#text/plain
 tests/bench/shootout/src/binarytrees.pp svneol=native#text/plain
 tests/bench/shootout/src/hello.pp svneol=native#text/plain
 tests/bench/shootout/src/hello.pp svneol=native#text/plain
+tests/bench/shootout/src/knucleotide.pp svneol=native#text/plain
 tests/bench/shootout/src/nsieve.pp svneol=native#text/plain
 tests/bench/shootout/src/nsieve.pp svneol=native#text/plain
 tests/bench/shootout/src/recursive.pp svneol=native#text/plain
 tests/bench/shootout/src/recursive.pp svneol=native#text/plain
+tests/bench/shootout/src/simple_hash.pp svneol=native#text/plain
 tests/bench/shootout/src/spectralnorm.pp svneol=native#text/plain
 tests/bench/shootout/src/spectralnorm.pp svneol=native#text/plain
 tests/bench/shootout/src/sumcol.pp svneol=native#text/plain
 tests/bench/shootout/src/sumcol.pp svneol=native#text/plain
 tests/bench/shortbench.pp svneol=native#text/plain
 tests/bench/shortbench.pp svneol=native#text/plain

+ 27 - 0
tests/bench/shootout/io/knucleotide-output.txt

@@ -0,0 +1,27 @@
+A 30.284
+T 29.796
+C 20.312
+G 19.608
+
+AA 9.212
+AT 8.950
+TT 8.948
+TA 8.936
+CA 6.166
+CT 6.100
+AC 6.086
+TC 6.042
+AG 6.036
+GA 5.968
+TG 5.868
+GT 5.798
+CC 4.140
+GC 4.044
+CG 3.906
+GG 3.798
+
+562	GGT
+152	GGTA
+15	GGTATT
+0	GGTATTTTAATT
+0	GGTATTTTAATTTATAGT

+ 176 - 0
tests/bench/shootout/src/knucleotide.pp

@@ -0,0 +1,176 @@
+(* The Computer Language Shootout
+   http://shootout.alioth.debian.org/
+
+   contributed by Josh Goldfoot
+   modified by Vincent Snijders
+*)
+
+program knucleotide;
+
+{$mode objfpc}{$I-}
+
+(* simple_hash available from CVS *)
+uses simple_hash, SysUtils, Strings, Math;
+
+type
+   sorter      = record
+		    sequence : PChar;
+		    num	     : longint;
+		 end;
+   sorterArray = array of sorter;
+
+function hash_table_size (fl : dword; buflen : dword): dword;
+var
+   maxsize1, maxsize2, r : dword;
+begin
+   maxsize1 := buflen - fl;
+   maxsize2 := 4;
+   while (fl > 1) and (maxsize2 < maxsize1) do
+   begin
+      fl := fl - 1;
+      maxsize2 := maxsize2 * 4;
+   end;
+   if maxsize1 < maxsize2 then
+      r := maxsize1
+   else
+      r := maxsize2;
+   hash_table_size := r;
+end; { hash_table_size }
+
+function generate_frequencies(fl: integer; buffer: PChar; buflen : longint): ht_pht;
+var
+   reader : PChar;
+   i, bufend : longint;
+   nulled : char;
+begin
+   if fl <= buflen then
+   begin
+      result := ht_create (hash_table_size (fl, buflen));
+      reader := buffer;
+      for i := 0 to buflen-fl do
+      begin
+         nulled := reader[fl];
+         reader[fl] := #0;
+         inc(ht_find_new (result, reader)^.val);
+         reader[fl] := nulled;
+         inc(reader);
+      end;
+   end else
+      result := nil;
+end; { generate_frequencies }
+
+procedure sortArray(var s : sorterArray; size:longint);
+var
+   i,j : longint;
+   tmp : sorter;
+begin
+   for i := 0 to size-2 do
+      for j := i+1 to size-1 do
+         if s[i].num < s[j].num then
+	 begin
+	    tmp := s[i];
+	    s[i] := s[j];
+	    s[j] := tmp;
+	 end;
+end; { sortArray }
+
+procedure write_frequencies(fl : integer; buffer : PChar; buflen : longint);
+var
+  ht	   : ht_pht;
+  i, size : longint;
+  total   : real;
+  nd	   : ht_pnode;
+  s	   : sorterArray;
+begin
+  ht := generate_frequencies(fl, buffer, buflen);
+  total := 0;
+  size := 0;
+  nd := ht_first(ht);
+  while (nd <> nil) do
+  begin
+    total := total + nd^.val;
+    size := size + 1;
+    nd := ht_next(ht);
+  end;
+  SetLength(s, size);
+
+  nd := ht_first(ht);
+  size := 0;
+  while (nd <> nil) do
+  begin
+    s[size].sequence := nd^.key;
+    strupper(s[size].sequence);
+    s[size].num := nd^.val;
+    size := size + 1;
+    nd := ht_next(ht);
+  end;
+
+  sortArray(s, size);
+  for i := 0 to size - 1 do
+    writeln(s[i].sequence,' ', (100 * s[i].num / total):3:3);
+  writeln;
+
+  ht_destroy(ht);
+end; { write_frequencies }
+
+procedure write_count(searchFor : PChar; buffer : PChar; buflen : longint);
+var
+   ht : ht_pht;
+   nd : ht_pnode;
+begin
+   ht := generate_frequencies (strlen (searchFor), buffer, buflen);
+   nd := ht_find(ht, searchFor);
+   if (nd <> nil) then
+      write(nd^.val)
+   else
+      write(0);
+   strupper(searchFor);
+   writeln(#9, searchFor);
+
+   ht_destroy(ht);
+end; { write_count }
+
+procedure main;
+var
+   buffer : PChar;
+   len, seqlen : longint;
+   buffersize, bufferptr: longint;
+   s : String;
+begin
+   seqlen := 0;
+   repeat
+      readln(s)
+   until (s[1] = '>') and (s[2] = 'T') and (s[3] = 'H');
+   buffersize:=1024;
+   buffer:=getmem(buffersize);
+   bufferptr :=0;
+   while not eof do begin
+     readln(s);
+     if (s[1] <> '>') and (s[1] <> ';') then begin
+       len:=length(s);
+       if (bufferptr+len+1)>buffersize then  begin
+         inc(buffersize,buffersize);
+         reallocmem(buffer,buffersize);
+       end;
+       move (s[1],buffer[bufferptr],len);
+       inc(bufferptr,len);
+     end;
+   end;
+   buffer[bufferptr] := #0;
+   seqlen := strlen(buffer);
+
+   write_frequencies(1, buffer, seqlen);
+   write_frequencies(2, buffer, seqlen);
+   write_count('ggt', buffer, seqlen);
+   write_count('ggta', buffer, seqlen);
+   write_count('ggtatt', buffer, seqlen);
+   write_count('ggtattttaatt', buffer, seqlen);
+   write_count('ggtattttaatttatagt', buffer, seqlen);
+   freemem(buffer);
+end; { main }
+
+
+begin
+   SetPrecisionMode(pmDouble);
+   main;
+end.

+ 260 - 0
tests/bench/shootout/src/simple_hash.pp

@@ -0,0 +1,260 @@
+{
+  Copyright 2005, Micha Nelissen, converted from C, originally from
+  "simple_hash.h":
+}
+
+unit simple_hash;
+
+{$mode objfpc}
+{$inline on}
+
+interface
+
+const
+  ht_num_primes = 28;
+
+  ht_prime_list: array[0 .. ht_num_primes-1] of dword =
+  ( 53,         97,         193,       389,       769,
+    1543,       3079,       6151,      12289,     24593,
+    49157,      98317,      196613,    393241,    786433,
+    1572869,    3145739,    6291469,   12582917,  25165843,
+    50331653,   100663319,  201326611, 402653189, 805306457,
+    1610612741, 3221225473, 4294967291 );
+
+type
+  ht_ppnode = ^ht_pnode;
+  ht_pnode = ^ht_node;
+  ht_node = record
+    key: pchar;
+    val: integer;
+    next: ht_pnode;
+  end;
+
+  ht_pht = ^ht_ht;
+  ht_ht = record
+    size: dword;
+    tbl: ht_ppnode;
+    iter_index: dword;
+    iter_next: ht_pnode;
+    items: integer;
+{$ifdef HT_DEBUG}
+    collisions: integer;
+{$endif}
+  end;
+
+
+function  ht_val(node: ht_pnode): integer; {inline;}
+function  ht_key(node: ht_pnode): pchar; {inline;}
+function  ht_hashcode(ht: ht_pht; key: pchar): integer; {inline;}
+function  ht_node_create(key: pchar): ht_pnode;
+function  ht_create(size: dword): ht_pht;
+procedure ht_destroy(ht: ht_pht);
+function  ht_find(ht: ht_pht; key: pchar): ht_pnode; {inline;}
+function  ht_find_new(ht: ht_pht; key: pchar): ht_pnode; {inline;}
+function  ht_next(ht: ht_pht): ht_pnode; {inline;}
+function  ht_first(ht: ht_pht): ht_pnode; {inline;}
+function  ht_count(ht: ht_pht): integer; {inline;}
+
+implementation
+
+uses
+  strings;
+
+function  ht_val(node: ht_pnode): integer; {inline;}
+begin
+  result := node^.val;
+end;
+
+function  ht_key(node: ht_pnode): pchar; {inline;}
+begin
+  result := node^.key;
+end;
+
+function  ht_hashcode(ht: ht_pht; key: pchar): integer; {inline;}
+var
+  val: dword;
+begin
+  val := 0;
+  while key^ <> #0 do
+  begin
+    val := 5 * val + byte(key^);
+    inc(key);
+  end;
+  result := val mod ht^.size;
+end;
+
+function  ht_node_create(key: pchar): ht_pnode;
+var
+  newkey: pchar;
+  node: ht_pnode;
+begin
+  new(node);
+  newkey := strnew(key);
+  with node^ do
+  begin
+    key := newkey;
+    val := 0;
+    next := nil;
+  end;
+  result := node;
+end;
+
+function  ht_create(size: dword): ht_pht;
+var
+  i: integer;
+  ht: ht_pht;
+begin
+  i := 0;
+  new(ht);
+  while ht_prime_list[i] < size do inc(i);
+  ht^.size := ht_prime_list[i];
+  ht^.tbl := allocmem(sizeof(ht_pnode) * ht^.size);
+  ht^.iter_index := 0;
+  ht^.iter_next := nil;
+  ht^.items := 0;
+{$ifdef HT_DEBUG}
+  ht^.collisions := 0;
+{$endif}
+  result := ht;
+end;
+
+procedure ht_destroy(ht: ht_pht);
+var
+  cur, next: ht_pnode;
+  i: integer;
+{$ifdef HT_DEBUG}
+  chain_len, max_chain_len, density: integer;
+{$endif}
+begin
+{$ifdef HT_DEBUG}
+  max_chain_len := 0;
+  density := 0;
+  writeln(' HT: size          ', ht^.size);
+  writeln(' HT: items         ', ht^.items);
+  writeln(' HT: collisions    ', ht^.collisions);
+{$endif}
+  for i := 0 to ht^.size-1 do
+  begin
+    next := ht^.tbl[i];
+{$ifdef HT_DEBUG}
+    if next <> nil then
+      inc(density);
+    chain_len := 0;
+{$endif}
+    while next <> nil do
+    begin
+      cur := next;
+      next := next^.next;
+      strdispose(cur^.key);
+      dispose(cur);
+{$ifdef HT_DEBUG}
+      inc(chain_len);
+{$endif}
+    end;
+{$ifdef HT_DEBUG}
+    if chain_len > max_chain_len then
+      max_chain_len := chain_len;
+{$endif}
+  end;
+  freemem(ht^.tbl);
+  dispose(ht);
+{$ifdef HT_DEBUG}
+  writeln(' HT: density       ', density);
+  writeln(' HT: max chain len ', max_chain_len);
+{$endif}
+end;
+
+function  ht_find(ht: ht_pht; key: pchar): ht_pnode; {inline;}
+var
+  hash_code: integer;
+  node: ht_pnode;
+begin
+  hash_code := ht_hashcode(ht, key);
+  node := ht^.tbl[hash_code];
+  while node <> nil do
+  begin
+    if strcomp(key, node^.key) = 0 then
+    begin
+      result := node;
+      exit;
+    end;
+    node := node^.next;
+  end;
+  result := nil;
+end;
+
+function  ht_find_new(ht: ht_pht; key: pchar): ht_pnode; {inline;}
+var
+  hash_code: integer;
+  prev, node: ht_pnode;
+begin
+  hash_code := ht_hashcode(ht, key);
+  prev := nil;
+  node := ht^.tbl[hash_code];
+  while node <> nil do
+  begin
+    if strcomp(key, node^.key) = 0 then
+    begin
+      result := node;
+      exit;
+    end;
+    prev := node;
+    node := node^.next;
+{$ifdef HT_DEBUG}
+    inc(ht^.collisions);
+{$endif}
+  end;
+  inc(ht^.items);
+  result := ht_node_create(key);
+  if prev <> nil then
+  begin
+    prev^.next := result;
+  end else begin
+    ht^.tbl[hash_code] := result;
+  end;
+end;
+
+{
+  Hash Table iterator data / functions
+}
+
+function  ht_next(ht: ht_pht): ht_pnode; {inline;}
+var
+  index: dword;
+  node: ht_pnode;
+begin
+  node := ht^.iter_next;
+  if node <> nil then
+  begin
+    ht^.iter_next := node^.next;
+    result := node;
+    exit;
+  end else begin
+    while ht^.iter_index < ht^.size do
+    begin
+      index := ht^.iter_index;
+      inc(ht^.iter_index);
+      if ht^.tbl[index] <> nil then
+      begin
+        ht^.iter_next := ht^.tbl[index]^.next;
+        result := ht^.tbl[index];
+        exit;
+      end;
+    end;
+  end;
+  result := nil;
+end;
+
+function  ht_first(ht: ht_pht): ht_pnode; {inline;}
+begin
+  ht^.iter_index := 0;
+  ht^.iter_next := nil;
+  result := ht_next(ht);
+end;
+
+function  ht_count(ht: ht_pht): integer; {inline;}
+begin
+  result := ht^.items;
+end;
+
+end.