knucleotide.pp 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. (* The Computer Language Benchmarks Game
  2. http://shootout.alioth.debian.org/
  3. contributed by Josh Goldfoot
  4. modified by Vincent Snijders
  5. *)
  6. {$mode objfpc}
  7. program knucleotide;
  8. (* simple_hash available from CVS *)
  9. const
  10. ht_num_primes = 28;
  11. ht_prime_list: array[0 .. ht_num_primes-1] of dword =
  12. ( 53, 97, 193, 389, 769,
  13. 1543, 3079, 6151, 12289, 24593,
  14. 49157, 98317, 196613, 393241, 786433,
  15. 1572869, 3145739, 6291469, 12582917, 25165843,
  16. 50331653, 100663319, 201326611, 402653189, 805306457,
  17. 1610612741, 3221225473, 4294967291 );
  18. type
  19. { TNonFreePooledMemManager - a memory manager for records without freeing }
  20. PMemChunk = ^TMemChunk;
  21. TMemChunk = record
  22. data: pointer;
  23. next: PMemChunk;
  24. end;
  25. TNonFreePooledMemManager = class
  26. private
  27. FItemSize: integer;
  28. FItems: PMemChunk;
  29. FCurItem: Pointer;
  30. FEndItem: Pointer;
  31. FCurSize: integer;
  32. procedure Grow;
  33. public
  34. property ItemSize: integer read FItemSize;
  35. constructor Create(TheItemSize: integer);
  36. destructor Destroy; override;
  37. function NewItem: Pointer; inline;
  38. end;
  39. { THashTable }
  40. ht_ppnode = ^ht_pnode;
  41. ht_pnode = ^ht_node;
  42. ht_node = record
  43. val: integer;
  44. next: ht_pnode;
  45. keydata: array[0..0] of char;
  46. end;
  47. THashTable=class
  48. private
  49. FSize: dword;
  50. FKeysize: dword;
  51. FTbl: ht_ppnode;
  52. FIter_index: dword;
  53. FIter_next: ht_pnode;
  54. FNodeMemManager: TNonFreePooledMemManager;
  55. public
  56. constructor Create(size: dword; keysize: dword);
  57. destructor Destroy; override;
  58. function Find(key: pchar): ht_pnode;
  59. function FindNew(key: pchar): ht_pnode;
  60. function First: ht_pnode;
  61. function Next: ht_pnode;
  62. end;
  63. { TNonFreePooledMemManager }
  64. procedure TNonFreePooledMemManager.Grow;
  65. var
  66. memchunk: PMemChunk;
  67. begin
  68. if FCurSize<256*1024 then
  69. // each item has double the size of its predecessor
  70. inc(FCurSize, FCurSize);
  71. GetMem(FCurItem,FCurSize);
  72. FillChar(FCurItem^, FCurSize, 0);
  73. new(MemChunk);
  74. MemChunk^.next := FItems;
  75. MemChunk^.Data := FCurItem;
  76. FItems := MemChunk;
  77. FEndItem := FCurItem;
  78. Inc(FEndItem, FCurSize);
  79. end;
  80. constructor TNonFreePooledMemManager.Create(TheItemSize: integer);
  81. begin
  82. FItemSize:=TheItemSize;
  83. FCurSize:=FItemSize*4; // 4 items => the first item has 8 entries
  84. end;
  85. destructor TNonFreePooledMemManager.Destroy;
  86. var
  87. p: PMemChunk;
  88. begin
  89. while FItems<>nil do begin
  90. p := FItems;
  91. FItems := Fitems^.next;
  92. FreeMem(p^.Data);
  93. Dispose(p);
  94. end;
  95. inherited Destroy;
  96. end;
  97. function TNonFreePooledMemManager.NewItem: Pointer; inline;
  98. begin
  99. if (FCurItem=FEndItem) then
  100. Grow;
  101. Result:=FCurItem;
  102. Inc(FCurItem, FItemSize);
  103. end;
  104. { THashTable }
  105. constructor THashTable.Create(size: dword; keysize: dword);
  106. var
  107. i: integer;
  108. begin
  109. i := 0;
  110. while (i<high(ht_prime_list)) and (size>ht_prime_list[i]) do
  111. inc(i);
  112. FSize := ht_prime_list[i];
  113. fkeysize := keysize;
  114. ftbl := allocmem(sizeof(ht_pnode) * FSize);
  115. fiter_index := 0;
  116. fiter_next := nil;
  117. FNodeMemManager := TNonFreePooledMemManager.Create(SizeOf(ht_node)+FKeySize);
  118. end;
  119. destructor THashTable.Destroy;
  120. begin
  121. FNodeMemManager.Free;
  122. freemem(Ftbl);
  123. inherited;
  124. end;
  125. function ht_hashcode(key: pchar; keysize: dword): dword; //inline;
  126. var
  127. val: dword;
  128. i: integer;
  129. begin
  130. val := 0;
  131. for i := 0 to Keysize -1 do
  132. begin
  133. val := val * 4;
  134. inc(val, dword(byte(key^) and 6) shr 1);
  135. inc(key);
  136. end;
  137. result := val;
  138. end;
  139. function THashTable.Find(key: pchar): ht_pnode;
  140. var
  141. hash_code: dword;
  142. node: ht_pnode;
  143. begin
  144. hash_code := ht_hashcode(key, FKeySize) mod FSize;
  145. node := FTbl[hash_code];
  146. while node <> nil do
  147. begin
  148. if comparebyte(key^, node^.keydata, FKeysize) = 0 then
  149. begin
  150. result := node;
  151. exit;
  152. end;
  153. node := node^.next;
  154. end;
  155. result := nil;
  156. end;
  157. function THashTable.FindNew(key: pchar): ht_pnode;
  158. var
  159. hash_code: integer;
  160. prev, node: ht_pnode;
  161. begin
  162. prev := nil;
  163. hash_code := ht_hashcode(key, FKeysize) mod FSize;
  164. node := FTbl[hash_code];
  165. while node <> nil do
  166. begin
  167. if CompareByte(key^, node^.keydata, FKeysize) = 0 then
  168. begin
  169. result := node;
  170. exit;
  171. end;
  172. prev := node;
  173. node := node^.next;
  174. end;
  175. result := FNodeMemManager.NewItem;
  176. move(key^,Result^.keydata,FKeysize);
  177. if prev <> nil then
  178. begin
  179. prev^.next := result;
  180. end else begin
  181. FTbl[hash_code] := result;
  182. end;
  183. end;
  184. {
  185. Hash Table iterator data / functions
  186. }
  187. function THashTable.First: ht_pnode;
  188. begin
  189. FIter_index := 0;
  190. FIter_next := nil;
  191. result := next;
  192. end;
  193. function THashTable.Next: ht_pnode;
  194. var
  195. index: dword;
  196. node: ht_pnode;
  197. begin
  198. node := FIter_next;
  199. if node <> nil then
  200. begin
  201. FIter_next := node^.next;
  202. result := node;
  203. exit;
  204. end else begin
  205. while FIter_index < FSize do
  206. begin
  207. index := FIter_index;
  208. inc(FIter_index);
  209. if FTbl[index] <> nil then
  210. begin
  211. FIter_next := FTbl[index]^.next;
  212. result := FTbl[index];
  213. exit;
  214. end;
  215. end;
  216. end;
  217. result := nil;
  218. end;
  219. {==============================================================================}
  220. type
  221. sorter = record
  222. sequence : ansistring;
  223. num : longint;
  224. end;
  225. sorterArray = array of sorter;
  226. function hash_table_size (fl : dword): dword;
  227. begin
  228. if fl<8 then
  229. hash_table_size := 1 shl (2 * fl)
  230. else
  231. hash_table_size := $10000;
  232. end; { hash_table_size }
  233. function generate_frequencies(fl: integer; buffer: PChar; buflen : longint): THashTable;
  234. var
  235. reader : PChar;
  236. i : longint;
  237. begin
  238. if fl <= buflen then
  239. begin
  240. result := THashTable.Create(hash_table_size (fl), fl);
  241. reader := buffer;
  242. for i := 0 to buflen-fl do
  243. begin
  244. inc(Result.FindNew(reader)^.val);
  245. inc(reader);
  246. end;
  247. end else
  248. result := nil;
  249. end; { generate_frequencies }
  250. procedure sortArray(var s : sorterArray; size:longint);
  251. var
  252. i,j : longint;
  253. tmp : sorter;
  254. begin
  255. for i := 0 to size-2 do
  256. for j := i+1 to size-1 do
  257. if s[i].num < s[j].num then
  258. begin
  259. tmp := s[i];
  260. s[i] := s[j];
  261. s[j] := tmp;
  262. end;
  263. end; { sortArray }
  264. procedure write_frequencies(fl : integer; buffer : PChar; buflen : longint);
  265. var
  266. ht : THashTable;
  267. i, size : longint;
  268. total : real;
  269. nd : ht_pnode;
  270. s : sorterArray;
  271. begin
  272. ht := generate_frequencies(fl, buffer, buflen);
  273. total := 0;
  274. size := 0;
  275. nd := ht.First;
  276. while (nd <> nil) do
  277. begin
  278. total := total + nd^.val;
  279. size := size + 1;
  280. nd := ht.Next;
  281. end;
  282. SetLength(s, size);
  283. nd := ht.First;
  284. size := 0;
  285. while (nd <> nil) do
  286. begin
  287. s[size].sequence := upcase(pchar(@nd^.keydata));
  288. s[size].num := nd^.val;
  289. size := size + 1;
  290. nd := ht.Next;
  291. end;
  292. sortArray(s, size);
  293. for i := 0 to size - 1 do
  294. writeln(s[i].sequence,' ', (100 * (s[i].num / total)):3:3);
  295. writeln;
  296. ht.Free;
  297. end; { write_frequencies }
  298. procedure write_count(searchFor : ansistring; buffer : PChar; buflen : longint);
  299. var
  300. ht : THashTable;
  301. nd : ht_pnode;
  302. begin
  303. ht := generate_frequencies (length(searchFor), buffer, buflen);
  304. nd := ht.Find(pchar(searchFor));
  305. if (nd <> nil) then
  306. write(nd^.val)
  307. else
  308. write(0);
  309. searchfor := UpCase(searchFor);
  310. writeln(#9, searchFor);
  311. ht.Free;
  312. end; { write_count }
  313. procedure main;
  314. var
  315. buffer : PChar;
  316. len, seqlen : longint;
  317. buffersize, bufferptr: longint;
  318. s : String;
  319. begin
  320. seqlen := 0;
  321. repeat
  322. readln(s)
  323. until (s[1] = '>') and (s[2] = 'T') and (s[3] = 'H');
  324. buffersize:=1024;
  325. buffer:=getmem(buffersize);
  326. bufferptr :=0;
  327. while not eof do begin
  328. readln(s);
  329. if (s[1] <> '>') and (s[1] <> ';') then begin
  330. len:=length(s);
  331. if (bufferptr+len+1)>buffersize then begin
  332. inc(buffersize,buffersize);
  333. reallocmem(buffer,buffersize);
  334. end;
  335. move (s[1],buffer[bufferptr],len);
  336. inc(bufferptr,len);
  337. end;
  338. end;
  339. buffer[bufferptr] := #0;
  340. seqlen := strlen(buffer);
  341. write_frequencies(1, buffer, seqlen);
  342. write_frequencies(2, buffer, seqlen);
  343. write_count('ggt', buffer, seqlen);
  344. write_count('ggta', buffer, seqlen);
  345. write_count('ggtatt', buffer, seqlen);
  346. write_count('ggtattttaatt', buffer, seqlen);
  347. write_count('ggtattttaatttatagt', buffer, seqlen);
  348. freemem(buffer);
  349. end; { main }
  350. begin
  351. //SetPrecisionMode(pmDouble);
  352. main;
  353. end.