fasta.pp 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. { The Computer Language Shootout
  2. http://shootout.alioth.debian.org
  3. contributed by Ian Osgood
  4. modified by Vincent Snijders
  5. }
  6. {$mode objfpc}{$inline on}{$I-}
  7. program fasta;
  8. uses Math;
  9. const ALU : AnsiString =
  10. 'GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG' +
  11. 'GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA' +
  12. 'CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT' +
  13. 'ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA' +
  14. 'GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG' +
  15. 'AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC' +
  16. 'AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA';
  17. const codes = 'acgtBDHKMNRSVWY';
  18. const IUB : array[0..14] of double = ( 0.27, 0.12, 0.12, 0.27,
  19. 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02 );
  20. const HomoSap : array[0..3] of double = (
  21. 0.3029549426680, 0.1979883004921, 0.1975473066391, 0.3015094502008 );
  22. const LineLen = 60;
  23. type
  24. TGene=record
  25. prob: double;
  26. code: char;
  27. dummy: array[1..7] of char;
  28. end;
  29. PGene = ^TGene;
  30. var
  31. n : longint;
  32. Genes: array of TGene;
  33. TextBuf: array[0..$FFF] of byte;
  34. procedure fastaRepeat(n : integer);
  35. var
  36. sourceALU: ansistring;
  37. line, wrapALU : pchar;
  38. nulled : char;
  39. lenALU : integer;
  40. begin
  41. sourceALU := ALU + copy(ALU, 1, LineLen);
  42. line := PChar(sourceALU);
  43. lenALU := length(ALU);
  44. wrapALU := @sourceALU[lenALU];
  45. repeat
  46. nulled := line[LineLen];
  47. line[LineLen] := #0;
  48. writeln(line);
  49. inc(line, LineLen);
  50. line^ := nulled;
  51. if line>wrapALU then
  52. dec(line, lenALU);
  53. n := n - LineLen;
  54. until n <= LineLen;
  55. line[n] := #0;
  56. writeln(line);
  57. end;
  58. function genRandom(limit : integer): double;
  59. const
  60. seed : integer = 42;
  61. IM = 139968;
  62. IA = 3877;
  63. IC = 29573;
  64. begin
  65. seed := (seed * IA + IC) mod IM;
  66. genRandom := limit * seed * (1 / IM);
  67. end;
  68. procedure InitGenes(const probs: array of double);
  69. var
  70. i : integer;
  71. SumProb: double;
  72. begin
  73. SetLength(Genes, length(probs));
  74. SumProb := 0;
  75. for i := low(probs) to high(probs) do begin
  76. SumProb := SumProb + probs[i];
  77. Genes[i].prob := SumProb;
  78. Genes[i].code := codes[i-low(probs)+1];
  79. end;
  80. end;
  81. procedure fastaRandom(n : integer; const probs: array of double);
  82. var
  83. line : string;
  84. p : pchar;
  85. function chooseCode : char; inline;
  86. var r : double;
  87. Gene: PGene;
  88. begin
  89. r := genRandom(1);
  90. Gene := @Genes[low(Genes)];
  91. while (r>=Gene^.prob) do
  92. inc(Gene);
  93. result := Gene^.Code;
  94. end;
  95. begin
  96. { make gene array}
  97. InitGenes(probs);
  98. SetLength(line,lineLen);
  99. while n > lineLen do
  100. begin
  101. p := @line[1];
  102. while (p<=@line[lineLen]) do begin
  103. p^ := chooseCode;
  104. inc(p);
  105. end;
  106. writeln(line);
  107. n := n - lineLen;
  108. end;
  109. SetLength(line,n);
  110. p := @line[1];
  111. while (p<=@line[n]) do begin
  112. p^ := chooseCode;
  113. inc(p);
  114. end;
  115. writeln(line);
  116. end;
  117. begin
  118. SetTextBuf(output, TextBuf, sizeof(TextBuf));
  119. val(paramstr(1), n);
  120. writeln('>ONE Homo sapiens alu');
  121. fastaRepeat(n*2);
  122. writeln('>TWO IUB ambiguity codes');
  123. fastaRandom(n*3, IUB);
  124. writeln('>THREE Homo sapiens frequency');
  125. fastaRandom(n*5, HomoSap);
  126. end.