k-nucleotide.nut 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. // The Great Computer Language Shootout
  2. // http://shootout.alioth.debian.org/
  3. // contributed by Mike Pall
  4. local function kfrequency(seq, freq, k, frame){
  5. //local sub = string.sub
  6. local k1 = k - 1
  7. for(local i=frame, len= seq.len()-k1; i < len; i += k){
  8. local c = seq.slice(i, i+k1);
  9. freq[c]++;
  10. }
  11. }
  12. local freq_delegate = {
  13. _get = function(x) {return 0;}
  14. }
  15. local function count(seq, frag){
  16. local k = frag.len()
  17. local freq = {};
  18. freq.setdelegate(freq_delegate)
  19. for(local frame=0; frame < k; ++frame) kfrequency(seq, freq, k, frame);
  20. print(freq.get(frag, 0), "\t", frag, "\n")
  21. }
  22. local function frequency(seq, k){
  23. local freq = {};
  24. freq.setdelegate(freq_delegate)
  25. for(local frame=0; frame < k; ++frame) kfrequency(seq, freq, k, frame);
  26. local sfreq = [], sn = 0
  27. foreach( c,v in freq) sfreq[sn++] <- c;
  28. sfreq.sort(function(a, b){
  29. local fa = freq[a], fb = freq[b]
  30. return fa == fb && a > b || fa > fb
  31. })
  32. sum = seq.len()-k+1
  33. foreach(c in sfreq){
  34. print(format("%s %0.3f\n", c, (freq[c]*100)/sum))
  35. }
  36. print("\n")
  37. }
  38. local function readseq(){
  39. //local sub = string.sub
  40. for line in io.lines() do
  41. if sub(line, 1, 1) == ">" and sub(line, 2, 6) == "THREE" then break end
  42. end
  43. local lines, ln = {}, 0
  44. for line in io.lines() do
  45. local c = sub(line, 1, 1)
  46. if c == ">" then
  47. break
  48. elseif c ~= ";" then
  49. ln = ln + 1
  50. lines[ln] = line
  51. end
  52. end
  53. return string.upper(table.concat(lines, "", 1, ln))
  54. }
  55. local seq = readseq()
  56. frequency(seq, 1)
  57. frequency(seq, 2)
  58. count(seq, "GGT")
  59. count(seq, "GGTA")
  60. count(seq, "GGTATT")
  61. count(seq, "GGTATTTTAATT")
  62. count(seq, "GGTATTTTAATTTATAGT")