Browse Source

* regex implementation from S. Fisher

git-svn-id: trunk@9142 -
peter 18 years ago
parent
commit
b741d5f2a3
1 changed files with 121 additions and 34 deletions
  1. 121 34
      tests/bench/shootout/src/regexdna.pp

+ 121 - 34
tests/bench/shootout/src/regexdna.pp

@@ -1,45 +1,132 @@
-{$mode objfpc}
-{$H-}
-uses
-  regexpr;
-var
-  buffer : PChar;
-  buffer2 : ansistring;
-  seqlen : longint;
-  TextBuf: array[0..$FFF] of byte;
+{ The Computer Language Benchmarks Game
+  http://shootout.alioth.debian.org
+
+  contributed by Steve Fisher
+
+  compile with
+  fpc -O3 regex-dna.pp
+}
+
+uses regexpr, strutils;
 
-procedure Load;
+function replace_matches( const target: pchar;  const repl: pchar;
+                const str: ansistring;  var dest: ansistring ): longint;
 var
-  len : longint;
-  buffersize, bufferptr: longint;
-  s : Shortstring;
+  engine : tRegexprEngine;
+  substr : ansistring;
+  count, index, size : longint;
 begin
-  buffersize:=1024;
-  buffer:=getmem(buffersize);
-  bufferptr :=0;
-  while not eof do begin
-    readln(s);
-    len:=length(s);
-    if (bufferptr+len+1)>buffersize then begin
-      inc(buffersize,buffersize);
-      reallocmem(buffer,buffersize);
-    end;
-    move (s[1],buffer[bufferptr],len);
-    inc(bufferptr,len);
+  if not GenerateRegExprEngine( target, [], engine) then
+  begin
+    writeln( 'Failed to generate regex. engine for "',target,'".' );
+    halt(1)
+  end;
+  count := 0;
+  dest := '';
+  substr := str;
+  while length(substr) > 0 do
+  begin
+    if RegExprPos(engine, pchar(substr), index, size ) then
+    begin
+      count += 1;
+      dest += ansiLeftStr( substr, index) + repl;
+      substr := ansiRightStr(substr,length(substr)-index-size);
+    end
+    else
+      break
   end;
-  buffer[bufferptr] := #0;
-  seqlen:=bufferptr;
-  writeln(seqlen);
+  DestroyRegExprEngine( engine );
+  dest += substr;
+  exit(count)
 end;
 
-procedure ReplaceNewline;
+function count_matches( target: pchar; const str: ansistring ): longint;
+var
+  engine : tRegexprEngine;
+  substr : ansistring;
+  count, index, size : longint;
+begin
+  if not GenerateRegExprEngine( target, [ref_caseinsensitive], engine) then
+  begin
+    writeln( 'Failed to generate regex. engine for "',target,'".' );
+    halt(1)
+  end;
+  count := 0;
+  substr := str;
+  while length(substr) > 0 do
   begin
-    GenerateRegExprEngine('>.*\n|\n',[],RegExprEngine);
-    writeln(RegExprReplace(RegExprEngine,buffer,'',buffer2));
-    DestroyRegExprEngine(RegExprEngine);
+    if RegExprPos(engine, pchar(substr), index, size ) then
+    begin
+      count += 1;
+      substr := ansiRightStr(substr,length(substr)-index-size);
+    end
+    else
+      break
   end;
+  DestroyRegExprEngine( engine );
+  exit(count)
+end;
+
+const
+  patterns : array[1..9] of pchar =
+    (
+      '(agggtaaa)|(tttaccct)',
+      '([cgt]gggtaaa)|(tttaccc[acg])',
+      '(a[act]ggtaaa)|(tttacc[agt]t)',
+      '(ag[act]gtaaa)|(tttac[agt]ct)',
+      '(agg[act]taaa)|(ttta[agt]cct)',
+      '(aggg[acg]aaa)|(ttt[cgt]ccct)',
+      '(agggt[cgt]aa)|(tt[acg]accct)',
+      '(agggta[cgt]a)|(t[acg]taccct)',
+      '(agggtaa[cgt])|([acg]ttaccct)'
+    );
+  replacements : array[1..11,1..2] of pchar =
+  (
+    ('B', '(c|g|t)'), ('D', '(a|g|t)'), ('H', '(a|c|t)'), ('K', '(g|t)'),
+    ('M', '(a|c)'), ('N', '(a|c|g|t)'), ('R', '(a|g)'), ('S', '(c|t)'),
+    ('V', '(a|c|g)'), ('W', '(a|t)'), ('Y', '(c|t)')
+  );
+
+
+var
+  pattern : pchar;
+  sequence, new_seq : ansiString;
+  line, tmp: string[255];
+  letter, repl : pchar;
+  i, count, init_length, clean_length, reps : longint;
 
 begin
-  SetTextBuf(input, TextBuf, sizeof(TextBuf));
-  Load;
+  sequence := '';
+  init_length := 0;
+  while not eof do
+  begin
+    readln( line );
+    init_length += length( line ) + 1;
+    if line[1] <> '>' then
+      sequence := sequence + line;
+  end;
+  clean_length := length(sequence);
+
+  for i := low(patterns) to high(patterns) do
+  begin
+    pattern := patterns[i];
+    count := count_matches( pattern, sequence );
+    tmp := delChars( delChars(pattern,'('), ')' );
+    writeln( tmp, ' ', count);
+  end;
+
+
+  //  Replace.
+  for i := low(replacements) to high(replacements) do
+  begin
+    letter := replacements[i][1];  repl := replacements[i][2];
+    reps := replace_matches(letter,repl,sequence,new_seq);
+    sequence := new_seq;
+  end;
+
+
+  writeln;
+  writeln( init_length );
+  writeln( clean_length );
+  writeln( length(sequence) );
 end.