Browse Source

* Optimized version of SHA1 and MD5 hashes (speed doubles on x64) by Engkin

git-svn-id: trunk@28281 -
michael 11 years ago
parent
commit
9efce39e06

+ 4 - 0
.gitattributes

@@ -4103,14 +4103,18 @@ packages/hash/examples/crctest.pas svneol=native#text/plain
 packages/hash/examples/hmd5.pp svneol=native#text/pascal
 packages/hash/examples/hsha1.pp svneol=native#text/pascal
 packages/hash/examples/md5.ref svneol=native#text/plain
+packages/hash/examples/md5performancetest.pas svneol=native#text/plain
 packages/hash/examples/mdtest.pas svneol=native#text/plain
+packages/hash/examples/sha1performancetest.pas svneol=native#text/plain
 packages/hash/examples/sha1test.pp svneol=native#text/plain
 packages/hash/fpmake.pp svneol=native#text/plain
 packages/hash/src/crc.pas svneol=native#text/plain
 packages/hash/src/hmac.pp svneol=native#text/pascal
 packages/hash/src/md5.pp svneol=native#text/plain
+packages/hash/src/md5i386.inc svneol=native#text/plain
 packages/hash/src/ntlm.pas svneol=native#text/plain
 packages/hash/src/sha1.pp svneol=native#text/plain
+packages/hash/src/sha1i386.inc svneol=native#text/plain
 packages/hash/src/unixcrypt.pas svneol=native#text/plain
 packages/hash/src/uuid.pas svneol=native#text/plain
 packages/hash/tests/tests.pp svneol=native#text/pascal

File diff suppressed because it is too large
+ 519 - 75
packages/hash/examples/Makefile


+ 2 - 2
packages/hash/examples/Makefile.fpc

@@ -3,10 +3,10 @@
 #
 
 [target]
-programs=mdtest crctest sha1test
+programs=mdtest crctest sha1test md5performancetest  sha1performancetest
 
 [require]
-packages=hash
+packages=hash rtl-extra rtl-objpas
 
 [install]
 fpcpackage=y

+ 29 - 0
packages/hash/examples/md5performancetest.pas

@@ -0,0 +1,29 @@
+program md5performancetest;
+
+{$mode objfpc}{$H+}
+
+uses
+  {$IFDEF UNIX}{$IFDEF UseCThreads}
+  cthreads,
+  {$ENDIF}{$ENDIF}
+  SysUtils,Classes,md5,dateutils;
+
+var
+  StartTime: TDateTime;
+  EndTime: TDateTime;
+  i: integer;
+  s,ss: string;
+begin
+  writeln('MD5 of a million "a" symbols');
+  Writeln('x86 only: compile md5 unit with -dMD5SLOW to use unoptimized original version');
+  SetLength(s, 1000000);
+  for i := 1 to 1000000 do s[i] := 'a';
+
+  StartTime:=now;
+  for i := 0 to 1000 do
+    ss := LowerCase(MDPrint(MDString(s, MD_VERSION_5)));
+  EndTime:=now;
+  writeln('Performance test finished. Elapsed time:');
+  writeln(TimeToStr(EndTime-StartTime));
+end.
+

+ 37 - 11
packages/hash/examples/mdtest.pas

@@ -1,8 +1,8 @@
 {
     This file is part of the Free Pascal packages.
-    Copyright (c) 1999-2000 by the Free Pascal development team
+    Copyright (c) 1999-2014 by the Free Pascal development team
 
-    Tests the MD5 program.
+    Tests MD2, MD4 and MD5 hashes.
 
     See the file COPYING.FPC, included in this distribution,
     for details about the copyright.
@@ -15,10 +15,10 @@
 
 program mdtest;
 
-{$h+}
+{$mode objfpc}{$h+}
 
 uses
-  md5;
+  SysUtils, md5;
 
 const
   Suite: array[1..7] of string = (
@@ -60,31 +60,57 @@ const
      '57edf4a22be3c955ac49da2e2107b67a')
   );
 
-procedure performTest(const Ver: TMDVersion);
+function performTest(const Ver: TMDVersion; const Verbose: boolean): boolean;
+// Runs test and returns success or failure
 var
   I: Integer;
   S: String;
 begin
+  result := false;
   for I := Low(Suite) to High(Suite) do
   begin
     S := LowerCase(MDPrint(MDString(Suite[I], Ver)));
     if S = Results[Ver, I] then
-      Write('passed  ') else
-      Write('failed  ');
-    WriteLn('  "', Suite[I], '" = ', S);
+      result := true;
+    if Verbose then WriteLn('  "', Suite[I], '" = ', S);
   end;
 end;
 
+var
+  i: integer;
 begin
+  i:=0;
   Writeln('Executing RFC 1319 test suite ...');
-  performTest(MD_VERSION_2);
+  if performTest(MD_VERSION_2,true) then
+    Write('RFC 1319 test suite passed  ')
+  else
+  begin
+    Write('RFC 1319 test suite failed  ');
+    i:=i or 1;
+  end;
+  Writeln;
   Writeln;
 
   Writeln('Executing RFC 1320 test suite ...');
-  performTest(MD_VERSION_4);
+  if performTest(MD_VERSION_4,true) then
+    Write('RFC 1320 test suite passed  ')
+  else
+  begin
+    Write('RFC 1320 test suite failed  ');
+    i:=i or 2;
+  end;
+  Writeln;
   Writeln;
 
   Writeln('Executing RFC 1321 test suite ...');
-  performTest(MD_VERSION_5);
+  if performTest(MD_VERSION_5,true) then
+    Write('RFC 1321 test suite passed  ')
+  else
+  begin
+    Write('RFC 1321 test suite failed  ');
+    i:=i or 4;
+  end;
+  Writeln;
   Writeln;
+  halt(i); //halt with error code 0 if everything ok
 end.

+ 29 - 0
packages/hash/examples/sha1performancetest.pas

@@ -0,0 +1,29 @@
+program sha1performancetest;
+
+{$mode objfpc}{$H+}
+
+uses
+  {$IFDEF UNIX}{$IFDEF UseCThreads}
+  cthreads,
+  {$ENDIF}{$ENDIF}
+  SysUtils,Classes,sha1,dateutils;
+
+var
+  StartTime: TDateTime;
+  EndTime: TDateTime;
+  i: integer;
+  s,ss: string;
+begin
+  writeln('MD5 of a million "a" symbols');
+  Writeln('compile sha unit with -dSHA1SLOW to use unoptimized original version');
+  SetLength(s, 1000000);
+  for i := 1 to 1000000 do s[i] := 'a';
+
+  StartTime:=now;
+  for i := 0 to 1000 do
+    ss := LowerCase(SHA1Print(SHA1string(s)));
+  EndTime:=now;
+  writeln('Performance test finished. Elapsed time:');
+  writeln(TimeToStr(EndTime-StartTime));
+end.
+

+ 20 - 9
packages/hash/examples/sha1test.pp

@@ -1,23 +1,24 @@
 program sha1test;
 {$mode objfpc}{$h+}
 
-uses sha1;
+uses SysUtils, sha1;
 
+function performTest: cardinal;
+// Runs test and returns result code (0=success)
 var
-  code: cardinal;
   s, sdig: string;
   i: integer;
   ctx: TSHA1Context;
   d: TSHA1Digest;
 begin
-  code := 0;
+  result := 0;
   sdig := SHA1Print(SHA1String('abc'));
   if sdig <> 'a9993e364706816aba3e25717850c26c9cd0d89d' then
-    code := code or 1;
-    
+    result := result or 1;
+
   sdig := SHA1Print(SHA1String('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq'));
   if sdig <> '84983e441c3bd26ebaae4aa1f95129e5e54670f1' then
-    code := code or 2;
+    result := result or 2;
 
   // SHA-1 of a million 'a' symbols
   SetLength(s, 1000);
@@ -28,11 +29,21 @@ begin
   SHA1Final(ctx, d);
   sdig := SHA1Print(d);
   if sdig <> '34aa973cd4c4daa4f61eeb2bdbad27316534016f' then
-    code := code or 4;
+    result := result or 4;
+end;
+
+var
+  StartTime, EndTime: TDateTime;
+  code: cardinal;
+begin
+  writeln('Performing basic SHA-1 test...');
+  code:=performTest;
 
   if code = 0 then
     writeln('Basic SHA-1 tests passed')
   else
+  begin
     writeln('SHA-1 tests failed: ', code);
-  Halt(code);
-end.
+  end;
+  Halt(code);	
+end.

+ 2 - 0
packages/hash/fpmake.pp

@@ -40,6 +40,8 @@ begin
     T:=P.Targets.AddExampleunit('examples/sha1test.pp');
     T:=P.Targets.AddExampleunit('examples/hmd5.pas');
     T:=P.Targets.AddExampleunit('examples/hsha1.pas');
+    T:=P.Targets.AddExampleunit('examples/md5performancetest.pas');
+    T:=P.Targets.AddExampleunit('examples/sha1performancetest.pas');
     // md5.ref
 {$ifndef ALLPACKAGES}
     Run;

+ 110 - 1
packages/hash/src/md5.pp

@@ -1,6 +1,6 @@
 {
     This file is part of the Free Pascal packages.
-    Copyright (c) 1999-2006 by the Free Pascal development team
+    Copyright (c) 1999-2014 by the Free Pascal development team
 
     Implements a MD2 digest algorithm (RFC 1319)
     Implements a MD4 digest algorithm (RFC 1320)
@@ -15,6 +15,10 @@
 
  **********************************************************************}
 
+// Define to use original MD5 code on i386 processors.
+// Undefine to use original implementation.
+{not $DEFINE MD5SLOW}
+
 unit md5;
 
 {$mode objfpc}
@@ -298,6 +302,110 @@ begin
 end;
 
 
+{$IF (NOT(DEFINED(MD5SLOW))) and (DEFINED(CPUI386)) }
+{$i md5i386.inc}
+{$ENDIF}
+{$IF (NOT(DEFINED(MD5SLOW))) and (DEFINED(CPUX86_64)) }
+{$OPTIMIZATION USERBP} //PEEPHOLE
+procedure MD5Transform(var Context: TMDContext; Buffer: Pointer);
+type
+  TBlock = array[0..15] of Cardinal;
+  PBlock = ^TBlock;
+var
+  a, b, c, d: Cardinal;
+  //Block: array[0..15] of Cardinal absolute Buffer;
+  Block: PBlock absolute Buffer;
+begin
+  //Invert(Buffer, @Block, 64);
+  a := Context.State[0];
+  b := Context.State[1];
+  c := Context.State[2];
+  d := Context.State[3];
+
+{$push}
+{$r-,q-}
+
+  // Round 1
+  a := b + roldword(dword(a + ((b and c) or ((not b) and d)) + Block^[0]  + $d76aa478),  7);
+  d := a + roldword(dword(d + ((a and b) or ((not a) and c)) + Block^[1]  + $e8c7b756), 12);
+  c := d + roldword(dword(c + ((d and a) or ((not d) and b)) + Block^[2]  + $242070db), 17);
+  b := c + roldword(dword(b + ((c and d) or ((not c) and a)) + Block^[3]  + $c1bdceee), 22);
+  a := b + roldword(dword(a + ((b and c) or ((not b) and d)) + Block^[4]  + $f57c0faf),  7);
+  d := a + roldword(dword(d + ((a and b) or ((not a) and c)) + Block^[5]  + $4787c62a), 12);
+  c := d + roldword(dword(c + ((d and a) or ((not d) and b)) + Block^[6]  + $a8304613), 17);
+  b := c + roldword(dword(b + ((c and d) or ((not c) and a)) + Block^[7]  + $fd469501), 22);
+  a := b + roldword(dword(a + ((b and c) or ((not b) and d)) + Block^[8]  + $698098d8),  7);
+  d := a + roldword(dword(d + ((a and b) or ((not a) and c)) + Block^[9]  + $8b44f7af), 12);
+  c := d + roldword(dword(c + ((d and a) or ((not d) and b)) + Block^[10] + $ffff5bb1), 17);
+  b := c + roldword(dword(b + ((c and d) or ((not c) and a)) + Block^[11] + $895cd7be), 22);
+  a := b + roldword(dword(a + ((b and c) or ((not b) and d)) + Block^[12] + $6b901122),  7);
+  d := a + roldword(dword(d + ((a and b) or ((not a) and c)) + Block^[13] + $fd987193), 12);
+  c := d + roldword(dword(c + ((d and a) or ((not d) and b)) + Block^[14] + $a679438e), 17);
+  b := c + roldword(dword(b + ((c and d) or ((not c) and a)) + Block^[15] + $49b40821), 22);
+  // Round 2
+  a := b + roldword(dword(a + ((b and d) or (c and (not d))) + Block^[1]  + $f61e2562),  5);
+  d := a + roldword(dword(d + ((a and c) or (b and (not c))) + Block^[6]  + $c040b340),  9);
+  c := d + roldword(dword(c + ((d and b) or (a and (not b))) + Block^[11] + $265e5a51), 14);
+  b := c + roldword(dword(b + ((c and a) or (d and (not a))) + Block^[0]  + $e9b6c7aa), 20);
+  a := b + roldword(dword(a + ((b and d) or (c and (not d))) + Block^[5]  + $d62f105d),  5);
+  d := a + roldword(dword(d + ((a and c) or (b and (not c))) + Block^[10] + $02441453),  9);
+  c := d + roldword(dword(c + ((d and b) or (a and (not b))) + Block^[15] + $d8a1e681), 14);
+  b := c + roldword(dword(b + ((c and a) or (d and (not a))) + Block^[4]  + $e7d3fbc8), 20);
+  a := b + roldword(dword(a + ((b and d) or (c and (not d))) + Block^[9]  + $21e1cde6),  5);
+  d := a + roldword(dword(d + ((a and c) or (b and (not c))) + Block^[14] + $c33707d6),  9);
+  c := d + roldword(dword(c + ((d and b) or (a and (not b))) + Block^[3]  + $f4d50d87), 14);
+  b := c + roldword(dword(b + ((c and a) or (d and (not a))) + Block^[8]  + $455a14ed), 20);
+  a := b + roldword(dword(a + ((b and d) or (c and (not d))) + Block^[13] + $a9e3e905),  5);
+  d := a + roldword(dword(d + ((a and c) or (b and (not c))) + Block^[2]  + $fcefa3f8),  9);
+  c := d + roldword(dword(c + ((d and b) or (a and (not b))) + Block^[7]  + $676f02d9), 14);
+  b := c + roldword(dword(b + ((c and a) or (d and (not a))) + Block^[12] + $8d2a4c8a), 20);
+  // Round 3
+  a := b + roldword(dword(a + (b xor c xor d) + Block^[5]  + $fffa3942),  4);
+  d := a + roldword(dword(d + (a xor b xor c) + Block^[8]  + $8771f681), 11);
+  c := d + roldword(dword(c + (d xor a xor b) + Block^[11] + $6d9d6122), 16);
+  b := c + roldword(dword(b + (c xor d xor a) + Block^[14] + $fde5380c), 23);
+  a := b + roldword(dword(a + (b xor c xor d) + Block^[1]  + $a4beea44),  4);
+  d := a + roldword(dword(d + (a xor b xor c) + Block^[4]  + $4bdecfa9), 11);
+  c := d + roldword(dword(c + (d xor a xor b) + Block^[7]  + $f6bb4b60), 16);
+  b := c + roldword(dword(b + (c xor d xor a) + Block^[10] + $bebfbc70), 23);
+  a := b + roldword(dword(a + (b xor c xor d) + Block^[13] + $289b7ec6),  4);
+  d := a + roldword(dword(d + (a xor b xor c) + Block^[0]  + $eaa127fa), 11);
+  c := d + roldword(dword(c + (d xor a xor b) + Block^[3]  + $d4ef3085), 16);
+  b := c + roldword(dword(b + (c xor d xor a) + Block^[6]  + $04881d05), 23);
+  a := b + roldword(dword(a + (b xor c xor d) + Block^[9]  + $d9d4d039),  4);
+  d := a + roldword(dword(d + (a xor b xor c) + Block^[12] + $e6db99e5), 11);
+  c := d + roldword(dword(c + (d xor a xor b) + Block^[15] + $1fa27cf8), 16);
+  b := c + roldword(dword(b + (c xor d xor a) + Block^[2]  + $c4ac5665), 23);
+  // Round 4
+  a := b + roldword(dword(a + (c xor (b or (not d))) + Block^[0]  + $f4292244),  6);
+  d := a + roldword(dword(d + (b xor (a or (not c))) + Block^[7]  + $432aff97), 10);
+  c := d + roldword(dword(c + (a xor (d or (not b))) + Block^[14] + $ab9423a7), 15);
+  b := c + roldword(dword(b + (d xor (c or (not a))) + Block^[5]  + $fc93a039), 21);
+  a := b + roldword(dword(a + (c xor (b or (not d))) + Block^[12] + $655b59c3),  6);
+  d := a + roldword(dword(d + (b xor (a or (not c))) + Block^[3]  + $8f0ccc92), 10);
+  c := d + roldword(dword(c + (a xor (d or (not b))) + Block^[10] + $ffeff47d), 15);
+  b := c + roldword(dword(b + (d xor (c or (not a))) + Block^[1]  + $85845dd1), 21);
+  a := b + roldword(dword(a + (c xor (b or (not d))) + Block^[8]  + $6fa87e4f),  6);
+  d := a + roldword(dword(d + (b xor (a or (not c))) + Block^[15] + $fe2ce6e0), 10);
+  c := d + roldword(dword(c + (a xor (d or (not b))) + Block^[6]  + $a3014314), 15);
+  b := c + roldword(dword(b + (d xor (c or (not a))) + Block^[13] + $4e0811a1), 21);
+  a := b + roldword(dword(a + (c xor (b or (not d))) + Block^[4]  + $f7537e82),  6);
+  d := a + roldword(dword(d + (b xor (a or (not c))) + Block^[11] + $bd3af235), 10);
+  c := d + roldword(dword(c + (a xor (d or (not b))) + Block^[2]  + $2ad7d2bb), 15);
+  b := c + roldword(dword(b + (d xor (c or (not a))) + Block^[9]  + $eb86d391), 21);
+
+
+  inc(Context.State[0],a);
+  inc(Context.State[1],b);
+  inc(Context.State[2],c);
+  inc(Context.State[3],d);
+{$pop}
+  inc(Context.Length,64);
+end;
+{$OPTIMIZATION DEFAULT}
+{$ENDIF}
+{$IF DEFINED(MD5SLOW) or (NOT ((DEFINED(CPUX86_64)) or (DEFINED(CPUI386))))}
+// Original version
 procedure MD5Transform(var Context: TMDContext; Buffer: Pointer);
 
 {$push}
@@ -372,6 +480,7 @@ begin
 {$pop}
   inc(Context.Length,64);
 end;
+{$ENDIF}
 
 
 procedure MDInit(out Context: TMDContext; const Version: TMDVersion);

+ 747 - 0
packages/hash/src/md5i386.inc

@@ -0,0 +1,747 @@
+// i386 assembler optimized version
+procedure MD5Transform(var Context: TMDContext; Buffer: Pointer);assembler;
+var
+  pContext: ^TMDContext;
+  pBuffer: Pointer;
+  a, b, c, d: Cardinal;
+  //Block: array[0..15] of Cardinal;
+{$asmmode intel}
+asm
+    push EAX
+    push EBX
+    push ECX
+    push EDX
+    push ESI
+    push EDI
+    push EBP
+
+    mov pContext, eax
+    mov pBuffer, edx
+
+    mov ESI, pContext
+    mov ebp, edx
+
+// A := Context.State[0];
+    mov EAX, [ESI+12+4*0]
+// B := Context.State[1];
+    mov EBX, [ESI+12+4*1]
+// C := Context.State[2];
+    mov ECX, [ESI+12+4*2]
+// D := Context.State[3];
+    mov EDX, [ESI+12+4*3]
+// Round 1
+//EAX := EBX + roldword(dword(EAX + ((EBX and ECX) or ((not EBX) and EDX)) + Data[0] + $d76aa478),  7);
+    mov ESI, ECX
+    add EAX, $d76aa478
+    xor ESI, EDX
+    add EAX, [ebp + 4*0]
+    and ESI, EBX
+    xor ESI, EDX
+    add EAX, ESI
+    rol EAX,  7
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + ((EAX and EBX) or ((not EAX) and ECX)) + Data[1] + $e8c7b756), 12);
+    mov ESI, EBX
+    add EDX, $e8c7b756
+    xor ESI, ECX
+    add EDX, [ebp + 4*1]
+    and ESI, EAX
+    xor ESI, ECX
+    add EDX, ESI
+    rol EDX, 12
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and EAX) or ((not EDX) and EBX)) + Data[2] + $242070db), 17);
+    mov ESI, EAX
+    add ECX, $242070db
+    xor ESI, EBX
+    add ECX, [ebp + 4*2]
+    and ESI, EDX
+    xor ESI, EBX
+    add ECX, ESI
+    rol ECX, 17
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + ((ECX and EDX) or ((not ECX) and EAX)) + Data[3] + $c1bdceee), 22);
+    mov ESI, EDX
+    add EBX, $c1bdceee
+    xor ESI, EAX
+    add EBX, [ebp + 4*3]
+    and ESI, ECX
+    xor ESI, EAX
+    add EBX, ESI
+    rol EBX, 22
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + ((EBX and ECX) or ((not EBX) and EDX)) + Data[4] + $f57c0faf),  7);
+    mov ESI, ECX
+    add EAX, $f57c0faf
+    xor ESI, EDX
+    add EAX, [ebp + 4*4]
+    and ESI, EBX
+    xor ESI, EDX
+    add EAX, ESI
+    rol EAX,  7
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + ((EAX and EBX) or ((not EAX) and ECX)) + Data[5] + $4787c62a), 12);
+    mov ESI, EBX
+    add EDX, $4787c62a
+    xor ESI, ECX
+    add EDX, [ebp + 4*5]
+    and ESI, EAX
+    xor ESI, ECX
+    add EDX, ESI
+    rol EDX, 12
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and EAX) or ((not EDX) and EBX)) + Data[6] + $a8304613), 17);
+    mov ESI, EAX
+    add ECX, $a8304613
+    xor ESI, EBX
+    add ECX, [ebp + 4*6]
+    and ESI, EDX
+    xor ESI, EBX
+    add ECX, ESI
+    rol ECX, 17
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + ((ECX and EDX) or ((not ECX) and EAX)) + Data[7] + $fd469501), 22);
+    mov ESI, EDX
+    add EBX, $fd469501
+    xor ESI, EAX
+    add EBX, [ebp + 4*7]
+    and ESI, ECX
+    xor ESI, EAX
+    add EBX, ESI
+    rol EBX, 22
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + ((EBX and ECX) or ((not EBX) and EDX)) + Data[8] + $698098d8),  7);
+    mov ESI, ECX
+    add EAX, $698098d8
+    xor ESI, EDX
+    add EAX, [ebp + 4*8]
+    and ESI, EBX
+    xor ESI, EDX
+    add EAX, ESI
+    rol EAX,  7
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + ((EAX and EBX) or ((not EAX) and ECX)) + Data[9] + $8b44f7af), 12);
+    mov ESI, EBX
+    add EDX, $8b44f7af
+    xor ESI, ECX
+    add EDX, [ebp + 4*9]
+    and ESI, EAX
+    xor ESI, ECX
+    add EDX, ESI
+    rol EDX, 12
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and EAX) or ((not EDX) and EBX)) + Data[10] + $ffff5bb1), 17);
+    mov ESI, EAX
+    add ECX, $ffff5bb1
+    xor ESI, EBX
+    add ECX, [ebp + 4*10]
+    and ESI, EDX
+    xor ESI, EBX
+    add ECX, ESI
+    rol ECX, 17
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + ((ECX and EDX) or ((not ECX) and EAX)) + Data[11] + $895cd7be), 22);
+    mov ESI, EDX
+    add EBX, $895cd7be
+    xor ESI, EAX
+    add EBX, [ebp + 4*11]
+    and ESI, ECX
+    xor ESI, EAX
+    add EBX, ESI
+    rol EBX, 22
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + ((EBX and ECX) or ((not EBX) and EDX)) + Data[12] + $6b901122),  7);
+    mov ESI, ECX
+    add EAX, $6b901122
+    xor ESI, EDX
+    add EAX, [ebp + 4*12]
+    and ESI, EBX
+    xor ESI, EDX
+    add EAX, ESI
+    rol EAX,  7
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + ((EAX and EBX) or ((not EAX) and ECX)) + Data[13] + $fd987193), 12);
+    mov ESI, EBX
+    add EDX, $fd987193
+    xor ESI, ECX
+    add EDX, [ebp + 4*13]
+    and ESI, EAX
+    xor ESI, ECX
+    add EDX, ESI
+    rol EDX, 12
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and EAX) or ((not EDX) and EBX)) + Data[14] + $a679438e), 17);
+    mov ESI, EAX
+    add ECX, $a679438e
+    xor ESI, EBX
+    add ECX, [ebp + 4*14]
+    and ESI, EDX
+    xor ESI, EBX
+    add ECX, ESI
+    rol ECX, 17
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + ((ECX and EDX) or ((not ECX) and EAX)) + Data[15] + $49b40821), 22);
+    mov ESI, EDX
+    add EBX, $49b40821
+    xor ESI, EAX
+    add EBX, [ebp + 4*15]
+    and ESI, ECX
+    xor ESI, EAX
+    add EBX, ESI
+    rol EBX, 22
+    add EBX, ECX
+
+// Round 2
+//EAX := EBX + roldword(dword(EAX + ((EBX and EDX) or (ECX and (not EDX))) + Data[1] + $f61e2562),  5);
+    mov ESI, EBX
+    add EAX, $f61e2562
+    xor ESI, ECX
+    add EAX, [ebp + 4*1]
+    and ESI, EDX
+    xor ESI, ECX
+    add EAX, ESI
+    rol EAX,  5
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + ((EAX and ECX) or (EBX and (not ECX))) + Data[6] + $c040b340),  9);
+    mov ESI, EAX
+    add EDX, $c040b340
+    xor ESI, EBX
+    add EDX, [ebp + 4*6]
+    and ESI, ECX
+    xor ESI, EBX
+    add EDX, ESI
+    rol EDX,  9
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and EBX) or (EAX and (not EBX))) + Data[11] + $265e5a51), 14);
+    mov ESI, EDX
+    add ECX, $265e5a51
+    xor ESI, EAX
+    add ECX, [ebp + 4*11]
+    and ESI, EBX
+    xor ESI, EAX
+    add ECX, ESI
+    rol ECX, 14
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + ((ECX and EAX) or (EDX and (not EAX))) + Data[0] + $e9b6c7aa), 20);
+    mov ESI, ECX
+    add EBX, $e9b6c7aa
+    xor ESI, EDX
+    add EBX, [ebp + 4*0]
+    and ESI, EAX
+    xor ESI, EDX
+    add EBX, ESI
+    rol EBX, 20
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + ((EBX and EDX) or (ECX and (not EDX))) + Data[5] + $d62f105d),  5);
+    mov ESI, EBX
+    add EAX, $d62f105d
+    xor ESI, ECX
+    add EAX, [ebp + 4*5]
+    and ESI, EDX
+    xor ESI, ECX
+    add EAX, ESI
+    rol EAX,  5
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + ((EAX and ECX) or (EBX and (not ECX))) + Data[10] + $02441453),  9);
+    mov ESI, EAX
+    add EDX, $02441453
+    xor ESI, EBX
+    add EDX, [ebp + 4*10]
+    and ESI, ECX
+    xor ESI, EBX
+    add EDX, ESI
+    rol EDX,  9
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and EBX) or (EAX and (not EBX))) + Data[15] + $d8a1e681), 14);
+    mov ESI, EDX
+    add ECX, $d8a1e681
+    xor ESI, EAX
+    add ECX, [ebp + 4*15]
+    and ESI, EBX
+    xor ESI, EAX
+    add ECX, ESI
+    rol ECX, 14
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + ((ECX and EAX) or (EDX and (not EAX))) + Data[4] + $e7d3fbc8), 20);
+    mov ESI, ECX
+    add EBX, $e7d3fbc8
+    xor ESI, EDX
+    add EBX, [ebp + 4*4]
+    and ESI, EAX
+    xor ESI, EDX
+    add EBX, ESI
+    rol EBX, 20
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + ((EBX and EDX) or (ECX and (not EDX))) + Data[9] + $21e1cde6),  5);
+    mov ESI, EBX
+    add EAX, $21e1cde6
+    xor ESI, ECX
+    add EAX, [ebp + 4*9]
+    and ESI, EDX
+    xor ESI, ECX
+    add EAX, ESI
+    rol EAX,  5
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + ((EAX and ECX) or (EBX and (not ECX))) + Data[14] + $c33707d6),  9);
+    mov ESI, EAX
+    add EDX, $c33707d6
+    xor ESI, EBX
+    add EDX, [ebp + 4*14]
+    and ESI, ECX
+    xor ESI, EBX
+    add EDX, ESI
+    rol EDX,  9
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and EBX) or (EAX and (not EBX))) + Data[3] + $f4d50d87), 14);
+    mov ESI, EDX
+    add ECX, $f4d50d87
+    xor ESI, EAX
+    add ECX, [ebp + 4*3]
+    and ESI, EBX
+    xor ESI, EAX
+    add ECX, ESI
+    rol ECX, 14
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + ((ECX and EAX) or (EDX and (not EAX))) + Data[8] + $455a14ed), 20);
+    mov ESI, ECX
+    add EBX, $455a14ed
+    xor ESI, EDX
+    add EBX, [ebp + 4*8]
+    and ESI, EAX
+    xor ESI, EDX
+    add EBX, ESI
+    rol EBX, 20
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + ((EBX and EDX) or (ECX and (not EDX))) + Data[13] + $a9e3e905),  5);
+    mov ESI, EBX
+    add EAX, $a9e3e905
+    xor ESI, ECX
+    add EAX, [ebp + 4*13]
+    and ESI, EDX
+    xor ESI, ECX
+    add EAX, ESI
+    rol EAX,  5
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + ((EAX and ECX) or (EBX and (not ECX))) + Data[2] + $fcefa3f8),  9);
+    mov ESI, EAX
+    add EDX, $fcefa3f8
+    xor ESI, EBX
+    add EDX, [ebp + 4*2]
+    and ESI, ECX
+    xor ESI, EBX
+    add EDX, ESI
+    rol EDX,  9
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and EBX) or (EAX and (not EBX))) + Data[7] + $676f02d9), 14);
+    mov ESI, EDX
+    add ECX, $676f02d9
+    xor ESI, EAX
+    add ECX, [ebp + 4*7]
+    and ESI, EBX
+    xor ESI, EAX
+    add ECX, ESI
+    rol ECX, 14
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + ((ECX and EAX) or (EDX and (not EAX))) + Data[12] + $8d2a4c8a), 20);
+    mov ESI, ECX
+    add EBX, $8d2a4c8a
+    xor ESI, EDX
+    add EBX, [ebp + 4*12]
+    and ESI, EAX
+    xor ESI, EDX
+    add EBX, ESI
+    rol EBX, 20
+    add EBX, ECX
+
+// Round 3
+//EAX := EBX + roldword(dword(EAX + (EBX xor ECX xor EDX) + Data[5] + $fffa3942),  4);
+    mov ESI, EBX
+    add EAX, $fffa3942
+    xor ESI, ECX
+    add EAX, [ebp + 4*5]
+    xor ESI, EDX
+    add EAX, ESI
+    rol EAX,  4
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + (EAX xor EBX xor ECX) + Data[8] + $8771f681), 11);
+    mov ESI, EAX
+    add EDX, $8771f681
+    xor ESI, EBX
+    add EDX, [ebp + 4*8]
+    xor ESI, ECX
+    add EDX, ESI
+    rol EDX, 11
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + (EDX xor EAX xor EBX) + Data[11] + $6d9d6122), 16);
+    mov ESI, EDX
+    add ECX, $6d9d6122
+    xor ESI, EAX
+    add ECX, [ebp + 4*11]
+    xor ESI, EBX
+    add ECX, ESI
+    rol ECX, 16
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + (ECX xor EDX xor EAX) + Data[14] + $fde5380c), 23);
+    mov ESI, ECX
+    add EBX, $fde5380c
+    xor ESI, EDX
+    add EBX, [ebp + 4*14]
+    xor ESI, EAX
+    add EBX, ESI
+    rol EBX, 23
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + (EBX xor ECX xor EDX) + Data[1] + $a4beea44),  4);
+    mov ESI, EBX
+    add EAX, $a4beea44
+    xor ESI, ECX
+    add EAX, [ebp + 4*1]
+    xor ESI, EDX
+    add EAX, ESI
+    rol EAX,  4
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + (EAX xor EBX xor ECX) + Data[4] + $4bdecfa9), 11);
+    mov ESI, EAX
+    add EDX, $4bdecfa9
+    xor ESI, EBX
+    add EDX, [ebp + 4*4]
+    xor ESI, ECX
+    add EDX, ESI
+    rol EDX, 11
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + (EDX xor EAX xor EBX) + Data[7] + $f6bb4b60), 16);
+    mov ESI, EDX
+    add ECX, $f6bb4b60
+    xor ESI, EAX
+    add ECX, [ebp + 4*7]
+    xor ESI, EBX
+    add ECX, ESI
+    rol ECX, 16
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + (ECX xor EDX xor EAX) + Data[10] + $bebfbc70), 23);
+    mov ESI, ECX
+    add EBX, $bebfbc70
+    xor ESI, EDX
+    add EBX, [ebp + 4*10]
+    xor ESI, EAX
+    add EBX, ESI
+    rol EBX, 23
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + (EBX xor ECX xor EDX) + Data[13] + $289b7ec6),  4);
+    mov ESI, EBX
+    add EAX, $289b7ec6
+    xor ESI, ECX
+    add EAX, [ebp + 4*13]
+    xor ESI, EDX
+    add EAX, ESI
+    rol EAX,  4
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + (EAX xor EBX xor ECX) + Data[0] + $eaa127fa), 11);
+    mov ESI, EAX
+    add EDX, $eaa127fa
+    xor ESI, EBX
+    add EDX, [ebp + 4*0]
+    xor ESI, ECX
+    add EDX, ESI
+    rol EDX, 11
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + (EDX xor EAX xor EBX) + Data[3] + $d4ef3085), 16);
+    mov ESI, EDX
+    add ECX, $d4ef3085
+    xor ESI, EAX
+    add ECX, [ebp + 4*3]
+    xor ESI, EBX
+    add ECX, ESI
+    rol ECX, 16
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + (ECX xor EDX xor EAX) + Data[6] + $04881d05), 23);
+    mov ESI, ECX
+    add EBX, $04881d05
+    xor ESI, EDX
+    add EBX, [ebp + 4*6]
+    xor ESI, EAX
+    add EBX, ESI
+    rol EBX, 23
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + (EBX xor ECX xor EDX) + Data[9] + $d9d4d039),  4);
+    mov ESI, EBX
+    add EAX, $d9d4d039
+    xor ESI, ECX
+    add EAX, [ebp + 4*9]
+    xor ESI, EDX
+    add EAX, ESI
+    rol EAX,  4
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + (EAX xor EBX xor ECX) + Data[12] + $e6db99e5), 11);
+    mov ESI, EAX
+    add EDX, $e6db99e5
+    xor ESI, EBX
+    add EDX, [ebp + 4*12]
+    xor ESI, ECX
+    add EDX, ESI
+    rol EDX, 11
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + (EDX xor EAX xor EBX) + Data[15] + $1fa27cf8), 16);
+    mov ESI, EDX
+    add ECX, $1fa27cf8
+    xor ESI, EAX
+    add ECX, [ebp + 4*15]
+    xor ESI, EBX
+    add ECX, ESI
+    rol ECX, 16
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + (ECX xor EDX xor EAX) + Data[2] + $c4ac5665), 23);
+    mov ESI, ECX
+    add EBX, $c4ac5665
+    xor ESI, EDX
+    add EBX, [ebp + 4*2]
+    xor ESI, EAX
+    add EBX, ESI
+    rol EBX, 23
+    add EBX, ECX
+
+// Round 4
+//EAX := EBX + roldword(dword(EAX + (ECX xor (EBX or (not EDX))) + Data[0] + $f4292244),  6);
+    mov ESI, EDX
+    add EAX, $f4292244
+    not ESI
+    add EAX, [ebp + 4*0]
+    or ESI, EBX
+    xor ESI, ECX
+    add EAX, ESI
+    rol EAX,  6
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + (EBX xor (EAX or (not ECX))) + Data[7] + $432aff97), 10);
+    mov ESI, ECX
+    add EDX, $432aff97
+    not ESI
+    add EDX, [ebp + 4*7]
+    or ESI, EAX
+    xor ESI, EBX
+    add EDX, ESI
+    rol EDX, 10
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + (EAX xor (EDX or (not EBX))) + Data[14] + $ab9423a7), 15);
+    mov ESI, EBX
+    add ECX, $ab9423a7
+    not ESI
+    add ECX, [ebp + 4*14]
+    or ESI, EDX
+    xor ESI, EAX
+    add ECX, ESI
+    rol ECX, 15
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + (EDX xor (ECX or (not EAX))) + Data[5] + $fc93a039), 21);
+    mov ESI, EAX
+    add EBX, $fc93a039
+    not ESI
+    add EBX, [ebp + 4*5]
+    or ESI, ECX
+    xor ESI, EDX
+    add EBX, ESI
+    rol EBX, 21
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + (ECX xor (EBX or (not EDX))) + Data[12] + $655b59c3),  6);
+    mov ESI, EDX
+    add EAX, $655b59c3
+    not ESI
+    add EAX, [ebp + 4*12]
+    or ESI, EBX
+    xor ESI, ECX
+    add EAX, ESI
+    rol EAX,  6
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + (EBX xor (EAX or (not ECX))) + Data[3] + $8f0ccc92), 10);
+    mov ESI, ECX
+    add EDX, $8f0ccc92
+    not ESI
+    add EDX, [ebp + 4*3]
+    or ESI, EAX
+    xor ESI, EBX
+    add EDX, ESI
+    rol EDX, 10
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + (EAX xor (EDX or (not EBX))) + Data[10] + $ffeff47d), 15);
+    mov ESI, EBX
+    add ECX, $ffeff47d
+    not ESI
+    add ECX, [ebp + 4*10]
+    or ESI, EDX
+    xor ESI, EAX
+    add ECX, ESI
+    rol ECX, 15
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + (EDX xor (ECX or (not EAX))) + Data[1] + $85845dd1), 21);
+    mov ESI, EAX
+    add EBX, $85845dd1
+    not ESI
+    add EBX, [ebp + 4*1]
+    or ESI, ECX
+    xor ESI, EDX
+    add EBX, ESI
+    rol EBX, 21
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + (ECX xor (EBX or (not EDX))) + Data[8] + $6fa87e4f),  6);
+    mov ESI, EDX
+    add EAX, $6fa87e4f
+    not ESI
+    add EAX, [ebp + 4*8]
+    or ESI, EBX
+    xor ESI, ECX
+    add EAX, ESI
+    rol EAX,  6
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + (EBX xor (EAX or (not ECX))) + Data[15] + $fe2ce6e0), 10);
+    mov ESI, ECX
+    add EDX, $fe2ce6e0
+    not ESI
+    add EDX, [ebp + 4*15]
+    or ESI, EAX
+    xor ESI, EBX
+    add EDX, ESI
+    rol EDX, 10
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + (EAX xor (EDX or (not EBX))) + Data[6] + $a3014314), 15);
+    mov ESI, EBX
+    add ECX, $a3014314
+    not ESI
+    add ECX, [ebp + 4*6]
+    or ESI, EDX
+    xor ESI, EAX
+    add ECX, ESI
+    rol ECX, 15
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + (EDX xor (ECX or (not EAX))) + Data[13] + $4e0811a1), 21);
+    mov ESI, EAX
+    add EBX, $4e0811a1
+    not ESI
+    add EBX, [ebp + 4*13]
+    or ESI, ECX
+    xor ESI, EDX
+    add EBX, ESI
+    rol EBX, 21
+    add EBX, ECX
+
+//EAX := EBX + roldword(dword(EAX + (ECX xor (EBX or (not EDX))) + Data[4] + $f7537e82),  6);
+    mov ESI, EDX
+    add EAX, $f7537e82
+    not ESI
+    add EAX, [ebp + 4*4]
+    or ESI, EBX
+    xor ESI, ECX
+    add EAX, ESI
+    rol EAX,  6
+    add EAX, EBX
+
+//EDX := EAX + roldword(dword(EDX + (EBX xor (EAX or (not ECX))) + Data[11] + $bd3af235), 10);
+    mov ESI, ECX
+    add EDX, $bd3af235
+    not ESI
+    add EDX, [ebp + 4*11]
+    or ESI, EAX
+    xor ESI, EBX
+    add EDX, ESI
+    rol EDX, 10
+    add EDX, EAX
+
+//ECX := EDX + roldword(dword(ECX + (EAX xor (EDX or (not EBX))) + Data[2] + $2ad7d2bb), 15);
+    mov ESI, EBX
+    add ECX, $2ad7d2bb
+    not ESI
+    add ECX, [ebp + 4*2]
+    or ESI, EDX
+    xor ESI, EAX
+    add ECX, ESI
+    rol ECX, 15
+    add ECX, EDX
+
+//EBX := ECX + roldword(dword(EBX + (EDX xor (ECX or (not EAX))) + Data[9] + $eb86d391), 21);
+    mov ESI, EAX
+    add EBX, $eb86d391
+    not ESI
+    add EBX, [ebp + 4*9]
+    or ESI, ECX
+    xor ESI, EDX
+    add EBX, ESI
+    rol EBX, 21
+    add EBX, ECX
+
+
+    pop EBP
+
+    mov EDI, [pContext{.State}]
+//  Inc(Context.State[0], A);
+    add [EDI+12+4*0], EAX
+//  Inc(Context.State[1], B);
+    add [EDI+12+4*1], EBX
+//  Inc(Context.State[2], C);
+    add [EDI+12+4*2], ECX
+//  Inc(Context.State[3], D);
+    add [EDI+12+4*3], EDX
+
+//Inc(Context.Length,64);
+    add	dword ptr [EDI+104],64
+    adc	dword ptr [EDI+108],0
+
+    pop EDI
+    pop ESI
+    pop EDX
+    pop ECX
+    pop EBX
+    pop EAX
+end;

+ 13 - 2
packages/hash/src/sha1.pp

@@ -1,6 +1,6 @@
 {
     This file is part of the Free Pascal packages.
-    Copyright (c) 2009 by the Free Pascal development team
+    Copyright (c) 2009-2014 by the Free Pascal development team
 
     Implements a SHA-1 digest algorithm (RFC 3174)
 
@@ -13,6 +13,10 @@
 
  **********************************************************************}
 
+// Normally, if an optimized version is available for OS/CPU, that will be used
+// Define to use existing unoptimized implementation
+{not $DEFINE SHA1SLOW}
+
 unit sha1;
 {$mode objfpc}{$h+}
 
@@ -77,6 +81,12 @@ const
   K60 = $8F1BBCDC;
   K80 = $CA62C1D6;
 
+{$IF (NOT(DEFINED(SHA1SLOW))) and (DEFINED(CPU386)) }
+// Use assembler version if we have a suitable CPU as well
+// Define SHA1SLOW to force use of original reference code
+{$i sha1i386.inc}
+{$ELSE}
+// Use original version if asked for, or when we have no optimized assembler version
 procedure SHA1Transform(var ctx: TSHA1Context; Buf: Pointer);
 var
   A, B, C, D, E, T: Cardinal;
@@ -144,6 +154,7 @@ begin
 {$pop}
   Inc(ctx.Length,64);
 end;
+{$ENDIF}
 
 procedure SHA1Update(var ctx: TSHA1Context; const Buf; BufLen: PtrUInt);
 var
@@ -303,4 +314,4 @@ begin
 end;
 
 end.
-k
+k

+ 1951 - 0
packages/hash/src/sha1i386.inc

@@ -0,0 +1,1951 @@
+procedure SHA1Transform(var ctx: TSHA1Context; const Buf: Pointer);assembler;
+var
+  pctx:^TSHA1Context;
+  pbuf:pointer;
+  A, B, C, D, E, T: Cardinal;
+  Data: array[0..15] of Cardinal;
+
+{$asmmode intel}
+asm
+  push EAX
+  push EBX
+  push ECX
+  push EDX
+  push ESI
+  push EDI
+  push EBP
+
+  mov pctx, eax
+  mov pbuf, edx
+
+//  Invert(Buf, @Data, 64);
+//Beginning of Invert
+  mov eax, edx//[pbuf]
+  mov ebx, [eax+0]
+  bswap ebx
+  mov [data+0], ebx
+  mov ebx, [eax+4]
+  bswap ebx
+  mov [data+4], ebx
+  mov ebx, [eax+8]
+  bswap ebx
+  mov [data+8], ebx
+  mov ebx, [eax+12]
+  bswap ebx
+  mov [data+12], ebx
+  mov ebx, [eax+16]
+  bswap ebx
+  mov [data+16], ebx
+  mov ebx, [eax+20]
+  bswap ebx
+  mov [data+20], ebx
+  mov ebx, [eax+24]
+  bswap ebx
+  mov [data+24], ebx
+  mov ebx, [eax+28]
+  bswap ebx
+  mov [data+28], ebx
+  mov ebx, [eax+32]
+  bswap ebx
+  mov [data+32], ebx
+  mov ebx, [eax+36]
+  bswap ebx
+  mov [data+36], ebx
+  mov ebx, [eax+40]
+  bswap ebx
+  mov [data+40], ebx
+  mov ebx, [eax+44]
+  bswap ebx
+  mov [data+44], ebx
+  mov ebx, [eax+48]
+  bswap ebx
+  mov [data+48], ebx
+  mov ebx, [eax+52]
+  bswap ebx
+  mov [data+52], ebx
+  mov ebx, [eax+56]
+  bswap ebx
+  mov [data+56], ebx
+  mov ebx, [eax+60]
+  bswap ebx
+  mov [data+60], ebx
+//End of Invert
+
+// A := ctx.State[0];
+  mov EDI, pctx{.State}
+  mov EAX, [EDI]
+// B := ctx.State[1];
+  mov EBX, [EDI+4]
+// C := ctx.State[2];
+  mov ECX, [EDI+8]
+// D := ctx.State[3];
+  mov EDX, [EDI+12]
+// E := ctx.State[4];
+  mov ESI, [EDI+16]
+//i=0
+//*    T := (EBX and ECX) or (not EBX and EDX) + K20 + ESI;
+  mov EDI, ECX
+  xor EDI, EDX
+  and EDI, EBX
+  xor EDI, EDX
+  add EDI, K20
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[0];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*0]
+//*    Data[0] := roldword(Data[0] xor Data[2] xor Data[8] xor Data[13], 1);
+  mov EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*13]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*0], EDI
+
+//i=1
+//*    T := (EAX and EBX) or (not EAX and ECX) + K20 + EDX;
+  mov EDI, EBX
+  xor EDI, ECX
+  and EDI, EAX
+  xor EDI, ECX
+  add EDI, K20
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[1];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*1]
+//*    Data[1] := roldword(Data[1] xor Data[3] xor Data[9] xor Data[14], 1);
+  mov EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*14]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*1], EDI
+
+//i=2
+//*    T := (ESI and EAX) or (not ESI and EBX) + K20 + ECX;
+  mov EDI, EAX
+  xor EDI, EBX
+  and EDI, ESI
+  xor EDI, EBX
+  add EDI, K20
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[2];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*2]
+//*    Data[2] := roldword(Data[2] xor Data[4] xor Data[10] xor Data[15], 1);
+  mov EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*15]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*2], EDI
+
+//i=3
+//*    T := (EDX and ESI) or (not EDX and EAX) + K20 + EBX;
+  mov EDI, ESI
+  xor EDI, EAX
+  and EDI, EDX
+  xor EDI, EAX
+  add EDI, K20
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[3];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*3]
+//*    Data[3] := roldword(Data[3] xor Data[5] xor Data[11] xor Data[0], 1);
+  mov EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*0]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*3], EDI
+
+//i=4
+//*    T := (ECX and EDX) or (not ECX and ESI) + K20 + EAX;
+  mov EDI, EDX
+  xor EDI, ESI
+  and EDI, ECX
+  xor EDI, ESI
+  add EDI, K20
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[4];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*4]
+//*    Data[4] := roldword(Data[4] xor Data[6] xor Data[12] xor Data[1], 1);
+  mov EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*1]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*4], EDI
+
+//i=5
+//*    T := (EBX and ECX) or (not EBX and EDX) + K20 + ESI;
+  mov EDI, ECX
+  xor EDI, EDX
+  and EDI, EBX
+  xor EDI, EDX
+  add EDI, K20
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[5];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*5]
+//*    Data[5] := roldword(Data[5] xor Data[7] xor Data[13] xor Data[2], 1);
+  mov EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*2]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*5], EDI
+
+//i=6
+//*    T := (EAX and EBX) or (not EAX and ECX) + K20 + EDX;
+  mov EDI, EBX
+  xor EDI, ECX
+  and EDI, EAX
+  xor EDI, ECX
+  add EDI, K20
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[6];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*6]
+//*    Data[6] := roldword(Data[6] xor Data[8] xor Data[14] xor Data[3], 1);
+  mov EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*3]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*6], EDI
+
+//i=7
+//*    T := (ESI and EAX) or (not ESI and EBX) + K20 + ECX;
+  mov EDI, EAX
+  xor EDI, EBX
+  and EDI, ESI
+  xor EDI, EBX
+  add EDI, K20
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[7];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*7]
+//*    Data[7] := roldword(Data[7] xor Data[9] xor Data[15] xor Data[4], 1);
+  mov EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*4]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*7], EDI
+
+//i=8
+//*    T := (EDX and ESI) or (not EDX and EAX) + K20 + EBX;
+  mov EDI, ESI
+  xor EDI, EAX
+  and EDI, EDX
+  xor EDI, EAX
+  add EDI, K20
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[8];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*8]
+//*    Data[8] := roldword(Data[8] xor Data[10] xor Data[0] xor Data[5], 1);
+  mov EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*5]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*8], EDI
+
+//i=9
+//*    T := (ECX and EDX) or (not ECX and ESI) + K20 + EAX;
+  mov EDI, EDX
+  xor EDI, ESI
+  and EDI, ECX
+  xor EDI, ESI
+  add EDI, K20
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[9];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*9]
+//*    Data[9] := roldword(Data[9] xor Data[11] xor Data[1] xor Data[6], 1);
+  mov EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*6]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*9], EDI
+
+//i=10
+//*    T := (EBX and ECX) or (not EBX and EDX) + K20 + ESI;
+  mov EDI, ECX
+  xor EDI, EDX
+  and EDI, EBX
+  xor EDI, EDX
+  add EDI, K20
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[10];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*10]
+//*    Data[10] := roldword(Data[10] xor Data[12] xor Data[2] xor Data[7], 1);
+  mov EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*7]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*10], EDI
+
+//i=11
+//*    T := (EAX and EBX) or (not EAX and ECX) + K20 + EDX;
+  mov EDI, EBX
+  xor EDI, ECX
+  and EDI, EAX
+  xor EDI, ECX
+  add EDI, K20
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[11];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*11]
+//*    Data[11] := roldword(Data[11] xor Data[13] xor Data[3] xor Data[8], 1);
+  mov EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*8]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*11], EDI
+
+//i=12
+//*    T := (ESI and EAX) or (not ESI and EBX) + K20 + ECX;
+  mov EDI, EAX
+  xor EDI, EBX
+  and EDI, ESI
+  xor EDI, EBX
+  add EDI, K20
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[12];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*12]
+//*    Data[12] := roldword(Data[12] xor Data[14] xor Data[4] xor Data[9], 1);
+  mov EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*9]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*12], EDI
+
+//i=13
+//*    T := (EDX and ESI) or (not EDX and EAX) + K20 + EBX;
+  mov EDI, ESI
+  xor EDI, EAX
+  and EDI, EDX
+  xor EDI, EAX
+  add EDI, K20
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[13];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*13]
+//*    Data[13] := roldword(Data[13] xor Data[15] xor Data[5] xor Data[10], 1);
+  mov EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*10]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*13], EDI
+
+//i=14
+//*    T := (ECX and EDX) or (not ECX and ESI) + K20 + EAX;
+  mov EDI, EDX
+  xor EDI, ESI
+  and EDI, ECX
+  xor EDI, ESI
+  add EDI, K20
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[14];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*14]
+//*    Data[14] := roldword(Data[14] xor Data[0] xor Data[6] xor Data[11], 1);
+  mov EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*11]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*14], EDI
+
+//i=15
+//*    T := (EBX and ECX) or (not EBX and EDX) + K20 + ESI;
+  mov EDI, ECX
+  xor EDI, EDX
+  and EDI, EBX
+  xor EDI, EDX
+  add EDI, K20
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[15];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*15]
+//*    Data[15] := roldword(Data[15] xor Data[1] xor Data[7] xor Data[12], 1);
+  mov EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*12]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*15], EDI
+
+//i=16
+//*    T := (EAX and EBX) or (not EAX and ECX) + K20 + EDX;
+  mov EDI, EBX
+  xor EDI, ECX
+  and EDI, EAX
+  xor EDI, ECX
+  add EDI, K20
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[0];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*0]
+//*    Data[0] := roldword(Data[0] xor Data[2] xor Data[8] xor Data[13], 1);
+  mov EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*13]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*0], EDI
+
+//i=17
+//*    T := (ESI and EAX) or (not ESI and EBX) + K20 + ECX;
+  mov EDI, EAX
+  xor EDI, EBX
+  and EDI, ESI
+  xor EDI, EBX
+  add EDI, K20
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[1];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*1]
+//*    Data[1] := roldword(Data[1] xor Data[3] xor Data[9] xor Data[14], 1);
+  mov EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*14]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*1], EDI
+
+//i=18
+//*    T := (EDX and ESI) or (not EDX and EAX) + K20 + EBX;
+  mov EDI, ESI
+  xor EDI, EAX
+  and EDI, EDX
+  xor EDI, EAX
+  add EDI, K20
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[2];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*2]
+//*    Data[2] := roldword(Data[2] xor Data[4] xor Data[10] xor Data[15], 1);
+  mov EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*15]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*2], EDI
+
+//i=19
+//*    T := (ECX and EDX) or (not ECX and ESI) + K20 + EAX;
+  mov EDI, EDX
+  xor EDI, ESI
+  and EDI, ECX
+  xor EDI, ESI
+  add EDI, K20
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[3];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*3]
+//*    Data[3] := roldword(Data[3] xor Data[5] xor Data[11] xor Data[0], 1);
+  mov EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*0]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*3], EDI
+
+//i=20
+//*    T := (EBX xor ECX xor EDX) + K40 + ESI;
+  mov EDI, EBX
+  xor EDI, ECX
+  xor EDI, EDX
+  add EDI, K40
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[4];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*4]
+//*    Data[4] := roldword(Data[4] xor Data[6] xor Data[12] xor Data[1], 1);
+  mov EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*1]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*4], EDI
+
+//i=21
+//*    T := (EAX xor EBX xor ECX) + K40 + EDX;
+  mov EDI, EAX
+  xor EDI, EBX
+  xor EDI, ECX
+  add EDI, K40
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[5];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*5]
+//*    Data[5] := roldword(Data[5] xor Data[7] xor Data[13] xor Data[2], 1);
+  mov EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*2]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*5], EDI
+
+//i=22
+//*    T := (ESI xor EAX xor EBX) + K40 + ECX;
+  mov EDI, ESI
+  xor EDI, EAX
+  xor EDI, EBX
+  add EDI, K40
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[6];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*6]
+//*    Data[6] := roldword(Data[6] xor Data[8] xor Data[14] xor Data[3], 1);
+  mov EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*3]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*6], EDI
+
+//i=23
+//*    T := (EDX xor ESI xor EAX) + K40 + EBX;
+  mov EDI, EDX
+  xor EDI, ESI
+  xor EDI, EAX
+  add EDI, K40
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[7];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*7]
+//*    Data[7] := roldword(Data[7] xor Data[9] xor Data[15] xor Data[4], 1);
+  mov EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*4]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*7], EDI
+
+//i=24
+//*    T := (ECX xor EDX xor ESI) + K40 + EAX;
+  mov EDI, ECX
+  xor EDI, EDX
+  xor EDI, ESI
+  add EDI, K40
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[8];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*8]
+//*    Data[8] := roldword(Data[8] xor Data[10] xor Data[0] xor Data[5], 1);
+  mov EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*5]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*8], EDI
+
+//i=25
+//*    T := (EBX xor ECX xor EDX) + K40 + ESI;
+  mov EDI, EBX
+  xor EDI, ECX
+  xor EDI, EDX
+  add EDI, K40
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[9];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*9]
+//*    Data[9] := roldword(Data[9] xor Data[11] xor Data[1] xor Data[6], 1);
+  mov EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*6]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*9], EDI
+
+//i=26
+//*    T := (EAX xor EBX xor ECX) + K40 + EDX;
+  mov EDI, EAX
+  xor EDI, EBX
+  xor EDI, ECX
+  add EDI, K40
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[10];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*10]
+//*    Data[10] := roldword(Data[10] xor Data[12] xor Data[2] xor Data[7], 1);
+  mov EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*7]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*10], EDI
+
+//i=27
+//*    T := (ESI xor EAX xor EBX) + K40 + ECX;
+  mov EDI, ESI
+  xor EDI, EAX
+  xor EDI, EBX
+  add EDI, K40
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[11];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*11]
+//*    Data[11] := roldword(Data[11] xor Data[13] xor Data[3] xor Data[8], 1);
+  mov EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*8]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*11], EDI
+
+//i=28
+//*    T := (EDX xor ESI xor EAX) + K40 + EBX;
+  mov EDI, EDX
+  xor EDI, ESI
+  xor EDI, EAX
+  add EDI, K40
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[12];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*12]
+//*    Data[12] := roldword(Data[12] xor Data[14] xor Data[4] xor Data[9], 1);
+  mov EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*9]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*12], EDI
+
+//i=29
+//*    T := (ECX xor EDX xor ESI) + K40 + EAX;
+  mov EDI, ECX
+  xor EDI, EDX
+  xor EDI, ESI
+  add EDI, K40
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[13];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*13]
+//*    Data[13] := roldword(Data[13] xor Data[15] xor Data[5] xor Data[10], 1);
+  mov EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*10]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*13], EDI
+
+//i=30
+//*    T := (EBX xor ECX xor EDX) + K40 + ESI;
+  mov EDI, EBX
+  xor EDI, ECX
+  xor EDI, EDX
+  add EDI, K40
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[14];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*14]
+//*    Data[14] := roldword(Data[14] xor Data[0] xor Data[6] xor Data[11], 1);
+  mov EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*11]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*14], EDI
+
+//i=31
+//*    T := (EAX xor EBX xor ECX) + K40 + EDX;
+  mov EDI, EAX
+  xor EDI, EBX
+  xor EDI, ECX
+  add EDI, K40
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[15];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*15]
+//*    Data[15] := roldword(Data[15] xor Data[1] xor Data[7] xor Data[12], 1);
+  mov EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*12]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*15], EDI
+
+//i=32
+//*    T := (ESI xor EAX xor EBX) + K40 + ECX;
+  mov EDI, ESI
+  xor EDI, EAX
+  xor EDI, EBX
+  add EDI, K40
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[0];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*0]
+//*    Data[0] := roldword(Data[0] xor Data[2] xor Data[8] xor Data[13], 1);
+  mov EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*13]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*0], EDI
+
+//i=33
+//*    T := (EDX xor ESI xor EAX) + K40 + EBX;
+  mov EDI, EDX
+  xor EDI, ESI
+  xor EDI, EAX
+  add EDI, K40
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[1];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*1]
+//*    Data[1] := roldword(Data[1] xor Data[3] xor Data[9] xor Data[14], 1);
+  mov EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*14]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*1], EDI
+
+//i=34
+//*    T := (ECX xor EDX xor ESI) + K40 + EAX;
+  mov EDI, ECX
+  xor EDI, EDX
+  xor EDI, ESI
+  add EDI, K40
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[2];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*2]
+//*    Data[2] := roldword(Data[2] xor Data[4] xor Data[10] xor Data[15], 1);
+  mov EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*15]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*2], EDI
+
+//i=35
+//*    T := (EBX xor ECX xor EDX) + K40 + ESI;
+  mov EDI, EBX
+  xor EDI, ECX
+  xor EDI, EDX
+  add EDI, K40
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[3];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*3]
+//*    Data[3] := roldword(Data[3] xor Data[5] xor Data[11] xor Data[0], 1);
+  mov EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*0]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*3], EDI
+
+//i=36
+//*    T := (EAX xor EBX xor ECX) + K40 + EDX;
+  mov EDI, EAX
+  xor EDI, EBX
+  xor EDI, ECX
+  add EDI, K40
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[4];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*4]
+//*    Data[4] := roldword(Data[4] xor Data[6] xor Data[12] xor Data[1], 1);
+  mov EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*1]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*4], EDI
+
+//i=37
+//*    T := (ESI xor EAX xor EBX) + K40 + ECX;
+  mov EDI, ESI
+  xor EDI, EAX
+  xor EDI, EBX
+  add EDI, K40
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[5];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*5]
+//*    Data[5] := roldword(Data[5] xor Data[7] xor Data[13] xor Data[2], 1);
+  mov EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*2]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*5], EDI
+
+//i=38
+//*    T := (EDX xor ESI xor EAX) + K40 + EBX;
+  mov EDI, EDX
+  xor EDI, ESI
+  xor EDI, EAX
+  add EDI, K40
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[6];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*6]
+//*    Data[6] := roldword(Data[6] xor Data[8] xor Data[14] xor Data[3], 1);
+  mov EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*3]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*6], EDI
+
+//i=39
+//*    T := (ECX xor EDX xor ESI) + K40 + EAX;
+  mov EDI, ECX
+  xor EDI, EDX
+  xor EDI, ESI
+  add EDI, K40
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[7];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*7]
+//*    Data[7] := roldword(Data[7] xor Data[9] xor Data[15] xor Data[4], 1);
+  mov EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*4]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*7], EDI
+
+//i=40
+//*    T := (EBX and ECX) or (EBX and EDX) or (ECX and EDX) + K60 + ESI;
+  mov EDI, ECX
+  mov EBP, ECX
+  or EDI, EDX
+  and EBP, EDX
+  and EDI, EBX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[8];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*8]
+//*    Data[8] := roldword(Data[8] xor Data[10] xor Data[0] xor Data[5], 1);
+  mov EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*5]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*8], EDI
+
+//i=41
+//*    T := (EAX and EBX) or (EAX and ECX) or (EBX and ECX) + K60 + EDX;
+  mov EDI, EBX
+  mov EBP, EBX
+  or EDI, ECX
+  and EBP, ECX
+  and EDI, EAX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[9];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*9]
+//*    Data[9] := roldword(Data[9] xor Data[11] xor Data[1] xor Data[6], 1);
+  mov EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*6]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*9], EDI
+
+//i=42
+//*    T := (ESI and EAX) or (ESI and EBX) or (EAX and EBX) + K60 + ECX;
+  mov EDI, EAX
+  mov EBP, EAX
+  or EDI, EBX
+  and EBP, EBX
+  and EDI, ESI
+  or EDI, EBP
+  add EDI, K60
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[10];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*10]
+//*    Data[10] := roldword(Data[10] xor Data[12] xor Data[2] xor Data[7], 1);
+  mov EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*7]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*10], EDI
+
+//i=43
+//*    T := (EDX and ESI) or (EDX and EAX) or (ESI and EAX) + K60 + EBX;
+  mov EDI, ESI
+  mov EBP, ESI
+  or EDI, EAX
+  and EBP, EAX
+  and EDI, EDX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[11];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*11]
+//*    Data[11] := roldword(Data[11] xor Data[13] xor Data[3] xor Data[8], 1);
+  mov EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*8]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*11], EDI
+
+//i=44
+//*    T := (ECX and EDX) or (ECX and ESI) or (EDX and ESI) + K60 + EAX;
+  mov EDI, EDX
+  mov EBP, EDX
+  or EDI, ESI
+  and EBP, ESI
+  and EDI, ECX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[12];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*12]
+//*    Data[12] := roldword(Data[12] xor Data[14] xor Data[4] xor Data[9], 1);
+  mov EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*9]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*12], EDI
+
+//i=45
+//*    T := (EBX and ECX) or (EBX and EDX) or (ECX and EDX) + K60 + ESI;
+  mov EDI, ECX
+  mov EBP, ECX
+  or EDI, EDX
+  and EBP, EDX
+  and EDI, EBX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[13];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*13]
+//*    Data[13] := roldword(Data[13] xor Data[15] xor Data[5] xor Data[10], 1);
+  mov EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*10]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*13], EDI
+
+//i=46
+//*    T := (EAX and EBX) or (EAX and ECX) or (EBX and ECX) + K60 + EDX;
+  mov EDI, EBX
+  mov EBP, EBX
+  or EDI, ECX
+  and EBP, ECX
+  and EDI, EAX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[14];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*14]
+//*    Data[14] := roldword(Data[14] xor Data[0] xor Data[6] xor Data[11], 1);
+  mov EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*11]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*14], EDI
+
+//i=47
+//*    T := (ESI and EAX) or (ESI and EBX) or (EAX and EBX) + K60 + ECX;
+  mov EDI, EAX
+  mov EBP, EAX
+  or EDI, EBX
+  and EBP, EBX
+  and EDI, ESI
+  or EDI, EBP
+  add EDI, K60
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[15];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*15]
+//*    Data[15] := roldword(Data[15] xor Data[1] xor Data[7] xor Data[12], 1);
+  mov EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*12]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*15], EDI
+
+//i=48
+//*    T := (EDX and ESI) or (EDX and EAX) or (ESI and EAX) + K60 + EBX;
+  mov EDI, ESI
+  mov EBP, ESI
+  or EDI, EAX
+  and EBP, EAX
+  and EDI, EDX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[0];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*0]
+//*    Data[0] := roldword(Data[0] xor Data[2] xor Data[8] xor Data[13], 1);
+  mov EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*13]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*0], EDI
+
+//i=49
+//*    T := (ECX and EDX) or (ECX and ESI) or (EDX and ESI) + K60 + EAX;
+  mov EDI, EDX
+  mov EBP, EDX
+  or EDI, ESI
+  and EBP, ESI
+  and EDI, ECX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[1];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*1]
+//*    Data[1] := roldword(Data[1] xor Data[3] xor Data[9] xor Data[14], 1);
+  mov EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*14]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*1], EDI
+
+//i=50
+//*    T := (EBX and ECX) or (EBX and EDX) or (ECX and EDX) + K60 + ESI;
+  mov EDI, ECX
+  mov EBP, ECX
+  or EDI, EDX
+  and EBP, EDX
+  and EDI, EBX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[2];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*2]
+//*    Data[2] := roldword(Data[2] xor Data[4] xor Data[10] xor Data[15], 1);
+  mov EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*15]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*2], EDI
+
+//i=51
+//*    T := (EAX and EBX) or (EAX and ECX) or (EBX and ECX) + K60 + EDX;
+  mov EDI, EBX
+  mov EBP, EBX
+  or EDI, ECX
+  and EBP, ECX
+  and EDI, EAX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[3];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*3]
+//*    Data[3] := roldword(Data[3] xor Data[5] xor Data[11] xor Data[0], 1);
+  mov EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*0]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*3], EDI
+
+//i=52
+//*    T := (ESI and EAX) or (ESI and EBX) or (EAX and EBX) + K60 + ECX;
+  mov EDI, EAX
+  mov EBP, EAX
+  or EDI, EBX
+  and EBP, EBX
+  and EDI, ESI
+  or EDI, EBP
+  add EDI, K60
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[4];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*4]
+//*    Data[4] := roldword(Data[4] xor Data[6] xor Data[12] xor Data[1], 1);
+  mov EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*1]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*4], EDI
+
+//i=53
+//*    T := (EDX and ESI) or (EDX and EAX) or (ESI and EAX) + K60 + EBX;
+  mov EDI, ESI
+  mov EBP, ESI
+  or EDI, EAX
+  and EBP, EAX
+  and EDI, EDX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[5];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*5]
+//*    Data[5] := roldword(Data[5] xor Data[7] xor Data[13] xor Data[2], 1);
+  mov EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*2]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*5], EDI
+
+//i=54
+//*    T := (ECX and EDX) or (ECX and ESI) or (EDX and ESI) + K60 + EAX;
+  mov EDI, EDX
+  mov EBP, EDX
+  or EDI, ESI
+  and EBP, ESI
+  and EDI, ECX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[6];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*6]
+//*    Data[6] := roldword(Data[6] xor Data[8] xor Data[14] xor Data[3], 1);
+  mov EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*3]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*6], EDI
+
+//i=55
+//*    T := (EBX and ECX) or (EBX and EDX) or (ECX and EDX) + K60 + ESI;
+  mov EDI, ECX
+  mov EBP, ECX
+  or EDI, EDX
+  and EBP, EDX
+  and EDI, EBX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[7];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*7]
+//*    Data[7] := roldword(Data[7] xor Data[9] xor Data[15] xor Data[4], 1);
+  mov EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*4]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*7], EDI
+
+//i=56
+//*    T := (EAX and EBX) or (EAX and ECX) or (EBX and ECX) + K60 + EDX;
+  mov EDI, EBX
+  mov EBP, EBX
+  or EDI, ECX
+  and EBP, ECX
+  and EDI, EAX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[8];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*8]
+//*    Data[8] := roldword(Data[8] xor Data[10] xor Data[0] xor Data[5], 1);
+  mov EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*5]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*8], EDI
+
+//i=57
+//*    T := (ESI and EAX) or (ESI and EBX) or (EAX and EBX) + K60 + ECX;
+  mov EDI, EAX
+  mov EBP, EAX
+  or EDI, EBX
+  and EBP, EBX
+  and EDI, ESI
+  or EDI, EBP
+  add EDI, K60
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[9];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*9]
+//*    Data[9] := roldword(Data[9] xor Data[11] xor Data[1] xor Data[6], 1);
+  mov EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*6]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*9], EDI
+
+//i=58
+//*    T := (EDX and ESI) or (EDX and EAX) or (ESI and EAX) + K60 + EBX;
+  mov EDI, ESI
+  mov EBP, ESI
+  or EDI, EAX
+  and EBP, EAX
+  and EDI, EDX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[10];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*10]
+//*    Data[10] := roldword(Data[10] xor Data[12] xor Data[2] xor Data[7], 1);
+  mov EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*7]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*10], EDI
+
+//i=59
+//*    T := (ECX and EDX) or (ECX and ESI) or (EDX and ESI) + K60 + EAX;
+  mov EDI, EDX
+  mov EBP, EDX
+  or EDI, ESI
+  and EBP, ESI
+  and EDI, ECX
+  or EDI, EBP
+  add EDI, K60
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[11];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*11]
+//*    Data[11] := roldword(Data[11] xor Data[13] xor Data[3] xor Data[8], 1);
+  mov EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*8]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*11], EDI
+
+//i=60
+//*    T := (EBX xor ECX xor EDX) + K80 + ESI;
+  mov EDI, EBX
+  xor EDI, ECX
+  xor EDI, EDX
+  add EDI, K80
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[12];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*12]
+//*    Data[12] := roldword(Data[12] xor Data[14] xor Data[4] xor Data[9], 1);
+  mov EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*9]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*12], EDI
+
+//i=61
+//*    T := (EAX xor EBX xor ECX) + K80 + EDX;
+  mov EDI, EAX
+  xor EDI, EBX
+  xor EDI, ECX
+  add EDI, K80
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[13];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*13]
+//*    Data[13] := roldword(Data[13] xor Data[15] xor Data[5] xor Data[10], 1);
+  mov EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*10]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*13], EDI
+
+//i=62
+//*    T := (ESI xor EAX xor EBX) + K80 + ECX;
+  mov EDI, ESI
+  xor EDI, EAX
+  xor EDI, EBX
+  add EDI, K80
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[14];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*14]
+//*    Data[14] := roldword(Data[14] xor Data[0] xor Data[6] xor Data[11], 1);
+  mov EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*11]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*14], EDI
+
+//i=63
+//*    T := (EDX xor ESI xor EAX) + K80 + EBX;
+  mov EDI, EDX
+  xor EDI, ESI
+  xor EDI, EAX
+  add EDI, K80
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[15];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*15]
+//*    Data[15] := roldword(Data[15] xor Data[1] xor Data[7] xor Data[12], 1);
+  mov EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*12]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*15], EDI
+
+//i=64
+//*    T := (ECX xor EDX xor ESI) + K80 + EAX;
+  mov EDI, ECX
+  xor EDI, EDX
+  xor EDI, ESI
+  add EDI, K80
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[0];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*0]
+//*    Data[0] := roldword(Data[0] xor Data[2] xor Data[8] xor Data[13], 1);
+  mov EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*13]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*0], EDI
+
+//i=65
+//*    T := (EBX xor ECX xor EDX) + K80 + ESI;
+  mov EDI, EBX
+  xor EDI, ECX
+  xor EDI, EDX
+  add EDI, K80
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[1];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*1]
+//*    Data[1] := roldword(Data[1] xor Data[3] xor Data[9] xor Data[14], 1);
+  mov EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*14]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*1], EDI
+
+//i=66
+//*    T := (EAX xor EBX xor ECX) + K80 + EDX;
+  mov EDI, EAX
+  xor EDI, EBX
+  xor EDI, ECX
+  add EDI, K80
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[2];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*2]
+//*    Data[2] := roldword(Data[2] xor Data[4] xor Data[10] xor Data[15], 1);
+  mov EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*15]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*2], EDI
+
+//i=67
+//*    T := (ESI xor EAX xor EBX) + K80 + ECX;
+  mov EDI, ESI
+  xor EDI, EAX
+  xor EDI, EBX
+  add EDI, K80
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[3];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*3]
+//*    Data[3] := roldword(Data[3] xor Data[5] xor Data[11] xor Data[0], 1);
+  mov EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*0]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*3], EDI
+
+//i=68
+//*    T := (EDX xor ESI xor EAX) + K80 + EBX;
+  mov EDI, EDX
+  xor EDI, ESI
+  xor EDI, EAX
+  add EDI, K80
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[4];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*4]
+//*    Data[4] := roldword(Data[4] xor Data[6] xor Data[12] xor Data[1], 1);
+  mov EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*1]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*4], EDI
+
+//i=69
+//*    T := (ECX xor EDX xor ESI) + K80 + EAX;
+  mov EDI, ECX
+  xor EDI, EDX
+  xor EDI, ESI
+  add EDI, K80
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[5];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*5]
+//*    Data[5] := roldword(Data[5] xor Data[7] xor Data[13] xor Data[2], 1);
+  mov EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*2]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*5], EDI
+
+//i=70
+//*    T := (EBX xor ECX xor EDX) + K80 + ESI;
+  mov EDI, EBX
+  xor EDI, ECX
+  xor EDI, EDX
+  add EDI, K80
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[6];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*6]
+//*    Data[6] := roldword(Data[6] xor Data[8] xor Data[14] xor Data[3], 1);
+  mov EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*3]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*6], EDI
+
+//i=71
+//*    T := (EAX xor EBX xor ECX) + K80 + EDX;
+  mov EDI, EAX
+  xor EDI, EBX
+  xor EDI, ECX
+  add EDI, K80
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[7];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*7]
+//*    Data[7] := roldword(Data[7] xor Data[9] xor Data[15] xor Data[4], 1);
+  mov EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*4]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*7], EDI
+
+//i=72
+//*    T := (ESI xor EAX xor EBX) + K80 + ECX;
+  mov EDI, ESI
+  xor EDI, EAX
+  xor EDI, EBX
+  add EDI, K80
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[8];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*8]
+//*    Data[8] := roldword(Data[8] xor Data[10] xor Data[0] xor Data[5], 1);
+  mov EDI, [esp+96-96+4*7+4*8]
+  xor EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*5]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*8], EDI
+
+//i=73
+//*    T := (EDX xor ESI xor EAX) + K80 + EBX;
+  mov EDI, EDX
+  xor EDI, ESI
+  xor EDI, EAX
+  add EDI, K80
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[9];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*9]
+//*    Data[9] := roldword(Data[9] xor Data[11] xor Data[1] xor Data[6], 1);
+  mov EDI, [esp+96-96+4*7+4*9]
+  xor EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*6]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*9], EDI
+
+//i=74
+//*    T := (ECX xor EDX xor ESI) + K80 + EAX;
+  mov EDI, ECX
+  xor EDI, EDX
+  xor EDI, ESI
+  add EDI, K80
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[10];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*10]
+//*    Data[10] := roldword(Data[10] xor Data[12] xor Data[2] xor Data[7], 1);
+  mov EDI, [esp+96-96+4*7+4*10]
+  xor EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*2]
+  xor EDI, [esp+96-96+4*7+4*7]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*10], EDI
+
+//i=75
+//*    T := (EBX xor ECX xor EDX) + K80 + ESI;
+  mov EDI, EBX
+  xor EDI, ECX
+  xor EDI, EDX
+  add EDI, K80
+  add EDI, ESI
+//*    EBX := rordword(EBX, 2);
+  ror EBX, 2
+//*    ESI := T + roldword(EAX, 5) + Data[11];
+  mov ESI, EAX
+  rol ESI, 5
+  add ESI, EDI
+  add ESI, [esp+96-96+4*7+4*11]
+//*    Data[11] := roldword(Data[11] xor Data[13] xor Data[3] xor Data[8], 1);
+  mov EDI, [esp+96-96+4*7+4*11]
+  xor EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*3]
+  xor EDI, [esp+96-96+4*7+4*8]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*11], EDI
+
+//i=76
+//*    T := (EAX xor EBX xor ECX) + K80 + EDX;
+  mov EDI, EAX
+  xor EDI, EBX
+  xor EDI, ECX
+  add EDI, K80
+  add EDI, EDX
+//*    EAX := rordword(EAX, 2);
+  ror EAX, 2
+//*    EDX := T + roldword(ESI, 5) + Data[12];
+  mov EDX, ESI
+  rol EDX, 5
+  add EDX, EDI
+  add EDX, [esp+96-96+4*7+4*12]
+//*    Data[12] := roldword(Data[12] xor Data[14] xor Data[4] xor Data[9], 1);
+  mov EDI, [esp+96-96+4*7+4*12]
+  xor EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*4]
+  xor EDI, [esp+96-96+4*7+4*9]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*12], EDI
+
+//i=77
+//*    T := (ESI xor EAX xor EBX) + K80 + ECX;
+  mov EDI, ESI
+  xor EDI, EAX
+  xor EDI, EBX
+  add EDI, K80
+  add EDI, ECX
+//*    ESI := rordword(ESI, 2);
+  ror ESI, 2
+//*    ECX := T + roldword(EDX, 5) + Data[13];
+  mov ECX, EDX
+  rol ECX, 5
+  add ECX, EDI
+  add ECX, [esp+96-96+4*7+4*13]
+//*    Data[13] := roldword(Data[13] xor Data[15] xor Data[5] xor Data[10], 1);
+  mov EDI, [esp+96-96+4*7+4*13]
+  xor EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*5]
+  xor EDI, [esp+96-96+4*7+4*10]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*13], EDI
+
+//i=78
+//*    T := (EDX xor ESI xor EAX) + K80 + EBX;
+  mov EDI, EDX
+  xor EDI, ESI
+  xor EDI, EAX
+  add EDI, K80
+  add EDI, EBX
+//*    EDX := rordword(EDX, 2);
+  ror EDX, 2
+//*    EBX := T + roldword(ECX, 5) + Data[14];
+  mov EBX, ECX
+  rol EBX, 5
+  add EBX, EDI
+  add EBX, [esp+96-96+4*7+4*14]
+//*    Data[14] := roldword(Data[14] xor Data[0] xor Data[6] xor Data[11], 1);
+  mov EDI, [esp+96-96+4*7+4*14]
+  xor EDI, [esp+96-96+4*7+4*0]
+  xor EDI, [esp+96-96+4*7+4*6]
+  xor EDI, [esp+96-96+4*7+4*11]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*14], EDI
+
+//i=79
+//*    T := (ECX xor EDX xor ESI) + K80 + EAX;
+  mov EDI, ECX
+  xor EDI, EDX
+  xor EDI, ESI
+  add EDI, K80
+  add EDI, EAX
+//*    ECX := rordword(ECX, 2);
+  ror ECX, 2
+//*    EAX := T + roldword(EBX, 5) + Data[15];
+  mov EAX, EBX
+  rol EAX, 5
+  add EAX, EDI
+  add EAX, [esp+96-96+4*7+4*15]
+//*    Data[15] := roldword(Data[15] xor Data[1] xor Data[7] xor Data[12], 1);
+  mov EDI, [esp+96-96+4*7+4*15]
+  xor EDI, [esp+96-96+4*7+4*1]
+  xor EDI, [esp+96-96+4*7+4*7]
+  xor EDI, [esp+96-96+4*7+4*12]
+  rol EDI, 1
+  mov [esp+96-96+4*7+4*15], EDI
+
+  pop EBP
+
+  mov EDI, [pctx{.State}]
+//  Inc(ctx.State[0], A);
+  add [EDI+4*0], EAX
+//  Inc(ctx.State[1], B);
+  add [EDI+4*1], EBX
+//  Inc(ctx.State[2], C);
+  add [EDI+4*2], ECX
+//  Inc(ctx.State[3], D);
+  add [EDI+4*3], EDX
+//  Inc(ctx.State[4], E);
+  add [EDI+4*4], ESI
+
+//Inc(ctx.Length,64);
+  mov eax, [ebp-4]
+  add	dword ptr [eax+88],64
+  adc	dword ptr [eax+92],0
+
+  pop EDI
+  pop ESI
+  pop EDX
+  pop ECX
+  pop EBX
+  pop EAX
+end;
+

Some files were not shown because too many files changed in this diff