Browse Source

* Packages: x86_64 assembly language version of MD5Transform added to hash package

J. Gareth "Curious Kit" Moreton 1 year ago
parent
commit
fce4a953a7
3 changed files with 1442 additions and 105 deletions
  1. 22 105
      packages/hash/src/md5.pp
  2. 710 0
      packages/hash/src/md5x64_sysv.inc
  3. 710 0
      packages/hash/src/md5x64_win.inc

+ 22 - 105
packages/hash/src/md5.pp

@@ -341,109 +341,27 @@ begin
 end;
 
 
-{$IF (NOT(DEFINED(MD5PASCAL))) and (DEFINED(CPUI386)) }
-{$i md5i386.inc}
-{$ENDIF}
-{$IF (NOT(DEFINED(MD5PASCAL))) and (DEFINED(CPUX86_64)) }
-{$OPTIMIZATION USERBP} //PEEPHOLE
-procedure MD5Transform(var Context: TMDContext; Buffer: Pointer);
-type
-  TBlock = array[0..15] of Cardinal;
-  PBlock = ^TBlock;
-var
-  a, b, c, d: Cardinal;
-  //Block: array[0..15] of Cardinal absolute Buffer;
-  Block: PBlock absolute Buffer;
-begin
-  //Invert(Buffer, @Block, 64);
-  a := Context.State[0];
-  b := Context.State[1];
-  c := Context.State[2];
-  d := Context.State[3];
-
-{$push}
-{$r-,q-}
-
-  // Round 1
-  a := b + roldword(dword(a + ((b and c) or ((not b) and d)) + Block^[0]  + $d76aa478),  7);
-  d := a + roldword(dword(d + ((a and b) or ((not a) and c)) + Block^[1]  + $e8c7b756), 12);
-  c := d + roldword(dword(c + ((d and a) or ((not d) and b)) + Block^[2]  + $242070db), 17);
-  b := c + roldword(dword(b + ((c and d) or ((not c) and a)) + Block^[3]  + $c1bdceee), 22);
-  a := b + roldword(dword(a + ((b and c) or ((not b) and d)) + Block^[4]  + $f57c0faf),  7);
-  d := a + roldword(dword(d + ((a and b) or ((not a) and c)) + Block^[5]  + $4787c62a), 12);
-  c := d + roldword(dword(c + ((d and a) or ((not d) and b)) + Block^[6]  + $a8304613), 17);
-  b := c + roldword(dword(b + ((c and d) or ((not c) and a)) + Block^[7]  + $fd469501), 22);
-  a := b + roldword(dword(a + ((b and c) or ((not b) and d)) + Block^[8]  + $698098d8),  7);
-  d := a + roldword(dword(d + ((a and b) or ((not a) and c)) + Block^[9]  + $8b44f7af), 12);
-  c := d + roldword(dword(c + ((d and a) or ((not d) and b)) + Block^[10] + $ffff5bb1), 17);
-  b := c + roldword(dword(b + ((c and d) or ((not c) and a)) + Block^[11] + $895cd7be), 22);
-  a := b + roldword(dword(a + ((b and c) or ((not b) and d)) + Block^[12] + $6b901122),  7);
-  d := a + roldword(dword(d + ((a and b) or ((not a) and c)) + Block^[13] + $fd987193), 12);
-  c := d + roldword(dword(c + ((d and a) or ((not d) and b)) + Block^[14] + $a679438e), 17);
-  b := c + roldword(dword(b + ((c and d) or ((not c) and a)) + Block^[15] + $49b40821), 22);
-  // Round 2
-  a := b + roldword(dword(a + ((b and d) or (c and (not d))) + Block^[1]  + $f61e2562),  5);
-  d := a + roldword(dword(d + ((a and c) or (b and (not c))) + Block^[6]  + $c040b340),  9);
-  c := d + roldword(dword(c + ((d and b) or (a and (not b))) + Block^[11] + $265e5a51), 14);
-  b := c + roldword(dword(b + ((c and a) or (d and (not a))) + Block^[0]  + $e9b6c7aa), 20);
-  a := b + roldword(dword(a + ((b and d) or (c and (not d))) + Block^[5]  + $d62f105d),  5);
-  d := a + roldword(dword(d + ((a and c) or (b and (not c))) + Block^[10] + $02441453),  9);
-  c := d + roldword(dword(c + ((d and b) or (a and (not b))) + Block^[15] + $d8a1e681), 14);
-  b := c + roldword(dword(b + ((c and a) or (d and (not a))) + Block^[4]  + $e7d3fbc8), 20);
-  a := b + roldword(dword(a + ((b and d) or (c and (not d))) + Block^[9]  + $21e1cde6),  5);
-  d := a + roldword(dword(d + ((a and c) or (b and (not c))) + Block^[14] + $c33707d6),  9);
-  c := d + roldword(dword(c + ((d and b) or (a and (not b))) + Block^[3]  + $f4d50d87), 14);
-  b := c + roldword(dword(b + ((c and a) or (d and (not a))) + Block^[8]  + $455a14ed), 20);
-  a := b + roldword(dword(a + ((b and d) or (c and (not d))) + Block^[13] + $a9e3e905),  5);
-  d := a + roldword(dword(d + ((a and c) or (b and (not c))) + Block^[2]  + $fcefa3f8),  9);
-  c := d + roldword(dword(c + ((d and b) or (a and (not b))) + Block^[7]  + $676f02d9), 14);
-  b := c + roldword(dword(b + ((c and a) or (d and (not a))) + Block^[12] + $8d2a4c8a), 20);
-  // Round 3
-  a := b + roldword(dword(a + (b xor c xor d) + Block^[5]  + $fffa3942),  4);
-  d := a + roldword(dword(d + (a xor b xor c) + Block^[8]  + $8771f681), 11);
-  c := d + roldword(dword(c + (d xor a xor b) + Block^[11] + $6d9d6122), 16);
-  b := c + roldword(dword(b + (c xor d xor a) + Block^[14] + $fde5380c), 23);
-  a := b + roldword(dword(a + (b xor c xor d) + Block^[1]  + $a4beea44),  4);
-  d := a + roldword(dword(d + (a xor b xor c) + Block^[4]  + $4bdecfa9), 11);
-  c := d + roldword(dword(c + (d xor a xor b) + Block^[7]  + $f6bb4b60), 16);
-  b := c + roldword(dword(b + (c xor d xor a) + Block^[10] + $bebfbc70), 23);
-  a := b + roldword(dword(a + (b xor c xor d) + Block^[13] + $289b7ec6),  4);
-  d := a + roldword(dword(d + (a xor b xor c) + Block^[0]  + $eaa127fa), 11);
-  c := d + roldword(dword(c + (d xor a xor b) + Block^[3]  + $d4ef3085), 16);
-  b := c + roldword(dword(b + (c xor d xor a) + Block^[6]  + $04881d05), 23);
-  a := b + roldword(dword(a + (b xor c xor d) + Block^[9]  + $d9d4d039),  4);
-  d := a + roldword(dword(d + (a xor b xor c) + Block^[12] + $e6db99e5), 11);
-  c := d + roldword(dword(c + (d xor a xor b) + Block^[15] + $1fa27cf8), 16);
-  b := c + roldword(dword(b + (c xor d xor a) + Block^[2]  + $c4ac5665), 23);
-  // Round 4
-  a := b + roldword(dword(a + (c xor (b or (not d))) + Block^[0]  + $f4292244),  6);
-  d := a + roldword(dword(d + (b xor (a or (not c))) + Block^[7]  + $432aff97), 10);
-  c := d + roldword(dword(c + (a xor (d or (not b))) + Block^[14] + $ab9423a7), 15);
-  b := c + roldword(dword(b + (d xor (c or (not a))) + Block^[5]  + $fc93a039), 21);
-  a := b + roldword(dword(a + (c xor (b or (not d))) + Block^[12] + $655b59c3),  6);
-  d := a + roldword(dword(d + (b xor (a or (not c))) + Block^[3]  + $8f0ccc92), 10);
-  c := d + roldword(dword(c + (a xor (d or (not b))) + Block^[10] + $ffeff47d), 15);
-  b := c + roldword(dword(b + (d xor (c or (not a))) + Block^[1]  + $85845dd1), 21);
-  a := b + roldword(dword(a + (c xor (b or (not d))) + Block^[8]  + $6fa87e4f),  6);
-  d := a + roldword(dword(d + (b xor (a or (not c))) + Block^[15] + $fe2ce6e0), 10);
-  c := d + roldword(dword(c + (a xor (d or (not b))) + Block^[6]  + $a3014314), 15);
-  b := c + roldword(dword(b + (d xor (c or (not a))) + Block^[13] + $4e0811a1), 21);
-  a := b + roldword(dword(a + (c xor (b or (not d))) + Block^[4]  + $f7537e82),  6);
-  d := a + roldword(dword(d + (b xor (a or (not c))) + Block^[11] + $bd3af235), 10);
-  c := d + roldword(dword(c + (a xor (d or (not b))) + Block^[2]  + $2ad7d2bb), 15);
-  b := c + roldword(dword(b + (d xor (c or (not a))) + Block^[9]  + $eb86d391), 21);
-
-
-  inc(Context.State[0],a);
-  inc(Context.State[1],b);
-  inc(Context.State[2],c);
-  inc(Context.State[3],d);
-{$pop}
-  inc(Context.Length,64);
-end;
-{$OPTIMIZATION DEFAULT}
-{$ENDIF}
-{$IF DEFINED(MD5PASCAL) or (NOT ((DEFINED(CPUX86_64)) or (DEFINED(CPUI386))))}
+// Use assembler version if we have a suitable CPU as well
+// Define MD5PASCAL to force use of original reference code
+{$ifndef MD5PASCAL}
+  {$if defined(CPU386)}
+    {$if defined(CPUX86_HAS_BSWAP)}
+      {$i md5i386.inc}
+      {$define MD5ASM}
+    {$endif CPUX86_HAS_BSWAP}
+  {$else if defined(CPUX64)}
+    {$ifdef MSWINDOWS}
+      // Microsoft Windows uses a different calling convention to the System V ABI
+      {$i md5x64_win.inc}
+      {$define MD5ASM}
+    {$else}
+      {$i md5x64_sysv.inc}
+      {$define MD5ASM}
+    {$endif MSWINDOWS}
+  {$endif}
+{$endif not MD5PASCAL}
+
+{$if not defined(MD5ASM)}
 // Original version
 procedure MD5Transform(var Context: TMDContext; Buffer: Pointer);
 
@@ -519,8 +437,7 @@ begin
 {$pop}
   inc(Context.Length,64);
 end;
-{$ENDIF}
-
+{$endif not MD5PASCAL}
 
 procedure MDInit(out Context: TMDContext; const Version: TMDVersion);
 begin

+ 710 - 0
packages/hash/src/md5x64_sysv.inc

@@ -0,0 +1,710 @@
+// x86_64 (Windows) assembly optimized version
+procedure MD5Transform(var Context: TMDContext; Buffer: Pointer); assembler; nostackframe;
+// RDI = Context, RSI = Buffer
+{$asmmode intel}
+asm
+  // R8D = A, R9D = B, ECX = C, EDX = D
+  MOV R8D, TMDContext.State[RDI + 4*0] // A, B, C, D := Context.State[0 .. 3];
+  MOV R9D, TMDContext.State[RDI + 4*1]
+  MOV ECX, TMDContext.State[RDI + 4*2]
+  MOV EDX, TMDContext.State[RDI + 4*3]
+// Round 1
+//R8D := R9D + roldword(dword(R8D + ((R9D and ECX) or ((not R9D) and EDX)) + Data[0] + $d76aa478), 7);
+  MOV EAX, ECX
+  ADD R8D, $d76aa478
+  XOR EAX, EDX
+  ADD R8D, [RSI + 4*0]
+  AND EAX, R9D
+  XOR EAX, EDX
+  ADD R8D, EAX
+  ROL R8D, 7
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + ((R8D and R9D) or ((not R8D) and ECX)) + Data[1] + $e8c7b756), 12);
+  MOV EAX, R9D
+  ADD EDX, $e8c7b756
+  XOR EAX, ECX
+  ADD EDX, [RSI + 4*1]
+  AND EAX, R8D
+  XOR EAX, ECX
+  ADD EDX, EAX
+  ROL EDX, 12
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and R8D) or ((not EDX) and R9D)) + Data[2] + $242070db), 17);
+  MOV EAX, R8D
+  ADD ECX, $242070db
+  XOR EAX, R9D
+  ADD ECX, [RSI + 4*2]
+  AND EAX, EDX
+  XOR EAX, R9D
+  ADD ECX, EAX
+  ROL ECX, 17
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + ((ECX and EDX) or ((not ECX) and R8D)) + Data[3] + $c1bdceee), 22);
+  MOV EAX, EDX
+  ADD R9D, $c1bdceee
+  XOR EAX, R8D
+  ADD R9D, [RSI + 4*3]
+  AND EAX, ECX
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 22
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and ECX) or ((not R9D) and EDX)) + Data[4] + $f57c0faf), 7);
+  MOV EAX, ECX
+  ADD R8D, $f57c0faf
+  XOR EAX, EDX
+  ADD R8D, [RSI + 4*4]
+  AND EAX, R9D
+  XOR EAX, EDX
+  ADD R8D, EAX
+  ROL R8D, 7
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + ((R8D and R9D) or ((not R8D) and ECX)) + Data[5] + $4787c62a), 12);
+  MOV EAX, R9D
+  ADD EDX, $4787c62a
+  XOR EAX, ECX
+  ADD EDX, [RSI + 4*5]
+  AND EAX, R8D
+  XOR EAX, ECX
+  ADD EDX, EAX
+  ROL EDX, 12
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and R8D) or ((not EDX) and R9D)) + Data[6] + $a8304613), 17);
+  MOV EAX, R8D
+  ADD ECX, $a8304613
+  XOR EAX, R9D
+  ADD ECX, [RSI + 4*6]
+  AND EAX, EDX
+  XOR EAX, R9D
+  ADD ECX, EAX
+  ROL ECX, 17
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + ((ECX and EDX) or ((not ECX) and R8D)) + Data[7] + $fd469501), 22);
+  MOV EAX, EDX
+  ADD R9D, $fd469501
+  XOR EAX, R8D
+  ADD R9D, [RSI + 4*7]
+  AND EAX, ECX
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 22
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and ECX) or ((not R9D) and EDX)) + Data[8] + $698098d8), 7);
+  MOV EAX, ECX
+  ADD R8D, $698098d8
+  XOR EAX, EDX
+  ADD R8D, [RSI + 4*8]
+  AND EAX, R9D
+  XOR EAX, EDX
+  ADD R8D, EAX
+  ROL R8D, 7
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + ((R8D and R9D) or ((not R8D) and ECX)) + Data[9] + $8b44f7af), 12);
+  MOV EAX, R9D
+  ADD EDX, $8b44f7af
+  XOR EAX, ECX
+  ADD EDX, [RSI + 4*9]
+  AND EAX, R8D
+  XOR EAX, ECX
+  ADD EDX, EAX
+  ROL EDX, 12
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and R8D) or ((not EDX) and R9D)) + Data[10] + $ffff5bb1), 17);
+  MOV EAX, R8D
+  ADD ECX, $ffff5bb1
+  XOR EAX, R9D
+  ADD ECX, [RSI + 4*10]
+  AND EAX, EDX
+  XOR EAX, R9D
+  ADD ECX, EAX
+  ROL ECX, 17
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + ((ECX and EDX) or ((not ECX) and R8D)) + Data[11] + $895cd7be), 22);
+  MOV EAX, EDX
+  ADD R9D, $895cd7be
+  XOR EAX, R8D
+  ADD R9D, [RSI + 4*11]
+  AND EAX, ECX
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 22
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and ECX) or ((not R9D) and EDX)) + Data[12] + $6b901122), 7);
+  MOV EAX, ECX
+  ADD R8D, $6b901122
+  XOR EAX, EDX
+  ADD R8D, [RSI + 4*12]
+  AND EAX, R9D
+  XOR EAX, EDX
+  ADD R8D, EAX
+  ROL R8D, 7
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + ((R8D and R9D) or ((not R8D) and ECX)) + Data[13] + $fd987193), 12);
+  MOV EAX, R9D
+  ADD EDX, $fd987193
+  XOR EAX, ECX
+  ADD EDX, [RSI + 4*13]
+  AND EAX, R8D
+  XOR EAX, ECX
+  ADD EDX, EAX
+  ROL EDX, 12
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and R8D) or ((not EDX) and R9D)) + Data[14] + $a679438e), 17);
+  MOV EAX, R8D
+  ADD ECX, $a679438e
+  XOR EAX, R9D
+  ADD ECX, [RSI + 4*14]
+  AND EAX, EDX
+  XOR EAX, R9D
+  ADD ECX, EAX
+  ROL ECX, 17
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + ((ECX and EDX) or ((not ECX) and R8D)) + Data[15] + $49b40821), 22);
+  MOV EAX, EDX
+  ADD R9D, $49b40821
+  XOR EAX, R8D
+  ADD R9D, [RSI + 4*15]
+  AND EAX, ECX
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 22
+  ADD R9D, ECX
+
+// Round 2
+//R8D := R9D + roldword(dword(R8D + ((R9D and EDX) or (ECX and (not EDX))) + Data[1] + $f61e2562), 5);
+  MOV EAX, R9D
+  ADD R8D, $f61e2562
+  XOR EAX, ECX
+  ADD R8D, [RSI + 4*1]
+  AND EAX, EDX
+  XOR EAX, ECX
+  ADD R8D, EAX
+  ROL R8D, 5
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + ((R8D and ECX) or (R9D and (not ECX))) + Data[6] + $c040b340), 9);
+  MOV EAX, R8D
+  ADD EDX, $c040b340
+  XOR EAX, R9D
+  ADD EDX, [RSI + 4*6]
+  AND EAX, ECX
+  XOR EAX, R9D
+  ADD EDX, EAX
+  ROL EDX, 9
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and R9D) or (R8D and (not R9D))) + Data[11] + $265e5a51), 14);
+  MOV EAX, EDX
+  ADD ECX, $265e5a51
+  XOR EAX, R8D
+  ADD ECX, [RSI + 4*11]
+  AND EAX, R9D
+  XOR EAX, R8D
+  ADD ECX, EAX
+  ROL ECX, 14
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + ((ECX and R8D) or (EDX and (not R8D))) + Data[0] + $e9b6c7aa), 20);
+  MOV EAX, ECX
+  ADD R9D, $e9b6c7aa
+  XOR EAX, EDX
+  ADD R9D, [RSI + 4*0]
+  AND EAX, R8D
+  XOR EAX, EDX
+  ADD R9D, EAX
+  ROL R9D, 20
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and EDX) or (ECX and (not EDX))) + Data[5] + $d62f105d), 5);
+  MOV EAX, R9D
+  ADD R8D, $d62f105d
+  XOR EAX, ECX
+  ADD R8D, [RSI + 4*5]
+  AND EAX, EDX
+  XOR EAX, ECX
+  ADD R8D, EAX
+  ROL R8D, 5
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + ((R8D and ECX) or (R9D and (not ECX))) + Data[10] + $02441453), 9);
+  MOV EAX, R8D
+  ADD EDX, $02441453
+  XOR EAX, R9D
+  ADD EDX, [RSI + 4*10]
+  AND EAX, ECX
+  XOR EAX, R9D
+  ADD EDX, EAX
+  ROL EDX, 9
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and R9D) or (R8D and (not R9D))) + Data[15] + $d8a1e681), 14);
+  MOV EAX, EDX
+  ADD ECX, $d8a1e681
+  XOR EAX, R8D
+  ADD ECX, [RSI + 4*15]
+  AND EAX, R9D
+  XOR EAX, R8D
+  ADD ECX, EAX
+  ROL ECX, 14
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + ((ECX and R8D) or (EDX and (not R8D))) + Data[4] + $e7d3fbc8), 20);
+  MOV EAX, ECX
+  ADD R9D, $e7d3fbc8
+  XOR EAX, EDX
+  ADD R9D, [RSI + 4*4]
+  AND EAX, R8D
+  XOR EAX, EDX
+  ADD R9D, EAX
+  ROL R9D, 20
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and EDX) or (ECX and (not EDX))) + Data[9] + $21e1cde6), 5);
+  MOV EAX, R9D
+  ADD R8D, $21e1cde6
+  XOR EAX, ECX
+  ADD R8D, [RSI + 4*9]
+  AND EAX, EDX
+  XOR EAX, ECX
+  ADD R8D, EAX
+  ROL R8D, 5
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + ((R8D and ECX) or (R9D and (not ECX))) + Data[14] + $c33707d6), 9);
+  MOV EAX, R8D
+  ADD EDX, $c33707d6
+  XOR EAX, R9D
+  ADD EDX, [RSI + 4*14]
+  AND EAX, ECX
+  XOR EAX, R9D
+  ADD EDX, EAX
+  ROL EDX, 9
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and R9D) or (R8D and (not R9D))) + Data[3] + $f4d50d87), 14);
+  MOV EAX, EDX
+  ADD ECX, $f4d50d87
+  XOR EAX, R8D
+  ADD ECX, [RSI + 4*3]
+  AND EAX, R9D
+  XOR EAX, R8D
+  ADD ECX, EAX
+  ROL ECX, 14
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + ((ECX and R8D) or (EDX and (not R8D))) + Data[8] + $455a14ed), 20);
+  MOV EAX, ECX
+  ADD R9D, $455a14ed
+  XOR EAX, EDX
+  ADD R9D, [RSI + 4*8]
+  AND EAX, R8D
+  XOR EAX, EDX
+  ADD R9D, EAX
+  ROL R9D, 20
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and EDX) or (ECX and (not EDX))) + Data[13] + $a9e3e905), 5);
+  MOV EAX, R9D
+  ADD R8D, $a9e3e905
+  XOR EAX, ECX
+  ADD R8D, [RSI + 4*13]
+  AND EAX, EDX
+  XOR EAX, ECX
+  ADD R8D, EAX
+  ROL R8D, 5
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + ((R8D and ECX) or (R9D and (not ECX))) + Data[2] + $fcefa3f8), 9);
+  MOV EAX, R8D
+  ADD EDX, $fcefa3f8
+  XOR EAX, R9D
+  ADD EDX, [RSI + 4*2]
+  AND EAX, ECX
+  XOR EAX, R9D
+  ADD EDX, EAX
+  ROL EDX, 9
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + ((EDX and R9D) or (R8D and (not R9D))) + Data[7] + $676f02d9), 14);
+  MOV EAX, EDX
+  ADD ECX, $676f02d9
+  XOR EAX, R8D
+  ADD ECX, [RSI + 4*7]
+  AND EAX, R9D
+  XOR EAX, R8D
+  ADD ECX, EAX
+  ROL ECX, 14
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + ((ECX and R8D) or (EDX and (not R8D))) + Data[12] + $8d2a4c8a), 20);
+  MOV EAX, ECX
+  ADD R9D, $8d2a4c8a
+  XOR EAX, EDX
+  ADD R9D, [RSI + 4*12]
+  AND EAX, R8D
+  XOR EAX, EDX
+  ADD R9D, EAX
+  ROL R9D, 20
+  ADD R9D, ECX
+
+// Round 3
+//R8D := R9D + roldword(dword(R8D + (R9D xor ECX xor EDX) + Data[5] + $fffa3942), 4);
+  MOV EAX, R9D
+  ADD R8D, $fffa3942
+  XOR EAX, ECX
+  ADD R8D, [RSI + 4*5]
+  XOR EAX, EDX
+  ADD R8D, EAX
+  ROL R8D, 4
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + (R8D xor R9D xor ECX) + Data[8] + $8771f681), 11);
+  MOV EAX, R8D
+  ADD EDX, $8771f681
+  XOR EAX, R9D
+  ADD EDX, [RSI + 4*8]
+  XOR EAX, ECX
+  ADD EDX, EAX
+  ROL EDX, 11
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + (EDX xor R8D xor R9D) + Data[11] + $6d9d6122), 16);
+  MOV EAX, EDX
+  ADD ECX, $6d9d6122
+  XOR EAX, R8D
+  ADD ECX, [RSI + 4*11]
+  XOR EAX, R9D
+  ADD ECX, EAX
+  ROL ECX, 16
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + (ECX xor EDX xor R8D) + Data[14] + $fde5380c), 23);
+  MOV EAX, ECX
+  ADD R9D, $fde5380c
+  XOR EAX, EDX
+  ADD R9D, [RSI + 4*14]
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 23
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + (R9D xor ECX xor EDX) + Data[1] + $a4beea44), 4);
+  MOV EAX, R9D
+  ADD R8D, $a4beea44
+  XOR EAX, ECX
+  ADD R8D, [RSI + 4*1]
+  XOR EAX, EDX
+  ADD R8D, EAX
+  ROL R8D, 4
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + (R8D xor R9D xor ECX) + Data[4] + $4bdecfa9), 11);
+  MOV EAX, R8D
+  ADD EDX, $4bdecfa9
+  XOR EAX, R9D
+  ADD EDX, [RSI + 4*4]
+  XOR EAX, ECX
+  ADD EDX, EAX
+  ROL EDX, 11
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + (EDX xor R8D xor R9D) + Data[7] + $f6bb4b60), 16);
+  MOV EAX, EDX
+  ADD ECX, $f6bb4b60
+  XOR EAX, R8D
+  ADD ECX, [RSI + 4*7]
+  XOR EAX, R9D
+  ADD ECX, EAX
+  ROL ECX, 16
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + (ECX xor EDX xor R8D) + Data[10] + $bebfbc70), 23);
+  MOV EAX, ECX
+  ADD R9D, $bebfbc70
+  XOR EAX, EDX
+  ADD R9D, [RSI + 4*10]
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 23
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + (R9D xor ECX xor EDX) + Data[13] + $289b7ec6), 4);
+  MOV EAX, R9D
+  ADD R8D, $289b7ec6
+  XOR EAX, ECX
+  ADD R8D, [RSI + 4*13]
+  XOR EAX, EDX
+  ADD R8D, EAX
+  ROL R8D, 4
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + (R8D xor R9D xor ECX) + Data[0] + $eaa127fa), 11);
+  MOV EAX, R8D
+  ADD EDX, $eaa127fa
+  XOR EAX, R9D
+  ADD EDX, [RSI + 4*0]
+  XOR EAX, ECX
+  ADD EDX, EAX
+  ROL EDX, 11
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + (EDX xor R8D xor R9D) + Data[3] + $d4ef3085), 16);
+  MOV EAX, EDX
+  ADD ECX, $d4ef3085
+  XOR EAX, R8D
+  ADD ECX, [RSI + 4*3]
+  XOR EAX, R9D
+  ADD ECX, EAX
+  ROL ECX, 16
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + (ECX xor EDX xor R8D) + Data[6] + $04881d05), 23);
+  MOV EAX, ECX
+  ADD R9D, $04881d05
+  XOR EAX, EDX
+  ADD R9D, [RSI + 4*6]
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 23
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + (R9D xor ECX xor EDX) + Data[9] + $d9d4d039), 4);
+  MOV EAX, R9D
+  ADD R8D, $d9d4d039
+  XOR EAX, ECX
+  ADD R8D, [RSI + 4*9]
+  XOR EAX, EDX
+  ADD R8D, EAX
+  ROL R8D, 4
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + (R8D xor R9D xor ECX) + Data[12] + $e6db99e5), 11);
+  MOV EAX, R8D
+  ADD EDX, $e6db99e5
+  XOR EAX, R9D
+  ADD EDX, [RSI + 4*12]
+  XOR EAX, ECX
+  ADD EDX, EAX
+  ROL EDX, 11
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + (EDX xor R8D xor R9D) + Data[15] + $1fa27cf8), 16);
+  MOV EAX, EDX
+  ADD ECX, $1fa27cf8
+  XOR EAX, R8D
+  ADD ECX, [RSI + 4*15]
+  XOR EAX, R9D
+  ADD ECX, EAX
+  ROL ECX, 16
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + (ECX xor EDX xor R8D) + Data[2] + $c4ac5665), 23);
+  MOV EAX, ECX
+  ADD R9D, $c4ac5665
+  XOR EAX, EDX
+  ADD R9D, [RSI + 4*2]
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 23
+  ADD R9D, ECX
+
+// Round 4
+//R8D := R9D + roldword(dword(R8D + (ECX xor (R9D or (not EDX))) + Data[0] + $f4292244), 6);
+  MOV EAX, EDX
+  ADD R8D, $f4292244
+  NOT EAX
+  ADD R8D, [RSI + 4*0]
+  OR  EAX, R9D
+  XOR EAX, ECX
+  ADD R8D, EAX
+  ROL R8D, 6
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + (R9D xor (R8D or (not ECX))) + Data[7] + $432aff97), 10);
+  MOV EAX, ECX
+  ADD EDX, $432aff97
+  NOT EAX
+  ADD EDX, [RSI + 4*7]
+  OR  EAX, R8D
+  XOR EAX, R9D
+  ADD EDX, EAX
+  ROL EDX, 10
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + (R8D xor (EDX or (not R9D))) + Data[14] + $ab9423a7), 15);
+  MOV EAX, R9D
+  ADD ECX, $ab9423a7
+  NOT EAX
+  ADD ECX, [RSI + 4*14]
+  OR  EAX, EDX
+  XOR EAX, R8D
+  ADD ECX, EAX
+  ROL ECX, 15
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + (EDX xor (ECX or (not R8D))) + Data[5] + $fc93a039), 21);
+  MOV EAX, R8D
+  ADD R9D, $fc93a039
+  NOT EAX
+  ADD R9D, [RSI + 4*5]
+  OR  EAX, ECX
+  XOR EAX, EDX
+  ADD R9D, EAX
+  ROL R9D, 21
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + (ECX xor (R9D or (not EDX))) + Data[12] + $655b59c3), 6);
+  MOV EAX, EDX
+  ADD R8D, $655b59c3
+  NOT EAX
+  ADD R8D, [RSI + 4*12]
+  OR  EAX, R9D
+  XOR EAX, ECX
+  ADD R8D, EAX
+  ROL R8D, 6
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + (R9D xor (R8D or (not ECX))) + Data[3] + $8f0ccc92), 10);
+  MOV EAX, ECX
+  ADD EDX, $8f0ccc92
+  NOT EAX
+  ADD EDX, [RSI + 4*3]
+  OR  EAX, R8D
+  XOR EAX, R9D
+  ADD EDX, EAX
+  ROL EDX, 10
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + (R8D xor (EDX or (not R9D))) + Data[10] + $ffeff47d), 15);
+  MOV EAX, R9D
+  ADD ECX, $ffeff47d
+  NOT EAX
+  ADD ECX, [RSI + 4*10]
+  OR  EAX, EDX
+  XOR EAX, R8D
+  ADD ECX, EAX
+  ROL ECX, 15
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + (EDX xor (ECX or (not R8D))) + Data[1] + $85845dd1), 21);
+  MOV EAX, R8D
+  ADD R9D, $85845dd1
+  NOT EAX
+  ADD R9D, [RSI + 4*1]
+  OR  EAX, ECX
+  XOR EAX, EDX
+  ADD R9D, EAX
+  ROL R9D, 21
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + (ECX xor (R9D or (not EDX))) + Data[8] + $6fa87e4f), 6);
+  MOV EAX, EDX
+  ADD R8D, $6fa87e4f
+  NOT EAX
+  ADD R8D, [RSI + 4*8]
+  OR  EAX, R9D
+  XOR EAX, ECX
+  ADD R8D, EAX
+  ROL R8D, 6
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + (R9D xor (R8D or (not ECX))) + Data[15] + $fe2ce6e0), 10);
+  MOV EAX, ECX
+  ADD EDX, $fe2ce6e0
+  NOT EAX
+  ADD EDX, [RSI + 4*15]
+  OR  EAX, R8D
+  XOR EAX, R9D
+  ADD EDX, EAX
+  ROL EDX, 10
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + (R8D xor (EDX or (not R9D))) + Data[6] + $a3014314), 15);
+  MOV EAX, R9D
+  ADD ECX, $a3014314
+  NOT EAX
+  ADD ECX, [RSI + 4*6]
+  OR  EAX, EDX
+  XOR EAX, R8D
+  ADD ECX, EAX
+  ROL ECX, 15
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + (EDX xor (ECX or (not R8D))) + Data[13] + $4e0811a1), 21);
+  MOV EAX, R8D
+  ADD R9D, $4e0811a1
+  NOT EAX
+  ADD R9D, [RSI + 4*13]
+  OR  EAX, ECX
+  XOR EAX, EDX
+  ADD R9D, EAX
+  ROL R9D, 21
+  ADD R9D, ECX
+
+//R8D := R9D + roldword(dword(R8D + (ECX xor (R9D or (not EDX))) + Data[4] + $f7537e82), 6);
+  MOV EAX, EDX
+  ADD R8D, $f7537e82
+  NOT EAX
+  ADD R8D, [RSI + 4*4]
+  OR  EAX, R9D
+  XOR EAX, ECX
+  ADD R8D, EAX
+  ROL R8D, 6
+  ADD R8D, R9D
+
+//EDX := R8D + roldword(dword(EDX + (R9D xor (R8D or (not ECX))) + Data[11] + $bd3af235), 10);
+  MOV EAX, ECX
+  ADD EDX, $bd3af235
+  NOT EAX
+  ADD EDX, [RSI + 4*11]
+  OR  EAX, R8D
+  XOR EAX, R9D
+  ADD EDX, EAX
+  ROL EDX, 10
+  ADD EDX, R8D
+
+//ECX := EDX + roldword(dword(ECX + (R8D xor (EDX or (not R9D))) + Data[2] + $2ad7d2bb), 15);
+  MOV EAX, R9D
+  ADD ECX, $2ad7d2bb
+  NOT EAX
+  ADD ECX, [RSI + 4*2]
+  OR  EAX, EDX
+  XOR EAX, R8D
+  ADD ECX, EAX
+  ROL ECX, 15
+  ADD ECX, EDX
+
+//R9D := ECX + roldword(dword(R9D + (EDX xor (ECX or (not R8D))) + Data[9] + $eb86d391), 21);
+  MOV EAX, R8D
+  ADD R9D, $eb86d391
+  NOT EAX
+  ADD R9D, [RSI + 4*9]
+  OR  EAX, ECX
+  XOR EAX, EDX
+  ADD R9D, EAX
+  ROL R9D, 21
+  ADD R9D, ECX
+
+  ADD TMDContext.State[RDI + 4*0], R8D //  Context.State[0 .. 3] += A, B, C, D
+  ADD TMDContext.State[RDI + 4*1], R9D
+  ADD TMDContext.State[RDI + 4*2], ECX
+  ADD TMDContext.State[RDI + 4*3], EDX
+
+//Inc(Context.Length,64);
+  ADD QWORD PTR TMDContext.Length[RDI],64
+end;

+ 710 - 0
packages/hash/src/md5x64_win.inc

@@ -0,0 +1,710 @@
+// x86_64 (Windows) assembly optimized version
+procedure MD5Transform(var Context: TMDContext; Buffer: Pointer); assembler; nostackframe;
+// RCX = Context, RDX = Buffer
+{$asmmode intel}
+asm
+  // R8D = A, R9D = B, R10D = C, R11D = D
+  MOV R8D, TMDContext.State[RCX + 4*0] // A, B, C, D := Context.State[0 .. 3];
+  MOV R9D, TMDContext.State[RCX + 4*1]
+  MOV R10D,TMDContext.State[RCX + 4*2]
+  MOV R11D,TMDContext.State[RCX + 4*3]
+// Round 1
+//R8D := R9D + roldword(dword(R8D + ((R9D and R10D) or ((not R9D) and R11D)) + Data[0] + $d76aa478), 7);
+  MOV EAX, R10D
+  ADD R8D, $d76aa478
+  XOR EAX, R11D
+  ADD R8D, [RDX + 4*0]
+  AND EAX, R9D
+  XOR EAX, R11D
+  ADD R8D, EAX
+  ROL R8D, 7
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + ((R8D and R9D) or ((not R8D) and R10D)) + Data[1] + $e8c7b756), 12);
+  MOV EAX, R9D
+  ADD R11D,$e8c7b756
+  XOR EAX, R10D
+  ADD R11D,[RDX + 4*1]
+  AND EAX, R8D
+  XOR EAX, R10D
+  ADD R11D,EAX
+  ROL R11D,12
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + ((R11D and R8D) or ((not R11D) and R9D)) + Data[2] + $242070db), 17);
+  MOV EAX, R8D
+  ADD R10D,$242070db
+  XOR EAX, R9D
+  ADD R10D,[RDX + 4*2]
+  AND EAX, R11D
+  XOR EAX, R9D
+  ADD R10D,EAX
+  ROL R10D,17
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + ((R10D and R11D) or ((not R10D) and R8D)) + Data[3] + $c1bdceee), 22);
+  MOV EAX, R11D
+  ADD R9D, $c1bdceee
+  XOR EAX, R8D
+  ADD R9D, [RDX + 4*3]
+  AND EAX, R10D
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 22
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and R10D) or ((not R9D) and R11D)) + Data[4] + $f57c0faf), 7);
+  MOV EAX, R10D
+  ADD R8D, $f57c0faf
+  XOR EAX, R11D
+  ADD R8D, [RDX + 4*4]
+  AND EAX, R9D
+  XOR EAX, R11D
+  ADD R8D, EAX
+  ROL R8D, 7
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + ((R8D and R9D) or ((not R8D) and R10D)) + Data[5] + $4787c62a), 12);
+  MOV EAX, R9D
+  ADD R11D,$4787c62a
+  XOR EAX, R10D
+  ADD R11D,[RDX + 4*5]
+  AND EAX, R8D
+  XOR EAX, R10D
+  ADD R11D,EAX
+  ROL R11D,12
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + ((R11D and R8D) or ((not R11D) and R9D)) + Data[6] + $a8304613), 17);
+  MOV EAX, R8D
+  ADD R10D,$a8304613
+  XOR EAX, R9D
+  ADD R10D,[RDX + 4*6]
+  AND EAX, R11D
+  XOR EAX, R9D
+  ADD R10D,EAX
+  ROL R10D,17
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + ((R10D and R11D) or ((not R10D) and R8D)) + Data[7] + $fd469501), 22);
+  MOV EAX, R11D
+  ADD R9D, $fd469501
+  XOR EAX, R8D
+  ADD R9D, [RDX + 4*7]
+  AND EAX, R10D
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 22
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and R10D) or ((not R9D) and R11D)) + Data[8] + $698098d8), 7);
+  MOV EAX, R10D
+  ADD R8D, $698098d8
+  XOR EAX, R11D
+  ADD R8D, [RDX + 4*8]
+  AND EAX, R9D
+  XOR EAX, R11D
+  ADD R8D, EAX
+  ROL R8D, 7
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + ((R8D and R9D) or ((not R8D) and R10D)) + Data[9] + $8b44f7af), 12);
+  MOV EAX, R9D
+  ADD R11D,$8b44f7af
+  XOR EAX, R10D
+  ADD R11D,[RDX + 4*9]
+  AND EAX, R8D
+  XOR EAX, R10D
+  ADD R11D,EAX
+  ROL R11D,12
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + ((R11D and R8D) or ((not R11D) and R9D)) + Data[10] + $ffff5bb1), 17);
+  MOV EAX, R8D
+  ADD R10D,$ffff5bb1
+  XOR EAX, R9D
+  ADD R10D,[RDX + 4*10]
+  AND EAX, R11D
+  XOR EAX, R9D
+  ADD R10D,EAX
+  ROL R10D,17
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + ((R10D and R11D) or ((not R10D) and R8D)) + Data[11] + $895cd7be), 22);
+  MOV EAX, R11D
+  ADD R9D, $895cd7be
+  XOR EAX, R8D
+  ADD R9D, [RDX + 4*11]
+  AND EAX, R10D
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 22
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and R10D) or ((not R9D) and R11D)) + Data[12] + $6b901122), 7);
+  MOV EAX, R10D
+  ADD R8D, $6b901122
+  XOR EAX, R11D
+  ADD R8D, [RDX + 4*12]
+  AND EAX, R9D
+  XOR EAX, R11D
+  ADD R8D, EAX
+  ROL R8D, 7
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + ((R8D and R9D) or ((not R8D) and R10D)) + Data[13] + $fd987193), 12);
+  MOV EAX, R9D
+  ADD R11D,$fd987193
+  XOR EAX, R10D
+  ADD R11D,[RDX + 4*13]
+  AND EAX, R8D
+  XOR EAX, R10D
+  ADD R11D,EAX
+  ROL R11D,12
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + ((R11D and R8D) or ((not R11D) and R9D)) + Data[14] + $a679438e), 17);
+  MOV EAX, R8D
+  ADD R10D,$a679438e
+  XOR EAX, R9D
+  ADD R10D,[RDX + 4*14]
+  AND EAX, R11D
+  XOR EAX, R9D
+  ADD R10D,EAX
+  ROL R10D,17
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + ((R10D and R11D) or ((not R10D) and R8D)) + Data[15] + $49b40821), 22);
+  MOV EAX, R11D
+  ADD R9D, $49b40821
+  XOR EAX, R8D
+  ADD R9D, [RDX + 4*15]
+  AND EAX, R10D
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 22
+  ADD R9D, R10D
+
+// Round 2
+//R8D := R9D + roldword(dword(R8D + ((R9D and R11D) or (R10D and (not R11D))) + Data[1] + $f61e2562), 5);
+  MOV EAX, R9D
+  ADD R8D, $f61e2562
+  XOR EAX, R10D
+  ADD R8D, [RDX + 4*1]
+  AND EAX, R11D
+  XOR EAX, R10D
+  ADD R8D, EAX
+  ROL R8D, 5
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + ((R8D and R10D) or (R9D and (not R10D))) + Data[6] + $c040b340), 9);
+  MOV EAX, R8D
+  ADD R11D,$c040b340
+  XOR EAX, R9D
+  ADD R11D,[RDX + 4*6]
+  AND EAX, R10D
+  XOR EAX, R9D
+  ADD R11D,EAX
+  ROL R11D,9
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + ((R11D and R9D) or (R8D and (not R9D))) + Data[11] + $265e5a51), 14);
+  MOV EAX, R11D
+  ADD R10D,$265e5a51
+  XOR EAX, R8D
+  ADD R10D,[RDX + 4*11]
+  AND EAX, R9D
+  XOR EAX, R8D
+  ADD R10D,EAX
+  ROL R10D,14
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + ((R10D and R8D) or (R11D and (not R8D))) + Data[0] + $e9b6c7aa), 20);
+  MOV EAX, R10D
+  ADD R9D, $e9b6c7aa
+  XOR EAX, R11D
+  ADD R9D, [RDX + 4*0]
+  AND EAX, R8D
+  XOR EAX, R11D
+  ADD R9D, EAX
+  ROL R9D, 20
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and R11D) or (R10D and (not R11D))) + Data[5] + $d62f105d), 5);
+  MOV EAX, R9D
+  ADD R8D, $d62f105d
+  XOR EAX, R10D
+  ADD R8D, [RDX + 4*5]
+  AND EAX, R11D
+  XOR EAX, R10D
+  ADD R8D, EAX
+  ROL R8D, 5
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + ((R8D and R10D) or (R9D and (not R10D))) + Data[10] + $02441453), 9);
+  MOV EAX, R8D
+  ADD R11D,$02441453
+  XOR EAX, R9D
+  ADD R11D,[RDX + 4*10]
+  AND EAX, R10D
+  XOR EAX, R9D
+  ADD R11D,EAX
+  ROL R11D,9
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + ((R11D and R9D) or (R8D and (not R9D))) + Data[15] + $d8a1e681), 14);
+  MOV EAX, R11D
+  ADD R10D,$d8a1e681
+  XOR EAX, R8D
+  ADD R10D,[RDX + 4*15]
+  AND EAX, R9D
+  XOR EAX, R8D
+  ADD R10D,EAX
+  ROL R10D,14
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + ((R10D and R8D) or (R11D and (not R8D))) + Data[4] + $e7d3fbc8), 20);
+  MOV EAX, R10D
+  ADD R9D, $e7d3fbc8
+  XOR EAX, R11D
+  ADD R9D, [RDX + 4*4]
+  AND EAX, R8D
+  XOR EAX, R11D
+  ADD R9D, EAX
+  ROL R9D, 20
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and R11D) or (R10D and (not R11D))) + Data[9] + $21e1cde6), 5);
+  MOV EAX, R9D
+  ADD R8D, $21e1cde6
+  XOR EAX, R10D
+  ADD R8D, [RDX + 4*9]
+  AND EAX, R11D
+  XOR EAX, R10D
+  ADD R8D, EAX
+  ROL R8D, 5
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + ((R8D and R10D) or (R9D and (not R10D))) + Data[14] + $c33707d6), 9);
+  MOV EAX, R8D
+  ADD R11D,$c33707d6
+  XOR EAX, R9D
+  ADD R11D,[RDX + 4*14]
+  AND EAX, R10D
+  XOR EAX, R9D
+  ADD R11D,EAX
+  ROL R11D,9
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + ((R11D and R9D) or (R8D and (not R9D))) + Data[3] + $f4d50d87), 14);
+  MOV EAX, R11D
+  ADD R10D,$f4d50d87
+  XOR EAX, R8D
+  ADD R10D,[RDX + 4*3]
+  AND EAX, R9D
+  XOR EAX, R8D
+  ADD R10D,EAX
+  ROL R10D,14
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + ((R10D and R8D) or (R11D and (not R8D))) + Data[8] + $455a14ed), 20);
+  MOV EAX, R10D
+  ADD R9D, $455a14ed
+  XOR EAX, R11D
+  ADD R9D, [RDX + 4*8]
+  AND EAX, R8D
+  XOR EAX, R11D
+  ADD R9D, EAX
+  ROL R9D, 20
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + ((R9D and R11D) or (R10D and (not R11D))) + Data[13] + $a9e3e905), 5);
+  MOV EAX, R9D
+  ADD R8D, $a9e3e905
+  XOR EAX, R10D
+  ADD R8D, [RDX + 4*13]
+  AND EAX, R11D
+  XOR EAX, R10D
+  ADD R8D, EAX
+  ROL R8D, 5
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + ((R8D and R10D) or (R9D and (not R10D))) + Data[2] + $fcefa3f8), 9);
+  MOV EAX, R8D
+  ADD R11D,$fcefa3f8
+  XOR EAX, R9D
+  ADD R11D,[RDX + 4*2]
+  AND EAX, R10D
+  XOR EAX, R9D
+  ADD R11D,EAX
+  ROL R11D,9
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + ((R11D and R9D) or (R8D and (not R9D))) + Data[7] + $676f02d9), 14);
+  MOV EAX, R11D
+  ADD R10D,$676f02d9
+  XOR EAX, R8D
+  ADD R10D,[RDX + 4*7]
+  AND EAX, R9D
+  XOR EAX, R8D
+  ADD R10D,EAX
+  ROL R10D,14
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + ((R10D and R8D) or (R11D and (not R8D))) + Data[12] + $8d2a4c8a), 20);
+  MOV EAX, R10D
+  ADD R9D, $8d2a4c8a
+  XOR EAX, R11D
+  ADD R9D, [RDX + 4*12]
+  AND EAX, R8D
+  XOR EAX, R11D
+  ADD R9D, EAX
+  ROL R9D, 20
+  ADD R9D, R10D
+
+// Round 3
+//R8D := R9D + roldword(dword(R8D + (R9D xor R10D xor R11D) + Data[5] + $fffa3942), 4);
+  MOV EAX, R9D
+  ADD R8D, $fffa3942
+  XOR EAX, R10D
+  ADD R8D, [RDX + 4*5]
+  XOR EAX, R11D
+  ADD R8D, EAX
+  ROL R8D, 4
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + (R8D xor R9D xor R10D) + Data[8] + $8771f681), 11);
+  MOV EAX, R8D
+  ADD R11D,$8771f681
+  XOR EAX, R9D
+  ADD R11D,[RDX + 4*8]
+  XOR EAX, R10D
+  ADD R11D,EAX
+  ROL R11D,11
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + (R11D xor R8D xor R9D) + Data[11] + $6d9d6122), 16);
+  MOV EAX, R11D
+  ADD R10D,$6d9d6122
+  XOR EAX, R8D
+  ADD R10D,[RDX + 4*11]
+  XOR EAX, R9D
+  ADD R10D,EAX
+  ROL R10D,16
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + (R10D xor R11D xor R8D) + Data[14] + $fde5380c), 23);
+  MOV EAX, R10D
+  ADD R9D, $fde5380c
+  XOR EAX, R11D
+  ADD R9D, [RDX + 4*14]
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 23
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + (R9D xor R10D xor R11D) + Data[1] + $a4beea44), 4);
+  MOV EAX, R9D
+  ADD R8D, $a4beea44
+  XOR EAX, R10D
+  ADD R8D, [RDX + 4*1]
+  XOR EAX, R11D
+  ADD R8D, EAX
+  ROL R8D, 4
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + (R8D xor R9D xor R10D) + Data[4] + $4bdecfa9), 11);
+  MOV EAX, R8D
+  ADD R11D,$4bdecfa9
+  XOR EAX, R9D
+  ADD R11D,[RDX + 4*4]
+  XOR EAX, R10D
+  ADD R11D,EAX
+  ROL R11D,11
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + (R11D xor R8D xor R9D) + Data[7] + $f6bb4b60), 16);
+  MOV EAX, R11D
+  ADD R10D,$f6bb4b60
+  XOR EAX, R8D
+  ADD R10D,[RDX + 4*7]
+  XOR EAX, R9D
+  ADD R10D,EAX
+  ROL R10D,16
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + (R10D xor R11D xor R8D) + Data[10] + $bebfbc70), 23);
+  MOV EAX, R10D
+  ADD R9D, $bebfbc70
+  XOR EAX, R11D
+  ADD R9D, [RDX + 4*10]
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 23
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + (R9D xor R10D xor R11D) + Data[13] + $289b7ec6), 4);
+  MOV EAX, R9D
+  ADD R8D, $289b7ec6
+  XOR EAX, R10D
+  ADD R8D, [RDX + 4*13]
+  XOR EAX, R11D
+  ADD R8D, EAX
+  ROL R8D, 4
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + (R8D xor R9D xor R10D) + Data[0] + $eaa127fa), 11);
+  MOV EAX, R8D
+  ADD R11D,$eaa127fa
+  XOR EAX, R9D
+  ADD R11D,[RDX + 4*0]
+  XOR EAX, R10D
+  ADD R11D,EAX
+  ROL R11D,11
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + (R11D xor R8D xor R9D) + Data[3] + $d4ef3085), 16);
+  MOV EAX, R11D
+  ADD R10D,$d4ef3085
+  XOR EAX, R8D
+  ADD R10D,[RDX + 4*3]
+  XOR EAX, R9D
+  ADD R10D,EAX
+  ROL R10D,16
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + (R10D xor R11D xor R8D) + Data[6] + $04881d05), 23);
+  MOV EAX, R10D
+  ADD R9D, $04881d05
+  XOR EAX, R11D
+  ADD R9D, [RDX + 4*6]
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 23
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + (R9D xor R10D xor R11D) + Data[9] + $d9d4d039), 4);
+  MOV EAX, R9D
+  ADD R8D, $d9d4d039
+  XOR EAX, R10D
+  ADD R8D, [RDX + 4*9]
+  XOR EAX, R11D
+  ADD R8D, EAX
+  ROL R8D, 4
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + (R8D xor R9D xor R10D) + Data[12] + $e6db99e5), 11);
+  MOV EAX, R8D
+  ADD R11D,$e6db99e5
+  XOR EAX, R9D
+  ADD R11D,[RDX + 4*12]
+  XOR EAX, R10D
+  ADD R11D,EAX
+  ROL R11D,11
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + (R11D xor R8D xor R9D) + Data[15] + $1fa27cf8), 16);
+  MOV EAX, R11D
+  ADD R10D,$1fa27cf8
+  XOR EAX, R8D
+  ADD R10D,[RDX + 4*15]
+  XOR EAX, R9D
+  ADD R10D,EAX
+  ROL R10D,16
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + (R10D xor R11D xor R8D) + Data[2] + $c4ac5665), 23);
+  MOV EAX, R10D
+  ADD R9D, $c4ac5665
+  XOR EAX, R11D
+  ADD R9D, [RDX + 4*2]
+  XOR EAX, R8D
+  ADD R9D, EAX
+  ROL R9D, 23
+  ADD R9D, R10D
+
+// Round 4
+//R8D := R9D + roldword(dword(R8D + (R10D xor (R9D or (not R11D))) + Data[0] + $f4292244), 6);
+  MOV EAX, R11D
+  ADD R8D, $f4292244
+  NOT EAX
+  ADD R8D, [RDX + 4*0]
+  OR  EAX, R9D
+  XOR EAX, R10D
+  ADD R8D, EAX
+  ROL R8D, 6
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + (R9D xor (R8D or (not R10D))) + Data[7] + $432aff97), 10);
+  MOV EAX, R10D
+  ADD R11D,$432aff97
+  NOT EAX
+  ADD R11D,[RDX + 4*7]
+  OR  EAX, R8D
+  XOR EAX, R9D
+  ADD R11D,EAX
+  ROL R11D,10
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + (R8D xor (R11D or (not R9D))) + Data[14] + $ab9423a7), 15);
+  MOV EAX, R9D
+  ADD R10D,$ab9423a7
+  NOT EAX
+  ADD R10D,[RDX + 4*14]
+  OR  EAX, R11D
+  XOR EAX, R8D
+  ADD R10D,EAX
+  ROL R10D,15
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + (R11D xor (R10D or (not R8D))) + Data[5] + $fc93a039), 21);
+  MOV EAX, R8D
+  ADD R9D, $fc93a039
+  NOT EAX
+  ADD R9D, [RDX + 4*5]
+  OR  EAX, R10D
+  XOR EAX, R11D
+  ADD R9D, EAX
+  ROL R9D, 21
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + (R10D xor (R9D or (not R11D))) + Data[12] + $655b59c3), 6);
+  MOV EAX, R11D
+  ADD R8D, $655b59c3
+  NOT EAX
+  ADD R8D, [RDX + 4*12]
+  OR  EAX, R9D
+  XOR EAX, R10D
+  ADD R8D, EAX
+  ROL R8D, 6
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + (R9D xor (R8D or (not R10D))) + Data[3] + $8f0ccc92), 10);
+  MOV EAX, R10D
+  ADD R11D,$8f0ccc92
+  NOT EAX
+  ADD R11D,[RDX + 4*3]
+  OR  EAX, R8D
+  XOR EAX, R9D
+  ADD R11D,EAX
+  ROL R11D,10
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + (R8D xor (R11D or (not R9D))) + Data[10] + $ffeff47d), 15);
+  MOV EAX, R9D
+  ADD R10D,$ffeff47d
+  NOT EAX
+  ADD R10D,[RDX + 4*10]
+  OR  EAX, R11D
+  XOR EAX, R8D
+  ADD R10D,EAX
+  ROL R10D,15
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + (R11D xor (R10D or (not R8D))) + Data[1] + $85845dd1), 21);
+  MOV EAX, R8D
+  ADD R9D, $85845dd1
+  NOT EAX
+  ADD R9D, [RDX + 4*1]
+  OR  EAX, R10D
+  XOR EAX, R11D
+  ADD R9D, EAX
+  ROL R9D, 21
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + (R10D xor (R9D or (not R11D))) + Data[8] + $6fa87e4f), 6);
+  MOV EAX, R11D
+  ADD R8D, $6fa87e4f
+  NOT EAX
+  ADD R8D, [RDX + 4*8]
+  OR  EAX, R9D
+  XOR EAX, R10D
+  ADD R8D, EAX
+  ROL R8D, 6
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + (R9D xor (R8D or (not R10D))) + Data[15] + $fe2ce6e0), 10);
+  MOV EAX, R10D
+  ADD R11D,$fe2ce6e0
+  NOT EAX
+  ADD R11D,[RDX + 4*15]
+  OR  EAX, R8D
+  XOR EAX, R9D
+  ADD R11D,EAX
+  ROL R11D,10
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + (R8D xor (R11D or (not R9D))) + Data[6] + $a3014314), 15);
+  MOV EAX, R9D
+  ADD R10D,$a3014314
+  NOT EAX
+  ADD R10D,[RDX + 4*6]
+  OR  EAX, R11D
+  XOR EAX, R8D
+  ADD R10D,EAX
+  ROL R10D,15
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + (R11D xor (R10D or (not R8D))) + Data[13] + $4e0811a1), 21);
+  MOV EAX, R8D
+  ADD R9D, $4e0811a1
+  NOT EAX
+  ADD R9D, [RDX + 4*13]
+  OR  EAX, R10D
+  XOR EAX, R11D
+  ADD R9D, EAX
+  ROL R9D, 21
+  ADD R9D, R10D
+
+//R8D := R9D + roldword(dword(R8D + (R10D xor (R9D or (not R11D))) + Data[4] + $f7537e82), 6);
+  MOV EAX, R11D
+  ADD R8D, $f7537e82
+  NOT EAX
+  ADD R8D, [RDX + 4*4]
+  OR  EAX, R9D
+  XOR EAX, R10D
+  ADD R8D, EAX
+  ROL R8D, 6
+  ADD R8D, R9D
+
+//R11D := R8D + roldword(dword(R11D + (R9D xor (R8D or (not R10D))) + Data[11] + $bd3af235), 10);
+  MOV EAX, R10D
+  ADD R11D,$bd3af235
+  NOT EAX
+  ADD R11D,[RDX + 4*11]
+  OR  EAX, R8D
+  XOR EAX, R9D
+  ADD R11D,EAX
+  ROL R11D,10
+  ADD R11D,R8D
+
+//R10D := R11D + roldword(dword(R10D + (R8D xor (R11D or (not R9D))) + Data[2] + $2ad7d2bb), 15);
+  MOV EAX, R9D
+  ADD R10D,$2ad7d2bb
+  NOT EAX
+  ADD R10D,[RDX + 4*2]
+  OR  EAX, R11D
+  XOR EAX, R8D
+  ADD R10D,EAX
+  ROL R10D,15
+  ADD R10D,R11D
+
+//R9D := R10D + roldword(dword(R9D + (R11D xor (R10D or (not R8D))) + Data[9] + $eb86d391), 21);
+  MOV EAX, R8D
+  ADD R9D, $eb86d391
+  NOT EAX
+  ADD R9D, [RDX + 4*9]
+  OR  EAX, R10D
+  XOR EAX, R11D
+  ADD R9D, EAX
+  ROL R9D, 21
+  ADD R9D, R10D
+
+  ADD TMDContext.State[RCX + 4*0], R8D //  Context.State[0 .. 3] += A, B, C, D
+  ADD TMDContext.State[RCX + 4*1], R9D
+  ADD TMDContext.State[RCX + 4*2], R10D
+  ADD TMDContext.State[RCX + 4*3], R11D
+
+//Inc(Context.Length,64);
+  ADD QWORD PTR TMDContext.Length[RCX],64
+end;