Browse Source

Calculate sha256 using dedicated x86 SHA instruction set.

Margers 3 months ago
parent
commit
14dc707042
2 changed files with 311 additions and 7 deletions
  1. 38 7
      packages/fcl-hash/src/fpsha256.pp
  2. 273 0
      packages/fcl-hash/src/sha256x86.inc

+ 38 - 7
packages/fcl-hash/src/fpsha256.pp

@@ -11,6 +11,11 @@
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 }
+
+// Normally, if an optimized version is available for OS/CPU, that will be used
+// Define to force to use implementation in pascal
+{ $DEFINE SHA256PASCAL}
+
 {$IFNDEF FPC_DOTTEDUNITS}
 unit fpsha256;
 {$ENDIF FPC_DOTTEDUNITS}
@@ -39,7 +44,7 @@ Type
     HashBuffer : THashBuffer;
     Index: UInt32;
     TotalLength: Int64;
-    procedure Compress;
+    procedure Compress; inline;
     procedure Final;
     procedure Init(Use224 : Boolean = False);
     procedure Update(PBuf: PByte; Size: UInt32); overload;
@@ -133,11 +138,24 @@ Const
 implementation
 
 {$IFDEF FPC_DOTTEDUNITS}
-uses System.Hash.Utils;
+uses System.Hash.Utils
+{$if defined(x86_64) or defined(CPU386)},System.Cpu{$endif};
 {$ELSE FPC_DOTTEDUNITS}
-uses fphashutils;
+uses fphashutils
+{$if defined(x86_64) or defined(CPU386)},cpu{$endif};
 {$ENDIF FPC_DOTTEDUNITS}
 
+procedure sha256PascalCompress(var Context:TContextBuffer; var HashBuffer:THashBuffer; aK: pointer; Mask: pointer); forward;
+
+// Use assembler version if we have a suitable CPU as well
+// Define SHA256PASCAL to force use of original reference code
+{$ifndef SHA256PASCAL}
+  {$if defined(x86_64) or defined(CPU386)}
+     {$define SHA256ASM}
+     {$i sha256x86.inc} //-- assembler implementation for x86 using SHA instruction set
+  {$endif}
+{$endif}
+
 //------------------------------------------------------------------------------
 // SHA256Base
 //------------------------------------------------------------------------------
@@ -182,9 +200,6 @@ begin
     Context[i]:=P^[i];
 end;
 
-
-procedure TSHA256Base.Compress;
-// Actual hashing function
 const
   K: array[0..63] of UInt32 = (
    $428a2f98, $71374491, $b5c0fbcf, $e9b5dba5, $3956c25b, $59f111f1,
@@ -198,6 +213,13 @@ const
    $19a4c116, $1e376c08, $2748774c, $34b0bcb5, $391c0cb3, $4ed8aa4a,
    $5b9cca4f, $682e6ff3, $748f82ee, $78a5636f, $84c87814, $8cc70208,
    $90befffa, $a4506ceb, $bef9a3f7, $c67178f2);
+
+{$ifdef SHA256ASM}
+  Mask: record a, b: qword end = (a:$0405060700010203;b:$0c0d0e0f08090a0b);
+{$endif}
+
+procedure sha256PascalCompress(var Context:TContextBuffer;var HashBuffer:THashBuffer; aK:pointer; Mask:pointer);
+// Actual hashing function
 Type
   TBuf64 =  array[0..63] of UInt32;
 var
@@ -243,6 +265,15 @@ begin
   Inc(Context[7], H);
 end;
 
+procedure TSHA256Base.Compress;
+begin
+{$ifdef SHA256ASM}
+  sha256AsmCompress(Context,HashBuffer,@K,@Mask);
+{$else}
+  sha256PascalCompress(Context,HashBuffer,@K,nil);
+{$endif}
+end;
+
 type
   TInt64Rec = packed record
     case Integer of
@@ -477,7 +508,7 @@ begin
   repeat
      aLen:=aStream.Read(Buffer, Length(Buffer));
      if aLen>0 then
-       SHA256.Update(PByte(Buffer),aLen); 
+       SHA256.Update(PByte(Buffer),aLen);
   until aLen=0;
   SHA256.Final;
   aDigest:=SHA256.Digest;

+ 273 - 0
packages/fcl-hash/src/sha256x86.inc

@@ -0,0 +1,273 @@
+{$push}
+{$asmmode intel}
+
+procedure sha256x86AsmCompress (var Ctx:TContextBuffer; var Data:THashBuffer; aK:pointer; aMask:pointer); assembler;
+{$if defined(x86_64) and not defined(windows)}nostackframe;{$endif}
+{$ifndef x86_64}
+var ABEF_SAVE, CDGH_SAVE : record a,b:qword end;
+{$endif}
+{$if defined(x86_64) and defined(windows)}
+var sav6,sav7,sav11,sav12 : record a,b:qword end; {abi for win64 requires to save and restore xmm6-xmm15}
+{$endif}
+{
+  Register usage
+    xmm0 MSG
+    xmm1 STATE1
+    xmm2 STATE0
+    xmm3 MSG3
+    xmm4 MSG0
+    xmm5 MSG1
+    xmm6 MSG2
+    xmm7 TMP & MASK
+    xmm11 CDGH_SAVE (only on x64)
+    xmm12 ABEF_SAVE (only on x64)
+}
+asm
+{$if defined(x86_64) and defined(windows)}
+   movdqu sav6,xmm6
+   movdqu sav7,xmm7
+   movdqu sav11,xmm11
+   movdqu sav12,xmm12
+{$endif}
+   {initalize}
+   movdqu xmm2,oword ptr[Ctx]
+   pshufd xmm0,xmm2,$B1
+   movdqu xmm2,oword ptr[Ctx+$10]
+   movdqa xmm1,xmm0
+   pshufd xmm2,xmm2,$1B
+   palignr xmm1,xmm2,$08
+   pblendw xmm2,xmm0,$F0
+
+   {preread some data}
+   {$ifdef x86_64}
+   movdqu xmm7,oword ptr[aMask]
+   {$else}
+   push ebx
+   mov ebx,aMask
+   movdqu xmm7,oword ptr[ebx]
+   pop ebx
+   {$endif}
+   movdqu xmm0,oword ptr[aK+$00]
+   movdqu xmm4,oword ptr[Data+00] { MSG0 }
+   movdqu xmm5,oword ptr[Data+16] { MSG1 }
+   movdqu xmm6,oword ptr[Data+32] { MSG2 }
+   movdqu xmm3,oword ptr[Data+48] { MSG3 }
+
+   {$ifdef x86_64}
+   movdqa xmm12,xmm2
+   movdqa xmm11,xmm1
+   {$else}
+   movdqu oword ptr [ABEF_SAVE],xmm2
+   movdqu oword ptr [CDGH_SAVE],xmm1
+   {$endif}
+
+   { do rounds 0-3 }
+   pshufb xmm4,xmm7
+   pshufb xmm5,xmm7
+   paddd xmm0,xmm4
+   pshufb xmm6,xmm7
+   pshufb xmm3,xmm7
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   movdqa xmm7,xmm3
+   sha256rnds2 xmm1,xmm2
+   { do rounds 4-7 }
+   movdqu xmm0,oword ptr[aK+$10]
+   sha256msg1 xmm4,xmm5
+   paddd xmm0,xmm5
+   palignr xmm7,xmm6,$04
+   sha256msg1 xmm5,xmm6
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   paddd xmm4,xmm7
+   sha256rnds2 xmm1,xmm2
+   sha256msg2 xmm4,xmm3
+   { do rounds 8-11 }
+   movdqu xmm0,oword ptr[aK+$20]
+   movdqa xmm7,xmm4
+   paddd xmm0,xmm6
+   sha256msg1 xmm6,xmm3
+   palignr xmm7,xmm3,$04
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   paddd xmm5,xmm7
+   sha256rnds2 xmm1,xmm2
+   { do rounds 12-15 }
+   movdqu xmm0,oword ptr[aK+$30]
+   sha256msg2 xmm5,xmm4
+   paddd xmm0,xmm3
+   movdqa xmm7,xmm5
+   sha256msg1 xmm3,xmm4
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   add aK,$7f     { just later to shorten displacement constants }
+   sha256rnds2 xmm1,xmm2
+   { do rounds 16-19 }
+   movdqu xmm0,oword ptr[aK-$3f]
+   palignr xmm7,xmm4,$04
+   paddd xmm6,xmm7
+   paddd xmm0,xmm4
+   sha256msg2 xmm6,xmm5
+   sha256msg1 xmm4,xmm5
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   movdqa xmm7,xmm6
+   sha256rnds2 xmm1,xmm2
+   { do rounds 20-23 }
+   movdqu xmm0,oword ptr[aK-$2f]
+   palignr xmm7,xmm5,$04
+   paddd xmm3,xmm7
+   paddd xmm0,xmm5
+   sha256msg2 xmm3,xmm6
+   sha256msg1 xmm5,xmm6
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   movdqa xmm7,xmm3
+   sha256rnds2 xmm1,xmm2
+   { do rounds 24-27 }
+   movdqu xmm0,oword ptr[aK-$1f]
+   palignr xmm7,xmm6,$04
+   paddd xmm4,xmm7
+   paddd xmm0,xmm6
+   sha256msg2 xmm4,xmm3
+   sha256msg1 xmm6,xmm3
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   movdqa xmm7,xmm4
+   sha256rnds2 xmm1,xmm2
+   { do rounds 28-31 }
+   movdqu xmm0,oword ptr[aK-$0f]
+   palignr xmm7,xmm3,$04
+   paddd xmm5,xmm7
+   paddd xmm0,xmm3
+   sha256msg2 xmm5,xmm4
+   inc aK
+   sha256msg1 xmm3,xmm4
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   movdqa xmm7,xmm5
+   sha256rnds2 xmm1,xmm2
+   { do rounds 32-35 }
+   movdqu xmm0,oword ptr[aK+$00]
+   palignr xmm7,xmm4,$04
+   paddd xmm6,xmm7
+   paddd xmm0,xmm4
+   sha256msg2 xmm6,xmm5
+   sha256msg1 xmm4,xmm5
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   movdqa xmm7,xmm6
+   sha256rnds2 xmm1,xmm2
+   { do rounds 36-39 }
+   movdqu xmm0,oword ptr[aK+$10]
+   palignr xmm7,xmm5,$04
+   paddd xmm3,xmm7
+   paddd xmm0,xmm5
+   sha256msg2 xmm3,xmm6
+   sha256msg1 xmm5,xmm6
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   movdqa xmm7,xmm3
+   sha256rnds2 xmm1,xmm2
+   { do rounds 40-43 }
+   movdqu xmm0,oword ptr[aK+$20]
+   palignr xmm7,xmm6,$04
+   paddd xmm4,xmm7
+   paddd xmm0,xmm6
+   sha256msg2 xmm4,xmm3
+   sha256msg1 xmm6,xmm3
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   movdqa xmm7,xmm4
+   sha256rnds2 xmm1,xmm2
+   { do rounds 44-47 }
+   movdqu xmm0,oword ptr[aK+$30]
+   palignr xmm7,xmm3,$04
+   paddd xmm0,xmm3
+   paddd xmm5,xmm7
+   sha256msg1 xmm3,xmm4
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   sha256msg2 xmm5,xmm4
+   sha256rnds2 xmm1,xmm2
+   { do rounds 48-51 }
+   movdqu xmm0,oword ptr[aK+$40]
+   paddd xmm0,xmm4
+   movdqa xmm7,xmm5
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   sha256rnds2 xmm1,xmm2
+   palignr xmm7,xmm4,$04
+   { do rounds 52-55 }
+   movdqu xmm0,oword ptr[aK+$50]
+   paddd xmm6,xmm7
+   sha256msg2 xmm6,xmm5
+   paddd xmm0,xmm5
+   sha256rnds2 xmm2,xmm1
+   movdqa xmm7,xmm6
+   pshufd xmm0,xmm0,$0E
+   sha256rnds2 xmm1,xmm2
+   palignr xmm7,xmm5,$04
+   { do rounds 56-59 }
+   movdqu xmm0,oword ptr[aK+$60]
+   paddd xmm3,xmm7
+   paddd xmm0,xmm6
+   sha256msg2 xmm3,xmm6
+   movdqu xmm7,oword ptr[aK+$70]
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm0,$0E
+   paddd xmm3,xmm7
+   sha256rnds2 xmm1,xmm2
+   { do rounds 60-63 }
+   movdqa xmm0,xmm3
+   sha256rnds2 xmm2,xmm1
+   pshufd xmm0,xmm3,$0E
+   sha256rnds2 xmm1,xmm2
+
+   { add saved state to current }
+   {$ifdef x86_64}
+   paddd xmm2,xmm12
+   paddd xmm1,xmm11
+   {$else}
+   movdqu xmm7,oword ptr[ABEF_SAVE]
+   movdqu xmm6,oword ptr[CDGH_SAVE]
+   paddd xmm2,xmm7
+   paddd xmm1,xmm6
+   {$endif}
+
+   { this is end end, save state }
+   pshufd xmm1,xmm1,$1B
+   pshufd xmm2,xmm2,$B1
+   movdqa xmm0,xmm1
+   pblendw xmm0,xmm2,$F0
+   palignr xmm2,xmm1,$08
+   movdqu oword ptr[Ctx],xmm0
+   movdqu oword ptr[Ctx+$10],xmm2
+{$if defined(x86_64) and defined(windows)}
+   movdqu xmm6,sav6
+   movdqu xmm7,sav7
+   movdqu xmm11,sav11
+   movdqu xmm12,sav12
+{$endif}
+end;
+
+procedure sha256Compress_Dispatch(var Context:TContextBuffer; var HashBuffer:THashBuffer; K:pointer; Mask:pointer); forward;
+
+var
+  sha256Compress_Impl: procedure (var Context:TContextBuffer; var HashBuffer:THashBuffer; K:pointer; Mask:pointer) = @sha256Compress_Dispatch;
+
+procedure sha256Compress_Dispatch(var Context:TContextBuffer; var HashBuffer:THashBuffer; K:pointer; Mask:pointer);
+begin
+  if SHASupport then
+    sha256Compress_Impl:=@sha256x86AsmCompress
+  else
+    sha256Compress_Impl:=@sha256PascalCompress; {fallback to pascal implementation if CPU does not support SHA instruction set}
+  sha256Compress_Impl(Context,HashBuffer,K,Mask);
+end;
+
+procedure sha256AsmCompress(var Context:TContextBuffer; var HashBuffer:THashBuffer; K:pointer; Mask:pointer); inline;
+begin
+  sha256Compress_Impl(Context,HashBuffer,K,Mask);
+end;
+
+{$pop}