Browse Source

* LLVM-specific support for bsr*/bsf*

git-svn-id: trunk@42309 -
Jonas Maebe 6 năm trước cách đây
mục cha
commit
0a91fcb44f
5 tập tin đã thay đổi với 106 bổ sung4 xóa
  1. 68 0
      compiler/llvm/nllvminl.pas
  2. 16 0
      compiler/nbas.pas
  3. 10 2
      compiler/ninl.pas
  4. 2 2
      compiler/options.pas
  5. 10 0
      rtl/inc/llvmintr.inc

+ 68 - 0
compiler/llvm/nllvminl.pas

@@ -36,6 +36,7 @@ interface
 
         function first_get_frame: tnode; override;
         function first_abs_real: tnode; override;
+        function first_bitscan: tnode; override;
         function first_fma: tnode; override;
         function first_sqr_real: tnode; override;
         function first_sqrt_real: tnode; override;
@@ -148,6 +149,73 @@ implementation
         left:=nil;
       end;
 
+
+    function tllvminlinenode.first_bitscan: tnode;
+      var
+        leftdef: tdef;
+        resulttemp,
+        lefttemp: ttempcreatenode;
+        stat: tstatementnode;
+        block: tblocknode;
+        cntresult: tnode;
+        procname: string[15];
+      begin
+        {
+          if left<>0 then
+            result:=llvm_ctlz/cttz(unsigned(left),true)
+          else
+            result:=255;
+        }
+        if inlinenumber=in_bsr_x then
+          procname:='LLVM_CTLZ'
+        else
+          procname:='LLVM_CTTZ';
+        leftdef:=left.resultdef;
+        block:=internalstatements(stat);
+        resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
+        addstatement(stat,resulttemp);
+        lefttemp:=maybereplacewithtemp(left,block,stat,left.resultdef.size,true);
+        cntresult:=
+          ccallnode.createintern(
+            procname,
+            ccallparanode.create(cordconstnode.create(1,llvmbool1type,false),
+              ccallparanode.create(
+                ctypeconvnode.create_explicit(left,get_unsigned_inttype(leftdef)),nil
+              )
+            )
+          );
+        { ctlz returns the number of leading zero bits, while bsr returns the bit
+          number of the first non-zero bit (with the least significant bit as 0)
+          -> invert result }
+        if inlinenumber=in_bsr_x then
+          begin
+            cntresult:=
+              caddnode.create(xorn,
+                cntresult,
+                genintconstnode(leftdef.size*8-1)
+              );
+          end;
+        addstatement(stat,
+          cifnode.create(caddnode.create(unequaln,left.getcopy,genintconstnode(0)),
+            cassignmentnode.create(
+              ctemprefnode.create(resulttemp),
+              cntresult
+            ),
+            cassignmentnode.create(
+              ctemprefnode.create(resulttemp),
+              genintconstnode(255)
+            )
+          )
+        );
+        if assigned(lefttemp) then
+          addstatement(stat,ctempdeletenode.create(lefttemp));
+        addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
+        addstatement(stat,ctemprefnode.create(resulttemp));
+        left:=nil;
+        result:=block;
+      end;
+
+
     function tllvminlinenode.first_fma: tnode;
       var
         procname: string[15];

+ 16 - 0
compiler/nbas.pas

@@ -331,6 +331,8 @@ interface
        { if the complexity of n is "high", creates a reference temp to n's
          location and replace n with a ttemprefnode referring to that location }
        function maybereplacewithtempref(var n: tnode; var block: tblocknode; var stat: tstatementnode; size: ASizeInt; readonly: boolean): ttempcreatenode;
+       { same as above, but create a regular temp rather than reference temp }
+       function maybereplacewithtemp(var n: tnode; var block: tblocknode; var stat: tstatementnode; size: ASizeInt; allowreg: boolean): ttempcreatenode;
 
 implementation
 
@@ -395,6 +397,20 @@ implementation
           end;
       end;
 
+    function maybereplacewithtemp(var n: tnode; var block: tblocknode; var stat: tstatementnode; size: ASizeInt; allowreg: boolean): ttempcreatenode;
+      begin
+        result:=nil;
+        if node_complexity(n) > 4 then
+          begin
+            result:=ctempcreatenode.create_value(n.resultdef,size,tt_persistent,allowreg,n);
+            typecheckpass(tnode(result));
+            n:=ctemprefnode.create(result);
+            typecheckpass(n);
+            if not assigned(stat) then
+              block:=internalstatements(stat);
+            addstatement(stat,result)
+          end;
+      end;
 
 {*****************************************************************************
                              TFIRSTNOTHING

+ 10 - 2
compiler/ninl.pas

@@ -88,6 +88,7 @@ interface
           function first_assigned: tnode; virtual;
           function first_assert: tnode; virtual;
           function first_popcnt: tnode; virtual;
+          function first_bitscan: tnode; virtual;
           { override these for Seg() support }
           function typecheck_seg: tnode; virtual;
           function first_seg: tnode; virtual;
@@ -4090,10 +4091,11 @@ implementation
          in_rol_x,
          in_rol_x_y,
          in_ror_x,
-         in_ror_x_y,
+         in_ror_x_y:
+           expectloc:=LOC_REGISTER;
          in_bsf_x,
          in_bsr_x:
-           expectloc:=LOC_REGISTER;
+           result:=first_bitscan;
          in_sar_x,
          in_sar_x_y:
            result:=first_sar;
@@ -4730,6 +4732,12 @@ implementation
          left:=nil;
        end;
 
+     function tinlinenode.first_bitscan: tnode;
+       begin
+         result:=nil;
+         expectloc:=LOC_REGISTER;
+       end;
+
 
      function tinlinenode.typecheck_seg: tnode;
        begin

+ 2 - 2
compiler/options.pas

@@ -4445,7 +4445,7 @@ begin
 {$endif ARM}
 
 { inline bsf/bsr implementation }
-{$if not defined(llvm) and (defined(i386) or defined(x86_64) or defined(aarch64) or defined(powerpc) or defined(powerpc64))}
+{$if defined(i386) or defined(x86_64) or defined(aarch64) or defined(powerpc) or defined(powerpc64)}
   def_system_macro('FPC_HAS_INTERNAL_BSF');
   def_system_macro('FPC_HAS_INTERNAL_BSR');
 {$endif}
@@ -4459,7 +4459,7 @@ begin
     end;
 {$endif defined(i386) or defined(x86_64)}
 
-{$if defined(arm) and not defined(llvm)}
+{$if defined(arm)}
   { it is determined during system unit compilation if clz is used for bsf or not,
     this is not perfect but the current implementation bsf/bsr does not allow another
     solution }

+ 10 - 0
rtl/inc/llvmintr.inc

@@ -25,6 +25,16 @@ function llvm_eh_typeid_for(sym: pointer): longint; compilerproc; external name
 procedure llvm_lifetime_start(size: int64; ptr: pointer); compilerproc; external name 'llvm.lifetime.start';
 procedure llvm_lifetime_end(size: int64; ptr: pointer); compilerproc; external name 'llvm.lifetime.end';
 
+function llvm_ctlz(src: UInt8; is_zero_undef: LLVMBool1): UInt8; external name 'llvm.ctlz.i8';
+function llvm_ctlz(src: UInt16; is_zero_undef: LLVMBool1): UInt16; external name 'llvm.ctlz.i16';
+function llvm_ctlz(src: UInt32; is_zero_undef: LLVMBool1): UInt32; external name 'llvm.ctlz.i32';
+function llvm_ctlz(src: UInt64; is_zero_undef: LLVMBool1): UInt64; external name 'llvm.ctlz.i64';
+
+function llvm_cttz(src: UInt8; is_zero_undef: LLVMBool1): UInt8; external name 'llvm.cttz.i8';
+function llvm_cttz(src: UInt16; is_zero_undef: LLVMBool1): UInt16; external name 'llvm.cttz.i16';
+function llvm_cttz(src: UInt32; is_zero_undef: LLVMBool1): UInt32; external name 'llvm.cttz.i32';
+function llvm_cttz(src: UInt64; is_zero_undef: LLVMBool1): UInt64; external name 'llvm.cttz.i64';
+
 function llvm_sqrt_f32(val: single): single; compilerproc; external name 'llvm.sqrt.f32';
 function llvm_sqrt_f64(val: double): double; compilerproc; external name 'llvm.sqrt.f64';
 {$ifdef SUPPORT_EXTENDED}