Przeglądaj źródła

+ enable support for a processor specific frac function
* implemented inlined frac support for CPUs with SSE4.1+

git-svn-id: trunk@36281 -

florian 8 lat temu
rodzic
commit
8eec29b139
2 zmienionych plików z 76 dodań i 0 usunięć
  1. 8 0
      compiler/ncginl.pas
  2. 68 0
      compiler/x86/nx86inl.pas

+ 8 - 0
compiler/ncginl.pas

@@ -64,6 +64,7 @@ interface
           procedure second_popcnt; virtual;
           procedure second_seg; virtual; abstract;
           procedure second_fma; virtual;
+          procedure second_frac_real; virtual;
        end;
 
 implementation
@@ -134,6 +135,8 @@ implementation
               second_ln_real;
             in_cos_real:
                second_cos_real;
+            in_frac_real:
+              second_frac_real;
             in_prefetch_var:
               second_prefetch;
             in_assigned_x:
@@ -659,6 +662,11 @@ implementation
       begin
       end;
 
+    procedure tcginlinenode.second_frac_real;
+      begin
+        internalerror(2017052104);
+      end;
+
     procedure tcginlinenode.second_abs_long;
       var
         tempreg1, tempreg2: tregister;

+ 68 - 0
compiler/x86/nx86inl.pas

@@ -49,6 +49,7 @@ interface
           function first_trunc_real: tnode; override;
           function first_popcnt: tnode; override;
           function first_fma: tnode; override;
+          function first_frac_real : tnode; override;
           { second pass override to generate these nodes }
           procedure second_IncludeExclude;override;
           procedure second_pi; override;
@@ -69,6 +70,7 @@ interface
 {$endif not i8086}
           procedure second_popcnt;override;
           procedure second_fma;override;
+          procedure second_frac_real;override;
        private
           procedure load_fpu_location(lnode: tnode);
        end;
@@ -274,6 +276,19 @@ implementation
        end;
 
 
+     function tx86inlinenode.first_frac_real : tnode;
+       begin
+         if (current_settings.fputype>=fpu_sse41) and
+           ((is_double(resultdef)) or (is_single(resultdef))) then
+           begin
+             expectloc:=LOC_MMREGISTER;
+             Result:=nil;
+           end
+         else
+           Result:=inherited first_frac_real;
+       end;
+
+
      procedure tx86inlinenode.second_pi;
        begin
          location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
@@ -282,6 +297,7 @@ implementation
          location.register:=NR_FPU_RESULT_REG;
        end;
 
+
      { load the FPU into the an fpu register }
      procedure tx86inlinenode.load_fpu_location(lnode: tnode);
        begin
@@ -935,4 +951,56 @@ implementation
            internalerror(2014032301);
       end;
 
+
+    procedure tx86inlinenode.second_frac_real;
+      var
+        extrareg : TRegister;
+      begin
+        if use_vectorfpu(resultdef) then
+          begin
+            secondpass(left);
+            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+            location_reset(location,LOC_MMREGISTER,left.location.size);
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            if UseAVX then
+              case tfloatdef(resultdef).floattype of
+                s32real:
+                  begin
+                    { using left.location.register here as 3rd parameter is crucial to break dependency chains }
+                    current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
+                  end;
+                s64real:
+                  begin
+                    { using left.location.register here as 3rd parameter is crucial to break dependency chains }
+                    current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
+                  end;
+                else
+                  internalerror(2017052102);
+              end
+            else
+              begin
+                extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+               cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
+                case tfloatdef(resultdef).floattype of
+                  s32real:
+                    begin
+                      current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
+                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
+                    end;
+                  s64real:
+                    begin
+                      current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
+                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
+                    end;
+                  else
+                    internalerror(2017052103);
+                end;
+              end;
+          end
+        else
+          internalerror(2017052101);
+      end;
+
 end.