Browse Source

+ min/max support for x86

git-svn-id: trunk@47701 -
florian 4 years ago
parent
commit
1a9678f4c4
2 changed files with 151 additions and 4 deletions
  1. 15 4
      compiler/nflw.pas
  2. 136 0
      compiler/x86/nx86inl.pas

+ 15 - 4
compiler/nflw.pas

@@ -302,9 +302,9 @@ implementation
     {$ifdef i8086}
       cpuinfo,
     {$endif i8086}
-    {$ifdef xtensa}
+    {$if defined(xtensa) or defined(i386)}
       cpuinfo,
-    {$endif xtensa}
+    {$endif defined(xtensa) or defined(i386)}
       cgbase,procinfo
       ;
 
@@ -1588,13 +1588,24 @@ implementation
                     CGMessagePos(right.fileinfo,cg_w_unreachable_code);
                end;
           end;
-{$if defined(xtensa)}
+{$if defined(i386) or defined(x86_64) or defined(xtensa)}
         { use min/max intrinsic? }
         if (left.nodetype in [gtn,gten,ltn,lten]) and IsSingleStatement(right,thenstmnt) and IsSingleStatement(t1,elsestmnt) and
           (thenstmnt.nodetype=assignn) and (elsestmnt.nodetype=assignn) and
           not(might_have_sideeffects(left)) and
           tassignmentnode(thenstmnt).left.isequal(tassignmentnode(elsestmnt).left) and
+{$if defined(i386) or defined(x86_64)}
+{$ifdef i386}
+          (((current_settings.fputype>=fpu_sse) and is_single(tassignmentnode(thenstmnt).left.resultdef)) or
+           ((current_settings.fputype>=fpu_sse2) and is_double(tassignmentnode(thenstmnt).left.resultdef))
+          ) and
+{$else i386}
+          (is_single(tassignmentnode(thenstmnt).left.resultdef) or is_double(tassignmentnode(thenstmnt).left.resultdef)) and
+{$endif i386}
+{$endif defined(i386) or defined(x86_64)}
+{$if defined(xtensa)}
           (CPUXTENSA_HAS_MINMAX in cpu_capabilities[current_settings.cputype]) and is_32bitint(tassignmentnode(thenstmnt).right.resultdef) and
+{$endif defined(xtensa)}
           ((tassignmentnode(thenstmnt).right.isequal(taddnode(left).left) and (tassignmentnode(elsestmnt).right.isequal(taddnode(left).right))) or
            (tassignmentnode(thenstmnt).right.isequal(taddnode(left).right) and (tassignmentnode(elsestmnt).right.isequal(taddnode(left).left)))) then
           begin
@@ -1627,7 +1638,7 @@ implementation
                     ccallparanode.create(taddnode(left).left.getcopy,nil)))
               );
           end;
-{$endif defined(xtensa)}
+{$endif defined(i386) or defined(x86_64) or defined(xtensa)}
       end;
 
 

+ 136 - 0
compiler/x86/nx86inl.pas

@@ -54,6 +54,7 @@ interface
           function first_fma: tnode; override;
           function first_frac_real : tnode; override;
           function first_int_real : tnode; override;
+          function first_minmax: tnode; override;
 
           function simplify(forinline : boolean) : tnode; override;
 
@@ -79,6 +80,7 @@ interface
           procedure second_frac_real;override;
           procedure second_int_real;override;
           procedure second_high;override;
+          procedure second_minmax;override;
        private
           procedure load_fpu_location(lnode: tnode);
        end;
@@ -389,6 +391,27 @@ implementation
        end;
 
 
+     function tx86inlinenode.first_minmax: tnode;
+       begin
+{$ifndef i8086}
+         if
+{$ifdef i386}
+           ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
+           ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
+{$else i386}
+           ((is_double(resultdef)) or (is_single(resultdef)))
+{$endif i386}
+           then
+           begin
+             expectloc:=LOC_MMREGISTER;
+             Result:=nil;
+           end
+         else
+{$endif i8086}
+           Result:=inherited first_minmax;
+       end;
+
+
      function tx86inlinenode.simplify(forinline : boolean) : tnode;
        var
          temp : tnode;
@@ -1401,4 +1424,117 @@ implementation
         location.register:=hregister;
       end;
 
+
+    procedure tx86inlinenode.second_minmax;
+      const
+        oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
+          (
+           (
+            (A_MINSS,A_MINSD),
+            (A_VMINSS,A_VMINSD)
+           ),
+           (
+            (A_MAXSS,A_MAXSD),
+            (A_VMAXSS,A_VMAXSD)
+           )
+          );
+
+      var
+        paraarray : array[1..2] of tnode;
+        memop,
+        i : integer;
+        gotmem : boolean;
+        op: TAsmOp;
+      begin
+{$ifndef i8086}
+         if
+{$ifdef i386}
+           ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
+           ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
+{$else i386}
+           is_single(resultdef) or is_double(resultdef)
+{$endif i386}
+           then
+           begin
+             paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
+             paraarray[2]:=tcallparanode(parameters).paravalue;
+
+             for i:=low(paraarray) to high(paraarray) do
+               secondpass(paraarray[i]);
+
+             { only one memory operand is allowed }
+             gotmem:=false;
+             memop:=0;
+             for i:=low(paraarray) to high(paraarray) do
+               begin
+                 if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
+                   begin
+                     if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
+                       begin
+                         memop:=i;
+                         gotmem:=true;
+                       end
+                     else
+                       hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
+                   end;
+               end;
+
+             op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
+
+             location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
+             location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+             if gotmem then
+               begin
+                 if UseAVX then
+                   case memop of
+                     1:
+                       emit_ref_reg_reg(op,S_NO,
+                         paraarray[1].location.reference,paraarray[2].location.register,location.register);
+                     2:
+                       emit_ref_reg_reg(op,S_NO,
+                         paraarray[2].location.reference,paraarray[1].location.register,location.register);
+                     else
+                       internalerror(2020120504);
+                   end
+                 else
+                   case memop of
+                     1:
+                       begin
+                         hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
+                           paraarray[2].location.register,location.register,mms_movescalar);
+                         emit_ref_reg(op,S_NO,
+                           paraarray[1].location.reference,location.register);
+                       end;
+                     2:
+                       begin
+                         hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
+                           paraarray[1].location.register,location.register,mms_movescalar);
+                         emit_ref_reg(op,S_NO,
+                           paraarray[2].location.reference,location.register);
+                       end;
+                     else
+                       internalerror(2020120601);
+                   end;
+               end
+             else
+               begin
+                 if UseAVX then
+                   emit_reg_reg_reg(op,S_NO,
+                     paraarray[1].location.register,paraarray[2].location.register,location.register)
+                 else
+                   begin
+                     hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
+                       paraarray[1].location.register,location.register,mms_movescalar);
+                     emit_reg_reg(op,S_NO,
+                       paraarray[2].location.register,location.register)
+                   end;
+               end;
+           end
+         else
+{$endif i8086}
+           internalerror(2020120503);
+      end;
+
+
 end.