Browse Source

* fix handling of -0.0 in sse/avx code, resolves #39357

florian 3 years ago
parent
commit
9bd785c06b
3 changed files with 88 additions and 8 deletions
  1. 2 2
      compiler/x86/nx86con.pas
  2. 51 6
      compiler/x86/nx86mat.pas
  3. 35 0
      tests/webtbs/tw39357.pp

+ 2 - 2
compiler/x86/nx86con.pas

@@ -51,7 +51,7 @@ implementation
     function tx86realconstnode.pass_1 : tnode;
       begin
          result:=nil;
-         if is_number_float(value_real) and not(use_vectorfpu(resultdef)) and ((value_real=1.0) or (value_real=0.0)) then
+         if is_number_float(value_real) and not(use_vectorfpu(resultdef)) and ((value_real=1.0) or ((value_real=0.0) and (get_real_sign(value_real)=1))) then
            expectloc:=LOC_FPUREGISTER
          else
            expectloc:=LOC_CREFERENCE;
@@ -69,7 +69,7 @@ implementation
                   location.register:=NR_ST;
                   tcgx86(cg).inc_fpu_stack;
                end
-             else if value_real=0.0 then
+             else if (value_real=0.0) and (get_real_sign(value_real)=1) then
                begin
                  if use_vectorfpu(resultdef) then
                    begin

+ 51 - 6
compiler/x86/nx86mat.pas

@@ -164,18 +164,63 @@ interface
 
 
     procedure tx86unaryminusnode.second_float;
+      var
+        l1: TAsmLabel;
+        href: treference;
+        reg: tregister;
       begin
         secondpass(left);
 
         if expectloc=LOC_MMREGISTER then
           begin
-            if not(left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
-              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
-            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            if cs_opt_fastmath in current_settings.optimizerswitches then
+              begin
+                if not(left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
+                  hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+                location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+
+                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+                cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,location.size,location.register,location.register,nil);
+                cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,OP_SUB,location.size,left.location,location.register,mms_movescalar);
+              end
+            else
+              begin
+                location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+
+                current_asmdata.getglobaldatalabel(l1);
+                new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
+                current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
+                case def_cgsize(resultdef) of
+                  OS_F32:
+                    current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(longint(1 shl 31)));
+                  OS_F64:
+                    begin
+                      current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(0));
+                      current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(-(1 shl 31)));
+                    end
+                  else
+                    internalerror(2004110215);
+                end;
 
-            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
-            cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,location.size,location.register,location.register,nil);
-            cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,OP_SUB,location.size,left.location,location.register,mms_movescalar);
+                reference_reset_symbol(href,l1,0,resultdef.alignment,[]);
+
+                if UseAVX then
+                  begin
+                    if not(left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
+                      hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+                    location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+                    cg.a_opmm_ref_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,left.location.register,location.register,nil)
+                  end
+                else
+                  begin
+                    reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+                    cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
+                    if not(left.location.loc=LOC_MMREGISTER) then
+                      hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+                    location.register:=left.location.register;
+                    cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,mms_movescalar);
+                  end;
+              end;
           end
         else
           begin

+ 35 - 0
tests/webtbs/tw39357.pp

@@ -0,0 +1,35 @@
+{ %OPT=-O- -O2 }
+
+function get_sign(d: double): Integer;
+  var
+    p: pbyte;
+  begin
+    get_sign:=1;
+    p:=pbyte(@d);
+{$ifdef FPC_LITTLE_ENDIAN}
+    inc(p,4);
+{$endif}
+    if (p^ and $80)=0 then
+      get_sign:=-1;
+  end;
+
+const
+	NegInfinity: single = -1.0 / 0.0;
+var
+    zero : Double;
+begin
+    zero:=0.0;
+	writeln(-zero);
+    if get_sign(-zero)<>-1 then
+      halt(1);
+
+	writeln(1.0 / (-1.0 / 0.0));
+    if get_sign(1.0 / (-1.0 / 0.0))<>-1 then
+      halt(1);
+
+	writeln(1.0 / NegInfinity);
+    if get_sign(1.0 / NegInfinity)<>-1 then
+      halt(1);
+
+    writeln('ok');
+end.