Browse Source

* fixed assembling of fisttp of sse3 instruction set
+ sse3 optimized trunc() using fisttp
+ -Cfsse3 for x86-64

git-svn-id: trunk@8962 -

florian 18 năm trước cách đây
mục cha
commit
f32831c44a

+ 3 - 3
compiler/i386/i386tab.inc

@@ -1509,21 +1509,21 @@
     opcode  : A_FISTTP;
     ops     : 1;
     optypes : (ot_memory or ot_bits32,ot_none,ot_none);
-    code    : #192#211#1#221#129;
+    code    : #192#211#1#219#129;
     flags   : if_prescott or if_fpu
   ),
   (
     opcode  : A_FISTTP;
     ops     : 1;
     optypes : (ot_memory or ot_bits16,ot_none,ot_none);
-    code    : #192#211#1#219#129;
+    code    : #192#211#1#223#129;
     flags   : if_prescott or if_fpu
   ),
   (
     opcode  : A_FISTTP;
     ops     : 1;
     optypes : (ot_memory or ot_bits64,ot_none,ot_none);
-    code    : #192#211#1#223#129;
+    code    : #192#211#1#221#129;
     flags   : if_prescott or if_fpu
   ),
   (

+ 3 - 3
compiler/x86/cgx86.pas

@@ -573,7 +573,7 @@ unit cgx86;
         if (target_info.system<>system_i386_darwin) then
           list.concat(taicpu.op_sym(A_JMP,S_NO,current_asmdata.RefAsmSymbol(s)))
         else
-          begin 
+          begin
             reference_reset_symbol(r,get_darwin_call_stub(s),0);
             r.refaddr:=addr_full;
             list.concat(taicpu.op_ref(A_JMP,S_NO,r));
@@ -1014,7 +1014,7 @@ unit cgx86;
          tmpref:=ref;
          make_simple_ref(list,tmpref);
          if shuffle=nil then
-           list.concat(taicpu.op_ref_reg(A_MOVQ,S_NO,tmpref,reg))
+           list.concat(taicpu.op_ref_reg(A_MOVDQU,S_NO,tmpref,reg))
          else if shufflescalar(shuffle) then
            list.concat(taicpu.op_ref_reg(get_scalar_mm_op(fromsize,tosize),S_NO,tmpref,reg))
          else
@@ -1030,7 +1030,7 @@ unit cgx86;
          tmpref:=ref;
          make_simple_ref(list,tmpref);
          if shuffle=nil then
-           list.concat(taicpu.op_reg_ref(A_MOVQ,S_NO,reg,tmpref))
+           list.concat(taicpu.op_reg_ref(A_MOVDQU,S_NO,reg,tmpref))
          else if shufflescalar(shuffle) then
            begin
              if tosize<>fromsize then

+ 26 - 15
compiler/x86/nx86inl.pas

@@ -282,7 +282,8 @@ implementation
          tempreg : tregister;
        begin
 {$ifdef x86_64}
-         if use_sse(left.resultdef) then
+         if use_sse(left.resultdef) and
+           not((location.left=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3) then
            begin
              secondpass(left);
              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
@@ -300,20 +301,30 @@ implementation
          else
 {$endif x86_64}
           begin
-            tg.GetTemp(current_asmdata.CurrAsmList,2,tt_normal,oldcw);
-            tg.GetTemp(current_asmdata.CurrAsmList,2,tt_normal,newcw);
-            emit_ref(A_FNSTCW,S_NO,newcw);
-            emit_ref(A_FNSTCW,S_NO,oldcw);
-            emit_const_ref(A_OR,S_W,$0f00,newcw);
-            load_fpu_location;
-            emit_ref(A_FLDCW,S_NO,newcw);
-            location_reset(location,LOC_REFERENCE,OS_S64);
-            tg.GetTempTyped(current_asmdata.CurrAsmList,resultdef,tt_normal,location.reference);
-            emit_ref(A_FISTP,S_IQ,location.reference);
-            emit_ref(A_FLDCW,S_NO,oldcw);
-            emit_none(A_FWAIT,S_NO);
-            tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
-            tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
+            if (current_settings.fputype>=fpu_sse3) then
+              begin
+                load_fpu_location;
+                location_reset(location,LOC_REFERENCE,OS_S64);
+                tg.GetTempTyped(current_asmdata.CurrAsmList,resultdef,tt_normal,location.reference);
+                emit_ref(A_FISTTP,S_IQ,location.reference);
+              end
+            else
+              begin
+                tg.GetTemp(current_asmdata.CurrAsmList,2,tt_normal,oldcw);
+                tg.GetTemp(current_asmdata.CurrAsmList,2,tt_normal,newcw);
+                emit_ref(A_FNSTCW,S_NO,newcw);
+                emit_ref(A_FNSTCW,S_NO,oldcw);
+                emit_const_ref(A_OR,S_W,$0f00,newcw);
+                load_fpu_location;
+                emit_ref(A_FLDCW,S_NO,newcw);
+                location_reset(location,LOC_REFERENCE,OS_S64);
+                tg.GetTempTyped(current_asmdata.CurrAsmList,resultdef,tt_normal,location.reference);
+                emit_ref(A_FISTP,S_IQ,location.reference);
+                emit_ref(A_FLDCW,S_NO,oldcw);
+                emit_none(A_FWAIT,S_NO);
+                tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
+                tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
+              end;
            end;
        end;
 

+ 3 - 3
compiler/x86/x86ins.dat

@@ -477,9 +477,9 @@ mem64                 \300\323\1\xDF\207              8086,FPU
 
 [FISTTP]
 (Ch_Wop1, Ch_None, Ch_None)
-mem32                 \300\323\1\xDD\201              PRESCOTT,FPU
-mem16                 \300\323\1\xDB\201              PRESCOTT,FPU
-mem64                 \300\323\1\xDF\201              PRESCOTT,FPU
+mem32                 \300\323\1\xDB\201              PRESCOTT,FPU
+mem16                 \300\323\1\xDF\201              PRESCOTT,FPU
+mem64                 \300\323\1\xDD\201              PRESCOTT,FPU
 
 [FISUB,fisubR]
 (Ch_FPU, Ch_ROp1, Ch_None)

+ 4 - 2
compiler/x86_64/cpuinfo.pas

@@ -46,7 +46,8 @@ Type
    tfputype =
      (fpu_none,
       fpu_soft,  { generic }
-      fpu_sse64
+      fpu_sse64,
+      fpu_sse3
      );
 
 Const
@@ -75,7 +76,8 @@ Const
 
    fputypestr : array[tfputype] of string[6] = ('',
      'SOFT',
-     'SSE64'
+     'SSE64',
+     'SSE3'
    );
 
    sse_singlescalar : set of tfputype = [fpu_sse64];

+ 3 - 3
compiler/x86_64/x8664tab.inc

@@ -1502,21 +1502,21 @@
     opcode  : A_FISTTP;
     ops     : 1;
     optypes : (ot_memory or ot_bits32,ot_none,ot_none);
-    code    : #192#211#1#221#129;
+    code    : #192#211#1#219#129;
     flags   : if_prescott or if_fpu
   ),
   (
     opcode  : A_FISTTP;
     ops     : 1;
     optypes : (ot_memory or ot_bits16,ot_none,ot_none);
-    code    : #192#211#1#219#129;
+    code    : #192#211#1#223#129;
     flags   : if_prescott or if_fpu
   ),
   (
     opcode  : A_FISTTP;
     ops     : 1;
     optypes : (ot_memory or ot_bits64,ot_none,ot_none);
-    code    : #192#211#1#223#129;
+    code    : #192#211#1#221#129;
     flags   : if_prescott or if_fpu
   ),
   (