Forráskód Böngészése

+ prove of concept how FMA4 could be supported in inline assembler

git-svn-id: trunk@27214 -
florian 11 éve
szülő
commit
842e027a9f

+ 1 - 0
compiler/i386/i386att.inc

@@ -988,6 +988,7 @@
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',
+'vfmaddpd',
 'vfmadd132ps',
 'vfmadd213ps',
 'vfmadd231ps',

+ 1 - 0
compiler/i386/i386atts.inc

@@ -1044,5 +1044,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 1 - 0
compiler/i386/i386int.inc

@@ -988,6 +988,7 @@
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',
+'vfmaddpd',
 'vfmadd132ps',
 'vfmadd213ps',
 'vfmadd231ps',

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1924;
+1926;

+ 1 - 0
compiler/i386/i386op.inc

@@ -988,6 +988,7 @@ A_TEST1,
 A_VFMADD132PD,
 A_VFMADD213PD,
 A_VFMADD231PD,
+A_VFMADDPD,
 A_VFMADD132PS,
 A_VFMADD213PS,
 A_VFMADD231PS,

+ 1 - 0
compiler/i386/i386prop.inc

@@ -1044,5 +1044,6 @@
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1))
 );

+ 14 - 0
compiler/i386/i386tab.inc

@@ -12838,6 +12838,20 @@
     code    : #241#242#244#249#243#1#184#61#80;
     flags   : if_fma
   ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_xmmreg);
+    code    : #241#242#250#1#105#61#80#247;
+    flags   : if_fma4
+  ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmreg,ot_xmmrm);
+    code    : #241#242#250#243#1#105#61#88#246;
+    flags   : if_fma4
+  ),
   (
     opcode  : A_VFMADD132PS;
     ops     : 3;

+ 1 - 0
compiler/i8086/i8086att.inc

@@ -988,6 +988,7 @@
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',
+'vfmaddpd',
 'vfmadd132ps',
 'vfmadd213ps',
 'vfmadd231ps',

+ 1 - 0
compiler/i8086/i8086atts.inc

@@ -1044,5 +1044,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 1 - 0
compiler/i8086/i8086int.inc

@@ -988,6 +988,7 @@
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',
+'vfmaddpd',
 'vfmadd132ps',
 'vfmadd213ps',
 'vfmadd231ps',

+ 1 - 1
compiler/i8086/i8086nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1952;
+1954;

+ 1 - 0
compiler/i8086/i8086op.inc

@@ -988,6 +988,7 @@ A_TEST1,
 A_VFMADD132PD,
 A_VFMADD213PD,
 A_VFMADD231PD,
+A_VFMADDPD,
 A_VFMADD132PS,
 A_VFMADD213PS,
 A_VFMADD231PS,

+ 1 - 0
compiler/i8086/i8086prop.inc

@@ -1044,5 +1044,6 @@
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1))
 );

+ 14 - 0
compiler/i8086/i8086tab.inc

@@ -13034,6 +13034,20 @@
     code    : #241#242#244#249#243#1#184#61#80;
     flags   : if_fma
   ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_xmmreg);
+    code    : #241#242#250#1#105#61#80#247;
+    flags   : if_fma4
+  ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmreg,ot_xmmrm);
+    code    : #241#242#250#243#1#105#61#88#246;
+    flags   : if_fma4
+  ),
   (
     opcode  : A_VFMADD132PS;
     ops     : 3;

+ 45 - 21
compiler/x86/aasmcpu.pas

@@ -432,6 +432,7 @@ implementation
        IF_BMI2   = $00200000;
        IF_16BITONLY = $00200000;
        IF_FMA    = $00200000;
+       IF_FMA4   = $00200000;
 
        IF_PLEVEL = $0F000000;  { mask for processor level }
        IF_8086   = $00000000;  { 8086 instruction  }
@@ -2122,6 +2123,7 @@ implementation
                 end;
               end;
             244: ; // VEX length bit
+            246, // operand 2 (ymmreg) encoded immediate byte (bit 4-7)
             247: inc(len); // operand 3 (ymmreg) encoded immediate byte (bit 4-7)
             248: // VEX-Extention prefix $0F
                  // ignore for calculating length
@@ -2238,6 +2240,7 @@ implementation
        * \362          - VEX prefix for AVX instructions
        * \363          - VEX W1
        * \364          - VEX Vector length 256
+       * \366          - operand 2 (ymmreg) encoded in bit 4-7 of the immediate byte
        * \367          - operand 3 (ymmreg) encoded in bit 4-7 of the immediate byte
 
        * \370          - VEX 0F-FLAG
@@ -2792,27 +2795,48 @@ implementation
                   are not needed }
               end;
             242..244: ; // VEX flags =>> nothing todo
-                 247: begin
-                        if needed_VEX then
-                        begin
-                          if ops = 4 then
-                          begin
-                            if (oper[3]^.typ=top_reg) then
-                            begin
-                              if (oper[3]^.ot and otf_reg_xmm <> 0) or
-                                 (oper[3]^.ot and otf_reg_ymm <> 0) then
-                              begin
-                                bytes[0] := ((getsupreg(oper[3]^.reg) and 15) shl 4);
-                                objdata.writebytes(bytes,1);
-                              end
-                              else Internalerror(777102);
-                            end
-                            else Internalerror(777103);
-                          end
-                          else Internalerror(777104);
-                        end
-                        else Internalerror(777105);
-                      end;
+            246: begin
+                   if needed_VEX then
+                   begin
+                     if ops = 4 then
+                     begin
+                       if (oper[2]^.typ=top_reg) then
+                       begin
+                         if (oper[2]^.ot and otf_reg_xmm <> 0) or
+                            (oper[2]^.ot and otf_reg_ymm <> 0) then
+                         begin
+                           bytes[0] := ((getsupreg(oper[2]^.reg) and 15) shl 4);
+                           objdata.writebytes(bytes,1);
+                         end
+                         else Internalerror(2014032001);
+                       end
+                       else Internalerror(2014032002);
+                     end
+                     else Internalerror(2014032003);
+                   end
+                   else Internalerror(2014032004);
+                 end;
+            247: begin
+                   if needed_VEX then
+                   begin
+                     if ops = 4 then
+                     begin
+                       if (oper[3]^.typ=top_reg) then
+                       begin
+                         if (oper[3]^.ot and otf_reg_xmm <> 0) or
+                            (oper[3]^.ot and otf_reg_ymm <> 0) then
+                         begin
+                           bytes[0] := ((getsupreg(oper[3]^.reg) and 15) shl 4);
+                           objdata.writebytes(bytes,1);
+                         end
+                         else Internalerror(2014032005);
+                       end
+                       else Internalerror(2014032006);
+                     end
+                     else Internalerror(2014032007);
+                   end
+                   else Internalerror(2014032008);
+                 end;
             248..250: ; // VEX flags =>> nothing todo
             31,
             48,49,50 :

+ 5 - 0
compiler/x86/x86ins.dat

@@ -5050,6 +5050,11 @@ ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xA8\75\120    FMA
 xmmreg,xmmreg,xmmrm                  \361\362\371\363\1\xB8\75\120        FMA
 ymmreg,ymmreg,ymmrm                  \361\362\364\371\363\1\xB8\75\120    FMA
 
+[VFMADDPD]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm,xmmreg           \361\362\372\1\x69\75\120\367        FMA4
+xmmreg,xmmreg,xmmreg,xmmrm           \361\362\372\363\1\x69\75\130\366    FMA4
+
 [VFMADD132PS]
 (Ch_Mop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm                  \361\362\371\1\x98\75\120            FMA

+ 1 - 0
compiler/x86_64/x8664ats.inc

@@ -1044,5 +1044,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 1 - 0
compiler/x86_64/x8664att.inc

@@ -988,6 +988,7 @@
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',
+'vfmaddpd',
 'vfmadd132ps',
 'vfmadd213ps',
 'vfmadd231ps',

+ 1 - 0
compiler/x86_64/x8664int.inc

@@ -988,6 +988,7 @@
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',
+'vfmaddpd',
 'vfmadd132ps',
 'vfmadd213ps',
 'vfmadd231ps',

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1945;
+1947;

+ 1 - 0
compiler/x86_64/x8664op.inc

@@ -988,6 +988,7 @@ A_TEST1,
 A_VFMADD132PD,
 A_VFMADD213PD,
 A_VFMADD231PD,
+A_VFMADDPD,
 A_VFMADD132PS,
 A_VFMADD213PS,
 A_VFMADD231PS,

+ 1 - 0
compiler/x86_64/x8664pro.inc

@@ -1044,5 +1044,6 @@
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1))
 );

+ 14 - 0
compiler/x86_64/x8664tab.inc

@@ -12985,6 +12985,20 @@
     code    : #241#242#244#249#243#1#184#61#80;
     flags   : if_fma
   ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_xmmreg);
+    code    : #241#242#250#1#105#61#80#247;
+    flags   : if_fma4
+  ),
+  (
+    opcode  : A_VFMADDPD;
+    ops     : 4;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmreg,ot_xmmrm);
+    code    : #241#242#250#243#1#105#61#88#246;
+    flags   : if_fma4
+  ),
   (
     opcode  : A_VFMADD132PS;
     ops     : 3;