Browse Source

Introduced 16 bit broadcast functionality.

Margers 2 months ago
parent
commit
ff9aa15143
4 changed files with 58 additions and 23 deletions
  1. 34 14
      compiler/x86/aasmcpu.pas
  2. 2 1
      compiler/x86/agx86att.pas
  3. 9 1
      compiler/x86/rax86.pas
  4. 13 7
      compiler/x86/rax86int.pas

+ 34 - 14
compiler/x86/aasmcpu.pas

@@ -65,6 +65,7 @@ interface
 
 
       OT_VECTOR_EXT = OT_VECTORMASK or OT_VECTORZERO or OT_VECTORBCST or OT_VECTORSAE or OT_VECTORER;
       OT_VECTOR_EXT = OT_VECTORMASK or OT_VECTORZERO or OT_VECTORBCST or OT_VECTORSAE or OT_VECTORER;
 
 
+      OT_BITSB16    = OT_BITS16 or OT_VECTORBCST;
       OT_BITSB32    = OT_BITS32 or OT_VECTORBCST;
       OT_BITSB32    = OT_BITS32 or OT_VECTORBCST;
       OT_BITSB64    = OT_BITS64 or OT_VECTORBCST;
       OT_BITSB64    = OT_BITS64 or OT_VECTORBCST;
 
 
@@ -235,6 +236,7 @@ interface
       OT_MEM8      = OT_MEMORY or OT_BITS8;
       OT_MEM8      = OT_MEMORY or OT_BITS8;
       OT_MEM16     = OT_MEMORY or OT_BITS16;
       OT_MEM16     = OT_MEMORY or OT_BITS16;
       OT_MEM16_M   = OT_MEM16  or OT_VECTORMASK;
       OT_MEM16_M   = OT_MEM16  or OT_VECTORMASK;
+      OT_BMEM16    = OT_MEMORY or OT_BITS16 or OT_VECTORBCST;
       OT_MEM32     = OT_MEMORY or OT_BITS32;
       OT_MEM32     = OT_MEMORY or OT_BITS32;
       OT_MEM32_M   = OT_MEMORY or OT_BITS32 or OT_VECTORMASK;
       OT_MEM32_M   = OT_MEMORY or OT_BITS32 or OT_VECTORMASK;
       OT_BMEM32    = OT_MEMORY or OT_BITS32 or OT_VECTORBCST;
       OT_BMEM32    = OT_MEMORY or OT_BITS32 or OT_VECTORBCST;
@@ -278,13 +280,14 @@ interface
       OTVE_VECTOR_BCST4        = 1 shl 4;
       OTVE_VECTOR_BCST4        = 1 shl 4;
       OTVE_VECTOR_BCST8        = 1 shl 5;
       OTVE_VECTOR_BCST8        = 1 shl 5;
       OTVE_VECTOR_BCST16       = 3 shl 4;
       OTVE_VECTOR_BCST16       = 3 shl 4;
+      OTVE_VECTOR_BCST32       = 1 shl 13;
       OTVE_VECTOR_RNSAE        = OTVE_VECTOR_ER or 0;
       OTVE_VECTOR_RNSAE        = OTVE_VECTOR_ER or 0;
       OTVE_VECTOR_RDSAE        = OTVE_VECTOR_ER or 1 shl 6;
       OTVE_VECTOR_RDSAE        = OTVE_VECTOR_ER or 1 shl 6;
       OTVE_VECTOR_RUSAE        = OTVE_VECTOR_ER or 1 shl 7;
       OTVE_VECTOR_RUSAE        = OTVE_VECTOR_ER or 1 shl 7;
       OTVE_VECTOR_RZSAE        = OTVE_VECTOR_ER or 3 shl 6;
       OTVE_VECTOR_RZSAE        = OTVE_VECTOR_ER or 3 shl 6;
 
 
 
 
-      OTVE_VECTOR_BCST_MASK    = OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16;
+      OTVE_VECTOR_BCST_MASK    = OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16 or OTVE_VECTOR_BCST32;
       OTVE_VECTOR_ER_MASK      = OTVE_VECTOR_RNSAE or OTVE_VECTOR_RDSAE or OTVE_VECTOR_RUSAE or OTVE_VECTOR_RZSAE;
       OTVE_VECTOR_ER_MASK      = OTVE_VECTOR_RNSAE or OTVE_VECTOR_RDSAE or OTVE_VECTOR_RUSAE or OTVE_VECTOR_RZSAE;
 
 
       OTVE_VECTOR_MASK = OTVE_VECTOR_SAE or OTVE_VECTOR_ER or OTVE_VECTOR_ZERO or OTVE_VECTOR_WRITEMASK or OTVE_VECTOR_BCST;
       OTVE_VECTOR_MASK = OTVE_VECTOR_SAE or OTVE_VECTOR_ER or OTVE_VECTOR_ZERO or OTVE_VECTOR_WRITEMASK or OTVE_VECTOR_BCST;
@@ -364,13 +367,13 @@ interface
                          msiMultipleMinSize64, msiMultipleMinSize128, msiMultipleminSize256, msiMultipleMinSize512,
                          msiMultipleMinSize64, msiMultipleMinSize128, msiMultipleminSize256, msiMultipleMinSize512,
                          msiMemRegSize, msiMemRegx16y32, msiMemRegx16y32z64, msiMemRegx32y64, msiMemRegx32y64z128, msiMemRegx64y128, msiMemRegx64y128z256,
                          msiMemRegSize, msiMemRegx16y32, msiMemRegx16y32z64, msiMemRegx32y64, msiMemRegx32y64z128, msiMemRegx64y128, msiMemRegx64y128z256,
                          msiMemRegx64y256, msiMemRegx64y256z512,
                          msiMemRegx64y256, msiMemRegx64y256z512,
-                         msiMem8, msiMem16, msiMem32, msiBMem32, msiMem64, msiBMem64, msiMem128, msiMem256, msiMem512,
+                         msiMem8, msiMem16, msiBMem16, msiMem32, msiBMem32, msiMem64, msiBMem64, msiMem128, msiMem256, msiMem512,
                          msiXMem32, msiXMem64, msiYMem32, msiYMem64, msiZMem32, msiZMem64,
                          msiXMem32, msiXMem64, msiYMem32, msiYMem64, msiZMem32, msiZMem64,
                          msiVMemMultiple, msiVMemRegSize,
                          msiVMemMultiple, msiVMemRegSize,
                          msiMemRegConst128,msiMemRegConst256,msiMemRegConst512);
                          msiMemRegConst128,msiMemRegConst256,msiMemRegConst512);
 
 
-      TMemRefSizeInfoBCST = (msbUnknown, msbBCST32, msbBCST64, msbMultiple);
-      TMemRefSizeInfoBCSTType = (btUnknown, bt1to2, bt1to4, bt1to8, bt1to16);
+      TMemRefSizeInfoBCST = (msbUnknown, msbBCST16, msbBCST32, msbBCST64, msbMultiple);
+      TMemRefSizeInfoBCSTType = (btUnknown, bt1to2, bt1to4, bt1to8, bt1to16, bt1to32);
 
 
       TEVEXTupleState = (etsUnknown, etsIsTuple, etsNotTuple);
       TEVEXTupleState = (etsUnknown, etsIsTuple, etsNotTuple);
       TConstSizeInfo  = (csiUnknown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64);
       TConstSizeInfo  = (csiUnknown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64);
@@ -515,6 +518,7 @@ interface
         IF_BCST4,
         IF_BCST4,
         IF_BCST8,
         IF_BCST8,
         IF_BCST16,
         IF_BCST16,
+        IF_BCST32,
         IF_T2,                  { disp8 - tuple - 2 }
         IF_T2,                  { disp8 - tuple - 2 }
         IF_T4,                  { disp8 - tuple - 4 }
         IF_T4,                  { disp8 - tuple - 4 }
         IF_T8,                  { disp8 - tuple - 8 }
         IF_T8,                  { disp8 - tuple - 8 }
@@ -1389,6 +1393,7 @@ implementation
                       OTVE_VECTOR_BCST4: s := s + ' {1to4}';
                       OTVE_VECTOR_BCST4: s := s + ' {1to4}';
                       OTVE_VECTOR_BCST8: s := s + ' {1to8}';
                       OTVE_VECTOR_BCST8: s := s + ' {1to8}';
                      OTVE_VECTOR_BCST16: s := s + ' {1to16}';
                      OTVE_VECTOR_BCST16: s := s + ' {1to16}';
+                     OTVE_VECTOR_BCST32: s := s + ' {1to32}';
                    end;
                    end;
 
 
                   if vopext and OTVE_VECTOR_ER = OTVE_VECTOR_ER then
                   if vopext and OTVE_VECTOR_ER = OTVE_VECTOR_ER then
@@ -1906,12 +1911,12 @@ implementation
                  // e.g. vfpclasspd  k1, [RAX] {1to8}, 0
                  // e.g. vfpclasspd  k1, [RAX] {1to8}, 0
                  // =>> check flags
                  // =>> check flags
 
 
-
-                 case oper[i]^.vopext and (OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16) of
+                 case oper[i]^.vopext and (OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16 or OTVE_VECTOR_BCST32) of
                     OTVE_VECTOR_BCST2: if not(IF_BCST2 in p^.flags) then exit;
                     OTVE_VECTOR_BCST2: if not(IF_BCST2 in p^.flags) then exit;
                     OTVE_VECTOR_BCST4: if not(IF_BCST4 in p^.flags) then exit;
                     OTVE_VECTOR_BCST4: if not(IF_BCST4 in p^.flags) then exit;
                     OTVE_VECTOR_BCST8: if not(IF_BCST8 in p^.flags) then exit;
                     OTVE_VECTOR_BCST8: if not(IF_BCST8 in p^.flags) then exit;
                    OTVE_VECTOR_BCST16: if not(IF_BCST16 in p^.flags) then exit;
                    OTVE_VECTOR_BCST16: if not(IF_BCST16 in p^.flags) then exit;
+                   OTVE_VECTOR_BCST32: if not(IF_BCST32 in p^.flags) then exit;
                                else exit;
                                else exit;
                  end;
                  end;
                end;
                end;
@@ -2048,7 +2053,12 @@ implementation
           if IF_TFV in aInsEntry^.Flags then
           if IF_TFV in aInsEntry^.Flags then
           begin
           begin
             for i :=  0 to aInsEntry^.ops - 1 do
             for i :=  0 to aInsEntry^.ops - 1 do
-             if (aInsEntry^.optypes[i] and OT_BMEM32 = OT_BMEM32) then
+             if (aInsEntry^.optypes[i] and OT_BMEM16 = OT_BMEM16) then
+             begin
+               tuplesize := 2;
+               break;
+             end
+             else if (aInsEntry^.optypes[i] and OT_BMEM32 = OT_BMEM32) then
              begin
              begin
                tuplesize := 4;
                tuplesize := 4;
                break;
                break;
@@ -2108,7 +2118,11 @@ implementation
             begin
             begin
               if aInsEntry^.optypes[i] and (OT_REGNORM or OT_MEMORY) = OT_REGMEM then
               if aInsEntry^.optypes[i] and (OT_REGNORM or OT_MEMORY) = OT_REGMEM then
               begin
               begin
-                case aInsEntry^.optypes[i] and (OT_BITS32 or OT_BITS64) of
+                case aInsEntry^.optypes[i] and (OT_BITS16 or OT_BITS32 or OT_BITS64) of
+                  OT_BITS16: begin
+                               memsize := 16;
+                               break;
+                             end;
                   OT_BITS32: begin
                   OT_BITS32: begin
                                memsize := 32;
                                memsize := 32;
                                break;
                                break;
@@ -2208,14 +2222,14 @@ implementation
             if aInput.typ = top_ref then
             if aInput.typ = top_ref then
             begin
             begin
               if aInput.ref^.base <> NR_NO then
               if aInput.ref^.base <> NR_NO then
-              begin              
+              begin
                 if (aInput.ref^.offset <> 0) and
                 if (aInput.ref^.offset <> 0) and
                    ((aInput.ref^.offset mod tuplesize) = 0) and
                    ((aInput.ref^.offset mod tuplesize) = 0) and
                    (abs(aInput.ref^.offset) div tuplesize <= 127) then
                    (abs(aInput.ref^.offset) div tuplesize <= 127) then
                 begin
                 begin
                   aInput.ref^.offset := aInput.ref^.offset div tuplesize;
                   aInput.ref^.offset := aInput.ref^.offset div tuplesize;
                   EVEXTupleState := etsIsTuple;
                   EVEXTupleState := etsIsTuple;
-                    end;  
+                end;
               end;
               end;
             end;
             end;
           end;
           end;
@@ -5301,6 +5315,7 @@ implementation
                                   0: MRefInfo := msiNoSize;
                                   0: MRefInfo := msiNoSize;
                            OT_BITS8: MRefInfo := msiMem8;
                            OT_BITS8: MRefInfo := msiMem8;
                           OT_BITS16: MRefInfo := msiMem16;
                           OT_BITS16: MRefInfo := msiMem16;
+                         OT_BITSB16: MRefInfo := msiBMem16;
                           OT_BITS32: MRefInfo := msiMem32;
                           OT_BITS32: MRefInfo := msiMem32;
                          OT_BITSB32: MRefInfo := msiBMem32;
                          OT_BITSB32: MRefInfo := msiBMem32;
                           OT_BITS64: MRefInfo := msiMem64;
                           OT_BITS64: MRefInfo := msiMem64;
@@ -5328,7 +5343,7 @@ implementation
                        else
                        else
                        begin
                        begin
                          // ignore broadcast-memory
                          // ignore broadcast-memory
-                         if not(MRefInfo in [msiBMem32, msiBMem64]) then
+                         if not(MRefInfo in [msiBMem16, msiBMem32, msiBMem64]) then
                          begin
                          begin
                            if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize <> MRefInfo then
                            if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize <> MRefInfo then
                            begin
                            begin
@@ -5350,12 +5365,13 @@ implementation
                        //if not(MRefInfo in [msiBMem32, msiBMem64]) and (actRegCount > 0) then
                        //if not(MRefInfo in [msiBMem32, msiBMem64]) and (actRegCount > 0) then
                        if actRegCount > 0 then
                        if actRegCount > 0 then
                        begin
                        begin
-                         if MRefInfo in [msiBMem32, msiBMem64] then
+                         if MRefInfo in [msiBMem16, msiBMem32, msiBMem64] then
                          begin
                          begin
                            if IF_BCST2  in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to2];
                            if IF_BCST2  in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to2];
                            if IF_BCST4  in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to4];
                            if IF_BCST4  in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to4];
                            if IF_BCST8  in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to8];
                            if IF_BCST8  in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to8];
                            if IF_BCST16 in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to16];
                            if IF_BCST16 in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to16];
+                           if IF_BCST32 in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to32];
 
 
                            //InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes
                            //InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes
 
 
@@ -5412,6 +5428,10 @@ implementation
           begin
           begin
             case RegBCSTSizeMask of
             case RegBCSTSizeMask of
                       0: ; // ignore;
                       0: ; // ignore;
+              OT_BITSB16: begin
+                            InsTabMemRefSizeInfoCache^[AsmOp].MemRefSizeBCST       := msbBCST16;
+                            InsTabMemRefSizeInfoCache^[AsmOp].BCSTXMMMultiplicator := 8;
+                          end;
               OT_BITSB32: begin
               OT_BITSB32: begin
                             InsTabMemRefSizeInfoCache^[AsmOp].MemRefSizeBCST       := msbBCST32;
                             InsTabMemRefSizeInfoCache^[AsmOp].MemRefSizeBCST       := msbBCST32;
                             InsTabMemRefSizeInfoCache^[AsmOp].BCSTXMMMultiplicator := 4;
                             InsTabMemRefSizeInfoCache^[AsmOp].BCSTXMMMultiplicator := 4;
@@ -5572,13 +5592,13 @@ implementation
 
 
             if (AsmOp <> A_CVTSI2SD) and
             if (AsmOp <> A_CVTSI2SD) and
                (AsmOp <> A_CVTSI2SS) then
                (AsmOp <> A_CVTSI2SS) then
-            begin            
+            begin
               inc(iCntOpcodeValError);
               inc(iCntOpcodeValError);
               Str(gas_needsuffix[AsmOp],hs1);
               Str(gas_needsuffix[AsmOp],hs1);
               Str(InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize,hs2);
               Str(InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize,hs2);
               Message3(asmr_e_not_supported_combination_attsuffix_memrefsize_type,
               Message3(asmr_e_not_supported_combination_attsuffix_memrefsize_type,
                        std_op2str[AsmOp],hs1,hs2);
                        std_op2str[AsmOp],hs1,hs2);
-            end;               
+            end;
           end;
           end;
         end;
         end;
       end;
       end;

+ 2 - 1
compiler/x86/agx86att.pas

@@ -289,11 +289,12 @@ interface
 
 
            if o.vopext and OTVE_VECTOR_BCST = OTVE_VECTOR_BCST then
            if o.vopext and OTVE_VECTOR_BCST = OTVE_VECTOR_BCST then
             begin
             begin
-              case o.vopext and (OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16) of
+              case o.vopext and OTVE_VECTOR_BCST_MASK of
                  OTVE_VECTOR_BCST2: owner.writer.AsmWrite('{1to2}');
                  OTVE_VECTOR_BCST2: owner.writer.AsmWrite('{1to2}');
                  OTVE_VECTOR_BCST4: owner.writer.AsmWrite('{1to4}');
                  OTVE_VECTOR_BCST4: owner.writer.AsmWrite('{1to4}');
                  OTVE_VECTOR_BCST8: owner.writer.AsmWrite('{1to8}');
                  OTVE_VECTOR_BCST8: owner.writer.AsmWrite('{1to8}');
                 OTVE_VECTOR_BCST16: owner.writer.AsmWrite('{1to16}');
                 OTVE_VECTOR_BCST16: owner.writer.AsmWrite('{1to16}');
+                OTVE_VECTOR_BCST32: owner.writer.AsmWrite('{1to32}');
                                else ; //TG TODO errormsg
                                else ; //TG TODO errormsg
               end;
               end;
             end;
             end;

+ 9 - 1
compiler/x86/rax86.pas

@@ -534,6 +534,7 @@ begin
           if ExistsBCST then
           if ExistsBCST then
           begin
           begin
             case MemRefInfo(opcode).MemRefSizeBCST of
             case MemRefInfo(opcode).MemRefSizeBCST of
+              msbBCST16: memrefsize := 16;
               msbBCST32: memrefsize := 32;
               msbBCST32: memrefsize := 32;
               msbBCST64: memrefsize := 64;
               msbBCST64: memrefsize := 64;
               else
               else
@@ -907,6 +908,10 @@ begin
                   if ExistsBCST then
                   if ExistsBCST then
                   begin
                   begin
                     case MemRefInfo(opcode).MemRefSizeBCST of
                     case MemRefInfo(opcode).MemRefSizeBCST of
+                      msbBCST16: begin
+                                   tx86operand(operands[i]).opsize := S_W;
+                                   tx86operand(operands[i]).size   := OS_16;
+                                 end;
                       msbBCST32: begin
                       msbBCST32: begin
                                    tx86operand(operands[i]).opsize := S_L;
                                    tx86operand(operands[i]).opsize := S_L;
                                    tx86operand(operands[i]).size   := OS_32;
                                    tx86operand(operands[i]).size   := OS_32;
@@ -1484,6 +1489,8 @@ begin
                Message2(asmr_e_mismatch_broadcasting_elements, '1to' + bcst1, '1to' + bcst2);
                Message2(asmr_e_mismatch_broadcasting_elements, '1to' + bcst1, '1to' + bcst2);
           16: if not(bt1to16 in MemRefInfo(opcode).BCSTTypes) then
           16: if not(bt1to16 in MemRefInfo(opcode).BCSTTypes) then
                Message2(asmr_e_mismatch_broadcasting_elements, '1to' + bcst1, '1to' + bcst2);
                Message2(asmr_e_mismatch_broadcasting_elements, '1to' + bcst1, '1to' + bcst2);
+          32: if not(bt1to32 in MemRefInfo(opcode).BCSTTypes) then
+               Message2(asmr_e_mismatch_broadcasting_elements, '1to' + bcst1, '1to' + bcst2);
         end;
         end;
       end
       end
       else if MemRefInfo(opcode).BCSTXMMMultiplicator * multiplicator <> vbcst then
       else if MemRefInfo(opcode).BCSTXMMMultiplicator * multiplicator <> vbcst then
@@ -1549,7 +1556,7 @@ procedure Tx86Instruction.SetInstructionOpsize;
               end;
               end;
             end;
             end;
 
 
-            result := true;  
+            result := true;
 	  end
 	  end
           else if MemRefSize in MemRefMultiples - [msiVMemMultiple] then
           else if MemRefSize in MemRefMultiples - [msiVMemMultiple] then
           begin
           begin
@@ -2265,6 +2272,7 @@ begin
                 OTVE_VECTOR_BCST4: s := s + ' {1to4}';
                 OTVE_VECTOR_BCST4: s := s + ' {1to4}';
                 OTVE_VECTOR_BCST8: s := s + ' {1to8}';
                 OTVE_VECTOR_BCST8: s := s + ' {1to8}';
                OTVE_VECTOR_BCST16: s := s + ' {1to16}';
                OTVE_VECTOR_BCST16: s := s + ' {1to16}';
+               OTVE_VECTOR_BCST32: s := s + ' {1to32}';
              end;
              end;
 
 
             if vopext and OTVE_VECTOR_ER = OTVE_VECTOR_ER then
             if vopext and OTVE_VECTOR_ER = OTVE_VECTOR_ER then

+ 13 - 7
compiler/x86/rax86int.pas

@@ -40,7 +40,7 @@ Unit Rax86int;
       AS_COMMA,AS_LBRACKET,AS_RBRACKET,AS_LPAREN,
       AS_COMMA,AS_LBRACKET,AS_RBRACKET,AS_LPAREN,
       AS_RPAREN,AS_COLON,AS_DOT,AS_PLUS,AS_MINUS,AS_STAR,
       AS_RPAREN,AS_COLON,AS_DOT,AS_PLUS,AS_MINUS,AS_STAR,
       AS_SEPARATOR,AS_ID,AS_REGISTER,AS_OPCODE,AS_SLASH,
       AS_SEPARATOR,AS_ID,AS_REGISTER,AS_OPCODE,AS_SLASH,
-      AS_LOPMASK,AS_VOPMASK,AS_LOPZEROMASK,AS_VOPZEROMASK,AS_LOPBCST,AS_OPBCST1TO2,AS_OPBCST1TO4,AS_OPBCST1TO8,AS_OPBCST1TO16,AS_LOPSAE,AS_OPSAE,
+      AS_LOPMASK,AS_VOPMASK,AS_LOPZEROMASK,AS_VOPZEROMASK,AS_LOPBCST,AS_OPBCST1TO2,AS_OPBCST1TO4,AS_OPBCST1TO8,AS_OPBCST1TO16,AS_OPBCST1TO32,AS_LOPSAE,AS_OPSAE,
       AS_LOPER,AS_OPRNSAE,AS_OPRDSAE,AS_OPRUSAE,AS_OPRZSAE,
       AS_LOPER,AS_OPRNSAE,AS_OPRDSAE,AS_OPRUSAE,AS_OPRZSAE,
        {------------------ Assembler directives --------------------}
        {------------------ Assembler directives --------------------}
       AS_ALIGN,AS_DB,AS_DW,AS_DD,AS_DQ,AS_PUBLIC,AS_END,
       AS_ALIGN,AS_DB,AS_DW,AS_DD,AS_DQ,AS_PUBLIC,AS_END,
@@ -167,7 +167,7 @@ Unit Rax86int;
         ',','[',']','(',
         ',','[',']','(',
         ')',':','.','+','-','*',
         ')',':','.','+','-','*',
         ';','identifier','register','opcode','/',
         ';','identifier','register','opcode','/',
-        '','','','','','','','','','','',
+        '','','','','','','','','','','','',
         '','','','','',
         '','','','','',
         '','','','','','','END',
         '','','','','','','END',
         '','','','','','','','','','','','','','',
         '','','','','','','','','','','','','','',
@@ -506,6 +506,7 @@ Unit Rax86int;
                                          else if (actasmpattern = '1TO4') then actasmtoken := AS_OPBCST1TO4
                                          else if (actasmpattern = '1TO4') then actasmtoken := AS_OPBCST1TO4
                                          else if (actasmpattern = '1TO8') then actasmtoken := AS_OPBCST1TO8
                                          else if (actasmpattern = '1TO8') then actasmtoken := AS_OPBCST1TO8
                                          else if (actasmpattern = '1TO16') then actasmtoken := AS_OPBCST1TO16
                                          else if (actasmpattern = '1TO16') then actasmtoken := AS_OPBCST1TO16
+                                         else if (actasmpattern = '1TO32') then actasmtoken := AS_OPBCST1TO32
                                          else actasmpattern := actasmpattern_origcase;
                                          else actasmpattern := actasmpattern_origcase;
                                       end;
                                       end;
                              AS_LOPSAE:
                              AS_LOPSAE:
@@ -816,7 +817,7 @@ Unit Rax86int;
                    actasmpattern:=c;
                    actasmpattern:=c;
                    c:=current_scanner.asmgetchar;
                    c:=current_scanner.asmgetchar;
                    { Get the possible characters }
                    { Get the possible characters }
-                   while c in ['1','2','4','6','8','t','T','o','O'] do
+                   while c in ['1','2','3','4','6','8','t','T','o','O'] do
                     begin
                     begin
                       actasmpattern:=actasmpattern + c;
                       actasmpattern:=actasmpattern + c;
                       c:=current_scanner.asmgetchar;
                       c:=current_scanner.asmgetchar;
@@ -834,6 +835,7 @@ Unit Rax86int;
                        else if (actasmpattern = '1TO4') then actasmtoken := AS_OPBCST1TO4
                        else if (actasmpattern = '1TO4') then actasmtoken := AS_OPBCST1TO4
                        else if (actasmpattern = '1TO8') then actasmtoken := AS_OPBCST1TO8
                        else if (actasmpattern = '1TO8') then actasmtoken := AS_OPBCST1TO8
                        else if (actasmpattern = '1TO16') then actasmtoken := AS_OPBCST1TO16
                        else if (actasmpattern = '1TO16') then actasmtoken := AS_OPBCST1TO16
+                       else if (actasmpattern = '1TO32') then actasmtoken := AS_OPBCST1TO32
                        else actasmpattern := actasmpattern_origcase;
                        else actasmpattern := actasmpattern_origcase;
                        c:=current_scanner.asmgetchar;
                        c:=current_scanner.asmgetchar;
                     end
                     end
@@ -990,7 +992,7 @@ Unit Rax86int;
     kreg: tregister;
     kreg: tregister;
   begin
   begin
     Consume(actasmtoken, true);
     Consume(actasmtoken, true);
-    if actasmtoken in [AS_VOPMASK, AS_VOPZEROMASK, AS_OPBCST1TO2, AS_OPBCST1TO4, AS_OPBCST1TO8, AS_OPBCST1TO16,
+    if actasmtoken in [AS_VOPMASK, AS_VOPZEROMASK, AS_OPBCST1TO2, AS_OPBCST1TO4, AS_OPBCST1TO8, AS_OPBCST1TO16, AS_OPBCST1TO32,
                        AS_OPSAE,AS_OPRNSAE,AS_OPRDSAE,AS_OPRUSAE,AS_OPRZSAE] then
                        AS_OPSAE,AS_OPRNSAE,AS_OPRDSAE,AS_OPRUSAE,AS_OPRZSAE] then
     begin
     begin
       case actasmtoken of
       case actasmtoken of
@@ -1020,6 +1022,10 @@ Unit Rax86int;
                           aop.vopext := aop.vopext or OTVE_VECTOR_BCST or OTVE_VECTOR_BCST16;
                           aop.vopext := aop.vopext or OTVE_VECTOR_BCST or OTVE_VECTOR_BCST16;
                           aop.vbcst  := 16;
                           aop.vbcst  := 16;
                         end;
                         end;
+        AS_OPBCST1TO32: begin
+                          aop.vopext := aop.vopext or OTVE_VECTOR_BCST or OTVE_VECTOR_BCST32;
+                          aop.vbcst  := 32;
+                        end;
               AS_OPSAE: aop.vopext := aop.vopext or OTVE_VECTOR_SAE;
               AS_OPSAE: aop.vopext := aop.vopext or OTVE_VECTOR_SAE;
             AS_OPRNSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RNSAE;
             AS_OPRNSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RNSAE;
             AS_OPRDSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RDSAE;
             AS_OPRDSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RDSAE;
@@ -2674,9 +2680,9 @@ Unit Rax86int;
                   AS_QWORD : oper.typesize:=8;
                   AS_QWORD : oper.typesize:=8;
                   AS_DQWORD : oper.typesize:=16;
                   AS_DQWORD : oper.typesize:=16;
                   AS_TBYTE : oper.typesize:=10;
                   AS_TBYTE : oper.typesize:=10;
-                  AS_OWORD,                     
-                  AS_XMMWORD: oper.typesize:=16; 
-                  AS_YWORD,                     
+                  AS_OWORD,
+                  AS_XMMWORD: oper.typesize:=16;
+                  AS_YWORD,
                   AS_YMMWORD: oper.typesize:=32;
                   AS_YMMWORD: oper.typesize:=32;
                   AS_ZWORD,
                   AS_ZWORD,
                   AS_ZMMWORD: oper.typesize:=64;
                   AS_ZMMWORD: oper.typesize:=64;