2 năm trước cách đây · 7da9b4a988
--- a/compiler/x86/nx86add.pas
+++ b/compiler/x86/nx86add.pas
@@ -79,10 +79,10 @@ unit nx86add;
 
				       aasmbase,aasmdata,aasmcpu,
			
 
				       symconst,symdef,
			
 
				       cgobj,hlcgobj,cgx86,cga,cgutils,
			
 
				-      tgobj,ncgutil,
			
 
				+      tgobj,ncgutil,nutils,
			
 
				       ncon,nset,ninl,ncnv,ncal,nmat,
			
 
				       defutil,defcmp,constexp,
			
 
				-      pass_2,htypechk;
			
 
				+      pass_1,pass_2,htypechk;
			
 
				 
			
 
				 { Range check must be disabled explicitly as the code serves
			
 
				   on three different architecture sizes }
			
@@ -1892,6 +1892,7 @@ unit nx86add;
 
				          checkoverflow : Boolean;
			
 
				          ovloc : tlocation;
			
 
				          tmpreg : TRegister;
			
 
				+         indexnode : TNode;
			
 
				       begin
			
 
				         { determine if the comparison will be unsigned }
			
 
				         unsigned:=not(is_signed(left.resultdef)) or
			
@@ -1944,73 +1945,156 @@ unit nx86add;
 
				        opsize:=def_cgsize(left.resultdef);
			
 
				 
			
 
				 {$ifndef i8086}
			
 
				-       { BMI1 optimisations }
			
 
				-       if (cs_opt_level2 in current_settings.optimizerswitches) and
			
 
				-         (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) then
			
 
				+       if (cs_opt_level2 in current_settings.optimizerswitches) then
			
 
				          begin
			
 
				-           { Can we turn "x and (not y)" into an ANDN instruction instead? }
			
 
				-           if (nodetype = andn) and
			
 
				-             (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
			
 
				-             ((left.nodetype = notn) or (right.nodetype = notn)) and
			
 
				-             (
			
 
				-               { With "const and (not variable)", ANDN will produce larger
			
 
				-                 code once everything is moved into registers (as a side-note,
			
 
				-                 "const and (not const)" and "variable and (not const)" will
			
 
				-                 have been simplified earlier to remove the NOT operation). }
			
 
				-               not (cs_opt_size in current_settings.optimizerswitches) or
			
 
				-               (
			
 
				-                 (left.location.loc <> LOC_CONSTANT) and
			
 
				-                 (right.location.loc <> LOC_CONSTANT)
			
 
				-               )
			
 
				-             ) then
			
 
				+           { BMI1 optimisations }
			
 
				+           if (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) then
			
 
				              begin
			
 
				-               { ANDN only supports the second operand being inverted; however,
			
 
				-                 since we're dealing with ordinals, there won't be any Boolean
			
 
				-                 shortcutting, so we can safely swap the parameters }
			
 
				-
			
 
				-               if (right.nodetype <> notn) then
			
 
				-                 swapleftright;
			
 
				-
			
 
				-               secondpass(left);
			
 
				-               { Skip the not node completely }
			
 
				-               secondpass(tnotnode(right).left);
			
 
				-
			
 
				-               { allocate registers }
			
 
				-               hlcg.location_force_reg(
			
 
				-                 current_asmdata.CurrAsmList,
			
 
				-                 tnotnode(right).left.location,
			
 
				-                 tnotnode(right).left.resultdef,
			
 
				-                 tnotnode(right).left.resultdef,
			
 
				-                 false
			
 
				-               );
			
 
				-
			
 
				-               if left.location.loc = LOC_CONSTANT then
			
 
				-                 { With "const and (not variable)", we can probably still make a
			
 
				-                   saving when it comes to pipeline stalls (left.location.loc
			
 
				-                   will become LOC_CREGISTER). }
			
 
				-                 hlcg.location_force_reg(
			
 
				-                   current_asmdata.CurrAsmList,
			
 
				-                   left.location,
			
 
				-                   left.resultdef,
			
 
				-                   left.resultdef,
			
 
				-                   true
			
 
				-                 );
			
 
				-
			
 
				-               set_result_location_reg;
			
 
				-
			
 
				-               case left.location.loc of
			
 
				-                 LOC_REFERENCE,
			
 
				-                 LOC_CREFERENCE:
			
 
				-                   emit_ref_reg_reg(A_ANDN, TCGSize2OpSize[opsize], left.location.reference, tnotnode(right).left.location.register, location.register);
			
 
				-                 LOC_REGISTER,
			
 
				-                 LOC_CREGISTER:
			
 
				-                   emit_reg_reg_reg(A_ANDN, TCGSize2OpSize[opsize], left.location.register, tnotnode(right).left.location.register, location.register)
			
 
				-                 else
			
 
				-                   InternalError(2022102101);
			
 
				-               end;
			
 
				+               { Can we turn "x and (not y)" into an ANDN instruction instead? }
			
 
				+               if (nodetype = andn) and
			
 
				+                 (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
			
 
				+                 ((left.nodetype = notn) or (right.nodetype = notn)) and
			
 
				+                 (
			
 
				+                   { With "const and (not variable)", ANDN will produce larger
			
 
				+                     code once everything is moved into registers (as a side-note,
			
 
				+                     "const and (not const)" and "variable and (not const)" will
			
 
				+                     have been simplified earlier to remove the NOT operation). }
			
 
				+                   not (cs_opt_size in current_settings.optimizerswitches) or
			
 
				+                   (
			
 
				+                     (left.location.loc <> LOC_CONSTANT) and
			
 
				+                     (right.location.loc <> LOC_CONSTANT)
			
 
				+                   )
			
 
				+                 ) then
			
 
				+                 begin
			
 
				+                   { ANDN only supports the second operand being inverted; however,
			
 
				+                     since we're dealing with ordinals, there won't be any Boolean
			
 
				+                     shortcutting, so we can safely swap the parameters }
			
 
				+
			
 
				+                   if (right.nodetype <> notn) then
			
 
				+                     swapleftright;
			
 
				+
			
 
				+                   secondpass(left);
			
 
				+                   { Skip the not node completely }
			
 
				+                   secondpass(tnotnode(right).left);
			
 
				+
			
 
				+                   { allocate registers }
			
 
				+                   hlcg.location_force_reg(
			
 
				+                     current_asmdata.CurrAsmList,
			
 
				+                     tnotnode(right).left.location,
			
 
				+                     tnotnode(right).left.resultdef,
			
 
				+                     tnotnode(right).left.resultdef,
			
 
				+                     false
			
 
				+                   );
			
 
				+
			
 
				+                   if left.location.loc = LOC_CONSTANT then
			
 
				+                     { With "const and (not variable)", we can probably still make a
			
 
				+                       saving when it comes to pipeline stalls (left.location.loc
			
 
				+                       will become LOC_CREGISTER). }
			
 
				+                     hlcg.location_force_reg(
			
 
				+                       current_asmdata.CurrAsmList,
			
 
				+                       left.location,
			
 
				+                       left.resultdef,
			
 
				+                       left.resultdef,
			
 
				+                       true
			
 
				+                     );
			
 
				+
			
 
				+                   set_result_location_reg;
			
 
				+
			
 
				+                   case left.location.loc of
			
 
				+                     LOC_REFERENCE,
			
 
				+                     LOC_CREFERENCE:
			
 
				+                       emit_ref_reg_reg(A_ANDN, TCGSize2OpSize[opsize], left.location.reference, tnotnode(right).left.location.register, location.register);
			
 
				+                     LOC_REGISTER,
			
 
				+                     LOC_CREGISTER:
			
 
				+                       emit_reg_reg_reg(A_ANDN, TCGSize2OpSize[opsize], left.location.register, tnotnode(right).left.location.register, location.register);
			
 
				+                     else
			
 
				+                       InternalError(2022102110);
			
 
				+                   end;
			
 
				+
			
 
				+                   { Overflow can't happen with and/andn }
			
 
				+                   Exit;
			
 
				+                 end;
			
 
				+             end;
			
 
				+
			
 
				+           { BMI2 optimisations }
			
 
				+           if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) then
			
 
				+             begin
			
 
				+               { Can we turn "x and ((1 shl y) - 1)" into a BZHI instruction instead? }
			
 
				+               if (nodetype = andn) and
			
 
				+                 (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
			
 
				+                 (
			
 
				+                   (
			
 
				+                     (right.nodetype = subn) and
			
 
				+                     (taddnode(right).right.nodetype = ordconstn) and
			
 
				+                     (tordconstnode(taddnode(right).right).value = 1) and
			
 
				+                     (taddnode(right).left.nodetype = shln) and
			
 
				+                     (tshlshrnode(taddnode(right).left).left.nodetype = ordconstn) and
			
 
				+                     (tordconstnode(tshlshrnode(taddnode(right).left).left).value = 1)
			
 
				+                   ) or
			
 
				+                   (
			
 
				+                     (left.nodetype = subn) and
			
 
				+                     (taddnode(left).right.nodetype = ordconstn) and
			
 
				+                     (tordconstnode(taddnode(left).right).value = 1) and
			
 
				+                     (taddnode(left).left.nodetype = shln) and
			
 
				+                     (tshlshrnode(taddnode(left).left).left.nodetype = ordconstn) and
			
 
				+                     (tordconstnode(tshlshrnode(taddnode(left).left).left).value = 1)
			
 
				+                   )
			
 
				+                 ) then
			
 
				+                 begin
			
 
				+
			
 
				+                   { Put the subtract node on the right }
			
 
				+                   if (right.nodetype <> subn) then
			
 
				+                     swapleftright;
			
 
				 
			
 
				-               { Overflow can't happen with and/andn }
			
 
				-               Exit;
			
 
				+                   secondpass(left);
			
 
				+
			
 
				+                   { Skip the subtract and shift nodes completely }
			
 
				+
			
 
				+                   { Helps avoid all the awkward typecasts }
			
 
				+                   indexnode := tshlshrnode(taddnode(right).left).right;
			
 
				+{$ifdef x86_64}
			
 
				+                   { The code generator sometimes extends the shift result to 64-bit unnecessarily }
			
 
				+                   if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and
			
 
				+                     (def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then
			
 
				+                     begin
			
 
				+                       { Convert to the 32-bit type }
			
 
				+                       indexnode.resultdef := resultdef;
			
 
				+                       node_reset_flags(indexnode,[nf_pass1_done]);
			
 
				+
			
 
				+                       { We should't be getting any new errors }
			
 
				+                       if do_firstpass(indexnode) then
			
 
				+                         InternalError(2022110201);
			
 
				+
			
 
				+                       { Keep things internally consistent in case indexnode changed }
			
 
				+                       tshlshrnode(taddnode(right).left).right := indexnode;
			
 
				+                     end;
			
 
				+{$endif x86_64}
			
 
				+                   secondpass(indexnode);
			
 
				+
			
 
				+                   { allocate registers }
			
 
				+                   hlcg.location_force_reg(
			
 
				+                     current_asmdata.CurrAsmList,
			
 
				+                     indexnode.location,
			
 
				+                     indexnode.resultdef,
			
 
				+                     resultdef,
			
 
				+                     false
			
 
				+                   );
			
 
				+
			
 
				+                   set_result_location_reg;
			
 
				+
			
 
				+                   case left.location.loc of
			
 
				+                     LOC_REFERENCE,
			
 
				+                     LOC_CREFERENCE:
			
 
				+                       emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, left.location.reference, location.register);
			
 
				+                     LOC_REGISTER,
			
 
				+                     LOC_CREGISTER:
			
 
				+                       emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, left.location.register, location.register);
			
 
				+                     else
			
 
				+                       InternalError(2022102111);
			
 
				+                   end;
			
 
				+
			
 
				+                   Exit;
			
 
				+                 end;
			
 
				              end;
			
 
				          end;
			
 
				 {$endif not i8086}