|  | @@ -114,7 +114,10 @@ unit cgcpu;
 | 
	
		
			
				|  |  |          procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
 | 
	
		
			
				|  |  |          { Transform unsupported methods into Internal errors }
 | 
	
		
			
				|  |  |          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
 | 
	
		
			
				|  |  | -      private
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        { try to generate optimized 32 Bit multiplication, returns true if successful generated }
 | 
	
		
			
				|  |  | +        function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |          { clear out potential overflow bits from 8 or 16 bit operations  }
 | 
	
		
			
				|  |  |          { the upper 24/16 bits of a register after an operation          }
 | 
	
		
			
				|  |  |          procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
 | 
	
	
		
			
				|  | @@ -653,6 +656,100 @@ unit cgcpu;
 | 
	
		
			
				|  |  |          end
 | 
	
		
			
				|  |  |        end;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    function tcgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
 | 
	
		
			
				|  |  | +      var
 | 
	
		
			
				|  |  | +        multiplier : dword;
 | 
	
		
			
				|  |  | +        power : longint;
 | 
	
		
			
				|  |  | +        shifterop : tshifterop;
 | 
	
		
			
				|  |  | +        bitsset : byte;
 | 
	
		
			
				|  |  | +        negative : boolean;
 | 
	
		
			
				|  |  | +        first : boolean;
 | 
	
		
			
				|  |  | +        cycles : byte;
 | 
	
		
			
				|  |  | +      begin
 | 
	
		
			
				|  |  | +        result:=true;
 | 
	
		
			
				|  |  | +        cycles:=0;
 | 
	
		
			
				|  |  | +        negative:=a<0;
 | 
	
		
			
				|  |  | +        shifterop.rs:=NR_NO;
 | 
	
		
			
				|  |  | +        shifterop.shiftmode:=SM_LSL;
 | 
	
		
			
				|  |  | +        if negative then
 | 
	
		
			
				|  |  | +          inc(cycles);
 | 
	
		
			
				|  |  | +        multiplier:=dword(abs(a));
 | 
	
		
			
				|  |  | +        bitsset:=popcnt(multiplier and $fffffffe);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        { most simple cases }
 | 
	
		
			
				|  |  | +        if a=1 then
 | 
	
		
			
				|  |  | +          a_load_reg_reg(list,OS_32,OS_32,src,dst)
 | 
	
		
			
				|  |  | +        else if a=0 then
 | 
	
		
			
				|  |  | +          a_load_const_reg(list,OS_32,0,dst)
 | 
	
		
			
				|  |  | +        else if a=-1 then
 | 
	
		
			
				|  |  | +          a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
 | 
	
		
			
				|  |  | +        { add up ?
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +          basically, one add is needed for each bit being set in the constant factor
 | 
	
		
			
				|  |  | +          however, the least significant bit is for free, it can be hidden in the initial
 | 
	
		
			
				|  |  | +          instruction
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        else if (bitsset+cycles<=3) and
 | 
	
		
			
				|  |  | +          (bitsset>popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
 | 
	
		
			
				|  |  | +          begin
 | 
	
		
			
				|  |  | +            first:=true;
 | 
	
		
			
				|  |  | +            while multiplier<>0 do
 | 
	
		
			
				|  |  | +              begin
 | 
	
		
			
				|  |  | +                shifterop.shiftimm:=BsrDWord(multiplier);
 | 
	
		
			
				|  |  | +                if odd(multiplier) then
 | 
	
		
			
				|  |  | +                  begin
 | 
	
		
			
				|  |  | +                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
 | 
	
		
			
				|  |  | +                    dec(multiplier);
 | 
	
		
			
				|  |  | +                  end
 | 
	
		
			
				|  |  | +                else
 | 
	
		
			
				|  |  | +                  if first then
 | 
	
		
			
				|  |  | +                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
 | 
	
		
			
				|  |  | +                  else
 | 
	
		
			
				|  |  | +                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
 | 
	
		
			
				|  |  | +                first:=false;
 | 
	
		
			
				|  |  | +                dec(multiplier,1 shl shifterop.shiftimm);
 | 
	
		
			
				|  |  | +              end;
 | 
	
		
			
				|  |  | +            if negative then
 | 
	
		
			
				|  |  | +              list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
 | 
	
		
			
				|  |  | +          end
 | 
	
		
			
				|  |  | +        { subtract from the next greater power of two? }
 | 
	
		
			
				|  |  | +        else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles<=3 then
 | 
	
		
			
				|  |  | +          begin
 | 
	
		
			
				|  |  | +            first:=true;
 | 
	
		
			
				|  |  | +            while multiplier<>0 do
 | 
	
		
			
				|  |  | +              begin
 | 
	
		
			
				|  |  | +                if first then
 | 
	
		
			
				|  |  | +                  begin
 | 
	
		
			
				|  |  | +                    multiplier:=(1 shl power)-multiplier;
 | 
	
		
			
				|  |  | +                    shifterop.shiftimm:=power;
 | 
	
		
			
				|  |  | +                  end
 | 
	
		
			
				|  |  | +                else
 | 
	
		
			
				|  |  | +                  shifterop.shiftimm:=BsrDWord(multiplier);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                if odd(multiplier) then
 | 
	
		
			
				|  |  | +                  begin
 | 
	
		
			
				|  |  | +                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
 | 
	
		
			
				|  |  | +                    dec(multiplier);
 | 
	
		
			
				|  |  | +                  end
 | 
	
		
			
				|  |  | +                else
 | 
	
		
			
				|  |  | +                  if first then
 | 
	
		
			
				|  |  | +                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
 | 
	
		
			
				|  |  | +                  else
 | 
	
		
			
				|  |  | +                    begin
 | 
	
		
			
				|  |  | +                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
 | 
	
		
			
				|  |  | +                      dec(multiplier,1 shl shifterop.shiftimm);
 | 
	
		
			
				|  |  | +                    end;
 | 
	
		
			
				|  |  | +                first:=false;
 | 
	
		
			
				|  |  | +              end;
 | 
	
		
			
				|  |  | +            if negative then
 | 
	
		
			
				|  |  | +              list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
 | 
	
		
			
				|  |  | +          end
 | 
	
		
			
				|  |  | +        else
 | 
	
		
			
				|  |  | +          result:=false;
 | 
	
		
			
				|  |  | +      end;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
 | 
	
		
			
				|  |  |        var
 | 
	
		
			
				|  |  |          shift : byte;
 | 
	
	
		
			
				|  | @@ -765,6 +862,10 @@ unit cgcpu;
 | 
	
		
			
				|  |  |                  so.shiftimm:=l1;
 | 
	
		
			
				|  |  |                  list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
 | 
	
		
			
				|  |  |                end
 | 
	
		
			
				|  |  | +            else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
 | 
	
		
			
				|  |  | +              begin
 | 
	
		
			
				|  |  | +                { nothing to do on success }
 | 
	
		
			
				|  |  | +              end
 | 
	
		
			
				|  |  |              { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
 | 
	
		
			
				|  |  |                Just using mov x, #0 might allow some easier optimizations down the line. }
 | 
	
		
			
				|  |  |              else if (op = OP_AND) and (dword(a)=0) then
 |