| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075 | {    Copyright (c) 1998-2002 by Florian Klaempfl    Generate x86 code for math nodes    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ****************************************************************************}unit nx86mat;{$i fpcdefs.inc}interface    uses      node,ncgmat;    type      tx86unaryminusnode = class(tcgunaryminusnode){$ifdef SUPPORT_MMX}         procedure second_mmx;override;{$endif SUPPORT_MMX}         procedure second_float;override;         function pass_1:tnode;override;      end;      tx86notnode = class(tcgnotnode)         procedure second_boolean;override;{$ifdef SUPPORT_MMX}         procedure second_mmx;override;{$endif SUPPORT_MMX}      end;      tx86moddivnode = class(tcgmoddivnode)         procedure pass_generate_code;override;      end;      tx86shlshrnode = class(tcgshlshrnode){$ifdef SUPPORT_MMX}         procedure second_mmx;override;{$endif SUPPORT_MMX}      end;  implementation    uses      globtype,      constexp,      cutils,verbose,globals,      symconst,symdef,      aasmbase,aasmtai,aasmcpu,aasmdata,defutil,      cgbase,pass_1,pass_2,      ncon,      cpubase,cpuinfo,      cga,cgobj,hlcgobj,cgx86,cgutils,      tgobj;{*****************************************************************************                          TI386UNARYMINUSNODE*****************************************************************************}    function tx86unaryminusnode.pass_1 : tnode;      begin         result:=nil;         firstpass(left);         if codegenerror then           exit;         if (left.resultdef.typ=floatdef) then           begin             if use_vectorfpu(left.resultdef) then               expectloc:=LOC_MMREGISTER             else               expectloc:=LOC_FPUREGISTER;           end{$ifdef SUPPORT_MMX}         else           if (cs_mmx in current_settings.localswitches) and              is_mmx_able_array(left.resultdef) then             begin               expectloc:=LOC_MMXREGISTER;             end{$endif SUPPORT_MMX}         else           inherited pass_1;      end;{$ifdef SUPPORT_MMX}    procedure tx86unaryminusnode.second_mmx;      var        op : tasmop;        hreg : tregister;      begin        op:=A_NONE;        secondpass(left);        location_reset(location,LOC_MMXREGISTER,OS_NO);        hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);        emit_reg_reg(A_PXOR,S_NO,hreg,hreg);        case left.location.loc of          LOC_MMXREGISTER:            begin               location.register:=left.location.register;            end;          LOC_CMMXREGISTER:            begin               location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);               emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);            end;          LOC_REFERENCE,          LOC_CREFERENCE:            begin               location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);               emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);            end;          else            internalerror(200203225);        end;        if cs_mmx_saturation in current_settings.localswitches then          case mmx_type(resultdef) of             mmxs8bit:               op:=A_PSUBSB;             mmxu8bit:               op:=A_PSUBUSB;             mmxs16bit,mmxfixed16:               op:=A_PSUBSW;             mmxu16bit:               op:=A_PSUBUSW;             else               ;          end        else          case mmx_type(resultdef) of             mmxs8bit,mmxu8bit:               op:=A_PSUBB;             mmxs16bit,mmxu16bit,mmxfixed16:               op:=A_PSUBW;             mmxs32bit,mmxu32bit:               op:=A_PSUBD;             else               ;          end;        if op = A_NONE then          internalerror(201408202);        emit_reg_reg(op,S_NO,location.register,hreg);        emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);      end;{$endif SUPPORT_MMX}    procedure tx86unaryminusnode.second_float;      var        l1: TAsmLabel;        href: treference;        reg: tregister;      begin        secondpass(left);        if expectloc=LOC_MMREGISTER then          begin            if cs_opt_fastmath in current_settings.optimizerswitches then              begin                if not(left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then                  hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);                location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));                cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,location.size,location.register,location.register,nil);                cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,OP_SUB,location.size,left.location,location.register,mms_movescalar);              end            else              begin                location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));                current_asmdata.getdatalabel(l1);                new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(16));                current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));                case def_cgsize(resultdef) of                  OS_F32:                    current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(longint(1 shl 31)));                  OS_F64:                    begin                      current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(0));                      current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(-(1 shl 31)));                    end                  else                    internalerror(2004110215);                end;                reference_reset_symbol(href,l1,0,resultdef.alignment,[]);                if UseAVX then                  begin                    if not(left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then                      hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);                    location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));                    cg.a_opmm_ref_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,left.location.register,location.register,nil)                  end                else                  begin                    if not(left.location.loc=LOC_MMREGISTER) then                      hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);                    location.register:=left.location.register;                    cg.a_opmm_ref_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,location.register,mms_movescalar);                  end;              end;          end        else          begin            location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));            case left.location.loc of              LOC_REFERENCE,              LOC_CREFERENCE:                begin                  location.register:=NR_ST;                  cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,                     left.location.size,location.size,                     left.location.reference,location.register);                  emit_none(A_FCHS,S_NO);                end;              LOC_FPUREGISTER,              LOC_CFPUREGISTER:                begin                   { "load st,st" is ignored by the code generator }                   cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,NR_ST);                   location.register:=NR_ST;                   emit_none(A_FCHS,S_NO);                end;              else                internalerror(200312241);            end;          end;      end;{*****************************************************************************                               TX86NOTNODE*****************************************************************************}    procedure tx86notnode.second_boolean;      var         opsize : tcgsize;         {$if defined(cpu32bitalu) or defined(cpu16bitalu)}         hreg: tregister;         {$endif}      begin        opsize:=def_cgsize(resultdef);        secondpass(left);        if not handle_locjump then         begin           case left.location.loc of             LOC_FLAGS :               begin                 location_reset(location,LOC_FLAGS,OS_NO);                 location.resflags:=left.location.resflags;                 inverse_flags(location.resflags);               end;             LOC_CREFERENCE,             LOC_REFERENCE:               begin{$if defined(cpu32bitalu)}                 if is_64bit(resultdef) then                   begin                     hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);                     tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);                     cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);                     inc(left.location.reference.offset,4);                     cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);                   end                 else{$elseif defined(cpu16bitalu)}                 if is_64bit(resultdef) then                   begin                     hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);                     tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);                     cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);                     inc(left.location.reference.offset,2);                     cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);                     inc(left.location.reference.offset,2);                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);                     inc(left.location.reference.offset,2);                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);                   end                 else if is_32bit(resultdef) then                   begin                     hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);                     tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);                     cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);                     inc(left.location.reference.offset,2);                     cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                     cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);                   end                 else{$endif}                   begin                     cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                     emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);                   end;                 location_reset(location,LOC_FLAGS,OS_NO);                 location.resflags:=F_E;               end;             LOC_CONSTANT,             LOC_REGISTER,             LOC_CREGISTER,             LOC_SUBSETREG,             LOC_CSUBSETREG,             LOC_SUBSETREF,             LOC_CSUBSETREF :               begin{$if defined(cpu32bitalu)}                 if is_64bit(resultdef) then                   begin                     hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);                     cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                     emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);                   end                 else{$elseif defined(cpu16bitalu)}                 if is_64bit(resultdef) then                   begin                     hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);                     cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                     emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reghi),left.location.register64.reghi);                     emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reglo),left.location.register64.reglo);                     emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);                   end                 else if is_32bit(resultdef) then                   begin                     hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);                     cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                     emit_reg_reg(A_OR,S_L,cg.GetNextReg(left.location.register),left.location.register);                   end                 else{$endif}                   begin                     hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);                     cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                     emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);                   end;                 location_reset(location,LOC_FLAGS,OS_NO);                 location.resflags:=F_E;               end;            else               internalerror(200203224);           end;         end;      end;{$ifdef SUPPORT_MMX}    procedure tx86notnode.second_mmx;    var hreg,r:Tregister;    begin      secondpass(left);      location_reset(location,LOC_MMXREGISTER,OS_NO);      r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);      emit_const_reg(A_MOV,S_L,longint($ffffffff),r);      { load operand }      case left.location.loc of        LOC_MMXREGISTER:          location_copy(location,left.location);        LOC_CMMXREGISTER:          begin            location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);            emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);          end;        LOC_REFERENCE,        LOC_CREFERENCE:          begin            location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);            emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);          end;        else          internalerror(2019050906);      end;      { load mask }      hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);      emit_reg_reg(A_MOVD,S_NO,r,hreg);      { lower 32 bit }      emit_reg_reg(A_PXOR,S_NO,hreg,location.register);      { shift mask }      emit_const_reg(A_PSLLQ,S_B,32,hreg);      { higher 32 bit }      emit_reg_reg(A_PXOR,S_NO,hreg,location.register);    end;{$endif SUPPORT_MMX}{*****************************************************************************                             TX86MODDIVNODE*****************************************************************************}    procedure tx86moddivnode.pass_generate_code;      var        hreg1,hreg2,hreg3,hreg4,rega,regd,tempreg:Tregister;        power:longint;        instr:TAiCpu;        op:Tasmop;        cgsize:TCgSize;        opsize:topsize;        e, sm: aint;        d,m: aword;        m_add, invertsign: boolean;        s: byte;      label        DefaultDiv;{$ifndef i8086}        procedure DoBMI2ReciprocalDivision;          var            exp_regd: Tregister;            exp_opsize: topsize;            DoMod: Boolean;            SubSize: TSubRegister;            divsize: Byte;          begin            DoMod := (nodetype = modn);            { Extend 32-bit divides to 64-bit registers and 16-bit              divides to 32-bit registers.  Because the domain of              the left input is only up to 2^(X/2 - 1) - 1, (i.e.              2^31 - 1 for 64-bit and 2^15 - 1 for 32-bit), a much              larger error in the reciprocal is permitted. }            if (resultdef.size <= {$ifdef x86_64}4{$else x86_64}2{$endif x86_64}) then              begin{$ifdef x86_64}                if resultdef.size = 4 then                  divsize := 64                else{$endif x86_64}                  divsize := 32;                calc_divconst_magic_unsigned(divsize, d, m, m_add, s);                { Should never have a zero shift and a magic add together }                if (s = 0) and m_add then                  InternalError(2021090203);                { Extend the input and out registers (the peephole optimizer should                  help clean up unnecessary MOVZX instructions }                hreg3 := hreg1;                case resultdef.size of{$ifdef x86_64}                  4:                    begin                      SubSize := R_SUBQ;                      setsubreg(hreg3, R_SUBQ);                      { Make sure the upper 32 bits are zero; the peephole                        optimizer will remove this instruction via MovAnd2Mov                        if it's not needed }                      emit_const_reg(A_AND, S_L, $FFFFFFFF, hreg1);                      exp_regd := NR_RDX;                      exp_opsize := S_Q;                      if m_add then                        { Append 1 to the tail end of the result }                        m := (m shr s) or ($8000000000000000 shr (s - 1))                      else                        m := m shr s;                    end;{$endif x86_64}                  1, 2:                    begin                      { MULX doesn't have a 16-bit version }                      SubSize := R_SUBD;                      setsubreg(hreg3, R_SUBD);                      if resultdef.size = 1 then                        exp_opsize := S_BL                      else                        exp_opsize := S_WL;                      emit_reg_reg(A_MOVZX, exp_opsize, hreg1, hreg3);                      exp_regd := NR_EDX;                      exp_opsize := S_L;                      if m_add then                        { Append 1 to the tail end of the result }                        m := (m shr s) or ($80000000 shr (s - 1))                      else                        m := m shr s;                    end;                  else                    InternalError(2021090211);                end;                Inc(m);                cg.getcpuregister(current_asmdata.CurrAsmList, exp_regd);                emit_const_reg(A_MOV, exp_opsize, aint(m), exp_regd);                hreg2 := cg.getintregister(current_asmdata.CurrAsmList, cgsize);                hreg4 := hreg2;                setsubreg(hreg4, SubSize);                cg.ungetcpuregister(current_asmdata.CurrAsmList, exp_regd);                emit_reg_reg_reg(A_MULX, exp_opsize, hreg3, hreg4, hreg4);              end            else              begin                calc_divconst_magic_unsigned(resultdef.size * 8, d, m, m_add, s);                { Should never have a zero shift and a magic add together }                if (s = 0) and m_add then                  InternalError(2021090204);                cg.getcpuregister(current_asmdata.CurrAsmList, regd);                emit_const_reg(A_MOV, opsize, aint(m), regd);                hreg2 := cg.getintregister(current_asmdata.CurrAsmList, cgsize);                cg.ungetcpuregister(current_asmdata.CurrAsmList, regd);                emit_reg_reg_reg(A_MULX, opsize, hreg1, hreg2, hreg2);                if m_add then                  begin                    { addition can overflow, shift first bit considering carry,                      then shift remaining bits in regular way. }                    cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);                    emit_reg_reg(A_ADD, opsize, hreg1, hreg2);                    emit_const_reg(A_RCR, opsize, 1, hreg2);                    cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);                    dec(s);                  end;                if s<>0 then                  emit_const_reg(A_SHR, opsize, aint(s), hreg2);              end;            if DoMod then              begin                { Now multiply the quotient by the original denominator and                  subtract the product from the original numerator to get                  the remainder. }{$ifdef x86_64}                if (cgsize in [OS_64,OS_S64]) and (d > $7FFFFFFF) then { Cannot use 64-bit constants in IMUL }                  begin                    hreg4 := cg.getintregister(current_asmdata.CurrAsmList,cgsize);                    emit_const_reg(A_MOV, opsize, aint(d), hreg4);                    emit_reg_reg(A_IMUL, opsize, hreg4, hreg2);                  end                else{$endif x86_64}                  emit_const_reg(A_IMUL, opsize, aint(d), hreg2);                emit_reg_reg(A_SUB, opsize, hreg2, hreg1);                location.register := hreg1;              end            else              location.register := hreg2;          end;{$endif not i8086}        procedure DoUnsignedReciprocalDivision;          var            exp_rega,exp_regd:Tregister;            exp_opsize:topsize;            DoMod: Boolean;          begin{$ifndef i8086}            IF (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) then              begin                { If BMI2 is available, use more efficient instructions }                DoBMI2ReciprocalDivision;                Exit;              end;{$endif not i8086}            DoMod := (nodetype = modn);            { Extend 32-bit divides to 64-bit registers and 16-bit              divides to 32-bit registers.  Because the domain of              the left input is only up to 2^(X/2 - 1) - 1, (i.e.              2^31 - 1 for 64-bit and 2^15 - 1 for 32-bit), a much              larger error in the reciprocal is permitted. }            if (resultdef.size <= {$ifdef x86_64}4{$else x86_64}2{$endif x86_64}) then              begin                calc_divconst_magic_unsigned(resultdef.size * 2 * 8,d,m,m_add,s);                { Should never have a zero shift and a magic add together }                if (s = 0) and m_add then                  InternalError(2021090201);                { Extend the input register (the peephole optimizer should                  help clean up unnecessary MOVZX instructions }                hreg3 := hreg1;                case resultdef.size of{$ifdef x86_64}                  4:                    begin                      setsubreg(hreg3, R_SUBQ);                      { Make sure the upper 32 bits are zero; the peephole                        optimizer will remove this instruction via MovAnd2Mov                        if it's not needed }                      emit_const_reg(A_AND, S_L, $FFFFFFFF, hreg1);                      exp_rega := NR_RAX;                      exp_regd := NR_RDX;                      exp_opsize := S_Q;                      if m_add then                        { Append 1 to the tail end of the result }                        m := (m shr s) or ($8000000000000000 shr (s - 1))                      else                        m := m shr s;                    end;{$endif x86_64}                  2:                    begin                      setsubreg(hreg3, R_SUBD);                      emit_reg_reg(A_MOVZX, S_WL, hreg1, hreg3);                      exp_rega := NR_EAX;                      exp_regd := NR_EDX;                      exp_opsize := S_L;                      if m_add then                        { Append 1 to the tail end of the result }                        m := (m shr s) or ($80000000 shr (s - 1))                      else                        m := m shr s;                    end;                  1:                    begin                      setsubreg(hreg3, R_SUBW);                      emit_reg_reg(A_MOVZX, S_BW, hreg1, hreg3);                      exp_rega := NR_AX;                      exp_regd := NR_DX;                      regd := NR_DL; { We need to change this from AH }                      exp_opsize := S_W;                      if m_add then                        { Append 1 to the tail end of the result }                        m := (m shr s) or ($8000 shr (s - 1))                      else                        m := m shr s;                    end;                  else                    InternalError(2021090210);                end;                Inc(m);                cg.getcpuregister(current_asmdata.CurrAsmList,exp_rega);                emit_const_reg(A_MOV,exp_opsize,aint(m),exp_rega);                cg.getcpuregister(current_asmdata.CurrAsmList,exp_regd);                emit_reg(A_MUL,exp_opsize,hreg3);                cg.ungetcpuregister(current_asmdata.CurrAsmList,exp_rega);                if DoMod then                  begin                    hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                    emit_reg_reg(A_MOV,opsize,hreg1,hreg2);                  end;              end            else              begin                calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);                { Should never have a zero shift and a magic add together }                if (s = 0) and m_add then                  InternalError(2021090202);                cg.getcpuregister(current_asmdata.CurrAsmList,rega);                emit_const_reg(A_MOV,opsize,aint(m),rega);                cg.getcpuregister(current_asmdata.CurrAsmList,regd);                emit_reg(A_MUL,opsize,hreg1);                cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);                if DoMod then                  begin                    hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                    emit_reg_reg(A_MOV,opsize,hreg1,hreg2);                  end;                if m_add then                  begin                    { addition can overflow, shift first bit considering carry,                      then shift remaining bits in regular way. }                    cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                    emit_reg_reg(A_ADD,opsize,hreg1,regd);                    emit_const_reg(A_RCR,opsize,1,regd);                    cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                    dec(s);                  end;                if s<>0 then                  emit_const_reg(A_SHR,opsize,aint(s),regd);              end;            if DoMod then              begin                { Now multiply the quotient by the original denominator and                  subtract the product from the original numerator to get                  the remainder. }{$ifdef x86_64}                if (cgsize in [OS_64,OS_S64]) and (d > $7FFFFFFF) then { Cannot use 64-bit constants in IMUL }                  begin                    hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                    emit_const_reg(A_MOV,opsize,aint(d),hreg3);                    emit_reg_reg(A_IMUL,opsize,hreg3,regd);                  end                else{$endif x86_64}                  emit_const_reg(A_IMUL,opsize,aint(d),regd);                emit_reg_reg(A_SUB,opsize,regd,hreg2);              end;            cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);            if not DoMod then              begin                hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,hreg2);              end;            location.register:=hreg2;          end;      begin        secondpass(left);        if codegenerror then          exit;        secondpass(right);        if codegenerror then          exit;        { put numerator in register }        cgsize:=def_cgsize(resultdef);        opsize:=TCGSize2OpSize[cgsize];        rega:=newreg(R_INTREGISTER,RS_EAX,cgsize2subreg(R_INTREGISTER,cgsize));        if cgsize in [OS_8,OS_S8] then          regd:=NR_AH        else          regd:=newreg(R_INTREGISTER,RS_EDX,cgsize2subreg(R_INTREGISTER,cgsize));        location_reset(location,LOC_REGISTER,cgsize);        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);        hreg1:=left.location.register;        if (nodetype=divn) and (right.nodetype=ordconstn) then          begin            if isabspowerof2(tordconstnode(right).value,power) then              begin                { for signed numbers, the numerator must be adjusted before the                  shift instruction, but not with unsigned numbers! Otherwise,                  "Cardinal($ffffffff) div 16" overflows! (JM) }                if is_signed(left.resultdef) Then                  begin                    invertsign:=tordconstnode(right).value<0;                    { use a sequence without jumps, saw this in                      comp.compilers (JM) }                    { no jumps, but more operations }                    hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                    emit_reg_reg(A_MOV,opsize,hreg1,hreg2);                    if power=1 then                      begin                        {If the left value is negative, hreg2=(1 shl power)-1=1, otherwise 0.}                        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-1,hreg2);                      end                    else                      begin                        {If the left value is negative, hreg2=$ffffffff, otherwise 0.}                        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,resultdef.size*8-1,hreg2);                        {If negative, hreg2=(1 shl power)-1, otherwise 0.}                        { (don't use emit_const_reg, because if value>high(longint)                           then it must first be loaded into a register) }                        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,(aint(1) shl power)-1,hreg2);                      end;                    { add to the left value }                    emit_reg_reg(A_ADD,opsize,hreg2,hreg1);                    { do the shift }                    cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,power,hreg1);                    if invertsign then                      emit_reg(A_NEG,opsize,hreg1);                  end                else                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,power,hreg1);                location.register:=hreg1;              end            else              begin                if is_signed(left.resultdef) then                  begin                    e:=tordconstnode(right).value.svalue;                    calc_divconst_magic_signed(resultdef.size*8,e,sm,s);                    cg.getcpuregister(current_asmdata.CurrAsmList,rega);                    emit_const_reg(A_MOV,opsize,sm,rega);                    cg.getcpuregister(current_asmdata.CurrAsmList,regd);                    emit_reg(A_IMUL,opsize,hreg1);                    { only the high half of result is used }                    cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);                    { add or subtract dividend }                    if (e>0) and (sm<0) then                      emit_reg_reg(A_ADD,opsize,hreg1,regd)                    else if (e<0) and (sm>0) then                      emit_reg_reg(A_SUB,opsize,hreg1,regd);                    { shift if necessary }                    if (s<>0) then                      emit_const_reg(A_SAR,opsize,s,regd);                    { extract and add the sign bit }                    if (e<0) then                      emit_reg_reg(A_MOV,opsize,regd,hreg1);                    { if e>=0, hreg1 still contains dividend }                    emit_const_reg(A_SHR,opsize,left.resultdef.size*8-1,hreg1);                    emit_reg_reg(A_ADD,opsize,hreg1,regd);                    cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);                    location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                    cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)                  end                else                  begin                    d:=tordconstnode(right).value.uvalue;                    if d>=aword(1) shl (left.resultdef.size*8-1) then                      begin                        location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                        { Ensure that the whole register is 0, since SETcc only sets the lowest byte }                        { If the operands are 64 bits, this XOR routine will be shrunk by the                          peephole optimizer. [Kit] }                        emit_reg_reg(A_XOR,opsize,location.register,location.register);                        if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }                          begin                            hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                            emit_const_reg(A_MOV,opsize,aint(d),hreg2);                            cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                            emit_reg_reg(A_CMP,opsize,hreg2,hreg1);                          end                        else                          begin                            cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                            emit_const_reg(A_CMP,opsize,aint(d),hreg1);                          end;                        { NOTE: SBB and SETAE are both 3 bytes long without the REX prefix,                          both use an ALU for their execution and take a single cycle to                          run. The only difference is that SETAE does not modify the flags,                          allowing for some possible reuse. [Kit] }                        { Emit a SETcc instruction that depends on the carry bit being zero,                          that is, the numerator is greater than or equal to the denominator. }                        tempreg:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,OS_8);                        instr:=TAiCpu.op_reg(A_SETcc,S_B,tempreg);                        instr.condition:=C_AE;                        current_asmdata.CurrAsmList.concat(instr);                        cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                      end                    else                      DoUnsignedReciprocalDivision;                  end;              end;          end        else if (nodetype=modn) and (right.nodetype=ordconstn) and not(is_signed(left.resultdef)) then          begin            { unsigned modulus by a (+/-)power-of-2 constant? }            if isabspowerof2(tordconstnode(right).value,power) then              begin                emit_const_reg(A_AND,opsize,(aint(1) shl power)-1,hreg1);                location.register:=hreg1;              end            else              begin                d:=tordconstnode(right).value.uvalue;                if d>=aword(1) shl (left.resultdef.size*8-1) then                  begin                    if not (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then                      goto DefaultDiv;                    location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                    hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                    m := aword(-aint(d)); { Two's complement of d }                    if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }                      begin                        hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);                        emit_const_reg(A_MOV,opsize,aint(d),hreg2);                        emit_const_reg(A_MOV,opsize,aint(m),hreg3);                        emit_reg_reg(A_XOR,opsize,location.register,location.register);                        cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                        emit_reg_reg(A_CMP,opsize,hreg2,hreg1);                      end                    else                      begin                        emit_const_reg(A_MOV,opsize,aint(m),hreg3);                        emit_reg_reg(A_XOR,opsize,location.register,location.register);                        cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                        emit_const_reg(A_CMP,opsize,aint(d),hreg1);                      end;                    { Emit conditional move that depends on the carry flag being zero,                      that is, the comparison result is above or equal }                    instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register);                    instr.condition := C_AE;                    current_asmdata.CurrAsmList.concat(instr);                    cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);                    emit_reg_reg(A_ADD,opsize,hreg1,location.register);                  end                else                  { Convert the division to a multiplication }                  DoUnsignedReciprocalDivision;              end;          end        else if (nodetype=modn) and (right.nodetype=ordconstn) and (is_signed(left.resultdef)) and isabspowerof2(tordconstnode(right).value,power) then          begin            hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);            if power=1 then              cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-power,hreg1,hreg2)            else              begin                cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,resultdef.size*8-1,hreg1,hreg2);                cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-power,hreg2,hreg2);              end;            emit_reg_reg(A_ADD,opsize,hreg1,hreg2);            cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,not((aint(1) shl power)-1),hreg2);            emit_reg_reg(A_SUB,opsize,hreg2,hreg1);            location.register:=hreg1;          end        else          beginDefaultDiv:            {Bring denominator to a register.}            cg.getcpuregister(current_asmdata.CurrAsmList,rega);            emit_reg_reg(A_MOV,opsize,hreg1,rega);            cg.getcpuregister(current_asmdata.CurrAsmList,regd);            {Sign extension depends on the left type.}            if is_signed(left.resultdef) then              case left.resultdef.size of{$ifdef x86_64}                8:                  emit_none(A_CQO,S_NO);{$endif x86_64}                4:                  emit_none(A_CDQ,S_NO);                else                  internalerror(2013102704);              end            else              emit_reg_reg(A_XOR,opsize,regd,regd);            { Division depends on the result type }            if is_signed(resultdef) then              op:=A_IDIV            else              op:=A_DIV;            if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then              emit_ref(op,opsize,right.location.reference)            else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then              emit_reg(op,opsize,right.location.register)            else              begin                hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);                hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,right.resultdef,right.location,hreg1);                emit_reg(op,opsize,hreg1);              end;            { Copy the result into a new register. Release R/EAX & R/EDX.}            cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);            cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);            location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);            if nodetype=divn then              cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,rega,location.register)            else              cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register);          end;      end;{$ifdef SUPPORT_MMX}    procedure tx86shlshrnode.second_mmx;      var        op         : TAsmOp;        mmxbase    : tmmxtype;        hregister  : tregister;      begin        secondpass(left);        if codegenerror then          exit;        secondpass(right);        if codegenerror then          exit;        op:=A_NOP;        mmxbase:=mmx_type(left.resultdef);        location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));        case nodetype of          shrn :            case mmxbase of               mmxs16bit,mmxu16bit,mmxfixed16:                 op:=A_PSRLW;               mmxs32bit,mmxu32bit:                 op:=A_PSRLD;               mmxs64bit,mmxu64bit:                 op:=A_PSRLQ;               else                 Internalerror(2018022504);            end;          shln :            case mmxbase of               mmxs16bit,mmxu16bit,mmxfixed16:                 op:=A_PSLLW;               mmxs32bit,mmxu32bit:                 op:=A_PSLLD;               mmxs64bit,mmxu64bit:                 op:=A_PSLLD;               else                 Internalerror(2018022503);            end;          else            internalerror(2018022502);        end;        { left and right no register?  }        { then one must be demanded    }        if (left.location.loc<>LOC_MMXREGISTER) then         begin           { register variable ? }           if (left.location.loc=LOC_CMMXREGISTER) then            begin              hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);              emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);            end           else            begin              if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then               internalerror(2018022505);              hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);              tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);              emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);            end;           location_reset(left.location,LOC_MMXREGISTER,OS_NO);           left.location.register:=hregister;         end;        { at this point, left.location.loc should be LOC_MMXREGISTER }        case right.location.loc of          LOC_MMXREGISTER,LOC_CMMXREGISTER:            begin              emit_reg_reg(op,S_NO,right.location.register,left.location.register);              location.register:=left.location.register;            end;          LOC_CONSTANT:            emit_const_reg(op,S_NO,right.location.value,left.location.register);          LOC_REFERENCE,LOC_CREFERENCE:            begin              tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);              emit_ref_reg(op,S_NO,right.location.reference,left.location.register);            end;          else            internalerror(2018022506);        end;        location.register:=left.location.register;        location_freetemp(current_asmdata.CurrAsmList,right.location);      end;{$endif SUPPORT_MMX}end.
 |