11 年之前 · a78fc3fbee
--- a/.gitattributes
+++ b/.gitattributes
@@ -203,7 +203,9 @@ compiler/i386/i386prop.inc svneol=native#text/plain
 
				 compiler/i386/i386tab.inc svneol=native#text/plain
			
 
				 compiler/i386/n386add.pas svneol=native#text/plain
			
 
				 compiler/i386/n386cal.pas svneol=native#text/plain
			
 
				+compiler/i386/n386flw.pas svneol=native#text/plain
			
 
				 compiler/i386/n386inl.pas svneol=native#text/plain
			
 
				+compiler/i386/n386ld.pas svneol=native#text/plain
			
 
				 compiler/i386/n386mat.pas svneol=native#text/plain
			
 
				 compiler/i386/n386mem.pas svneol=native#text/plain
			
 
				 compiler/i386/n386set.pas svneol=native#text/plain
			
@@ -218,7 +220,6 @@ compiler/i386/r386nasm.inc svneol=native#text/plain
 
				 compiler/i386/r386nor.inc svneol=native#text/plain
			
 
				 compiler/i386/r386nri.inc svneol=native#text/plain
			
 
				 compiler/i386/r386num.inc svneol=native#text/plain
			
 
				-compiler/i386/r386op.inc svneol=native#text/plain
			
 
				 compiler/i386/r386ot.inc svneol=native#text/plain
			
 
				 compiler/i386/r386rni.inc svneol=native#text/plain
			
 
				 compiler/i386/r386sri.inc svneol=native#text/plain
			
@@ -228,6 +229,7 @@ compiler/i386/ra386att.pas svneol=native#text/plain
 
				 compiler/i386/ra386int.pas svneol=native#text/plain
			
 
				 compiler/i386/rgcpu.pas svneol=native#text/plain
			
 
				 compiler/i386/rropt386.pas svneol=native#text/plain
			
 
				+compiler/i386/symcpu.pas svneol=native#text/plain
			
 
				 compiler/i8086/aoptcpu.pas svneol=native#text/plain
			
 
				 compiler/i8086/aoptcpub.pas svneol=native#text/plain
			
 
				 compiler/i8086/aoptcpud.pas svneol=native#text/plain
			
--- a/compiler/i386/aopt386.pas
+++ b/compiler/i386/aopt386.pas
@@ -64,13 +64,16 @@ Begin
 
				    { or nil                                                                }
			
 
				      While Assigned(BlockStart) Do
			
 
				        Begin
			
 
				-         if pass = 0 then
			
 
				-           PrePeepHoleOpts(AsmL, BlockStart, BlockEnd);
			
 
				-        { Peephole optimizations }
			
 
				-         PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
			
 
				-        { Only perform them twice in the first pass }
			
 
				-         if pass = 0 then
			
 
				-           PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
			
 
				+         if (cs_opt_peephole in current_settings.optimizerswitches) then
			
 
				+           begin
			
 
				+            if (pass = 0) then
			
 
				+              PrePeepHoleOpts(AsmL, BlockStart, BlockEnd);
			
 
				+              { Peephole optimizations }
			
 
				+               PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
			
 
				+              { Only perform them twice in the first pass }
			
 
				+               if pass = 0 then
			
 
				+                 PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
			
 
				+           end;
			
 
				         { Data flow analyzer }
			
 
				          If (cs_opt_asmcse in current_settings.optimizerswitches) Then
			
 
				            begin
			
@@ -79,9 +82,12 @@ Begin
 
				                changed := CSE(asmL, blockStart, blockEnd, pass) or changed;
			
 
				            end;
			
 
				         { More peephole optimizations }
			
 
				-         PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
			
 
				-         if lastLoop then
			
 
				-           PostPeepHoleOpts(AsmL, BlockStart, BlockEnd);
			
 
				+         if (cs_opt_peephole in current_settings.optimizerswitches) then
			
 
				+           begin
			
 
				+             PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
			
 
				+             if lastLoop then
			
 
				+               PostPeepHoleOpts(AsmL, BlockStart, BlockEnd);
			
 
				+           end;
			
 
				 
			
 
				         { Free memory }
			
 
				         dfa.clear;
			
--- a/compiler/i386/cgcpu.pas
+++ b/compiler/i386/cgcpu.pas
@@ -30,7 +30,7 @@ unit cgcpu;
 
				        cgbase,cgobj,cg64f32,cgx86,
			
 
				        aasmbase,aasmtai,aasmdata,aasmcpu,
			
 
				        cpubase,parabase,cgutils,
			
 
				-       symconst,symdef
			
 
				+       symconst,symdef,symsym
			
 
				        ;
			
 
				 
			
 
				     type
			
@@ -200,7 +200,7 @@ unit cgcpu;
 
				         if use_push(cgpara) then
			
 
				           begin
			
 
				             { Record copy? }
			
 
				-            if (cgpara.size in [OS_NO,OS_F64]) or (size=OS_NO) then
			
 
				+            if (cgpara.size=OS_NO) or (size=OS_NO) then
			
 
				               begin
			
 
				                 cgpara.check_simple_location;
			
 
				                 len:=align(cgpara.intsize,cgpara.alignment);
			
@@ -212,9 +212,19 @@ unit cgcpu;
 
				               begin
			
 
				                 if tcgsize2size[cgpara.size]<>tcgsize2size[size] then
			
 
				                   internalerror(200501161);
			
 
				-                { We need to push the data in reverse order,
			
 
				-                  therefor we use a recursive algorithm }
			
 
				-                pushdata(cgpara.location,0);
			
 
				+                if (cgpara.size=OS_F64) then
			
 
				+                  begin
			
 
				+                    href:=r;
			
 
				+                    make_simple_ref(list,href);
			
 
				+                    inc(href.offset,4);
			
 
				+                    list.concat(taicpu.op_ref(A_PUSH,S_L,href));
			
 
				+                    dec(href.offset,4);
			
 
				+                    list.concat(taicpu.op_ref(A_PUSH,S_L,href));
			
 
				+                  end
			
 
				+                else
			
 
				+                  { We need to push the data in reverse order,
			
 
				+                    therefor we use a recursive algorithm }
			
 
				+                  pushdata(cgpara.location,0);
			
 
				               end
			
 
				           end
			
 
				         else
			
@@ -294,17 +304,15 @@ unit cgcpu;
 
				 
			
 
				     procedure tcg386.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
			
 
				 
			
 
				-      procedure increase_fp(a : tcgint);
			
 
				+      procedure increase_sp(a : tcgint);
			
 
				         var
			
 
				           href : treference;
			
 
				         begin
			
 
				-          reference_reset_base(href,current_procinfo.framepointer,a,0);
			
 
				+          reference_reset_base(href,NR_STACK_POINTER_REG,a,0);
			
 
				           { normally, lea is a better choice than an add }
			
 
				-          list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,current_procinfo.framepointer));
			
 
				+          list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
			
 
				         end;
			
 
				 
			
 
				-      var
			
 
				-        stacksize : longint;
			
 
				       begin
			
 
				         { MMX needs to call EMMS }
			
 
				         if assigned(rg[R_MMXREGISTER]) and
			
@@ -314,20 +322,15 @@ unit cgcpu;
 
				         { remove stackframe }
			
 
				         if not nostackframe then
			
 
				           begin
			
 
				-            if current_procinfo.framepointer=NR_STACK_POINTER_REG then
			
 
				+            if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
			
 
				+               (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
			
 
				               begin
			
 
				-                stacksize:=current_procinfo.calc_stackframe_size;
			
 
				-                if (target_info.stackalign>4) and
			
 
				-                   ((stacksize <> 0) or
			
 
				-                    (pi_do_call in current_procinfo.flags) or
			
 
				-                    { can't detect if a call in this case -> use nostackframe }
			
 
				-                    { if you (think you) know what you are doing              }
			
 
				-                    (po_assembler in current_procinfo.procdef.procoptions)) then
			
 
				-                  stacksize := align(stacksize+sizeof(aint),target_info.stackalign) - sizeof(aint);
			
 
				-                if stacksize<>0 then
			
 
				-                  increase_fp(stacksize);
			
 
				+                if current_procinfo.final_localsize<>0 then
			
 
				+                  increase_sp(current_procinfo.final_localsize);
			
 
				                 if (not paramanager.use_fixed_stack) then
			
 
				                   internal_restore_regs(list,true);
			
 
				+                if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
			
 
				+                  list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
			
 
				               end
			
 
				             else
			
 
				               begin
			
@@ -387,7 +390,8 @@ unit cgcpu;
 
				            { but not on win32 }
			
 
				            { and not for safecall with hidden exceptions, because the result }
			
 
				            { wich contains the exception is passed in EAX }
			
 
				-           if (target_info.system <> system_i386_win32) and
			
 
				+           if ((target_info.system <> system_i386_win32) or
			
 
				+               (target_info.abi=abi_old_win32_gnu)) and
			
 
				               not ((current_procinfo.procdef.proccalloption = pocall_safecall) and
			
 
				                (tf_safecall_exceptions in target_info.flags)) and
			
 
				               paramanager.ret_in_param(current_procinfo.procdef.returndef,
			
@@ -411,7 +415,7 @@ unit cgcpu;
 
				 
			
 
				     procedure tcg386.g_copyvaluepara_openarray(list : TAsmList;const ref:treference;const lenloc:tlocation;elesize:tcgint;destreg:tregister);
			
 
				       var
			
 
				-        power,len  : longint;
			
 
				+        power  : longint;
			
 
				         opsize : topsize;
			
 
				 {$ifndef __NOWINPECOFF__}
			
 
				         again,ok : tasmlabel;
			
@@ -421,9 +425,21 @@ unit cgcpu;
 
				         getcpuregister(list,NR_EDI);
			
 
				         a_load_loc_reg(list,OS_INT,lenloc,NR_EDI);
			
 
				         list.concat(Taicpu.op_reg(A_INC,S_L,NR_EDI));
			
 
				-        { Now EDI contains (high+1). Copy it to ECX for later use. }
			
 
				-        getcpuregister(list,NR_ECX);
			
 
				-        list.concat(Taicpu.op_reg_reg(A_MOV,S_L,NR_EDI,NR_ECX));
			
 
				+        { Now EDI contains (high+1). }
			
 
				+
			
 
				+        { special case handling for elesize=8, 4 and 2:
			
 
				+          set ECX = (high+1) instead of ECX = (high+1)*elesize.
			
 
				+
			
 
				+          In the case of elesize=4 and 2, this allows us to avoid the SHR later.
			
 
				+          In the case of elesize=8, we can later use a SHL ECX, 1 instead of
			
 
				+          SHR ECX, 2 which is one byte shorter. }
			
 
				+        if (elesize=8) or (elesize=4) or (elesize=2) then
			
 
				+          begin
			
 
				+            { Now EDI contains (high+1). Copy it to ECX for later use. }
			
 
				+            getcpuregister(list,NR_ECX);
			
 
				+            list.concat(Taicpu.op_reg_reg(A_MOV,S_L,NR_EDI,NR_ECX));
			
 
				+          end;
			
 
				+        { EDI := EDI * elesize }
			
 
				         if (elesize<>1) then
			
 
				          begin
			
 
				            if ispowerof2(elesize, power) then
			
@@ -431,6 +447,12 @@ unit cgcpu;
 
				            else
			
 
				              list.concat(Taicpu.op_const_reg(A_IMUL,S_L,elesize,NR_EDI));
			
 
				          end;
			
 
				+        if (elesize<>8) and (elesize<>4) and (elesize<>2) then
			
 
				+          begin
			
 
				+            { Now EDI contains (high+1)*elesize. Copy it to ECX for later use. }
			
 
				+            getcpuregister(list,NR_ECX);
			
 
				+            list.concat(Taicpu.op_reg_reg(A_MOV,S_L,NR_EDI,NR_ECX));
			
 
				+          end;
			
 
				 {$ifndef __NOWINPECOFF__}
			
 
				         { windows guards only a few pages for stack growing, }
			
 
				         { so we have to access every page first              }
			
@@ -464,27 +486,38 @@ unit cgcpu;
 
				         a_loadaddr_ref_reg(list,ref,NR_ESI);
			
 
				 
			
 
				         { calculate size }
			
 
				-        len:=elesize;
			
 
				         opsize:=S_B;
			
 
				-        if (len and 3)=0 then
			
 
				+        if elesize=8 then
			
 
				+          begin
			
 
				+            opsize:=S_L;
			
 
				+            { ECX is number of qwords, convert to dwords }
			
 
				+            list.concat(Taicpu.op_const_reg(A_SHL,S_L,1,NR_ECX))
			
 
				+          end
			
 
				+        else if elesize=4 then
			
 
				+          begin
			
 
				+            opsize:=S_L;
			
 
				+            { ECX is already number of dwords, so no need to SHL/SHR }
			
 
				+          end
			
 
				+        else if elesize=2 then
			
 
				+          begin
			
 
				+            opsize:=S_W;
			
 
				+            { ECX is already number of words, so no need to SHL/SHR }
			
 
				+          end
			
 
				+        else
			
 
				+         if (elesize and 3)=0 then
			
 
				          begin
			
 
				            opsize:=S_L;
			
 
				-           len:=len shr 2;
			
 
				+           { ECX is number of bytes, convert to dwords }
			
 
				+           list.concat(Taicpu.op_const_reg(A_SHR,S_L,2,NR_ECX))
			
 
				          end
			
 
				         else
			
 
				-         if (len and 1)=0 then
			
 
				+         if (elesize and 1)=0 then
			
 
				           begin
			
 
				             opsize:=S_W;
			
 
				-            len:=len shr 1;
			
 
				+            { ECX is number of bytes, convert to words }
			
 
				+            list.concat(Taicpu.op_const_reg(A_SHR,S_L,1,NR_ECX))
			
 
				           end;
			
 
				 
			
 
				-        if len>1 then
			
 
				-          begin
			
 
				-            if ispowerof2(len, power) then
			
 
				-              list.concat(Taicpu.op_const_reg(A_SHL,S_L,power,NR_ECX))
			
 
				-            else
			
 
				-              list.concat(Taicpu.op_const_reg(A_IMUL,S_L,len,NR_ECX));
			
 
				-          end;
			
 
				         if ts_cld in current_settings.targetswitches then
			
 
				           list.concat(Taicpu.op_none(A_CLD,S_NO));
			
 
				         list.concat(Taicpu.op_none(A_REP,S_NO));
			
@@ -532,7 +565,7 @@ unit cgcpu;
 
				       begin
			
 
				         if not paramanager.use_fixed_stack then
			
 
				           begin
			
 
				-            cg.a_reg_alloc(list,NR_FUNCTION_RESULT_REG);
			
 
				+            a_reg_alloc(list,NR_FUNCTION_RESULT_REG);
			
 
				             list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_INT],NR_FUNCTION_RESULT_REG))
			
 
				           end
			
 
				         else
			
@@ -561,7 +594,7 @@ unit cgcpu;
 
				                (current_settings.optimizecputype in [cpu_Pentium2,cpu_Pentium3,cpu_Pentium4]) } then
			
 
				               begin
			
 
				                 current_module.requires_ebx_pic_helper:=true;
			
 
				-                cg.a_call_name_static(list,'fpc_geteipasebx');
			
 
				+                a_call_name_static(list,'fpc_geteipasebx');
			
 
				               end
			
 
				             else
			
 
				               begin
			
@@ -587,17 +620,17 @@ unit cgcpu;
 
				       possible calling conventions:
			
 
				                     default stdcall cdecl pascal register
			
 
				       default(0):      OK     OK    OK     OK       OK
			
 
				-      virtual(1):      OK     OK    OK     OK       OK(2)
			
 
				+      virtual(1):      OK     OK    OK     OK       OK(2 or 1)
			
 
				 
			
 
				       (0):
			
 
				           set self parameter to correct value
			
 
				           jmp mangledname
			
 
				 
			
 
				-      (1): The wrapper code use %eax to reach the virtual method address
			
 
				+      (1): The wrapper code use %ecx to reach the virtual method address
			
 
				            set self to correct value
			
 
				            move self,%eax
			
 
				-           mov  0(%eax),%eax ; load vmt
			
 
				-           jmp  vmtoffs(%eax) ; method offs
			
 
				+           mov  0(%eax),%ecx ; load vmt
			
 
				+           jmp  vmtoffs(%ecx) ; method offs
			
 
				 
			
 
				       (2): Virtual use values pushed on stack to reach the method address
			
 
				            so the following code be generated:
			
@@ -613,6 +646,30 @@ unit cgcpu;
 
				 
			
 
				       }
			
 
				 
			
 
				+      { returns whether ECX is used (either as a parameter or is nonvolatile and shouldn't be changed) }
			
 
				+      function is_ecx_used: boolean;
			
 
				+        var
			
 
				+          i: Integer;
			
 
				+          hp: tparavarsym;
			
 
				+          paraloc: PCGParaLocation;
			
 
				+        begin
			
 
				+          if not (RS_ECX in paramanager.get_volatile_registers_int(procdef.proccalloption)) then
			
 
				+            exit(true);
			
 
				+          for i:=0 to procdef.paras.count-1 do
			
 
				+           begin
			
 
				+             hp:=tparavarsym(procdef.paras[i]);
			
 
				+             procdef.init_paraloc_info(calleeside);
			
 
				+             paraloc:=hp.paraloc[calleeside].Location;
			
 
				+             while paraloc<>nil do
			
 
				+               begin
			
 
				+                 if (paraloc^.Loc=LOC_REGISTER) and (getsupreg(paraloc^.register)=RS_ECX) then
			
 
				+                   exit(true);
			
 
				+                 paraloc:=paraloc^.Next;
			
 
				+               end;
			
 
				+           end;
			
 
				+          Result:=false;
			
 
				+        end;
			
 
				+
			
 
				       procedure getselftoeax(offs: longint);
			
 
				         var
			
 
				           href : treference;
			
@@ -627,27 +684,27 @@ unit cgcpu;
 
				               else
			
 
				                 selfoffsetfromsp:=sizeof(aint);
			
 
				               reference_reset_base(href,NR_ESP,selfoffsetfromsp+offs,4);
			
 
				-              cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
			
 
				+              a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
			
 
				             end;
			
 
				         end;
			
 
				 
			
 
				-      procedure loadvmttoeax;
			
 
				+      procedure loadvmtto(reg: tregister);
			
 
				         var
			
 
				           href : treference;
			
 
				         begin
			
 
				-          { mov  0(%eax),%eax ; load vmt}
			
 
				+          { mov  0(%eax),%reg ; load vmt}
			
 
				           reference_reset_base(href,NR_EAX,0,4);
			
 
				-          cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
			
 
				+          a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,reg);
			
 
				         end;
			
 
				 
			
 
				-      procedure op_oneaxmethodaddr(op: TAsmOp);
			
 
				+      procedure op_onregmethodaddr(op: TAsmOp; reg: tregister);
			
 
				         var
			
 
				           href : treference;
			
 
				         begin
			
 
				           if (procdef.extnumber=$ffff) then
			
 
				             Internalerror(200006139);
			
 
				-          { call/jmp  vmtoffs(%eax) ; method offs }
			
 
				-          reference_reset_base(href,NR_EAX,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),4);
			
 
				+          { call/jmp  vmtoffs(%reg) ; method offs }
			
 
				+          reference_reset_base(href,reg,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),4);
			
 
				           list.concat(taicpu.op_ref(op,S_L,href));
			
 
				         end;
			
 
				 
			
@@ -660,7 +717,7 @@ unit cgcpu;
 
				             Internalerror(200006139);
			
 
				           { mov vmtoffs(%eax),%eax ; method offs }
			
 
				           reference_reset_base(href,NR_EAX,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),4);
			
 
				-          cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
			
 
				+          a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
			
 
				         end;
			
 
				 
			
 
				 
			
@@ -695,13 +752,13 @@ unit cgcpu;
 
				         if (po_virtualmethod in procdef.procoptions) and
			
 
				             not is_objectpascal_helper(procdef.struct) then
			
 
				           begin
			
 
				-            if (procdef.proccalloption=pocall_register) then
			
 
				+            if (procdef.proccalloption=pocall_register) and is_ecx_used then
			
 
				               begin
			
 
				                 { case 2 }
			
 
				                 list.concat(taicpu.op_reg(A_PUSH,S_L,NR_EBX)); { allocate space for address}
			
 
				                 list.concat(taicpu.op_reg(A_PUSH,S_L,NR_EAX));
			
 
				                 getselftoeax(8);
			
 
				-                loadvmttoeax;
			
 
				+                loadvmtto(NR_EAX);
			
 
				                 loadmethodoffstoeax;
			
 
				                 { mov %eax,4(%esp) }
			
 
				                 reference_reset_base(href,NR_ESP,4,4);
			
@@ -715,8 +772,8 @@ unit cgcpu;
 
				               begin
			
 
				                 { case 1 }
			
 
				                 getselftoeax(0);
			
 
				-                loadvmttoeax;
			
 
				-                op_oneaxmethodaddr(A_JMP);
			
 
				+                loadvmtto(NR_ECX);
			
 
				+                op_onregmethodaddr(A_JMP,NR_ECX);
			
 
				               end;
			
 
				           end
			
 
				         { case 0 }
			
--- a/compiler/i386/cpubase.inc
+++ b/compiler/i386/cpubase.inc
@@ -38,6 +38,7 @@
 
				         S_YMM
			
 
				       );
			
 
				 
			
 
				+      TOpSizes = set of topsize;
			
 
				 
			
 
				 {*****************************************************************************
			
 
				                                 Registers
			
@@ -138,6 +139,7 @@
 
				       }
			
 
				       saved_standard_registers : array[0..3] of tsuperregister = (RS_EBX,RS_ESI,RS_EDI,RS_EBP);
			
 
				 
			
 
				+      saved_address_registers : array[0..0] of tsuperregister = (RS_INVALID);
			
 
				       saved_mm_registers : array[0..0] of tsuperregister = (RS_INVALID);
			
 
				       {# Required parameter alignment when calling a routine declared as
			
 
				          stdcall and cdecl. The alignment value should be the one defined
			
--- a/compiler/i386/cpuelf.pas
+++ b/compiler/i386/cpuelf.pas
@@ -110,6 +110,8 @@ implementation
 
				           result:=R_386_GOTPC;
			
 
				         RELOC_PLT32 :
			
 
				           result:=R_386_PLT32;
			
 
				+        RELOC_GOTOFF:
			
 
				+          result:=R_386_GOTOFF;
			
 
				       else
			
 
				         result:=0;
			
 
				         InternalError(2012082301);
			
--- a/compiler/i386/cpuinfo.pas
+++ b/compiler/i386/cpuinfo.pas
@@ -46,7 +46,10 @@ Type
 
				        cpu_Pentium2,
			
 
				        cpu_Pentium3,
			
 
				        cpu_Pentium4,
			
 
				-       cpu_PentiumM
			
 
				+       cpu_PentiumM,
			
 
				+       cpu_core_i,
			
 
				+       cpu_core_avx,
			
 
				+       cpu_core_avx2
			
 
				       );
			
 
				 
			
 
				    tfputype =
			
@@ -85,7 +88,10 @@ Const
 
				      'PENTIUM2',
			
 
				      'PENTIUM3',
			
 
				      'PENTIUM4',
			
 
				-     'PENTIUMM'
			
 
				+     'PENTIUMM',
			
 
				+     'COREI',
			
 
				+     'COREAVX',
			
 
				+     'COREAVX2'
			
 
				    );
			
 
				 
			
 
				    fputypestr : array[tfputype] of string[6] = ('',
			
@@ -117,12 +123,40 @@ Const
 
				                                   cs_opt_tailrecursion,cs_opt_nodecse,cs_useebp,
			
 
				 				  cs_opt_reorder_fields,cs_opt_fastmath];
			
 
				 
			
 
				-   level1optimizerswitches = genericlevel1optimizerswitches + [cs_opt_peephole];
			
 
				+   level1optimizerswitches = genericlevel1optimizerswitches;
			
 
				    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
			
 
				      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
			
 
				    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
			
 
				    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [cs_useebp];
			
 
				 
			
 
				+type
			
 
				+   tcpuflags =
			
 
				+      (CPUX86_HAS_SSEUNIT,
			
 
				+       CPUX86_HAS_BMI1,
			
 
				+       CPUX86_HAS_BMI2,
			
 
				+       CPUX86_HAS_POPCNT,
			
 
				+       CPUX86_HAS_AVXUNIT,
			
 
				+       CPUX86_HAS_LZCNT,
			
 
				+       CPUX86_HAS_MOVBE,
			
 
				+       CPUX86_HAS_FMA,
			
 
				+       CPUX86_HAS_FMA4
			
 
				+      );
			
 
				+
			
 
				+ const
			
 
				+   cpu_capabilities : array[tcputype] of set of tcpuflags = (
			
 
				+     { cpu_none      } [],
			
 
				+     { cpu_386       } [],
			
 
				+     { cpu_Pentium   } [],
			
 
				+     { cpu_Pentium2  } [],
			
 
				+     { cpu_Pentium3  } [CPUX86_HAS_SSEUNIT],
			
 
				+     { cpu_Pentium4  } [CPUX86_HAS_SSEUNIT],
			
 
				+     { cpu_PentiumM  } [CPUX86_HAS_SSEUNIT],
			
 
				+     { cpu_core_i    } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
			
 
				+     { cpu_core_avx  } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
			
 
				+     { cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
			
 
				+   );
			
 
				+
			
 
				+
			
 
				 Implementation
			
 
				 
			
 
				 end.
			
--- a/compiler/i386/cpunode.pas
+++ b/compiler/i386/cpunode.pas
@@ -51,10 +51,16 @@ unit cpunode;
 
				 
			
 
				        n386add,
			
 
				        n386cal,
			
 
				+       n386ld,
			
 
				        n386mem,
			
 
				        n386set,
			
 
				        n386inl,
			
 
				-       n386mat
			
 
				+{$ifdef TEST_WIN32_SEH}
			
 
				+       n386flw,
			
 
				+{$endif TEST_WIN32_SEH}
			
 
				+       n386mat,
			
 
				+       { symtable }
			
 
				+       symcpu
			
 
				        ;
			
 
				 
			
 
				 end.
			
--- a/compiler/i386/cpupara.pas
+++ b/compiler/i386/cpupara.pas
@@ -40,12 +40,6 @@ unit cpupara;
 
				           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
			
 
				           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
			
 
				           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
			
 
				-          { Returns the location for the nr-st 32 Bit int parameter
			
 
				-            if every parameter before is an 32 Bit int parameter as well
			
 
				-            and if the calling conventions for the helper routines of the
			
 
				-            rtl are used.
			
 
				-          }
			
 
				-          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
			
 
				           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
			
 
				           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
			
 
				           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;can_use_final_stack_loc : boolean;var cgpara:TCGPara);override;
			
@@ -274,51 +268,6 @@ unit cpupara;
 
				       end;
			
 
				 
			
 
				 
			
 
				-    procedure ti386paramanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
			
 
				-      var
			
 
				-        paraloc : pcgparalocation;
			
 
				-        psym: tparavarsym;
			
 
				-        pdef: tdef;
			
 
				-      begin
			
 
				-        psym:=tparavarsym(pd.paras[nr-1]);
			
 
				-        pdef:=psym.vardef;
			
 
				-        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
			
 
				-          pdef:=getpointerdef(pdef);
			
 
				-        cgpara.reset;
			
 
				-        cgpara.size:=def_cgsize(pdef);
			
 
				-        cgpara.intsize:=tcgsize2size[cgpara.size];
			
 
				-        cgpara.alignment:=get_para_align(pd.proccalloption);
			
 
				-        cgpara.def:=pdef;
			
 
				-        paraloc:=cgpara.add_location;
			
 
				-        with paraloc^ do
			
 
				-         begin
			
 
				-           size:=def_cgsize(pdef);
			
 
				-           def:=pdef;
			
 
				-           if pd.proccalloption=pocall_register then
			
 
				-             begin
			
 
				-               if (nr<=length(parasupregs)) then
			
 
				-                 begin
			
 
				-                   if nr=0 then
			
 
				-                     internalerror(200309271);
			
 
				-                   loc:=LOC_REGISTER;
			
 
				-                   register:=newreg(R_INTREGISTER,parasupregs[nr-1],R_SUBWHOLE);
			
 
				-                 end
			
 
				-               else
			
 
				-                 begin
			
 
				-                   loc:=LOC_REFERENCE;
			
 
				-                   reference.index:=NR_STACK_POINTER_REG;
			
 
				-                   { the previous parameters didn't take up room in memory }
			
 
				-                   reference.offset:=sizeof(aint)*(nr-length(parasupregs)-1)
			
 
				-                 end;
			
 
				-             end
			
 
				-           else
			
 
				-             begin
			
 
				-               loc:=LOC_REFERENCE;
			
 
				-               reference.index:=NR_STACK_POINTER_REG;
			
 
				-               reference.offset:=sizeof(aint)*nr;
			
 
				-             end;
			
 
				-          end;
			
 
				-      end;
			
 
				 
			
 
				 
			
 
				     function  ti386paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): TCGPara;
			
--- a/compiler/i386/csopt386.pas
+++ b/compiler/i386/csopt386.pas
@@ -353,6 +353,7 @@ var
 
				 
			
 
				   function getNextRegToTest(var prev: tai; currentReg: tsuperregister): tsuperregister;
			
 
				   begin
			
 
				+    getNextRegToTest := RS_INVALID;
			
 
				     if not checkingPrevSequences then
			
 
				       begin
			
 
				         if (currentreg = RS_INVALID) then
			
@@ -423,9 +424,13 @@ begin {CheckSequence}
 
				   TmpResult := False;
			
 
				   FillChar(OrgRegInfo, Sizeof(OrgRegInfo), 0);
			
 
				   FillChar(startRegInfo, sizeof(startRegInfo), 0);
			
 
				+  FillChar(HighRegInfo, sizeof(HighRegInfo), 0);
			
 
				+  FillChar(prevreginfo, sizeof(prevreginfo), 0);
			
 
				   OrgRegFound := 0;
			
 
				   HighFound := 0;
			
 
				   OrgRegResult := False;
			
 
				+  highPrev := nil;
			
 
				+  orgPrev := nil;
			
 
				   with startRegInfo do
			
 
				     begin
			
 
				       newRegsEncountered := [RS_EBP, RS_ESP];
			
@@ -1074,6 +1079,7 @@ var
 
				   prev: tai;
			
 
				   newOrgRegRState, newOrgRegWState: byte;
			
 
				 begin
			
 
				+  newOrgRegwState := 0;
			
 
				   if getLastInstruction(hp,prev) then
			
 
				     with ptaiprop(prev.optinfo)^ do
			
 
				       begin
			
@@ -1429,6 +1435,7 @@ var
 
				 
			
 
				 begin
			
 
				   replacereg := false;
			
 
				+  readStateChanged := false;
			
 
				   if canreplacereg(orgsupreg,newsupreg,p,orgregcanbemodified,newregmodified, orgregread, removelast,endp) then
			
 
				     begin
			
 
				 {$ifdef replaceregdebug}
			
@@ -1595,12 +1602,12 @@ var
 
				   regcounter: tsuperregister;
			
 
				   optimizable: boolean;
			
 
				 begin
			
 
				+  memtoreg := NR_NO;
			
 
				+
			
 
				   if not getlastinstruction(t,hp) or
			
 
				      not issimplememloc(ref) then
			
 
				-    begin
			
 
				-      memtoreg := NR_NO;
			
 
				-      exit;
			
 
				-    end;
			
 
				+    exit;
			
 
				+
			
 
				   p := ptaiprop(hp.optinfo);
			
 
				   optimizable := false;
			
 
				   for regcounter := RS_EAX to RS_EDI do
			
@@ -1832,15 +1839,18 @@ procedure doCSE(asml: TAsmList; First, Last: tai; findPrevSeqs, doSubOpts: boole
 
				  removed immediately because sometimes an instruction needs to be checked in
			
 
				  two different sequences}
			
 
				 var cnt, cnt2, {cnt3,} orgNrofMods: longint;
			
 
				-    p, hp1, hp2, prevSeq: tai;
			
 
				-    hp4: tai;
			
 
				-    hp5 : tai;
			
 
				+    p, hp1, hp2, hp4, hp5, prevSeq: tai;
			
 
				     reginfo: toptreginfo;
			
 
				     memreg: tregister;
			
 
				     regcounter: tsuperregister;
			
 
				 begin
			
 
				   p := First;
			
 
				   SkipHead(p);
			
 
				+  hp1 := nil;
			
 
				+  hp2 := nil;
			
 
				+  hp4 := nil;
			
 
				+  hp5 := nil;
			
 
				+  cnt := 0;
			
 
				   while (p <> Last) do
			
 
				     begin
			
 
				       case p.typ of
			
--- a/compiler/i386/daopt386.pas
+++ b/compiler/i386/daopt386.pas
@@ -901,6 +901,7 @@ var
 
				   Cnt: Word;
			
 
				 begin
			
 
				   TmpResult := False;
			
 
				+  Result := False;
			
 
				   if supreg = RS_INVALID then
			
 
				     exit;
			
 
				   if (p1.typ = ait_instruction) then
			
--- a/compiler/i386/hlcgcpu.pas
+++ b/compiler/i386/hlcgcpu.pas
@@ -78,10 +78,10 @@ implementation
 
				                      (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
			
 
				                     begin
			
 
				                       cg.g_stackpointer_alloc(list,stacksize);
			
 
				-                      reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
			
 
				+                      reference_reset_base(href,voidstackpointertype,NR_STACK_POINTER_REG,0,voidstackpointertype.size);
			
 
				                     end
			
 
				                   else
			
 
				-                    reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
			
 
				+                    reference_reset_base(href,voidstackpointertype,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
			
 
				                   cg.a_loadfpu_reg_ref(list,locsize,locsize,l.register,href);
			
 
				                 end;
			
 
				               LOC_FPUREGISTER:
			
@@ -123,10 +123,10 @@ implementation
 
				                      (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
			
 
				                     begin
			
 
				                       cg.g_stackpointer_alloc(list,stacksize);
			
 
				-                      reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
			
 
				+                      reference_reset_base(href,voidstackpointertype,NR_STACK_POINTER_REG,0,voidstackpointertype.size);
			
 
				                     end
			
 
				                   else
			
 
				-                    reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
			
 
				+                    reference_reset_base(href,voidstackpointertype,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
			
 
				                   cg.a_loadmm_reg_ref(list,locsize,locsize,l.register,href,mms_movescalar);
			
 
				                 end;
			
 
				               LOC_FPUREGISTER:
			
@@ -152,7 +152,7 @@ implementation
 
				                     cg.a_load_ref_cgpara(list,locsize,l.reference,cgpara)
			
 
				                   else
			
 
				                     begin
			
 
				-                      reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
			
 
				+                      reference_reset_base(href,voidstackpointertype,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
			
 
				                       cg.g_concatcopy(list,l.reference,href,stacksize);
			
 
				                     end;
			
 
				                 end;
			
--- a/compiler/i386/n386add.pas
+++ b/compiler/i386/n386add.pas
@@ -31,10 +31,14 @@ interface
 
				     type
			
 
				        ti386addnode = class(tx86addnode)
			
 
				          function use_generic_mul32to64: boolean; override;
			
 
				+         function use_generic_mul64bit: boolean; override;
			
 
				          procedure second_addordinal; override;
			
 
				          procedure second_add64bit;override;
			
 
				          procedure second_cmp64bit;override;
			
 
				          procedure second_mul(unsigned: boolean);
			
 
				+         procedure second_mul64bit;
			
 
				+       protected
			
 
				+         procedure set_mul_result_location;
			
 
				        end;
			
 
				 
			
 
				   implementation
			
@@ -58,6 +62,12 @@ interface
 
				       result := False;
			
 
				     end;
			
 
				 
			
 
				+    function ti386addnode.use_generic_mul64bit: boolean;
			
 
				+    begin
			
 
				+      result:=(cs_check_overflow in current_settings.localswitches) or
			
 
				+        (cs_opt_size in current_settings.optimizerswitches);
			
 
				+    end;
			
 
				+
			
 
				     { handles all unsigned multiplications, and 32->64 bit signed ones.
			
 
				       32bit-only signed mul is handled by generic codegen }
			
 
				     procedure ti386addnode.second_addordinal;
			
@@ -66,6 +76,11 @@ interface
 
				     begin
			
 
				       unsigned:=not(is_signed(left.resultdef)) or
			
 
				                 not(is_signed(right.resultdef));
			
 
				+      { use IMUL instead of MUL in case overflow checking is off and we're
			
 
				+        doing a 32->32-bit multiplication }
			
 
				+      if not (cs_check_overflow in current_settings.localswitches) and
			
 
				+         not is_64bit(resultdef) then
			
 
				+        unsigned:=false;
			
 
				       if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
			
 
				         second_mul(unsigned)
			
 
				       else
			
@@ -117,6 +132,11 @@ interface
 
				             op:=OP_OR;
			
 
				           andn:
			
 
				             op:=OP_AND;
			
 
				+          muln:
			
 
				+            begin
			
 
				+              second_mul64bit;
			
 
				+              exit;
			
 
				+            end
			
 
				           else
			
 
				             begin
			
 
				               { everything should be handled in pass_1 (JM) }
			
@@ -209,8 +229,7 @@ interface
 
				 
			
 
				     procedure ti386addnode.second_cmp64bit;
			
 
				       var
			
 
				-        hregister,
			
 
				-        hregister2 : tregister;
			
 
				+        hlab       : tasmlabel;
			
 
				         href       : treference;
			
 
				         unsigned   : boolean;
			
 
				 
			
@@ -227,10 +246,12 @@ interface
 
				            case nodetype of
			
 
				               ltn,gtn:
			
 
				                 begin
			
 
				-                   cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
			
 
				+                   if (hlab<>current_procinfo.CurrTrueLabel) then
			
 
				+                     cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
			
 
				                    { cheat a little bit for the negative test }
			
 
				                    toggleflag(nf_swapped);
			
 
				-                   cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
			
 
				+                   if (hlab<>current_procinfo.CurrFalseLabel) then
			
 
				+                     cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
			
 
				                    toggleflag(nf_swapped);
			
 
				                 end;
			
 
				               lten,gten:
			
@@ -240,13 +261,15 @@ interface
 
				                      nodetype:=ltn
			
 
				                    else
			
 
				                      nodetype:=gtn;
			
 
				-                   cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
			
 
				+                   if (hlab<>current_procinfo.CurrTrueLabel) then
			
 
				+                     cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
			
 
				                    { cheat for the negative test }
			
 
				                    if nodetype=ltn then
			
 
				                      nodetype:=gtn
			
 
				                    else
			
 
				                      nodetype:=ltn;
			
 
				-                   cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
			
 
				+                   if (hlab<>current_procinfo.CurrFalseLabel) then
			
 
				+                     cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
			
 
				                    nodetype:=oldnodetype;
			
 
				                 end;
			
 
				               equaln:
			
@@ -289,24 +312,46 @@ interface
 
				                   ((right.resultdef.typ=orddef) and
			
 
				                    (torddef(right.resultdef).ordtype=u64bit));
			
 
				 
			
 
				+        { we have LOC_JUMP as result }
			
 
				+        location_reset(location,LOC_JUMP,OS_NO);
			
 
				+
			
 
				+        { Relational compares against constants having low dword=0 can omit the
			
 
				+          second compare based on the fact that any unsigned value is >=0 }
			
 
				+        hlab:=nil;
			
 
				+        if (right.location.loc=LOC_CONSTANT) and
			
 
				+           (lo(right.location.value64)=0) then
			
 
				+          begin
			
 
				+            case getresflags(true) of
			
 
				+              F_AE: hlab:=current_procinfo.CurrTrueLabel;
			
 
				+              F_B:  hlab:=current_procinfo.CurrFalseLabel;
			
 
				+            end;
			
 
				+          end;
			
 
				+
			
 
				+        if (right.location.loc=LOC_CONSTANT) and
			
 
				+           (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
			
 
				+          begin
			
 
				+            tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
			
 
				+            href:=left.location.reference;
			
 
				+            inc(href.offset,4);
			
 
				+            emit_const_ref(A_CMP,S_L,aint(hi(right.location.value64)),href);
			
 
				+            firstjmp64bitcmp;
			
 
				+            if assigned(hlab) then
			
 
				+              cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
			
 
				+            else
			
 
				+              begin
			
 
				+                emit_const_ref(A_CMP,S_L,aint(lo(right.location.value64)),left.location.reference);
			
 
				+                secondjmp64bitcmp;
			
 
				+              end;
			
 
				+            location_freetemp(current_asmdata.CurrAsmList,left.location);
			
 
				+            exit;
			
 
				+          end;
			
 
				+
			
 
				         { left and right no register?  }
			
 
				         { then one must be demanded    }
			
 
				-        if (left.location.loc<>LOC_REGISTER) then
			
 
				+        if not (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
			
 
				          begin
			
 
				-           if (right.location.loc<>LOC_REGISTER) then
			
 
				-            begin
			
 
				-              { we can reuse a CREGISTER for comparison }
			
 
				-              if (left.location.loc<>LOC_CREGISTER) then
			
 
				-               begin
			
 
				-                 hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-                 hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-                 cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
			
 
				-                 location_freetemp(current_asmdata.CurrAsmList,left.location);
			
 
				-                 location_reset(left.location,LOC_REGISTER,left.location.size);
			
 
				-                 left.location.register64.reglo:=hregister;
			
 
				-                 left.location.register64.reghi:=hregister2;
			
 
				-               end;
			
 
				-            end
			
 
				+           if not (right.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
			
 
				+             hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true)
			
 
				            else
			
 
				             begin
			
 
				               location_swap(left.location,right.location);
			
@@ -314,51 +359,44 @@ interface
 
				             end;
			
 
				          end;
			
 
				 
			
 
				-        { at this point, left.location.loc should be LOC_REGISTER }
			
 
				-        if right.location.loc=LOC_REGISTER then
			
 
				-         begin
			
 
				-           emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
			
 
				-           firstjmp64bitcmp;
			
 
				-           emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
			
 
				-           secondjmp64bitcmp;
			
 
				-         end
			
 
				+        { at this point, left.location.loc should be LOC_[C]REGISTER }
			
 
				+        case right.location.loc of
			
 
				+          LOC_REGISTER,
			
 
				+          LOC_CREGISTER :
			
 
				+            begin
			
 
				+              emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
			
 
				+              firstjmp64bitcmp;
			
 
				+              emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
			
 
				+              secondjmp64bitcmp;
			
 
				+            end;
			
 
				+          LOC_CREFERENCE,
			
 
				+          LOC_REFERENCE :
			
 
				+            begin
			
 
				+              tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
			
 
				+              href:=right.location.reference;
			
 
				+              inc(href.offset,4);
			
 
				+              emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
			
 
				+              firstjmp64bitcmp;
			
 
				+              emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
			
 
				+              secondjmp64bitcmp;
			
 
				+              location_freetemp(current_asmdata.CurrAsmList,right.location);
			
 
				+            end;
			
 
				+          LOC_CONSTANT :
			
 
				+            begin
			
 
				+              current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
			
 
				+              firstjmp64bitcmp;
			
 
				+              if assigned(hlab) then
			
 
				+                cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
			
 
				+              else
			
 
				+                begin
			
 
				+                  current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
			
 
				+                  secondjmp64bitcmp;
			
 
				+                end;
			
 
				+            end;
			
 
				         else
			
 
				-         begin
			
 
				-           case right.location.loc of
			
 
				-             LOC_CREGISTER :
			
 
				-               begin
			
 
				-                 emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
			
 
				-                 firstjmp64bitcmp;
			
 
				-                 emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
			
 
				-                 secondjmp64bitcmp;
			
 
				-               end;
			
 
				-             LOC_CREFERENCE,
			
 
				-             LOC_REFERENCE :
			
 
				-               begin
			
 
				-                 tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
			
 
				-                 href:=right.location.reference;
			
 
				-                 inc(href.offset,4);
			
 
				-                 emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
			
 
				-                 firstjmp64bitcmp;
			
 
				-                 emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
			
 
				-                 secondjmp64bitcmp;
			
 
				-                 cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
			
 
				-                 location_freetemp(current_asmdata.CurrAsmList,right.location);
			
 
				-               end;
			
 
				-             LOC_CONSTANT :
			
 
				-               begin
			
 
				-                 current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
			
 
				-                 firstjmp64bitcmp;
			
 
				-                 current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
			
 
				-                 secondjmp64bitcmp;
			
 
				-               end;
			
 
				-             else
			
 
				-               internalerror(200203282);
			
 
				-           end;
			
 
				-         end;
			
 
				+          internalerror(200203282);
			
 
				+        end;
			
 
				 
			
 
				-        { we have LOC_JUMP as result }
			
 
				-        location_reset(location,LOC_JUMP,OS_NO)
			
 
				       end;
			
 
				 
			
 
				 
			
@@ -366,6 +404,32 @@ interface
 
				                                 x86 MUL
			
 
				 *****************************************************************************}
			
 
				 
			
 
				+    procedure ti386addnode.set_mul_result_location;
			
 
				+    begin
			
 
				+      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
			
 
				+      {Free EAX,EDX}
			
 
				+      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
			
 
				+      if is_64bit(resultdef) then
			
 
				+      begin
			
 
				+        {Allocate a couple of registers and store EDX:EAX into it}
			
 
				+        location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				+        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
			
 
				+        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				+        location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				+        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
			
 
				+      end
			
 
				+      else
			
 
				+      begin
			
 
				+        {Allocate a new register and store the result in EAX in it.}
			
 
				+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				+        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
			
 
				+      end;
			
 
				+      location_freetemp(current_asmdata.CurrAsmList,left.location);
			
 
				+      location_freetemp(current_asmdata.CurrAsmList,right.location);
			
 
				+    end;
			
 
				+
			
 
				+
			
 
				     procedure ti386addnode.second_mul(unsigned: boolean);
			
 
				 
			
 
				     var reg:Tregister;
			
@@ -379,8 +443,6 @@ interface
 
				     begin
			
 
				       pass_left_right;
			
 
				 
			
 
				-      {The location.register will be filled in later (JM)}
			
 
				-      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
			
 
				       { Mul supports registers and references, so if not register/reference,
			
 
				         load the location into a register.
			
 
				         The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
			
@@ -418,26 +480,107 @@ interface
 
				           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
			
 
				           cg.a_label(current_asmdata.CurrAsmList,hl4);
			
 
				         end;
			
 
				-      {Free EAX,EDX}
			
 
				-      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
			
 
				-      if is_64bit(resultdef) then
			
 
				-      begin
			
 
				-        {Allocate a couple of registers and store EDX:EAX into it}
			
 
				-        location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
			
 
				-        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				-        location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
			
 
				-      end
			
 
				+      set_mul_result_location;
			
 
				+    end;
			
 
				+
			
 
				+
			
 
				+    procedure ti386addnode.second_mul64bit;
			
 
				+    var
			
 
				+      list: TAsmList;
			
 
				+      hreg1,hreg2: tregister;
			
 
				+    begin
			
 
				+      { 64x64 multiplication yields 128-bit result, but we're only
			
 
				+        interested in its lower 64 bits. This lower part is independent
			
 
				+        of operand signs, and so is the generated code. }
			
 
				+      { pass_left_right already called from second_add64bit }
			
 
				+      list:=current_asmdata.CurrAsmList;
			
 
				+      if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
			
 
				+        tcgx86(cg).make_simple_ref(list,left.location.reference);
			
 
				+      if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
			
 
				+        tcgx86(cg).make_simple_ref(list,right.location.reference);
			
 
				+
			
 
				+      { calculate 32-bit terms lo(right)*hi(left) and hi(left)*lo(right) }
			
 
				+      if (right.location.loc=LOC_CONSTANT) then
			
 
				+        begin
			
 
				+          { Omit zero terms, if any }
			
 
				+          hreg1:=NR_NO;
			
 
				+          hreg2:=NR_NO;
			
 
				+          if lo(right.location.value64)<>0 then
			
 
				+            hreg1:=cg.getintregister(list,OS_INT);
			
 
				+          if hi(right.location.value64)<>0 then
			
 
				+            hreg2:=cg.getintregister(list,OS_INT);
			
 
				+
			
 
				+          { Take advantage of 3-operand form of IMUL }
			
 
				+          case left.location.loc of
			
 
				+            LOC_REGISTER,LOC_CREGISTER:
			
 
				+              begin
			
 
				+                if hreg1<>NR_NO then
			
 
				+                  emit_const_reg_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.register64.reghi,hreg1);
			
 
				+                if hreg2<>NR_NO then
			
 
				+                  emit_const_reg_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.register64.reglo,hreg2);
			
 
				+              end;
			
 
				+            LOC_REFERENCE,LOC_CREFERENCE:
			
 
				+              begin
			
 
				+                if hreg2<>NR_NO then
			
 
				+                  list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.reference,hreg2));
			
 
				+                inc(left.location.reference.offset,4);
			
 
				+                if hreg1<>NR_NO then
			
 
				+                  list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.reference,hreg1));
			
 
				+                dec(left.location.reference.offset,4);
			
 
				+              end;
			
 
				+          else
			
 
				+            InternalError(2014011602);
			
 
				+          end;
			
 
				+        end
			
 
				       else
			
 
				-      begin
			
 
				-        {Allocate a new register and store the result in EAX in it.}
			
 
				-        location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				-        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
			
 
				-      end;
			
 
				-      location_freetemp(current_asmdata.CurrAsmList,left.location);
			
 
				-      location_freetemp(current_asmdata.CurrAsmList,right.location);
			
 
				+        begin
			
 
				+          hreg1:=cg.getintregister(list,OS_INT);
			
 
				+          hreg2:=cg.getintregister(list,OS_INT);
			
 
				+          cg64.a_load64low_loc_reg(list,left.location,hreg1);
			
 
				+          cg64.a_load64high_loc_reg(list,left.location,hreg2);
			
 
				+          case right.location.loc of
			
 
				+            LOC_REGISTER,LOC_CREGISTER:
			
 
				+              begin
			
 
				+                emit_reg_reg(A_IMUL,S_L,right.location.register64.reghi,hreg1);
			
 
				+                emit_reg_reg(A_IMUL,S_L,right.location.register64.reglo,hreg2);
			
 
				+              end;
			
 
				+            LOC_REFERENCE,LOC_CREFERENCE:
			
 
				+              begin
			
 
				+                emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg2);
			
 
				+                inc(right.location.reference.offset,4);
			
 
				+                emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg1);
			
 
				+                dec(right.location.reference.offset,4);
			
 
				+              end;
			
 
				+          else
			
 
				+            InternalError(2014011603);
			
 
				+          end;
			
 
				+        end;
			
 
				+      { add hi*lo and lo*hi terms together }
			
 
				+      if (hreg1<>NR_NO) and (hreg2<>NR_NO) then
			
 
				+        emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
			
 
				+
			
 
				+      { load lo(right) into EAX }
			
 
				+      cg.getcpuregister(list,NR_EAX);
			
 
				+      cg64.a_load64low_loc_reg(list,right.location,NR_EAX);
			
 
				+
			
 
				+      { multiply EAX by lo(left), producing 64-bit value in EDX:EAX }
			
 
				+      cg.getcpuregister(list,NR_EDX);
			
 
				+      if (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
			
 
				+        emit_reg(A_MUL,S_L,left.location.register64.reglo)
			
 
				+      else if (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
			
 
				+        emit_ref(A_MUL,S_L,left.location.reference)
			
 
				+      else
			
 
				+        InternalError(2014011604);
			
 
				+      { add previously calculated terms to the high half }
			
 
				+      if (hreg1<>NR_NO) then
			
 
				+        emit_reg_reg(A_ADD,S_L,hreg1,NR_EDX)
			
 
				+      else if (hreg2<>NR_NO) then
			
 
				+        emit_reg_reg(A_ADD,S_L,hreg2,NR_EDX)
			
 
				+      else
			
 
				+        InternalError(2014011604);
			
 
				+
			
 
				+      { Result is now in EDX:EAX. Copy it to virtual registers. }
			
 
				+      set_mul_result_location;
			
 
				     end;
			
 
				 
			
 
				 
			
--- a/compiler/i386/n386cal.pas
+++ b/compiler/i386/n386cal.pas
@@ -93,7 +93,8 @@ implementation
 
				           it is always the first parameter (apart from hidden parentfp,
			
 
				           but this one is never put into a register (vs_nonregable set)
			
 
				           so funcret is always in EAX for register calling }
			
 
				-        if (target_info.system = system_i386_win32) and
			
 
				+        if ((target_info.system = system_i386_win32) and
			
 
				+            not (target_info.abi=abi_old_win32_gnu)) and
			
 
				             paramanager.ret_in_param(procdefinition.returndef,procdefinition) and
			
 
				             not ((procdefinition.proccalloption=pocall_register) or
			
 
				                  ((procdefinition.proccalloption=pocall_internproc) and
			
--- a/compiler/i386/n386flw.pas
+++ b/compiler/i386/n386flw.pas
@@ -0,0 +1,685 @@
 
				+{
			
 
				+    Copyright (c) 2011 by Free Pascal development team
			
 
				+
			
 
				+    Generate Win32-specific exception handling code
			
 
				+
			
 
				+    This program is free software; you can redistribute it and/or modify
			
 
				+    it under the terms of the GNU General Public License as published by
			
 
				+    the Free Software Foundation; either version 2 of the License, or
			
 
				+    (at your option) any later version.
			
 
				+
			
 
				+    This program is distributed in the hope that it will be useful,
			
 
				+    but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+    GNU General Public License for more details.
			
 
				+
			
 
				+    You should have received a copy of the GNU General Public License
			
 
				+    along with this program; if not, write to the Free Software
			
 
				+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+
			
 
				+ ****************************************************************************
			
 
				+}
			
 
				+unit n386flw;
			
 
				+
			
 
				+{$i fpcdefs.inc}
			
 
				+
			
 
				+interface
			
 
				+
			
 
				+  uses
			
 
				+    node,nflw,ncgflw,psub;
			
 
				+
			
 
				+  type
			
 
				+    ti386raisenode=class(tcgraisenode)
			
 
				+      function pass_1 : tnode;override;
			
 
				+    end;
			
 
				+
			
 
				+    ti386onnode=class(tcgonnode)
			
 
				+      procedure pass_generate_code;override;
			
 
				+    end;
			
 
				+
			
 
				+    ti386tryexceptnode=class(tcgtryexceptnode)
			
 
				+      procedure pass_generate_code;override;
			
 
				+    end;
			
 
				+
			
 
				+    ti386tryfinallynode=class(tcgtryfinallynode)
			
 
				+      finalizepi: tcgprocinfo;
			
 
				+      constructor create(l,r:TNode);override;
			
 
				+      constructor create_implicit(l,r,_t1:TNode);override;
			
 
				+      function pass_1: tnode;override;
			
 
				+      function simplify(forinline: boolean): tnode;override;
			
 
				+      procedure pass_generate_code;override;
			
 
				+    end;
			
 
				+
			
 
				+implementation
			
 
				+
			
 
				+  uses
			
 
				+    cutils,globtype,globals,verbose,systems,
			
 
				+    nbas,ncal,nmem,nutils,
			
 
				+    symconst,symbase,symtable,symsym,symdef,
			
 
				+    cgbase,cgobj,cgcpu,cgutils,tgobj,
			
 
				+    cpubase,htypechk,
			
 
				+    parabase,paramgr,pdecsub,pass_1,pass_2,ncgutil,cga,
			
 
				+    aasmbase,aasmtai,aasmdata,aasmcpu,procinfo,cpupi;
			
 
				+
			
 
				+  var
			
 
				+    endexceptlabel: tasmlabel;
			
 
				+
			
 
				+
			
 
				+{ ti386raisenode }
			
 
				+
			
 
				+function ti386raisenode.pass_1 : tnode;
			
 
				+  var
			
 
				+    statements : tstatementnode;
			
 
				+    raisenode : tcallnode;
			
 
				+  begin
			
 
				+    { difference from generic code is that address stack is not popped on reraise }
			
 
				+    if (target_info.system<>system_i386_win32) or assigned(left) then
			
 
				+      result:=inherited pass_1
			
 
				+    else
			
 
				+      begin
			
 
				+        result:=internalstatements(statements);
			
 
				+        raisenode:=ccallnode.createintern('fpc_reraise',nil);
			
 
				+        include(raisenode.callnodeflags,cnf_call_never_returns);
			
 
				+        addstatement(statements,raisenode);
			
 
				+      end;
			
 
				+end;
			
 
				+
			
 
				+{ ti386onnode }
			
 
				+
			
 
				+procedure ti386onnode.pass_generate_code;
			
 
				+  var
			
 
				+    oldflowcontrol : tflowcontrol;
			
 
				+    exceptvarsym : tlocalvarsym;
			
 
				+  begin
			
 
				+    if (target_info.system<>system_i386_win32) then
			
 
				+      begin
			
 
				+        inherited pass_generate_code;
			
 
				+        exit;
			
 
				+      end;
			
 
				+
			
 
				+    location_reset(location,LOC_VOID,OS_NO);
			
 
				+
			
 
				+    oldflowcontrol:=flowcontrol;
			
 
				+    flowcontrol:=flowcontrol*[fc_unwind]+[fc_inflowcontrol];
			
 
				+
			
 
				+    { RTL will put exceptobject into EAX when jumping here }
			
 
				+    cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
			
 
				+
			
 
				+    { Retrieve exception variable }
			
 
				+    if assigned(excepTSymtable) then
			
 
				+      exceptvarsym:=tlocalvarsym(excepTSymtable.SymList[0])
			
 
				+    else
			
 
				+      exceptvarsym:=nil;
			
 
				+
			
 
				+    if assigned(exceptvarsym) then
			
 
				+      begin
			
 
				+        exceptvarsym.localloc.loc:=LOC_REFERENCE;
			
 
				+        exceptvarsym.localloc.size:=OS_ADDR;
			
 
				+        tg.GetLocal(current_asmdata.CurrAsmList,sizeof(pint),voidpointertype,exceptvarsym.localloc.reference);
			
 
				+        cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_FUNCTION_RESULT_REG,exceptvarsym.localloc.reference);
			
 
				+      end;
			
 
				+    cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
			
 
				+
			
 
				+    if assigned(right) then
			
 
				+      secondpass(right);
			
 
				+
			
 
				+    { deallocate exception symbol }
			
 
				+    if assigned(exceptvarsym) then
			
 
				+      begin
			
 
				+        tg.UngetLocal(current_asmdata.CurrAsmList,exceptvarsym.localloc.reference);
			
 
				+        exceptvarsym.localloc.loc:=LOC_INVALID;
			
 
				+      end;
			
 
				+    cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
			
 
				+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
			
 
				+
			
 
				+    flowcontrol:=oldflowcontrol+(flowcontrol-[fc_inflowcontrol]);
			
 
				+  end;
			
 
				+
			
 
				+{ ti386tryfinallynode }
			
 
				+
			
 
				+function reset_regvars(var n: tnode; arg: pointer): foreachnoderesult;
			
 
				+  begin
			
 
				+    case n.nodetype of
			
 
				+      temprefn:
			
 
				+        make_not_regable(n,[]);
			
 
				+      calln:
			
 
				+        include(tprocinfo(arg).flags,pi_do_call);
			
 
				+    end;
			
 
				+    result:=fen_true;
			
 
				+  end;
			
 
				+
			
 
				+function copy_parasize(var n: tnode; arg: pointer): foreachnoderesult;
			
 
				+  begin
			
 
				+    case n.nodetype of
			
 
				+      calln:
			
 
				+        tcgprocinfo(arg).allocate_push_parasize(tcallnode(n).pushed_parasize);
			
 
				+    end;
			
 
				+    result:=fen_true;
			
 
				+  end;
			
 
				+
			
 
				+constructor ti386tryfinallynode.create(l, r: TNode);
			
 
				+  begin
			
 
				+    inherited create(l,r);
			
 
				+    if (target_info.system<>system_i386_win32) or (
			
 
				+      { Don't create child procedures for generic methods, their nested-like
			
 
				+        behavior causes compilation errors because real nested procedures
			
 
				+        aren't allowed for generics. Not creating them doesn't harm because
			
 
				+        generic node tree is discarded without generating code. }
			
 
				+        assigned(current_procinfo.procdef.struct) and
			
 
				+        (df_generic in current_procinfo.procdef.struct.defoptions)
			
 
				+      ) then
			
 
				+      exit;
			
 
				+    finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
			
 
				+    finalizepi.force_nested;
			
 
				+    finalizepi.procdef:=create_finalizer_procdef;
			
 
				+    finalizepi.entrypos:=r.fileinfo;
			
 
				+    finalizepi.entryswitches:=r.localswitches;
			
 
				+    finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
			
 
				+    finalizepi.exitswitches:=current_settings.localswitches;
			
 
				+    { Regvar optimization for symbols is suppressed when using exceptions, but
			
 
				+      temps may be still placed into registers. This must be fixed. }
			
 
				+    foreachnodestatic(r,@reset_regvars,finalizepi);
			
 
				+    include(finalizepi.flags,pi_has_assembler_block);
			
 
				+    include(finalizepi.flags,pi_do_call);
			
 
				+    include(finalizepi.flags,pi_uses_exceptions);
			
 
				+  end;
			
 
				+
			
 
				+constructor ti386tryfinallynode.create_implicit(l, r, _t1: TNode);
			
 
				+  begin
			
 
				+    inherited create_implicit(l, r, _t1);
			
 
				+    if (target_info.system<>system_i386_win32) then
			
 
				+      exit;
			
 
				+
			
 
				+    { safecall procedures can handle implicit finalization as part of "except" flow }
			
 
				+    if implicitframe and (current_procinfo.procdef.proccalloption=pocall_safecall) then
			
 
				+      exit;
			
 
				+
			
 
				+    if assigned(current_procinfo.procdef.struct) and
			
 
				+      (df_generic in current_procinfo.procdef.struct.defoptions) then
			
 
				+      InternalError(2013012501);
			
 
				+
			
 
				+    finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
			
 
				+    finalizepi.force_nested;
			
 
				+    finalizepi.procdef:=create_finalizer_procdef;
			
 
				+    finalizepi.entrypos:=current_filepos;
			
 
				+    finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
			
 
				+    finalizepi.entryswitches:=r.localswitches;
			
 
				+    finalizepi.exitswitches:=current_settings.localswitches;
			
 
				+    include(finalizepi.flags,pi_has_assembler_block);
			
 
				+    include(finalizepi.flags,pi_do_call);
			
 
				+    include(finalizepi.flags,pi_uses_exceptions);
			
 
				+  end;
			
 
				+
			
 
				+
			
 
				+function ti386tryfinallynode.pass_1: tnode;
			
 
				+  var
			
 
				+    selfsym: tparavarsym;
			
 
				+  begin
			
 
				+    result:=inherited pass_1;
			
 
				+    if (target_info.system=system_i386_win32) then
			
 
				+      begin
			
 
				+        { safecall method will access 'self' from except block -> make it non-regable }
			
 
				+        if implicitframe and (current_procinfo.procdef.proccalloption=pocall_safecall) and
			
 
				+          is_class(current_procinfo.procdef.struct) then
			
 
				+          begin
			
 
				+            selfsym:=tparavarsym(current_procinfo.procdef.parast.Find('self'));
			
 
				+            if (selfsym=nil) or (selfsym.typ<>paravarsym) then
			
 
				+              InternalError(2011123101);
			
 
				+            selfsym.varregable:=vr_none;
			
 
				+          end;
			
 
				+      end;
			
 
				+  end;
			
 
				+
			
 
				+
			
 
				+function ti386tryfinallynode.simplify(forinline: boolean): tnode;
			
 
				+  begin
			
 
				+    result:=inherited simplify(forinline);
			
 
				+    if (target_info.system<>system_i386_win32) then
			
 
				+      exit;
			
 
				+
			
 
				+    if (result=nil) and assigned(finalizepi) then
			
 
				+      begin
			
 
				+        finalizepi.code:=right;
			
 
				+        foreachnodestatic(right,@copy_parasize,finalizepi);
			
 
				+        right:=ccallnode.create(nil,tprocsym(finalizepi.procdef.procsym),nil,nil,[]);
			
 
				+        firstpass(right);
			
 
				+        { For implicit frames, no actual code is available at this time,
			
 
				+          it is added later in assembler form. So store the nested procinfo
			
 
				+          for later use. }
			
 
				+        if implicitframe then
			
 
				+          begin
			
 
				+            current_procinfo.finalize_procinfo:=finalizepi;
			
 
				+            { don't leave dangling pointer }
			
 
				+            tcgprocinfo(current_procinfo).final_asmnode:=nil;
			
 
				+          end;
			
 
				+      end;
			
 
				+  end;
			
 
				+
			
 
				+
			
 
				+procedure emit_scope_start(handler,data: TAsmSymbol);
			
 
				+  var
			
 
				+    href: treference;
			
 
				+    hreg: tregister;
			
 
				+  begin
			
 
				+    hreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
			
 
				+    reference_reset_base(href,hreg,0,sizeof(pint));
			
 
				+    href.segment:=NR_FS;
			
 
				+    emit_reg_reg(A_XOR,S_L,hreg,hreg);
			
 
				+    emit_sym(A_PUSH,S_L,data);
			
 
				+    emit_reg(A_PUSH,S_L,NR_FRAME_POINTER_REG);
			
 
				+    emit_sym(A_PUSH,S_L,handler);
			
 
				+    emit_ref(A_PUSH,S_L,href);
			
 
				+    emit_reg_ref(A_MOV,S_L,NR_ESP,href);
			
 
				+  end;
			
 
				+
			
 
				+procedure emit_scope_end;
			
 
				+  var
			
 
				+    href: treference;
			
 
				+    hreg,hreg2: tregister;
			
 
				+  begin
			
 
				+    hreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
			
 
				+    hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
			
 
				+    reference_reset_base(href,hreg,0,sizeof(pint));
			
 
				+    href.segment:=NR_FS;
			
 
				+    emit_reg_reg(A_XOR,S_L,hreg,hreg);
			
 
				+    emit_reg(A_POP,S_L,hreg2);
			
 
				+    emit_const_reg(A_ADD,S_L,3*sizeof(pint),NR_ESP);
			
 
				+    emit_reg_ref(A_MOV,S_L,hreg2,href);
			
 
				+  end;
			
 
				+
			
 
				+procedure ti386tryfinallynode.pass_generate_code;
			
 
				+  var
			
 
				+    finallylabel,
			
 
				+    exceptlabel,
			
 
				+    safecalllabel,
			
 
				+    endfinallylabel,
			
 
				+    exitfinallylabel,
			
 
				+    continuefinallylabel,
			
 
				+    breakfinallylabel,
			
 
				+    oldCurrExitLabel,
			
 
				+    oldContinueLabel,
			
 
				+    oldBreakLabel : tasmlabel;
			
 
				+    oldflowcontrol,tryflowcontrol : tflowcontrol;
			
 
				+    is_safecall: boolean;
			
 
				+    hreg: tregister;
			
 
				+  begin
			
 
				+    if (target_info.system<>system_i386_win32) then
			
 
				+      begin
			
 
				+        inherited pass_generate_code;
			
 
				+        exit;
			
 
				+      end;
			
 
				+    location_reset(location,LOC_VOID,OS_NO);
			
 
				+    tryflowcontrol:=[];
			
 
				+    oldBreakLabel:=nil;
			
 
				+    oldContinueLabel:=nil;
			
 
				+    continuefinallylabel:=nil;
			
 
				+    breakfinallylabel:=nil;
			
 
				+    exceptlabel:=nil;
			
 
				+    safecalllabel:=nil;
			
 
				+    is_safecall:=implicitframe and (current_procinfo.procdef.proccalloption=pocall_safecall);
			
 
				+
			
 
				+    { check if child nodes do a break/continue/exit }
			
 
				+    oldflowcontrol:=flowcontrol;
			
 
				+    flowcontrol:=[fc_inflowcontrol];
			
 
				+    current_asmdata.getjumplabel(finallylabel);
			
 
				+    current_asmdata.getjumplabel(endfinallylabel);
			
 
				+
			
 
				+    { the finally block must catch break, continue and exit }
			
 
				+    { statements                                            }
			
 
				+    oldCurrExitLabel:=current_procinfo.CurrExitLabel;
			
 
				+    if implicitframe then
			
 
				+      exitfinallylabel:=finallylabel
			
 
				+    else
			
 
				+      current_asmdata.getjumplabel(exitfinallylabel);
			
 
				+    current_procinfo.CurrExitLabel:=exitfinallylabel;
			
 
				+    if assigned(current_procinfo.CurrBreakLabel) then
			
 
				+      begin
			
 
				+        oldContinueLabel:=current_procinfo.CurrContinueLabel;
			
 
				+        oldBreakLabel:=current_procinfo.CurrBreakLabel;
			
 
				+        if implicitframe then
			
 
				+          begin
			
 
				+            breakfinallylabel:=finallylabel;
			
 
				+            continuefinallylabel:=finallylabel;
			
 
				+          end
			
 
				+        else
			
 
				+          begin
			
 
				+            current_asmdata.getjumplabel(breakfinallylabel);
			
 
				+            current_asmdata.getjumplabel(continuefinallylabel);
			
 
				+          end;
			
 
				+        current_procinfo.CurrContinueLabel:=continuefinallylabel;
			
 
				+        current_procinfo.CurrBreakLabel:=breakfinallylabel;
			
 
				+      end;
			
 
				+
			
 
				+    { Start of scope }
			
 
				+    if is_safecall then
			
 
				+      begin
			
 
				+        with cg.rg[R_INTREGISTER] do
			
 
				+          used_in_proc:=used_in_proc+[RS_EBX,RS_ESI,RS_EDI];
			
 
				+
			
 
				+        current_asmdata.getjumplabel(exceptlabel);
			
 
				+        emit_scope_start(
			
 
				+          current_asmdata.RefAsmSymbol('__FPC_except_safecall'),
			
 
				+          exceptlabel
			
 
				+        );
			
 
				+      end
			
 
				+    else
			
 
				+      emit_scope_start(
			
 
				+        current_asmdata.RefAsmSymbol('__FPC_finally_handler'),
			
 
				+        current_asmdata.RefAsmSymbol(finalizepi.procdef.mangledname)
			
 
				+      );
			
 
				+
			
 
				+    { try code }
			
 
				+    if assigned(left) then
			
 
				+      begin
			
 
				+        secondpass(left);
			
 
				+        tryflowcontrol:=flowcontrol;
			
 
				+        if codegenerror then
			
 
				+          exit;
			
 
				+      end;
			
 
				+
			
 
				+    { don't generate line info for internal cleanup }
			
 
				+    current_asmdata.CurrAsmList.concat(tai_marker.create(mark_NoLineInfoStart));
			
 
				+
			
 
				+    cg.a_label(current_asmdata.CurrAsmList,finallylabel);
			
 
				+    emit_scope_end;
			
 
				+    if is_safecall then
			
 
				+      begin
			
 
				+        current_asmdata.getjumplabel(safecalllabel);
			
 
				+        hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
			
 
				+        cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_INT,0,hreg);
			
 
				+        cg.a_jmp_always(current_asmdata.CurrAsmList,safecalllabel);
			
 
				+        { RTL handler will jump here on exception }
			
 
				+        cg.a_label(current_asmdata.CurrAsmList,exceptlabel);
			
 
				+        handle_safecall_exception;
			
 
				+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_FUNCTION_RESULT_REG,hreg);
			
 
				+        cg.a_label(current_asmdata.CurrAsmList,safecalllabel);
			
 
				+      end;
			
 
				+
			
 
				+    { end cleanup }
			
 
				+    current_asmdata.CurrAsmList.concat(tai_marker.create(mark_NoLineInfoEnd));
			
 
				+
			
 
				+    { generate finally code as a separate procedure }
			
 
				+    { !!! this resets flowcontrol, how to check flow away? }
			
 
				+    if not implicitframe then
			
 
				+      tcgprocinfo(current_procinfo).generate_exceptfilter(finalizepi);
			
 
				+
			
 
				+    flowcontrol:=[fc_inflowcontrol];
			
 
				+    { right is a call to finalizer procedure }
			
 
				+    secondpass(right);
			
 
				+
			
 
				+    { goto is allowed if it stays inside the finally block,
			
 
				+      this is checked using the exception block number }
			
 
				+    if (flowcontrol-[fc_gotolabel])<>[fc_inflowcontrol] then
			
 
				+      CGMessage(cg_e_control_flow_outside_finally);
			
 
				+    if codegenerror then
			
 
				+      exit;
			
 
				+
			
 
				+    { don't generate line info for internal cleanup }
			
 
				+    current_asmdata.CurrAsmList.concat(tai_marker.create(mark_NoLineInfoStart));
			
 
				+
			
 
				+    if not implicitframe then
			
 
				+      begin
			
 
				+        if tryflowcontrol*[fc_exit,fc_break,fc_continue]<>[] then
			
 
				+          cg.a_jmp_always(current_asmdata.CurrAsmList,endfinallylabel);
			
 
				+        { do some magic for exit,break,continue in the try block }
			
 
				+        if fc_exit in tryflowcontrol then
			
 
				+          begin
			
 
				+            cg.a_label(current_asmdata.CurrAsmList,exitfinallylabel);
			
 
				+            cg.g_call(current_asmdata.CurrAsmList,'_FPC_leave');
			
 
				+            cg.a_jmp_always(current_asmdata.CurrAsmList,oldCurrExitLabel);
			
 
				+          end;
			
 
				+        if fc_break in tryflowcontrol then
			
 
				+          begin
			
 
				+            cg.a_label(current_asmdata.CurrAsmList,breakfinallylabel);
			
 
				+            cg.g_call(current_asmdata.CurrAsmList,'_FPC_leave');
			
 
				+            cg.a_jmp_always(current_asmdata.CurrAsmList,oldBreakLabel);
			
 
				+          end;
			
 
				+        if fc_continue in tryflowcontrol then
			
 
				+          begin
			
 
				+            cg.a_label(current_asmdata.CurrAsmList,continuefinallylabel);
			
 
				+            cg.g_call(current_asmdata.CurrAsmList,'_FPC_leave');
			
 
				+            cg.a_jmp_always(current_asmdata.CurrAsmList,oldContinueLabel);
			
 
				+          end;
			
 
				+      end;
			
 
				+    if is_safecall then
			
 
				+      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,hreg,NR_FUNCTION_RETURN_REG);
			
 
				+    cg.a_label(current_asmdata.CurrAsmList,endfinallylabel);
			
 
				+
			
 
				+    { end cleanup }
			
 
				+    current_asmdata.CurrAsmList.concat(tai_marker.create(mark_NoLineInfoEnd));
			
 
				+
			
 
				+    current_procinfo.CurrExitLabel:=oldCurrExitLabel;
			
 
				+    if assigned(current_procinfo.CurrBreakLabel) then
			
 
				+      begin
			
 
				+        current_procinfo.CurrContinueLabel:=oldContinueLabel;
			
 
				+        current_procinfo.CurrBreakLabel:=oldBreakLabel;
			
 
				+      end;
			
 
				+    flowcontrol:=oldflowcontrol+(tryflowcontrol-[fc_inflowcontrol]);
			
 
				+  end;
			
 
				+
			
 
				+{ ti386tryexceptnode }
			
 
				+
			
 
				+procedure ti386tryexceptnode.pass_generate_code;
			
 
				+  var
			
 
				+    exceptlabel,oldendexceptlabel,
			
 
				+    lastonlabel,
			
 
				+    exitexceptlabel,
			
 
				+    continueexceptlabel,
			
 
				+    breakexceptlabel,
			
 
				+    exittrylabel,
			
 
				+    continuetrylabel,
			
 
				+    breaktrylabel,
			
 
				+    oldCurrExitLabel,
			
 
				+    oldContinueLabel,
			
 
				+    oldBreakLabel : tasmlabel;
			
 
				+    onlabel,
			
 
				+    filterlabel: tasmlabel;
			
 
				+    oldflowcontrol,tryflowcontrol,
			
 
				+    exceptflowcontrol : tflowcontrol;
			
 
				+    hnode : tnode;
			
 
				+    hlist : tasmlist;
			
 
				+    onnodecount : tai_const;
			
 
				+  label
			
 
				+    errorexit;
			
 
				+  begin
			
 
				+    if (target_info.system<>system_i386_win32) then
			
 
				+      begin
			
 
				+        inherited pass_generate_code;
			
 
				+        exit;
			
 
				+      end;
			
 
				+    location_reset(location,LOC_VOID,OS_NO);
			
 
				+
			
 
				+    oldflowcontrol:=flowcontrol;
			
 
				+    flowcontrol:=[fc_inflowcontrol];
			
 
				+    { this can be called recursivly }
			
 
				+    oldBreakLabel:=nil;
			
 
				+    oldContinueLabel:=nil;
			
 
				+    oldendexceptlabel:=endexceptlabel;
			
 
				+
			
 
				+    { Win32 SEH unwinding does not preserve registers. Indicate that they are
			
 
				+      going to be destroyed. }
			
 
				+    cg.alloccpuregisters(current_asmdata.CurrAsmList,R_INTREGISTER,[RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]);
			
 
				+    cg.dealloccpuregisters(current_asmdata.CurrAsmList,R_INTREGISTER,[RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]);
			
 
				+
			
 
				+    { save the old labels for control flow statements }
			
 
				+    oldCurrExitLabel:=current_procinfo.CurrExitLabel;
			
 
				+    if assigned(current_procinfo.CurrBreakLabel) then
			
 
				+      begin
			
 
				+        oldContinueLabel:=current_procinfo.CurrContinueLabel;
			
 
				+        oldBreakLabel:=current_procinfo.CurrBreakLabel;
			
 
				+      end;
			
 
				+
			
 
				+    { get new labels for the control flow statements }
			
 
				+    current_asmdata.getjumplabel(exittrylabel);
			
 
				+    current_asmdata.getjumplabel(exitexceptlabel);
			
 
				+    if assigned(current_procinfo.CurrBreakLabel) then
			
 
				+      begin
			
 
				+        current_asmdata.getjumplabel(breaktrylabel);
			
 
				+        current_asmdata.getjumplabel(continuetrylabel);
			
 
				+        current_asmdata.getjumplabel(breakexceptlabel);
			
 
				+        current_asmdata.getjumplabel(continueexceptlabel);
			
 
				+      end;
			
 
				+
			
 
				+    current_asmdata.getjumplabel(exceptlabel);
			
 
				+    current_asmdata.getjumplabel(endexceptlabel);
			
 
				+    current_asmdata.getjumplabel(lastonlabel);
			
 
				+    filterlabel:=nil;
			
 
				+
			
 
				+    { start of scope }
			
 
				+    if assigned(right) then
			
 
				+      begin
			
 
				+        current_asmdata.getdatalabel(filterlabel);
			
 
				+        emit_scope_start(
			
 
				+          current_asmdata.RefAsmSymbol('__FPC_on_handler'),
			
 
				+          filterlabel);
			
 
				+      end
			
 
				+    else
			
 
				+      emit_scope_start(
			
 
				+        current_asmdata.RefAsmSymbol('__FPC_except_handler'),
			
 
				+        exceptlabel);
			
 
				+
			
 
				+    { set control flow labels for the try block }
			
 
				+    current_procinfo.CurrExitLabel:=exittrylabel;
			
 
				+    if assigned(oldBreakLabel) then
			
 
				+      begin
			
 
				+        current_procinfo.CurrContinueLabel:=continuetrylabel;
			
 
				+        current_procinfo.CurrBreakLabel:=breaktrylabel;
			
 
				+      end;
			
 
				+
			
 
				+    secondpass(left);
			
 
				+    tryflowcontrol:=flowcontrol;
			
 
				+    if codegenerror then
			
 
				+      goto errorexit;
			
 
				+
			
 
				+    emit_scope_end;
			
 
				+    { jump over except handlers }
			
 
				+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
			
 
				+
			
 
				+    if fc_exit in tryflowcontrol then
			
 
				+      begin
			
 
				+        cg.a_label(current_asmdata.CurrAsmList,exittrylabel);
			
 
				+        emit_scope_end;
			
 
				+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldCurrExitLabel);
			
 
				+      end;
			
 
				+    if fc_break in tryflowcontrol then
			
 
				+      begin
			
 
				+        cg.a_label(current_asmdata.CurrAsmList,breaktrylabel);
			
 
				+        emit_scope_end;
			
 
				+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldBreakLabel);
			
 
				+      end;
			
 
				+    if fc_continue in tryflowcontrol then
			
 
				+      begin
			
 
				+        cg.a_label(current_asmdata.CurrAsmList,continuetrylabel);
			
 
				+        emit_scope_end;
			
 
				+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldContinueLabel);
			
 
				+      end;
			
 
				+
			
 
				+    { target for catch-all handler }
			
 
				+    cg.a_label(current_asmdata.CurrAsmList,exceptlabel);
			
 
				+
			
 
				+    { set control flow labels for the except block }
			
 
				+    { and the on statements                        }
			
 
				+    current_procinfo.CurrExitLabel:=exitexceptlabel;
			
 
				+    if assigned(oldBreakLabel) then
			
 
				+      begin
			
 
				+        current_procinfo.CurrContinueLabel:=continueexceptlabel;
			
 
				+        current_procinfo.CurrBreakLabel:=breakexceptlabel;
			
 
				+      end;
			
 
				+
			
 
				+    flowcontrol:=[fc_inflowcontrol];
			
 
				+    { on statements }
			
 
				+    if assigned(right) then
			
 
				+      begin
			
 
				+        { emit filter table to a temporary asmlist }
			
 
				+        hlist:=TAsmList.Create;
			
 
				+        new_section(hlist,sec_rodata,filterlabel.name,4);
			
 
				+        cg.a_label(hlist,filterlabel);
			
 
				+        onnodecount:=tai_const.create_32bit(0);
			
 
				+        hlist.concat(onnodecount);
			
 
				+
			
 
				+        hnode:=right;
			
 
				+        while assigned(hnode) do
			
 
				+          begin
			
 
				+            if hnode.nodetype<>onn then
			
 
				+              InternalError(2011103101);
			
 
				+            { TODO: make it done without using global label }
			
 
				+            current_asmdata.getglobaljumplabel(onlabel);
			
 
				+            hlist.concat(tai_const.create_sym(current_asmdata.RefAsmSymbol(tonnode(hnode).excepttype.vmt_mangledname,AT_DATA)));
			
 
				+            hlist.concat(tai_const.create_sym(onlabel));
			
 
				+            cg.a_label(current_asmdata.CurrAsmList,onlabel);
			
 
				+            secondpass(hnode);
			
 
				+            inc(onnodecount.value);
			
 
				+            hnode:=tonnode(hnode).left;
			
 
				+          end;
			
 
				+        { add 'else' node to the filter list, too }
			
 
				+        if assigned(t1) then
			
 
				+          begin
			
 
				+            hlist.concat(tai_const.create_32bit(-1));
			
 
				+            hlist.concat(tai_const.create_sym(lastonlabel));
			
 
				+            inc(onnodecount.value);
			
 
				+          end;
			
 
				+        { now move filter table to permanent list all at once }
			
 
				+        maybe_new_object_file(current_asmdata.asmlists[al_typedconsts]);
			
 
				+        current_asmdata.asmlists[al_typedconsts].concatlist(hlist);
			
 
				+        hlist.free;
			
 
				+      end;
			
 
				+
			
 
				+    cg.a_label(current_asmdata.CurrAsmList,lastonlabel);
			
 
				+    if assigned(t1) then
			
 
				+      begin
			
 
				+        { here we don't have to reset flowcontrol           }
			
 
				+        { the default and on flowcontrols are handled equal }
			
 
				+        secondpass(t1);
			
 
				+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
			
 
				+        if (flowcontrol*[fc_exit,fc_break,fc_continue]<>[]) then
			
 
				+          cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
			
 
				+      end;
			
 
				+    exceptflowcontrol:=flowcontrol;
			
 
				+
			
 
				+    if fc_exit in exceptflowcontrol then
			
 
				+      begin
			
 
				+        { do some magic for exit in the try block }
			
 
				+        cg.a_label(current_asmdata.CurrAsmList,exitexceptlabel);
			
 
				+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
			
 
				+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldCurrExitLabel);
			
 
				+      end;
			
 
				+
			
 
				+    if fc_break in exceptflowcontrol then
			
 
				+      begin
			
 
				+        cg.a_label(current_asmdata.CurrAsmList,breakexceptlabel);
			
 
				+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
			
 
				+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldBreakLabel);
			
 
				+      end;
			
 
				+
			
 
				+    if fc_continue in exceptflowcontrol then
			
 
				+      begin
			
 
				+        cg.a_label(current_asmdata.CurrAsmList,continueexceptlabel);
			
 
				+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
			
 
				+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldContinueLabel);
			
 
				+      end;
			
 
				+
			
 
				+    cg.a_label(current_asmdata.CurrAsmList,endexceptlabel);
			
 
				+
			
 
				+errorexit:
			
 
				+    { restore all saved labels }
			
 
				+    endexceptlabel:=oldendexceptlabel;
			
 
				+
			
 
				+    { restore the control flow labels }
			
 
				+    current_procinfo.CurrExitLabel:=oldCurrExitLabel;
			
 
				+    if assigned(oldBreakLabel) then
			
 
				+      begin
			
 
				+        current_procinfo.CurrContinueLabel:=oldContinueLabel;
			
 
				+        current_procinfo.CurrBreakLabel:=oldBreakLabel;
			
 
				+      end;
			
 
				+
			
 
				+    { return all used control flow statements }
			
 
				+    flowcontrol:=oldflowcontrol+(exceptflowcontrol +
			
 
				+      tryflowcontrol - [fc_inflowcontrol]);
			
 
				+  end;
			
 
				+
			
 
				+initialization
			
 
				+  craisenode:=ti386raisenode;
			
 
				+  connode:=ti386onnode;
			
 
				+  ctryexceptnode:=ti386tryexceptnode;
			
 
				+  ctryfinallynode:=ti386tryfinallynode;
			
 
				+end.
			
--- a/compiler/i386/n386inl.pas
+++ b/compiler/i386/n386inl.pas
@@ -26,16 +26,100 @@ unit n386inl;
 
				 interface
			
 
				 
			
 
				     uses
			
 
				-       nx86inl;
			
 
				+       node,nx86inl;
			
 
				 
			
 
				     type
			
 
				        ti386inlinenode = class(tx86inlinenode)
			
 
				+       public
			
 
				+         function first_sar: tnode; override;
			
 
				+         procedure second_rox_sar; override;
			
 
				        end;
			
 
				 
			
 
				 implementation
			
 
				 
			
 
				   uses
			
 
				-    ninl;
			
 
				+    globtype,globals,
			
 
				+    defutil,
			
 
				+    aasmbase,aasmdata,
			
 
				+    cgbase,pass_2,
			
 
				+    cpuinfo,cpubase,
			
 
				+    cga,cgutils,cgx86,cgobj,hlcgobj,
			
 
				+    ninl,ncon,ncal;
			
 
				+
			
 
				+
			
 
				+  function ti386inlinenode.first_sar: tnode;
			
 
				+    begin
			
 
				+      if is_64bitint(resultdef) and (
			
 
				+        (inlinenumber=in_sar_x) or (
			
 
				+          (inlinenumber=in_sar_x_y) and
			
 
				+          (tcallparanode(left).left.nodetype=ordconstn)
			
 
				+      )) then
			
 
				+        begin
			
 
				+          result:=nil;
			
 
				+          expectloc:=LOC_REGISTER;
			
 
				+        end
			
 
				+      else
			
 
				+        result:=inherited first_sar;
			
 
				+    end;
			
 
				+
			
 
				+
			
 
				+  procedure ti386inlinenode.second_rox_sar;
			
 
				+    var
			
 
				+      op1: tnode;
			
 
				+      hreg64hi,hreg64lo: tregister;
			
 
				+      v: aint;
			
 
				+    begin
			
 
				+      if is_64bitint(resultdef) and (
			
 
				+        (inlinenumber=in_sar_x) or (
			
 
				+          (inlinenumber=in_sar_x_y) and
			
 
				+          (tcallparanode(left).left.nodetype=ordconstn)
			
 
				+      )) then
			
 
				+        begin
			
 
				+          { x sar constant }
			
 
				+          if (left.nodetype=callparan) and
			
 
				+             assigned(tcallparanode(left).right) then
			
 
				+            begin
			
 
				+              op1:=tcallparanode(tcallparanode(left).right).left;
			
 
				+              secondpass(tcallparanode(left).left);
			
 
				+              v:=Tordconstnode(tcallparanode(left).left).value.svalue and 63;
			
 
				+            end
			
 
				+          else
			
 
				+            begin
			
 
				+              op1:=left;
			
 
				+              v:=1;
			
 
				+            end;
			
 
				+          secondpass(op1);
			
 
				+
			
 
				+          location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
			
 
				+
			
 
				+          { load left operator in a register }
			
 
				+          hlcg.location_force_reg(current_asmdata.CurrAsmList,op1.location,op1.resultdef,resultdef,false);
			
 
				+          hreg64hi:=op1.location.register64.reghi;
			
 
				+          hreg64lo:=op1.location.register64.reglo;
			
 
				+
			
 
				+          if (v=63) then
			
 
				+            begin
			
 
				+              emit_const_reg(A_SAR,S_L,31,hreg64hi);
			
 
				+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,OS_32,hreg64hi,hreg64lo);
			
 
				+            end
			
 
				+          else if (v>31) then
			
 
				+            begin
			
 
				+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,OS_32,hreg64hi,hreg64lo);
			
 
				+              emit_const_reg(A_SAR,S_L,31,hreg64hi);
			
 
				+              emit_const_reg(A_SAR,S_L,v and 31,hreg64lo);
			
 
				+            end
			
 
				+          else
			
 
				+            begin
			
 
				+              emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
			
 
				+              emit_const_reg(A_SAR,S_L,v and 31,hreg64hi);
			
 
				+            end;
			
 
				+          location.register64.reghi:=hreg64hi;
			
 
				+          location.register64.reglo:=hreg64lo;
			
 
				+        end
			
 
				+      else
			
 
				+        inherited second_rox_sar;
			
 
				+    end;
			
 
				+
			
 
				 
			
 
				 begin
			
 
				    cinlinenode:=ti386inlinenode;
			
--- a/compiler/i386/n386ld.pas
+++ b/compiler/i386/n386ld.pas
@@ -0,0 +1,61 @@
 
				+{
			
 
				+    Copyright (c) 1998-2014 by Florian Klaempfl
			
 
				+
			
 
				+    Generate i386 assembler for load nodes
			
 
				+
			
 
				+    This program is free software; you can redistribute it and/or modify
			
 
				+    it under the terms of the GNU General Public License as published by
			
 
				+    the Free Software Foundation; either version 2 of the License, or
			
 
				+    (at your option) any later version.
			
 
				+
			
 
				+    This program is distributed in the hope that it will be useful,
			
 
				+    but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+    GNU General Public License for more details.
			
 
				+
			
 
				+    You should have received a copy of the GNU General Public License
			
 
				+    along with this program; if not, write to the Free Software
			
 
				+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+
			
 
				+ ****************************************************************************
			
 
				+}
			
 
				+unit n386ld;
			
 
				+
			
 
				+{$i fpcdefs.inc}
			
 
				+
			
 
				+interface
			
 
				+
			
 
				+    uses
			
 
				+      globtype,
			
 
				+      symsym,
			
 
				+      node,ncgld,pass_1;
			
 
				+
			
 
				+    type
			
 
				+      ti386loadnode = class(tcgloadnode)
			
 
				+         procedure generate_absaddr_access(vs: tabsolutevarsym); override;
			
 
				+      end;
			
 
				+
			
 
				+
			
 
				+implementation
			
 
				+
			
 
				+    uses
			
 
				+      globals,
			
 
				+      symcpu,
			
 
				+      nld,
			
 
				+      cpubase;
			
 
				+
			
 
				+{*****************************************************************************
			
 
				+                            TI386LOADNODE
			
 
				+*****************************************************************************}
			
 
				+
			
 
				+    procedure ti386loadnode.generate_absaddr_access(vs: tabsolutevarsym);
			
 
				+      begin
			
 
				+        if tcpuabsolutevarsym(symtableentry).absseg then
			
 
				+          location.reference.segment:=NR_FS;
			
 
				+        inherited;
			
 
				+      end;
			
 
				+
			
 
				+
			
 
				+begin
			
 
				+   cloadnode:=ti386loadnode;
			
 
				+end.
			
--- a/compiler/i386/n386mat.pas
+++ b/compiler/i386/n386mat.pas
@@ -29,7 +29,7 @@ interface
 
				       node,nmat,ncgmat,nx86mat;
			
 
				 
			
 
				     type
			
 
				-      ti386moddivnode = class(tmoddivnode)
			
 
				+      ti386moddivnode = class(tx86moddivnode)
			
 
				          procedure pass_generate_code;override;
			
 
				       end;
			
 
				 
			
@@ -61,295 +61,47 @@ implementation
 
				                              TI386MODDIVNODE
			
 
				 *****************************************************************************}
			
 
				 
			
 
				-    function log2(i : dword) : dword;
			
 
				-      begin
			
 
				-        result:=0;
			
 
				-        i:=i shr 1;
			
 
				-        while i<>0 do
			
 
				-          begin
			
 
				-            i:=i shr 1;
			
 
				-            inc(result);
			
 
				-          end;
			
 
				-      end;
			
 
				 
			
 
				 
			
 
				    procedure ti386moddivnode.pass_generate_code;
			
 
				       var
			
 
				-        hreg1,hreg2:Tregister;
			
 
				+        hreg1:Tregister;
			
 
				         power:longint;
			
 
				         hl:Tasmlabel;
			
 
				-        op:Tasmop;
			
 
				-        e : longint;
			
 
				-        d,l,r,s,m,a,n,t : dword;
			
 
				-        m_low,m_high,j,k : qword;
			
 
				       begin
			
 
				-        secondpass(left);
			
 
				-        if codegenerror then
			
 
				-          exit;
			
 
				-        secondpass(right);
			
 
				-        if codegenerror then
			
 
				-          exit;
			
 
				-
			
 
				         if is_64bitint(resultdef) then
			
 
				           { should be handled in pass_1 (JM) }
			
 
				           internalerror(200109052);
			
 
				-        { put numerator in register }
			
 
				-        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
			
 
				-        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
			
 
				-        hreg1:=left.location.register;
			
 
				 
			
 
				-        if (nodetype=divn) and (right.nodetype=ordconstn) then
			
 
				+        if (nodetype=divn) and (right.nodetype=ordconstn) and
			
 
				+          is_signed(left.resultdef) and
			
 
				+          ispowerof2(tordconstnode(right).value.svalue,power) and
			
 
				+          ((current_settings.optimizecputype = cpu_386) or
			
 
				+           (cs_opt_size in current_settings.optimizerswitches)) then
			
 
				           begin
			
 
				-            if ispowerof2(tordconstnode(right).value.svalue,power) then
			
 
				-              begin
			
 
				-                { for signed numbers, the numerator must be adjusted before the
			
 
				-                  shift instruction, but not wih unsigned numbers! Otherwise,
			
 
				-                  "Cardinal($ffffffff) div 16" overflows! (JM) }
			
 
				-                if is_signed(left.resultdef) Then
			
 
				-                  begin
			
 
				-                    if (current_settings.optimizecputype <> cpu_386) and
			
 
				-                       not(cs_opt_size in current_settings.optimizerswitches) then
			
 
				-                      { use a sequence without jumps, saw this in
			
 
				-                        comp.compilers (JM) }
			
 
				-                      begin
			
 
				-                        { no jumps, but more operations }
			
 
				-                        hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-                        emit_reg_reg(A_MOV,S_L,hreg1,hreg2);
			
 
				-                        {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
			
 
				-                        emit_const_reg(A_SAR,S_L,31,hreg2);
			
 
				-                        {If signed, hreg2=right value-1, otherwise 0.}
			
 
				-                        emit_const_reg(A_AND,S_L,tordconstnode(right).value.svalue-1,hreg2);
			
 
				-                        { add to the left value }
			
 
				-                        emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
			
 
				-                        { do the shift }
			
 
				-                        emit_const_reg(A_SAR,S_L,power,hreg1);
			
 
				-                      end
			
 
				-                    else
			
 
				-                      begin
			
 
				-                        { a jump, but less operations }
			
 
				-                        emit_reg_reg(A_TEST,S_L,hreg1,hreg1);
			
 
				-                        current_asmdata.getjumplabel(hl);
			
 
				-                        cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NS,hl);
			
 
				-                        if power=1 then
			
 
				-                          emit_reg(A_INC,S_L,hreg1)
			
 
				-                        else
			
 
				-                          emit_const_reg(A_ADD,S_L,tordconstnode(right).value.svalue-1,hreg1);
			
 
				-                        cg.a_label(current_asmdata.CurrAsmList,hl);
			
 
				-                        emit_const_reg(A_SAR,S_L,power,hreg1);
			
 
				-                      end
			
 
				-                  end
			
 
				-                else
			
 
				-                  emit_const_reg(A_SHR,S_L,power,hreg1);
			
 
				-                location.register:=hreg1;
			
 
				-              end
			
 
				+            { signed divide-by-power-of-two optimized for size }
			
 
				+            secondpass(left);
			
 
				+            if codegenerror then
			
 
				+              exit;
			
 
				+            secondpass(right);
			
 
				+            if codegenerror then
			
 
				+              exit;
			
 
				+            hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
			
 
				+            hreg1:=left.location.register;
			
 
				+            emit_reg_reg(A_TEST,S_L,hreg1,hreg1);
			
 
				+            current_asmdata.getjumplabel(hl);
			
 
				+            cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NS,hl);
			
 
				+            if power=1 then
			
 
				+              emit_reg(A_INC,S_L,hreg1)
			
 
				             else
			
 
				-              begin
			
 
				-                if is_signed(left.resultdef) then
			
 
				-                  begin
			
 
				-                    e:=tordconstnode(right).value.svalue;
			
 
				-                    d:=abs(e);
			
 
				-                    { Determine algorithm (a), multiplier (m), and shift factor (s) for 32-bit
			
 
				-                      signed integer division. Based on: Granlund, T.; Montgomery, P.L.:
			
 
				-                      "Division by Invariant Integers using Multiplication". SIGPLAN Notices,
			
 
				-                      Vol. 29, June 1994, page 61.
			
 
				-                    }
			
 
				-
			
 
				-                    l:=log2(d);
			
 
				-                    j:=qword($80000000) mod qword(d);
			
 
				-                    k:=(qword(1) shl (32+l)) div (qword($80000000-j));
			
 
				-                    m_low:=((qword(1)) shl (32+l)) div d;
			
 
				-                    m_high:=(((qword(1)) shl (32+l)) + k) div d;
			
 
				-                    while ((m_low shr 1) < (m_high shr 1)) and (l > 0) do
			
 
				-                      begin
			
 
				-                        m_low:=m_low shr 1;
			
 
				-                        m_high:=m_high shr 1;
			
 
				-                        dec(l);
			
 
				-                      end;
			
 
				-                    m:=dword(m_high);
			
 
				-                    s:=l;
			
 
				-                    if (m_high shr 31)<>0 then
			
 
				-                      a:=1
			
 
				-                    else
			
 
				-                      a:=0;
			
 
				-                    cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				-                    emit_const_reg(A_MOV,S_L,aint(m),NR_EAX);
			
 
				-                    cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
			
 
				-                    emit_reg(A_IMUL,S_L,hreg1);
			
 
				-                    emit_reg_reg(A_MOV,S_L,hreg1,NR_EAX);
			
 
				-                    if a<>0 then
			
 
				-                      begin
			
 
				-                        emit_reg_reg(A_ADD,S_L,NR_EAX,NR_EDX);
			
 
				-                        {
			
 
				-                          printf ("; dividend: memory location or register other than EAX or EDX\n");
			
 
				-                          printf ("\n");
			
 
				-                          printf ("MOV EAX, 0%08LXh\n", m);
			
 
				-                          printf ("IMUL dividend\n");
			
 
				-                          printf ("MOV EAX, dividend\n");
			
 
				-                          printf ("ADD EDX, EAX\n");
			
 
				-                          if (s) printf ("SAR EDX, %d\n", s);
			
 
				-                          printf ("SHR EAX, 31\n");
			
 
				-                          printf ("ADD EDX, EAX\n");
			
 
				-                          if (e < 0) printf ("NEG EDX\n");
			
 
				-                          printf ("\n");
			
 
				-                          printf ("; quotient now in EDX\n");
			
 
				-                        }
			
 
				-                      end;
			
 
				-                      {
			
 
				-                        printf ("; dividend: memory location of register other than EAX or EDX\n");
			
 
				-                        printf ("\n");
			
 
				-                        printf ("MOV EAX, 0%08LXh\n", m);
			
 
				-                        printf ("IMUL dividend\n");
			
 
				-                        printf ("MOV EAX, dividend\n");
			
 
				-                        if (s) printf ("SAR EDX, %d\n", s);
			
 
				-                        printf ("SHR EAX, 31\n");
			
 
				-                        printf ("ADD EDX, EAX\n");
			
 
				-                        if (e < 0) printf ("NEG EDX\n");
			
 
				-                        printf ("\n");
			
 
				-                        printf ("; quotient now in EDX\n");
			
 
				-                      }
			
 
				-                    if s<>0 then
			
 
				-                      emit_const_reg(A_SAR,S_L,s,NR_EDX);
			
 
				-                    emit_const_reg(A_SHR,S_L,31,NR_EAX);
			
 
				-                    emit_reg_reg(A_ADD,S_L,NR_EAX,NR_EDX);
			
 
				-                    if e<0 then
			
 
				-                      emit_reg(A_NEG,S_L,NR_EDX);
			
 
				-                    cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
			
 
				-                    cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				-                    location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-                    cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EDX,location.register)
			
 
				-                  end
			
 
				-                else
			
 
				-                  begin
			
 
				-                    d:=tordconstnode(right).value.svalue;
			
 
				-                    if d>=$80000000 then
			
 
				-                      begin
			
 
				-                        emit_const_reg(A_CMP,S_L,aint(d),hreg1);
			
 
				-                        location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-                        emit_const_reg(A_MOV,S_L,0,location.register);
			
 
				-                        emit_const_reg(A_SBB,S_L,-1,location.register);
			
 
				-                      end
			
 
				-                    else
			
 
				-                      begin
			
 
				-                        { Reduce divisor until it becomes odd }
			
 
				-                        n:=0;
			
 
				-                        t:=d;
			
 
				-                        while (t and 1)=0 do
			
 
				-                          begin
			
 
				-                            t:=t shr 1;
			
 
				-                            inc(n);
			
 
				-                          end;
			
 
				-                        { Generate m, s for algorithm 0. Based on: Granlund, T.; Montgomery,
			
 
				-                        P.L.: "Division by Invariant Integers using Multiplication".
			
 
				-                        SIGPLAN Notices, Vol. 29, June 1994, page 61.
			
 
				-                        }
			
 
				-                        l:=log2(t)+1;
			
 
				-                        j:=qword($ffffffff) mod qword(t);
			
 
				-                        k:=(qword(1) shl (32+l)) div (qword($ffffffff-j));
			
 
				-                        m_low:=((qword(1)) shl (32+l)) div t;
			
 
				-                        m_high:=(((qword(1)) shl (32+l)) + k) div t;
			
 
				-                        while ((m_low shr 1) < (m_high shr 1)) and (l>0) do
			
 
				-                          begin
			
 
				-                            m_low:=m_low shr 1;
			
 
				-                            m_high:=m_high shr 1;
			
 
				-                            l:=l-1;
			
 
				-                          end;
			
 
				-                        if (m_high shr 32)=0 then
			
 
				-                          begin
			
 
				-                            m:=dword(m_high);
			
 
				-                            s:=l;
			
 
				-                            a:=0;
			
 
				-                          end
			
 
				-
			
 
				-                        { Generate m, s for algorithm 1. Based on: Magenheimer, D.J.; et al:
			
 
				-                        "Integer Multiplication and Division on the HP Precision Architecture".
			
 
				-                        IEEE Transactions on Computers, Vol 37, No. 8, August 1988, page 980.
			
 
				-                        }
			
 
				-                        else
			
 
				-                          begin
			
 
				-                            s:=log2(t);
			
 
				-                            m_low:=(qword(1) shl (32+s)) div qword(t);
			
 
				-                            r:=dword(((qword(1)) shl (32+s)) mod qword(t));
			
 
				-                            if (r < ((t>>1)+1)) then
			
 
				-                              m:=dword(m_low)
			
 
				-                            else
			
 
				-                              m:=dword(m_low)+1;
			
 
				-                            a:=1;
			
 
				-                          end;
			
 
				-                        { Reduce multiplier for either algorithm to smallest possible }
			
 
				-                        while (m and 1)=0 do
			
 
				-                          begin
			
 
				-                            m:=m shr 1;
			
 
				-                            dec(s);
			
 
				-                          end;
			
 
				-                        { Adjust multiplier for reduction of even divisors }
			
 
				-                        inc(s,n);
			
 
				-                        cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				-                        emit_const_reg(A_MOV,S_L,aint(m),NR_EAX);
			
 
				-                        cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
			
 
				-                        emit_reg(A_MUL,S_L,hreg1);
			
 
				-                        if a<>0 then
			
 
				-                          begin
			
 
				-                            {
			
 
				-                            printf ("; dividend: register other than EAX or memory location\n");
			
 
				-                            printf ("\n");
			
 
				-                            printf ("MOV EAX, 0%08lXh\n", m);
			
 
				-                            printf ("MUL dividend\n");
			
 
				-                            printf ("ADD EAX, 0%08lXh\n", m);
			
 
				-                            printf ("ADC EDX, 0\n");
			
 
				-                            if (s) printf ("SHR EDX, %d\n", s);
			
 
				-                            printf ("\n");
			
 
				-                            printf ("; quotient now in EDX\n");
			
 
				-                            }
			
 
				-                            emit_const_reg(A_ADD,S_L,aint(m),NR_EAX);
			
 
				-                            emit_const_reg(A_ADC,S_L,0,NR_EDX);
			
 
				-                          end;
			
 
				-                        if s<>0 then
			
 
				-                          emit_const_reg(A_SHR,S_L,aint(s),NR_EDX);
			
 
				-                        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
			
 
				-                        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				-                        location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-                        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EDX,location.register)
			
 
				-                      end;
			
 
				-                  end
			
 
				-              end
			
 
				+              emit_const_reg(A_ADD,S_L,tordconstnode(right).value.svalue-1,hreg1);
			
 
				+            cg.a_label(current_asmdata.CurrAsmList,hl);
			
 
				+            emit_const_reg(A_SAR,S_L,power,hreg1);
			
 
				+            location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
			
 
				+            location.register:=hreg1;
			
 
				           end
			
 
				         else
			
 
				-          begin
			
 
				-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				-            emit_reg_reg(A_MOV,S_L,hreg1,NR_EAX);
			
 
				-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
			
 
				-            {Sign extension depends on the left type.}
			
 
				-            if torddef(left.resultdef).ordtype=u32bit then
			
 
				-              emit_reg_reg(A_XOR,S_L,NR_EDX,NR_EDX)
			
 
				-            else
			
 
				-              emit_none(A_CDQ,S_NO);
			
 
				-
			
 
				-            {Division depends on the right type.}
			
 
				-            if Torddef(right.resultdef).ordtype=u32bit then
			
 
				-              op:=A_DIV
			
 
				-            else
			
 
				-              op:=A_IDIV;
			
 
				-
			
 
				-            if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
			
 
				-              emit_ref(op,S_L,right.location.reference)
			
 
				-            else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
			
 
				-              emit_reg(op,S_L,right.location.register)
			
 
				-            else
			
 
				-              begin
			
 
				-                hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
			
 
				-                hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u32inttype,right.location,hreg1);
			
 
				-                emit_reg(op,S_L,hreg1);
			
 
				-              end;
			
 
				-
			
 
				-            {Copy the result into a new register. Release EAX & EDX.}
			
 
				-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
			
 
				-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
			
 
				-            location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-            if nodetype=divn then
			
 
				-              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register)
			
 
				-            else
			
 
				-              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EDX,location.register);
			
 
				-          end;
			
 
				+          inherited pass_generate_code;
			
 
				       end;
			
 
				 
			
 
				 
			
--- a/compiler/i386/n386mem.pas
+++ b/compiler/i386/n386mem.pas
@@ -28,10 +28,13 @@ interface
 
				     uses
			
 
				       globtype,
			
 
				       cgbase,cpuinfo,cpubase,
			
 
				-      node,nmem,ncgmem,nx86mem;
			
 
				+      node,nmem,ncgmem,nx86mem,ni86mem;
			
 
				 
			
 
				     type
			
 
				-       ti386addrnode = class(tcgaddrnode)
			
 
				+       ti386addrnode = class(ti86addrnode)
			
 
				+         protected
			
 
				+          procedure set_absvarsym_resultdef; override;
			
 
				+         public
			
 
				           procedure pass_generate_code;override;
			
 
				        end;
			
 
				 
			
@@ -44,7 +47,7 @@ implementation
 
				     uses
			
 
				       systems,
			
 
				       cutils,verbose,
			
 
				-      symdef,paramgr,
			
 
				+      symconst,symdef,symcpu,paramgr,
			
 
				       aasmtai,aasmdata,
			
 
				       nld,ncon,nadd,
			
 
				       cgutils,cgobj;
			
@@ -53,8 +56,16 @@ implementation
 
				                              TI386ADDRNODE
			
 
				 *****************************************************************************}
			
 
				 
			
 
				-    procedure ti386addrnode.pass_generate_code;
			
 
				+    procedure ti386addrnode.set_absvarsym_resultdef;
			
 
				+      begin
			
 
				+        if not(nf_typedaddr in flags) then
			
 
				+          resultdef:=voidnearfspointertype
			
 
				+        else
			
 
				+          resultdef:=tcpupointerdefclass(cpointerdef).createx86(left.resultdef,x86pt_near_fs);
			
 
				+      end;
			
 
				 
			
 
				+
			
 
				+    procedure ti386addrnode.pass_generate_code;
			
 
				       begin
			
 
				         inherited pass_generate_code;
			
 
				         { for use of other segments, not used }
			
--- a/compiler/i386/popt386.pas
+++ b/compiler/i386/popt386.pas
@@ -23,6 +23,8 @@ unit popt386;
 
				 
			
 
				 {$i fpcdefs.inc}
			
 
				 
			
 
				+{ $define DEBUG_AOPTCPU}
			
 
				+
			
 
				 interface
			
 
				 
			
 
				 uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
			
@@ -35,7 +37,7 @@ procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
 
				 implementation
			
 
				 
			
 
				 uses
			
 
				-  globtype,systems,
			
 
				+  cutils,globtype,systems,
			
 
				   globals,cgbase,procinfo,
			
 
				   symsym,
			
 
				 {$ifdef finaldestdebug}
			
@@ -318,7 +320,7 @@ begin
 
				                          12: begin
			
 
				                             {imul 12, reg1, reg2 to
			
 
				                                lea (,reg1,4), reg2
			
 
				-                               lea (,reg1,8) reg2
			
 
				+                               lea (reg2,reg1,8), reg2
			
 
				                              imul 12, reg1 to
			
 
				                                lea (reg1,reg1,2), reg1
			
 
				                                lea (,reg1,4), reg1}
			
@@ -440,9 +442,101 @@ begin
 
				 end;
			
 
				 
			
 
				 
			
 
				+function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
			
 
				+  begin
			
 
				+    result :=
			
 
				+      (instr.typ = ait_instruction) and
			
 
				+      (taicpu(instr).opcode = op) and
			
 
				+      ((opsize = []) or (taicpu(instr).opsize in opsize));
			
 
				+  end;
			
 
				+
			
 
				+
			
 
				+function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
			
 
				+  begin
			
 
				+    result :=
			
 
				+      (instr.typ = ait_instruction) and
			
 
				+      ((taicpu(instr).opcode = op1) or
			
 
				+       (taicpu(instr).opcode = op2)
			
 
				+      ) and
			
 
				+      ((opsize = []) or (taicpu(instr).opsize in opsize));
			
 
				+  end;
			
 
				+
			
 
				+
			
 
				+function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
			
 
				+  begin
			
 
				+    result :=
			
 
				+      (instr.typ = ait_instruction) and
			
 
				+      ((taicpu(instr).opcode = op1) or
			
 
				+       (taicpu(instr).opcode = op2) or
			
 
				+       (taicpu(instr).opcode = op3)
			
 
				+      ) and
			
 
				+      ((opsize = []) or (taicpu(instr).opsize in opsize));
			
 
				+  end;
			
 
				+
			
 
				+
			
 
				+function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
			
 
				+  begin
			
 
				+    result := (oper.typ = top_reg) and (oper.reg = reg);
			
 
				+  end;
			
 
				+
			
 
				+
			
 
				+function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
			
 
				+  begin
			
 
				+    result := (oper.typ = top_const) and (oper.val = a);
			
 
				+  end;
			
 
				+
			
 
				+
			
 
				+function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
			
 
				+  begin
			
 
				+    result := oper1.typ = oper2.typ;
			
 
				+
			
 
				+    if result then
			
 
				+      case oper1.typ of
			
 
				+        top_const:
			
 
				+          Result:=oper1.val = oper2.val;
			
 
				+        top_reg:
			
 
				+          Result:=oper1.reg = oper2.reg;
			
 
				+        top_ref:
			
 
				+          Result:=RefsEqual(oper1.ref^, oper2.ref^);
			
 
				+        else
			
 
				+          internalerror(2013102801);
			
 
				+      end
			
 
				+  end;
			
 
				+
			
 
				+
			
 
				+function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
			
 
				+  begin
			
 
				+   Result:=(ref.offset=0) and
			
 
				+     (ref.scalefactor in [0,1]) and
			
 
				+     (ref.segment=NR_NO) and
			
 
				+     (ref.symbol=nil) and
			
 
				+     (ref.relsymbol=nil) and
			
 
				+     ((base=NR_INVALID) or
			
 
				+      (ref.base=base)) and
			
 
				+     ((index=NR_INVALID) or
			
 
				+      (ref.index=index));
			
 
				+  end;
			
 
				+
			
 
				 
			
 
				+{ First pass of peephole optimizations }
			
 
				 procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
			
 
				-{First pass of peepholeoptimizations}
			
 
				+
			
 
				+{$ifdef DEBUG_AOPTCPU}
			
 
				+  procedure DebugMsg(const s: string;p : tai);
			
 
				+    begin
			
 
				+      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
			
 
				+    end;
			
 
				+{$else DEBUG_AOPTCPU}
			
 
				+  procedure DebugMsg(const s: string;p : tai);inline;
			
 
				+    begin
			
 
				+    end;
			
 
				+{$endif DEBUG_AOPTCPU}
			
 
				+
			
 
				+function WriteOk : Boolean;
			
 
				+  begin
			
 
				+    writeln('Ok');
			
 
				+    Result:=True;
			
 
				+  end;
			
 
				 
			
 
				 var
			
 
				   l : longint;
			
@@ -626,6 +720,7 @@ begin
 
				       case p.Typ Of
			
 
				         ait_instruction:
			
 
				           begin
			
 
				+            current_filepos:=taicpu(p).fileinfo;
			
 
				             if InsContainsSegRef(taicpu(p)) then
			
 
				               begin
			
 
				                 p := tai(p.next);
			
@@ -758,6 +853,8 @@ begin
 
				                         S_B: v:=$80;
			
 
				                         S_W: v:=$8000;
			
 
				                         S_L: v:=aint($80000000);
			
 
				+                        else
			
 
				+                          internalerror(2013112905);
			
 
				                       end;
			
 
				                       if (taicpu(p).oper[0]^.typ=Top_const) and
			
 
				                          (taicpu(p).oper[0]^.val=v) and
			
@@ -938,47 +1035,89 @@ begin
 
				                          (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
			
 
				                          (taicpu(p).oper[0]^.ref^.index = NR_NO) and
			
 
				                          (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
			
 
				-                        if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
			
 
				-                           (taicpu(p).oper[0]^.ref^.offset = 0) then
			
 
				-                          begin
			
 
				-                            hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
			
 
				-                              taicpu(p).oper[1]^.reg);
			
 
				-                            InsertLLItem(asml,p.previous,p.next, hp1);
			
 
				-                            p.free;
			
 
				-                            p := hp1;
			
 
				-                            continue;
			
 
				-                          end
			
 
				-                        else if (taicpu(p).oper[0]^.ref^.offset = 0) then
			
 
				-                          begin
			
 
				-                            hp1 := tai(p.Next);
			
 
				-                            asml.remove(p);
			
 
				-                            p.free;
			
 
				-                            p := hp1;
			
 
				-                            continue;
			
 
				-                          end
			
 
				-                        else
			
 
				-                          with taicpu(p).oper[0]^.ref^ do
			
 
				-                            if (base = taicpu(p).oper[1]^.reg) then
			
 
				-                              begin
			
 
				-                                l := offset;
			
 
				-                                if (l=1) and UseIncDec then
			
 
				-                                  begin
			
 
				-                                    taicpu(p).opcode := A_INC;
			
 
				-                                    taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
			
 
				-                                    taicpu(p).ops := 1
			
 
				-                                  end
			
 
				-                                else if (l=-1) and UseIncDec then
			
 
				-                                  begin
			
 
				-                                    taicpu(p).opcode := A_DEC;
			
 
				-                                    taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
			
 
				-                                    taicpu(p).ops := 1;
			
 
				-                                  end
			
 
				-                                else
			
 
				-                                  begin
			
 
				-                                    taicpu(p).opcode := A_ADD;
			
 
				-                                    taicpu(p).loadConst(0,l);
			
 
				-                                  end;
			
 
				-                              end;
			
 
				+                        begin
			
 
				+                          if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
			
 
				+                             (taicpu(p).oper[0]^.ref^.offset = 0) then
			
 
				+                            begin
			
 
				+                              hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
			
 
				+                                taicpu(p).oper[1]^.reg);
			
 
				+                              InsertLLItem(asml,p.previous,p.next, hp1);
			
 
				+                              p.free;
			
 
				+                              p := hp1;
			
 
				+                              continue;
			
 
				+                            end
			
 
				+                          else if (taicpu(p).oper[0]^.ref^.offset = 0) then
			
 
				+                            begin
			
 
				+                              hp1 := tai(p.Next);
			
 
				+                              asml.remove(p);
			
 
				+                              p.free;
			
 
				+                              p := hp1;
			
 
				+                              continue;
			
 
				+                            end
			
 
				+                          { continue to use lea to adjust the stack pointer,
			
 
				+                            it is the recommended way, but only if not optimizing for size }
			
 
				+                          else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
			
 
				+                            (cs_opt_size in current_settings.optimizerswitches) then
			
 
				+                            with taicpu(p).oper[0]^.ref^ do
			
 
				+                              if (base = taicpu(p).oper[1]^.reg) then
			
 
				+                                begin
			
 
				+                                  l := offset;
			
 
				+                                  if (l=1) and UseIncDec then
			
 
				+                                    begin
			
 
				+                                      taicpu(p).opcode := A_INC;
			
 
				+                                      taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
			
 
				+                                      taicpu(p).ops := 1
			
 
				+                                    end
			
 
				+                                  else if (l=-1) and UseIncDec then
			
 
				+                                    begin
			
 
				+                                      taicpu(p).opcode := A_DEC;
			
 
				+                                      taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
			
 
				+                                      taicpu(p).ops := 1;
			
 
				+                                    end
			
 
				+                                  else
			
 
				+                                    begin
			
 
				+                                      if (l<0) and (l<>-2147483648) then
			
 
				+                                        begin
			
 
				+                                          taicpu(p).opcode := A_SUB;
			
 
				+                                          taicpu(p).loadConst(0,-l);
			
 
				+                                        end
			
 
				+                                      else
			
 
				+                                        begin
			
 
				+                                          taicpu(p).opcode := A_ADD;
			
 
				+                                          taicpu(p).loadConst(0,l);
			
 
				+                                        end;
			
 
				+                                    end;
			
 
				+                                end;
			
 
				+                        end
			
 
				+(*
			
 
				+                      This is unsafe, lea doesn't modify the flags but "add"
			
 
				+                      does. This breaks webtbs/tw15694.pp. The above
			
 
				+                      transformations are also unsafe, but they don't seem to
			
 
				+                      be triggered by code that FPC generators (or that at
			
 
				+                      least does not occur in the tests...). This needs to be
			
 
				+                      fixed by checking for the liveness of the flags register.
			
 
				+
			
 
				+                      else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
			
 
				+                        begin
			
 
				+                          hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
			
 
				+                            taicpu(p).oper[0]^.ref^.base);
			
 
				+                          InsertLLItem(asml,p.previous,p.next, hp1);
			
 
				+                          DebugMsg('Peephole Lea2AddBase done',hp1);
			
 
				+                          p.free;
			
 
				+                          p:=hp1;
			
 
				+                          continue;
			
 
				+                        end
			
 
				+                      else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
			
 
				+                        begin
			
 
				+                          hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
			
 
				+                            taicpu(p).oper[0]^.ref^.index);
			
 
				+                          InsertLLItem(asml,p.previous,p.next,hp1);
			
 
				+                          DebugMsg('Peephole Lea2AddIndex done',hp1);
			
 
				+                          p.free;
			
 
				+                          p:=hp1;
			
 
				+                          continue;
			
 
				+                        end
			
 
				+*)
			
 
				                     end;
			
 
				                   A_MOV:
			
 
				                     begin
			
@@ -1103,9 +1242,7 @@ begin
 
				                                 end;
			
 
				                     { Next instruction is also a MOV ? }
			
 
				                       if GetNextInstruction(p, hp1) and
			
 
				-                         (tai(hp1).typ = ait_instruction) and
			
 
				-                         (taicpu(hp1).opcode = A_MOV) and
			
 
				-                         (taicpu(hp1).opsize = taicpu(p).opsize) then
			
 
				+                        MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
			
 
				                         begin
			
 
				                           if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
			
 
				                              (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
			
@@ -1264,23 +1401,17 @@ begin
 
				                                   taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
			
 
				                                   taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
			
 
				                                   taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
			
 
				+                                  taicpu(hp1).fileinfo := taicpu(p).fileinfo;
			
 
				                                 end
			
 
				                         end;
			
 
				                       if GetNextInstruction(p, hp1) and
			
 
				-                         (Tai(hp1).typ = ait_instruction) and
			
 
				-                         ((Taicpu(hp1).opcode = A_BTS) or (Taicpu(hp1).opcode = A_BTR)) and
			
 
				-                         (Taicpu(hp1).opsize = Taicpu(p).opsize) and
			
 
				+                         MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
			
 
				                          GetNextInstruction(hp1, hp2) and
			
 
				-                         (Tai(hp2).typ = ait_instruction) and
			
 
				-                         (Taicpu(hp2).opcode = A_OR) and
			
 
				-                         (Taicpu(hp1).opsize = Taicpu(p).opsize) and
			
 
				-                         (Taicpu(hp2).opsize = Taicpu(p).opsize) and
			
 
				-                         (Taicpu(p).oper[0]^.typ = top_const) and (Taicpu(p).oper[0]^.val=0) and
			
 
				+                         MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
			
 
				+                         MatchOperand(Taicpu(p).oper[0]^,0) and
			
 
				                          (Taicpu(p).oper[1]^.typ = top_reg) and
			
 
				-                         (Taicpu(hp1).oper[1]^.typ = top_reg) and
			
 
				-                         (Taicpu(p).oper[1]^.reg=Taicpu(hp1).oper[1]^.reg) and
			
 
				-                         (Taicpu(hp2).oper[1]^.typ = top_reg) and
			
 
				-                         (Taicpu(p).oper[1]^.reg=Taicpu(hp2).oper[1]^.reg) then
			
 
				+                         MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
			
 
				+                         MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
			
 
				                          {mov reg1,0
			
 
				                           bts reg1,operand1             -->      mov reg1,operand2
			
 
				                           or  reg1,operand2                      bts reg1,operand1}
			
@@ -1290,6 +1421,33 @@ begin
 
				                           insertllitem(asml,hp2,hp2.next,hp1);
			
 
				                           asml.remove(p);
			
 
				                           p.free;
			
 
				+                          p:=hp1;
			
 
				+                        end;
			
 
				+                      if GetNextInstruction(p, hp1) and
			
 
				+                         MatchInstruction(hp1,A_LEA,[S_L]) and
			
 
				+                         (Taicpu(p).oper[0]^.typ = top_ref) and
			
 
				+                         (Taicpu(p).oper[1]^.typ = top_reg) and
			
 
				+                         ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
			
 
				+                           (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
			
 
				+                          ) or
			
 
				+                          (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
			
 
				+                           (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
			
 
				+                          )
			
 
				+                         ) then
			
 
				+                         {mov reg1,ref
			
 
				+                          lea reg2,[reg1,reg2]          -->      add reg2,ref}
			
 
				+                        begin
			
 
				+                          TmpUsedRegs := UsedRegs;
			
 
				+                          { reg1 may not be used afterwards }
			
 
				+                          if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
			
 
				+                            begin
			
 
				+                              Taicpu(hp1).opcode:=A_ADD;
			
 
				+                              Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
			
 
				+                              DebugMsg('Peephole MovLea2Add done',hp1);
			
 
				+                              asml.remove(p);
			
 
				+                              p.free;
			
 
				+                              p:=hp1;
			
 
				+                            end;
			
 
				                         end;
			
 
				                     end;
			
 
				 
			
@@ -1302,8 +1460,7 @@ begin
 
				                          IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
			
 
				                          (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
			
 
				                          GetNextInstruction(hp1,hp2) and
			
 
				-                         (hp2.typ = ait_instruction) and
			
 
				-                         (taicpu(hp2).opcode = A_MOV) and
			
 
				+                         MatchInstruction(hp2,A_MOV,[]) and
			
 
				                          (taicpu(hp2).oper[0]^.typ = top_reg) and
			
 
				                          OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
			
 
				                          (((taicpu(hp1).ops=2) and
			
@@ -1791,6 +1948,17 @@ end;
 
				 
			
 
				 procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
			
 
				 
			
 
				+{$ifdef DEBUG_AOPTCPU}
			
 
				+  procedure DebugMsg(const s: string;p : tai);
			
 
				+    begin
			
 
				+      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
			
 
				+    end;
			
 
				+{$else DEBUG_AOPTCPU}
			
 
				+  procedure DebugMsg(const s: string;p : tai);inline;
			
 
				+    begin
			
 
				+    end;
			
 
				+{$endif DEBUG_AOPTCPU}
			
 
				+
			
 
				   function CanBeCMOV(p : tai) : boolean;
			
 
				     begin
			
 
				        CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
			
@@ -1808,10 +1976,9 @@ procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
 
				     end;
			
 
				 
			
 
				 var
			
 
				-  p,hp1,hp2: tai;
			
 
				+  p,hp1,hp2,hp3: tai;
			
 
				   l : longint;
			
 
				   condition : tasmcond;
			
 
				-  hp3: tai;
			
 
				   UsedRegs, TmpUsedRegs: TRegSet;
			
 
				   carryadd_opcode: Tasmop;
			
 
				 
			
@@ -2064,12 +2231,19 @@ begin
 
				                   else if (taicpu(p).oper[0]^.typ = top_ref) and
			
 
				                      GetNextInstruction(p,hp1) and
			
 
				                      (hp1.typ = ait_instruction) and
			
 
				-                     IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
			
 
				+                     (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
			
 
				+                      ((taicpu(hp1).opcode=A_LEA) and
			
 
				+                       (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
			
 
				+                       ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
			
 
				+                        (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)) or
			
 
				+                        (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
			
 
				+                        (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
			
 
				+                       )
			
 
				+                      )
			
 
				+                     ) and
			
 
				                      GetNextInstruction(hp1,hp2) and
			
 
				-                     (hp2.typ = ait_instruction) and
			
 
				-                     (taicpu(hp2).opcode = A_MOV) and
			
 
				-                     (taicpu(hp2).oper[0]^.typ = top_reg) and
			
 
				-                     (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
			
 
				+                     MatchInstruction(hp2,A_MOV,[]) and
			
 
				+                     MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
			
 
				                      (taicpu(hp2).oper[1]^.typ = top_ref) then
			
 
				                     begin
			
 
				                       TmpUsedRegs := UsedRegs;
			
@@ -2085,7 +2259,17 @@ begin
 
				                         begin
			
 
				                           case taicpu(hp1).opcode of
			
 
				                             A_INC,A_DEC:
			
 
				-                              taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^)
			
 
				+                              taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
			
 
				+                            A_LEA:
			
 
				+                              begin
			
 
				+                                taicpu(hp1).opcode:=A_ADD;
			
 
				+                                if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
			
 
				+                                  taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
			
 
				+                                else
			
 
				+                                  taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
			
 
				+                                taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
			
 
				+                                DebugMsg('Peephole FoldLea done',hp1);
			
 
				+                              end
			
 
				                             else
			
 
				                               taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
			
 
				                           end;
			
@@ -2108,6 +2292,7 @@ end;
 
				 procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
			
 
				 var
			
 
				   p,hp1,hp2: tai;
			
 
				+  IsTestConstX: boolean;
			
 
				 begin
			
 
				   p := BlockStart;
			
 
				   while (p <> BlockEnd) Do
			
@@ -2213,22 +2398,22 @@ See test/tgadint64 in the test suite.
 
				               A_TEST, A_OR:
			
 
				                 {removes the line marked with (x) from the sequence
			
 
				                  and/or/xor/add/sub/... $x, %y
			
 
				-                 test/or %y, %y   (x)
			
 
				+                 test/or %y, %y  | test $-1, %y    (x)
			
 
				                  j(n)z _Label
			
 
				-                    as the first instruction already adjusts the ZF}
			
 
				+                    as the first instruction already adjusts the ZF
			
 
				+                    %y operand may also be a reference }
			
 
				                  begin
			
 
				-                   if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
			
 
				-                    if GetLastInstruction(p, hp1) and
			
 
				+                   IsTestConstX:=(taicpu(p).opcode=A_TEST) and
			
 
				+                     MatchOperand(taicpu(p).oper[0]^,-1);
			
 
				+                   if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
			
 
				+                      GetLastInstruction(p, hp1) and
			
 
				                       (tai(hp1).typ = ait_instruction) and
			
 
				                       GetNextInstruction(p,hp2) and
			
 
				-                      (hp2.typ = ait_instruction) and
			
 
				-                      ((taicpu(hp2).opcode = A_SETcc) or
			
 
				-                       (taicpu(hp2).opcode = A_Jcc) or
			
 
				-                       (taicpu(hp2).opcode = A_CMOVcc)) then
			
 
				+                      MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
			
 
				                      case taicpu(hp1).opcode Of
			
 
				-                       A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
			
 
				+                       A_ADD, A_SUB, A_OR, A_XOR, A_AND:
			
 
				                          begin
			
 
				-                           if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
			
 
				+                           if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
			
 
				                              { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
			
 
				                              { and in case of carry for A(E)/B(E)/C/NC                  }
			
 
				                               ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
			
@@ -2242,9 +2427,28 @@ See test/tgadint64 in the test suite.
 
				                                continue
			
 
				                              end;
			
 
				                          end;
			
 
				+                       A_SHL, A_SAL, A_SHR, A_SAR:
			
 
				+                         begin
			
 
				+                           if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
			
 
				+                             { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
			
 
				+                             { therefore, it's only safe to do this optimization for     }
			
 
				+                             { shifts by a (nonzero) constant                            }
			
 
				+                              (taicpu(hp1).oper[0]^.typ = top_const) and
			
 
				+                              (taicpu(hp1).oper[0]^.val <> 0) and
			
 
				+                             { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
			
 
				+                             { and in case of carry for A(E)/B(E)/C/NC                  }
			
 
				+                              (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
			
 
				+                             begin
			
 
				+                               hp1 := tai(p.next);
			
 
				+                               asml.remove(p);
			
 
				+                               p.free;
			
 
				+                               p := tai(hp1);
			
 
				+                               continue
			
 
				+                             end;
			
 
				+                         end;
			
 
				                        A_DEC, A_INC, A_NEG:
			
 
				                          begin
			
 
				-                           if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
			
 
				+                           if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
			
 
				                              { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
			
 
				                              { and in case of carry for A(E)/B(E)/C/NC                  }
			
 
				                              (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
			
@@ -2269,7 +2473,15 @@ See test/tgadint64 in the test suite.
 
				                                continue
			
 
				                              end;
			
 
				                          end
			
 
				-                     end
			
 
				+                     else
			
 
				+                       { change "test  $-1,%reg" into "test %reg,%reg" }
			
 
				+                       if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
			
 
				+                         taicpu(p).loadoper(0,taicpu(p).oper[1]^);
			
 
				+                     end { case }
			
 
				+                   else
			
 
				+                     { change "test  $-1,%reg" into "test %reg,%reg" }
			
 
				+                     if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
			
 
				+                       taicpu(p).loadoper(0,taicpu(p).oper[1]^);
			
 
				                  end;
			
 
				             end;
			
 
				           end;
			
--- a/compiler/i386/r386ari.inc
+++ b/compiler/i386/r386ari.inc
@@ -8,22 +8,22 @@
 
				 15,
			
 
				 6,
			
 
				 5,
			
 
				-38,
			
 
				 39,
			
 
				 40,
			
 
				 41,
			
 
				+42,
			
 
				 26,
			
 
				 7,
			
 
				 10,
			
 
				 19,
			
 
				 9,
			
 
				-32,
			
 
				 33,
			
 
				 34,
			
 
				 35,
			
 
				 36,
			
 
				 37,
			
 
				-27,
			
 
				+38,
			
 
				+28,
			
 
				 11,
			
 
				 4,
			
 
				 22,
			
@@ -31,13 +31,13 @@
 
				 8,
			
 
				 20,
			
 
				 12,
			
 
				+32,
			
 
				 25,
			
 
				-28,
			
 
				 18,
			
 
				 24,
			
 
				-47,
			
 
				-30,
			
 
				 31,
			
 
				+29,
			
 
				+30,
			
 
				 57,
			
 
				 58,
			
 
				 59,
			
@@ -48,7 +48,7 @@
 
				 64,
			
 
				 17,
			
 
				 23,
			
 
				-29,
			
 
				+27,
			
 
				 56,
			
 
				 48,
			
 
				 49,
			
@@ -58,11 +58,11 @@
 
				 53,
			
 
				 54,
			
 
				 55,
			
 
				-42,
			
 
				 43,
			
 
				 44,
			
 
				 45,
			
 
				 46,
			
 
				+47,
			
 
				 65,
			
 
				 66,
			
 
				 67,
			
--- a/compiler/i386/r386att.inc
+++ b/compiler/i386/r386att.inc
@@ -24,13 +24,14 @@
 
				 '%ebp',
			
 
				 '%sp',
			
 
				 '%esp',
			
 
				-'%eip',
			
 
				-'%cs',
			
 
				-'%ds',
			
 
				 '%es',
			
 
				+'%cs',
			
 
				 '%ss',
			
 
				+'%ds',
			
 
				 '%fs',
			
 
				 '%gs',
			
 
				+'%flags',
			
 
				+'%eip',
			
 
				 '%dr0',
			
 
				 '%dr1',
			
 
				 '%dr2',
			
@@ -46,7 +47,6 @@
 
				 '%tr5',
			
 
				 '%tr6',
			
 
				 '%tr7',
			
 
				-'%flags',
			
 
				 '%st(0)',
			
 
				 '%st(1)',
			
 
				 '%st(2)',
			
--- a/compiler/i386/r386con.inc
+++ b/compiler/i386/r386con.inc
@@ -24,29 +24,29 @@ NR_BP = tregister($01030006);
 
				 NR_EBP = tregister($01040006);
			
 
				 NR_SP = tregister($01030007);
			
 
				 NR_ESP = tregister($01040007);
			
 
				-NR_EIP = tregister($05040000);
			
 
				+NR_ES = tregister($05000000);
			
 
				 NR_CS = tregister($05000001);
			
 
				-NR_DS = tregister($05000002);
			
 
				-NR_ES = tregister($05000003);
			
 
				-NR_SS = tregister($05000004);
			
 
				-NR_FS = tregister($05000005);
			
 
				-NR_GS = tregister($05000006);
			
 
				-NR_DR0 = tregister($05000007);
			
 
				-NR_DR1 = tregister($05000008);
			
 
				-NR_DR2 = tregister($05000009);
			
 
				-NR_DR3 = tregister($0500000a);
			
 
				-NR_DR6 = tregister($0500000b);
			
 
				-NR_DR7 = tregister($0500000c);
			
 
				-NR_CR0 = tregister($0500000d);
			
 
				-NR_CR2 = tregister($0500000e);
			
 
				-NR_CR3 = tregister($0500000f);
			
 
				-NR_CR4 = tregister($05000010);
			
 
				-NR_TR3 = tregister($05000011);
			
 
				-NR_TR4 = tregister($05000012);
			
 
				-NR_TR5 = tregister($05000013);
			
 
				-NR_TR6 = tregister($05000014);
			
 
				-NR_TR7 = tregister($05000015);
			
 
				-NR_FLAGS = tregister($05000016);
			
 
				+NR_SS = tregister($05000002);
			
 
				+NR_DS = tregister($05000003);
			
 
				+NR_FS = tregister($05000004);
			
 
				+NR_GS = tregister($05000005);
			
 
				+NR_FLAGS = tregister($05000006);
			
 
				+NR_EIP = tregister($05040007);
			
 
				+NR_DR0 = tregister($05000008);
			
 
				+NR_DR1 = tregister($05000009);
			
 
				+NR_DR2 = tregister($0500000a);
			
 
				+NR_DR3 = tregister($0500000b);
			
 
				+NR_DR6 = tregister($0500000d);
			
 
				+NR_DR7 = tregister($0500000e);
			
 
				+NR_CR0 = tregister($05000010);
			
 
				+NR_CR2 = tregister($05000012);
			
 
				+NR_CR3 = tregister($05000013);
			
 
				+NR_CR4 = tregister($05000014);
			
 
				+NR_TR3 = tregister($0500001b);
			
 
				+NR_TR4 = tregister($0500001c);
			
 
				+NR_TR5 = tregister($0500001d);
			
 
				+NR_TR6 = tregister($0500001e);
			
 
				+NR_TR7 = tregister($0500001f);
			
 
				 NR_ST0 = tregister($02000000);
			
 
				 NR_ST1 = tregister($02000001);
			
 
				 NR_ST2 = tregister($02000002);
			
--- a/compiler/i386/r386dwrf.inc
+++ b/compiler/i386/r386dwrf.inc
@@ -24,7 +24,6 @@
 
				 5,
			
 
				 4,
			
 
				 4,
			
 
				-8,
			
 
				 -1,
			
 
				 -1,
			
 
				 -1,
			
@@ -32,6 +31,7 @@
 
				 -1,
			
 
				 -1,
			
 
				 -1,
			
 
				+8,
			
 
				 -1,
			
 
				 -1,
			
 
				 -1,
			
--- a/compiler/i386/r386int.inc
+++ b/compiler/i386/r386int.inc
@@ -24,13 +24,14 @@
 
				 'ebp',
			
 
				 'sp',
			
 
				 'esp',
			
 
				-'eip',
			
 
				-'cs',
			
 
				-'ds',
			
 
				 'es',
			
 
				+'cs',
			
 
				 'ss',
			
 
				+'ds',
			
 
				 'fs',
			
 
				 'gs',
			
 
				+'flags',
			
 
				+'eip',
			
 
				 'dr0',
			
 
				 'dr1',
			
 
				 'dr2',
			
@@ -46,7 +47,6 @@
 
				 'tr5',
			
 
				 'tr6',
			
 
				 'tr7',
			
 
				-'flags',
			
 
				 'st(0)',
			
 
				 'st(1)',
			
 
				 'st(2)',
			
--- a/compiler/i386/r386iri.inc
+++ b/compiler/i386/r386iri.inc
@@ -9,22 +9,22 @@
 
				 15,
			
 
				 6,
			
 
				 5,
			
 
				-38,
			
 
				 39,
			
 
				 40,
			
 
				 41,
			
 
				+42,
			
 
				 26,
			
 
				 7,
			
 
				 10,
			
 
				 19,
			
 
				 9,
			
 
				-32,
			
 
				 33,
			
 
				 34,
			
 
				 35,
			
 
				 36,
			
 
				 37,
			
 
				-27,
			
 
				+38,
			
 
				+28,
			
 
				 11,
			
 
				 4,
			
 
				 22,
			
@@ -32,13 +32,13 @@
 
				 8,
			
 
				 20,
			
 
				 12,
			
 
				+32,
			
 
				 25,
			
 
				-28,
			
 
				 18,
			
 
				 24,
			
 
				-47,
			
 
				-30,
			
 
				 31,
			
 
				+29,
			
 
				+30,
			
 
				 57,
			
 
				 58,
			
 
				 59,
			
@@ -49,7 +49,7 @@
 
				 64,
			
 
				 17,
			
 
				 23,
			
 
				-29,
			
 
				+27,
			
 
				 56,
			
 
				 48,
			
 
				 49,
			
@@ -59,11 +59,11 @@
 
				 53,
			
 
				 54,
			
 
				 55,
			
 
				-42,
			
 
				 43,
			
 
				 44,
			
 
				 45,
			
 
				 46,
			
 
				+47,
			
 
				 65,
			
 
				 66,
			
 
				 67,
			
--- a/compiler/i386/r386nasm.inc
+++ b/compiler/i386/r386nasm.inc
@@ -24,13 +24,14 @@
 
				 'ebp',
			
 
				 'sp',
			
 
				 'esp',
			
 
				-'eip',
			
 
				-'cs',
			
 
				-'ds',
			
 
				 'es',
			
 
				+'cs',
			
 
				 'ss',
			
 
				+'ds',
			
 
				 'fs',
			
 
				 'gs',
			
 
				+'flags',
			
 
				+'eip',
			
 
				 'dr0',
			
 
				 'dr1',
			
 
				 'dr2',
			
@@ -46,7 +47,6 @@
 
				 'tr5',
			
 
				 'tr6',
			
 
				 'tr7',
			
 
				-'flags',
			
 
				 'st0',
			
 
				 'st1',
			
 
				 'st2',
			
--- a/compiler/i386/r386nri.inc
+++ b/compiler/i386/r386nri.inc
@@ -9,22 +9,22 @@
 
				 15,
			
 
				 6,
			
 
				 5,
			
 
				-38,
			
 
				 39,
			
 
				 40,
			
 
				 41,
			
 
				+42,
			
 
				 26,
			
 
				 7,
			
 
				 10,
			
 
				 19,
			
 
				 9,
			
 
				-32,
			
 
				 33,
			
 
				 34,
			
 
				 35,
			
 
				 36,
			
 
				 37,
			
 
				-27,
			
 
				+38,
			
 
				+28,
			
 
				 11,
			
 
				 4,
			
 
				 22,
			
@@ -32,13 +32,13 @@
 
				 8,
			
 
				 20,
			
 
				 12,
			
 
				+32,
			
 
				 25,
			
 
				-28,
			
 
				 18,
			
 
				 24,
			
 
				-47,
			
 
				-30,
			
 
				 31,
			
 
				+29,
			
 
				+30,
			
 
				 57,
			
 
				 58,
			
 
				 59,
			
@@ -49,7 +49,7 @@
 
				 64,
			
 
				 17,
			
 
				 23,
			
 
				-29,
			
 
				+27,
			
 
				 56,
			
 
				 48,
			
 
				 49,
			
@@ -59,11 +59,11 @@
 
				 53,
			
 
				 54,
			
 
				 55,
			
 
				-42,
			
 
				 43,
			
 
				 44,
			
 
				 45,
			
 
				 46,
			
 
				+47,
			
 
				 65,
			
 
				 66,
			
 
				 67,
			
--- a/compiler/i386/r386num.inc
+++ b/compiler/i386/r386num.inc
@@ -24,29 +24,29 @@ tregister($01030006),
 
				 tregister($01040006),
			
 
				 tregister($01030007),
			
 
				 tregister($01040007),
			
 
				-tregister($05040000),
			
 
				+tregister($05000000),
			
 
				 tregister($05000001),
			
 
				 tregister($05000002),
			
 
				 tregister($05000003),
			
 
				 tregister($05000004),
			
 
				 tregister($05000005),
			
 
				 tregister($05000006),
			
 
				-tregister($05000007),
			
 
				+tregister($05040007),
			
 
				 tregister($05000008),
			
 
				 tregister($05000009),
			
 
				 tregister($0500000a),
			
 
				 tregister($0500000b),
			
 
				-tregister($0500000c),
			
 
				 tregister($0500000d),
			
 
				 tregister($0500000e),
			
 
				-tregister($0500000f),
			
 
				 tregister($05000010),
			
 
				-tregister($05000011),
			
 
				 tregister($05000012),
			
 
				 tregister($05000013),
			
 
				 tregister($05000014),
			
 
				-tregister($05000015),
			
 
				-tregister($05000016),
			
 
				+tregister($0500001b),
			
 
				+tregister($0500001c),
			
 
				+tregister($0500001d),
			
 
				+tregister($0500001e),
			
 
				+tregister($0500001f),
			
 
				 tregister($02000000),
			
 
				 tregister($02000001),
			
 
				 tregister($02000002),
			
--- a/compiler/i386/r386op.inc
+++ b/compiler/i386/r386op.inc
@@ -1,82 +0,0 @@
 
				-{ don't edit, this file is generated from x86reg.dat }
			
 
				-0,
			
 
				-0,
			
 
				-4,
			
 
				-0,
			
 
				-0,
			
 
				-1,
			
 
				-5,
			
 
				-1,
			
 
				-1,
			
 
				-2,
			
 
				-6,
			
 
				-2,
			
 
				-2,
			
 
				-3,
			
 
				-7,
			
 
				-3,
			
 
				-3,
			
 
				-6,
			
 
				-6,
			
 
				-7,
			
 
				-7,
			
 
				-5,
			
 
				-5,
			
 
				-4,
			
 
				-4,
			
 
				-0,
			
 
				-1,
			
 
				-3,
			
 
				-0,
			
 
				-2,
			
 
				-4,
			
 
				-5,
			
 
				-0,
			
 
				-1,
			
 
				-2,
			
 
				-3,
			
 
				-6,
			
 
				-7,
			
 
				-0,
			
 
				-2,
			
 
				-3,
			
 
				-4,
			
 
				-3,
			
 
				-4,
			
 
				-5,
			
 
				-6,
			
 
				-7,
			
 
				-0,
			
 
				-0,
			
 
				-1,
			
 
				-2,
			
 
				-3,
			
 
				-4,
			
 
				-5,
			
 
				-6,
			
 
				-7,
			
 
				-0,
			
 
				-0,
			
 
				-1,
			
 
				-2,
			
 
				-3,
			
 
				-4,
			
 
				-5,
			
 
				-6,
			
 
				-7,
			
 
				-0,
			
 
				-1,
			
 
				-2,
			
 
				-3,
			
 
				-4,
			
 
				-5,
			
 
				-6,
			
 
				-7,
			
 
				-0,
			
 
				-1,
			
 
				-2,
			
 
				-3,
			
 
				-4,
			
 
				-5,
			
 
				-6,
			
 
				-7
			
--- a/compiler/i386/r386ot.inc
+++ b/compiler/i386/r386ot.inc
@@ -24,13 +24,14 @@ OT_REG16,
 
				 OT_REG32,
			
 
				 OT_REG16,
			
 
				 OT_REG32,
			
 
				-OT_NONE,
			
 
				-OT_REG_CS,
			
 
				 OT_REG_DESS,
			
 
				+OT_REG_CS,
			
 
				 OT_REG_DESS,
			
 
				 OT_REG_DESS,
			
 
				 OT_REG_FSGS,
			
 
				 OT_REG_FSGS,
			
 
				+OT_NONE,
			
 
				+OT_NONE,
			
 
				 OT_REG_DREG,
			
 
				 OT_REG_DREG,
			
 
				 OT_REG_DREG,
			
@@ -46,7 +47,6 @@ OT_REG_TREG,
 
				 OT_REG_TREG,
			
 
				 OT_REG_TREG,
			
 
				 OT_REG_TREG,
			
 
				-OT_NONE,
			
 
				 OT_FPU0,
			
 
				 OT_FPUREG,
			
 
				 OT_FPUREG,
			
--- a/compiler/i386/r386rni.inc
+++ b/compiler/i386/r386rni.inc
@@ -57,13 +57,13 @@
 
				 78,
			
 
				 79,
			
 
				 80,
			
 
				+25,
			
 
				 26,
			
 
				 27,
			
 
				 28,
			
 
				 29,
			
 
				 30,
			
 
				 31,
			
 
				-32,
			
 
				 33,
			
 
				 34,
			
 
				 35,
			
@@ -79,4 +79,4 @@
 
				 45,
			
 
				 46,
			
 
				 47,
			
 
				-25
			
 
				+32
			
--- a/compiler/i386/r386sri.inc
+++ b/compiler/i386/r386sri.inc
@@ -9,22 +9,22 @@
 
				 15,
			
 
				 6,
			
 
				 5,
			
 
				-38,
			
 
				 39,
			
 
				 40,
			
 
				 41,
			
 
				+42,
			
 
				 26,
			
 
				 7,
			
 
				 10,
			
 
				 19,
			
 
				 9,
			
 
				-32,
			
 
				 33,
			
 
				 34,
			
 
				 35,
			
 
				 36,
			
 
				 37,
			
 
				-27,
			
 
				+38,
			
 
				+28,
			
 
				 11,
			
 
				 4,
			
 
				 22,
			
@@ -32,13 +32,13 @@
 
				 8,
			
 
				 20,
			
 
				 12,
			
 
				+32,
			
 
				 25,
			
 
				-28,
			
 
				 18,
			
 
				 24,
			
 
				-47,
			
 
				-30,
			
 
				 31,
			
 
				+29,
			
 
				+30,
			
 
				 57,
			
 
				 58,
			
 
				 59,
			
@@ -49,7 +49,7 @@
 
				 64,
			
 
				 17,
			
 
				 23,
			
 
				-29,
			
 
				+27,
			
 
				 56,
			
 
				 48,
			
 
				 49,
			
@@ -59,11 +59,11 @@
 
				 53,
			
 
				 54,
			
 
				 55,
			
 
				-42,
			
 
				 43,
			
 
				 44,
			
 
				 45,
			
 
				 46,
			
 
				+47,
			
 
				 65,
			
 
				 66,
			
 
				 67,
			
--- a/compiler/i386/r386std.inc
+++ b/compiler/i386/r386std.inc
@@ -24,13 +24,14 @@
 
				 'ebp',
			
 
				 'sp',
			
 
				 'esp',
			
 
				-'eip',
			
 
				-'cs',
			
 
				-'ds',
			
 
				 'es',
			
 
				+'cs',
			
 
				 'ss',
			
 
				+'ds',
			
 
				 'fs',
			
 
				 'gs',
			
 
				+'flags',
			
 
				+'eip',
			
 
				 'dr0',
			
 
				 'dr1',
			
 
				 'dr2',
			
@@ -46,7 +47,6 @@
 
				 'tr5',
			
 
				 'tr6',
			
 
				 'tr7',
			
 
				-'flags',
			
 
				 'st(0)',
			
 
				 'st(1)',
			
 
				 'st(2)',
			
--- a/compiler/i386/rropt386.pas
+++ b/compiler/i386/rropt386.pas
@@ -206,6 +206,7 @@ begin
 
				   sequenceEnd := false;
			
 
				   reg1Modified := false;
			
 
				   reg2Modified := false;
			
 
				+  switchLast := false;
			
 
				   endP := start;
			
 
				   while tmpResult and not sequenceEnd do
			
 
				     begin
			
--- a/compiler/i386/symcpu.pas
+++ b/compiler/i386/symcpu.pas
@@ -0,0 +1,211 @@
 
				+{
			
 
				+    Copyright (c) 2014 by Florian Klaempfl
			
 
				+
			
 
				+    Symbol table overrides for i386
			
 
				+
			
 
				+    This program is free software; you can redistribute it and/or modify
			
 
				+    it under the terms of the GNU General Public License as published by
			
 
				+    the Free Software Foundation; either version 2 of the License, or
			
 
				+    (at your option) any later version.
			
 
				+
			
 
				+    This program is distributed in the hope that it will be useful,
			
 
				+    but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+    GNU General Public License for more details.
			
 
				+
			
 
				+    You should have received a copy of the GNU General Public License
			
 
				+    along with this program; if not, write to the Free Software
			
 
				+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+
			
 
				+ ****************************************************************************
			
 
				+}
			
 
				+unit symcpu;
			
 
				+
			
 
				+{$i fpcdefs.inc}
			
 
				+
			
 
				+interface
			
 
				+
			
 
				+uses
			
 
				+  symtype,symdef,symsym,symx86,symi86;
			
 
				+
			
 
				+type
			
 
				+  { defs }
			
 
				+  tcpufiledef = class(tfiledef)
			
 
				+  end;
			
 
				+  tcpufiledefclass = class of tcpufiledef;
			
 
				+
			
 
				+  tcpuvariantdef = class(tvariantdef)
			
 
				+  end;
			
 
				+  tcpuvariantdefclass = class of tcpuvariantdef;
			
 
				+
			
 
				+  tcpuformaldef = class(tformaldef)
			
 
				+  end;
			
 
				+  tcpuformaldefclass = class of tcpuformaldef;
			
 
				+
			
 
				+  tcpuforwarddef = class(tforwarddef)
			
 
				+  end;
			
 
				+  tcpuforwarddefclass = class of tcpuforwarddef;
			
 
				+
			
 
				+  tcpuundefineddef = class(tundefineddef)
			
 
				+  end;
			
 
				+  tcpuundefineddefclass = class of tcpuundefineddef;
			
 
				+
			
 
				+  tcpuerrordef = class(terrordef)
			
 
				+  end;
			
 
				+  tcpuerrordefclass = class of tcpuerrordef;
			
 
				+
			
 
				+  tcpupointerdef = class(tx86pointerdef)
			
 
				+  end;
			
 
				+  tcpupointerdefclass = class of tcpupointerdef;
			
 
				+
			
 
				+  tcpurecorddef = class(trecorddef)
			
 
				+  end;
			
 
				+  tcpurecorddefclass = class of tcpurecorddef;
			
 
				+
			
 
				+  tcpuimplementedinterface = class(timplementedinterface)
			
 
				+  end;
			
 
				+  tcpuimplementedinterfaceclass = class of tcpuimplementedinterface;
			
 
				+
			
 
				+  tcpuobjectdef = class(tobjectdef)
			
 
				+  end;
			
 
				+  tcpuobjectdefclass = class of tcpuobjectdef;
			
 
				+
			
 
				+  tcpuclassrefdef = class(tclassrefdef)
			
 
				+  end;
			
 
				+  tcpuclassrefdefclass = class of tcpuclassrefdef;
			
 
				+
			
 
				+  tcpuarraydef = class(tarraydef)
			
 
				+  end;
			
 
				+  tcpuarraydefclass = class of tcpuarraydef;
			
 
				+
			
 
				+  tcpuorddef = class(torddef)
			
 
				+  end;
			
 
				+  tcpuorddefclass = class of tcpuorddef;
			
 
				+
			
 
				+  tcpufloatdef = class(tfloatdef)
			
 
				+  end;
			
 
				+  tcpufloatdefclass = class of tcpufloatdef;
			
 
				+
			
 
				+  tcpuprocvardef = class(ti86procvardef)
			
 
				+  end;
			
 
				+  tcpuprocvardefclass = class of tcpuprocvardef;
			
 
				+
			
 
				+  tcpuprocdef = class(ti86procdef)
			
 
				+  end;
			
 
				+  tcpuprocdefclass = class of tcpuprocdef;
			
 
				+
			
 
				+  tcpustringdef = class(tstringdef)
			
 
				+  end;
			
 
				+  tcpustringdefclass = class of tcpustringdef;
			
 
				+
			
 
				+  tcpuenumdef = class(tenumdef)
			
 
				+  end;
			
 
				+  tcpuenumdefclass = class of tcpuenumdef;
			
 
				+
			
 
				+  tcpusetdef = class(tsetdef)
			
 
				+  end;
			
 
				+  tcpusetdefclass = class of tcpusetdef;
			
 
				+
			
 
				+  { syms }
			
 
				+  tcpulabelsym = class(tlabelsym)
			
 
				+  end;
			
 
				+  tcpulabelsymclass = class of tcpulabelsym;
			
 
				+
			
 
				+  tcpuunitsym = class(tunitsym)
			
 
				+  end;
			
 
				+  tcpuunitsymclass = class of tcpuunitsym;
			
 
				+
			
 
				+  tcpunamespacesym = class(tnamespacesym)
			
 
				+  end;
			
 
				+  tcpunamespacesymclass = class of tcpunamespacesym;
			
 
				+
			
 
				+  tcpuprocsym = class(tprocsym)
			
 
				+  end;
			
 
				+  tcpuprocsymclass = class of tcpuprocsym;
			
 
				+
			
 
				+  tcputypesym = class(ttypesym)
			
 
				+  end;
			
 
				+  tcpuypesymclass = class of tcputypesym;
			
 
				+
			
 
				+  tcpufieldvarsym = class(tfieldvarsym)
			
 
				+  end;
			
 
				+  tcpufieldvarsymclass = class of tcpufieldvarsym;
			
 
				+
			
 
				+  tcpulocalvarsym = class(tlocalvarsym)
			
 
				+  end;
			
 
				+  tcpulocalvarsymclass = class of tcpulocalvarsym;
			
 
				+
			
 
				+  tcpuparavarsym = class(tparavarsym)
			
 
				+  end;
			
 
				+  tcpuparavarsymclass = class of tcpuparavarsym;
			
 
				+
			
 
				+  tcpustaticvarsym = class(tstaticvarsym)
			
 
				+  end;
			
 
				+  tcpustaticvarsymclass = class of tcpustaticvarsym;
			
 
				+
			
 
				+  tcpuabsolutevarsym = class(ti86absolutevarsym)
			
 
				+  end;
			
 
				+  tcpuabsolutevarsymclass = class of tcpuabsolutevarsym;
			
 
				+
			
 
				+  tcpupropertysym = class(tpropertysym)
			
 
				+  end;
			
 
				+  tcpupropertysymclass = class of tcpupropertysym;
			
 
				+
			
 
				+  tcpuconstsym = class(tconstsym)
			
 
				+  end;
			
 
				+  tcpuconstsymclass = class of tcpuconstsym;
			
 
				+
			
 
				+  tcpuenumsym = class(tenumsym)
			
 
				+  end;
			
 
				+  tcpuenumsymclass = class of tcpuenumsym;
			
 
				+
			
 
				+  tcpusyssym = class(tsyssym)
			
 
				+  end;
			
 
				+  tcpusyssymclass = class of tcpusyssym;
			
 
				+
			
 
				+
			
 
				+const
			
 
				+  pbestrealtype : ^tdef = @s80floattype;
			
 
				+
			
 
				+
			
 
				+implementation
			
 
				+
			
 
				+begin
			
 
				+  { used tdef classes }
			
 
				+  cfiledef:=tcpufiledef;
			
 
				+  cvariantdef:=tcpuvariantdef;
			
 
				+  cformaldef:=tcpuformaldef;
			
 
				+  cforwarddef:=tcpuforwarddef;
			
 
				+  cundefineddef:=tcpuundefineddef;
			
 
				+  cerrordef:=tcpuerrordef;
			
 
				+  cpointerdef:=tcpupointerdef;
			
 
				+  crecorddef:=tcpurecorddef;
			
 
				+  cimplementedinterface:=tcpuimplementedinterface;
			
 
				+  cobjectdef:=tcpuobjectdef;
			
 
				+  cclassrefdef:=tcpuclassrefdef;
			
 
				+  carraydef:=tcpuarraydef;
			
 
				+  corddef:=tcpuorddef;
			
 
				+  cfloatdef:=tcpufloatdef;
			
 
				+  cprocvardef:=tcpuprocvardef;
			
 
				+  cprocdef:=tcpuprocdef;
			
 
				+  cstringdef:=tcpustringdef;
			
 
				+  cenumdef:=tcpuenumdef;
			
 
				+  csetdef:=tcpusetdef;
			
 
				+
			
 
				+  { used tsym classes }
			
 
				+  clabelsym:=tcpulabelsym;
			
 
				+  cunitsym:=tcpuunitsym;
			
 
				+  cnamespacesym:=tcpunamespacesym;
			
 
				+  cprocsym:=tcpuprocsym;
			
 
				+  ctypesym:=tcputypesym;
			
 
				+  cfieldvarsym:=tcpufieldvarsym;
			
 
				+  clocalvarsym:=tcpulocalvarsym;
			
 
				+  cparavarsym:=tcpuparavarsym;
			
 
				+  cstaticvarsym:=tcpustaticvarsym;
			
 
				+  cabsolutevarsym:=tcpuabsolutevarsym;
			
 
				+  cpropertysym:=tcpupropertysym;
			
 
				+  cconstsym:=tcpuconstsym;
			
 
				+  cenumsym:=tcpuenumsym;
			
 
				+  csyssym:=tcpusyssym;
			
 
				+end.
			
 
				+