Browse Source

+ implemented OP_SHR,OP_SHL and OP_SAR in a_op64_const_reg for i8086. The shlshr
node now uses them for 64-bit shift by constant.

git-svn-id: trunk@36017 -

nickysn 8 years ago
parent
commit
a82c89d894
2 changed files with 209 additions and 80 deletions
  1. 198 0
      compiler/i8086/cgcpu.pas
  2. 11 80
      compiler/i8086/n8086mat.pas

+ 198 - 0
compiler/i8086/cgcpu.pas

@@ -2913,6 +2913,8 @@ unit cgcpu;
     procedure tcg64f8086.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
       var
         op1,op2 : TAsmOp;
+        loop_start: TAsmLabel;
+        ai: taicpu;
       begin
         case op of
           OP_AND,OP_OR,OP_XOR:
@@ -2954,6 +2956,202 @@ unit cgcpu;
                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
                 end;
             end;
+          OP_SHR,OP_SHL,OP_SAR:
+            begin
+              value:=value and 63;
+              case value of
+                0:
+                  { ultra hyper fast shift by 0 };
+                1:
+                  case op of
+                    OP_SHL:
+                      begin
+                        list.concat(taicpu.op_const_reg(A_SHL,S_W,1,reg.reglo));
+                        list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg.reglo)));
+                        list.concat(taicpu.op_const_reg(A_RCL,S_W,1,reg.reghi));
+                        list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg.reghi)));
+                      end;
+                    OP_SHR,OP_SAR:
+                      begin
+                        cg.a_op_const_reg(list,op,OS_16,1,GetNextReg(reg.reghi));
+                        list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reghi));
+                        list.concat(taicpu.op_const_reg(A_RCR,S_W,1,GetNextReg(reg.reglo)));
+                        list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reglo));
+                      end;
+                  end;
+                2..15:
+                  begin
+                    cg.getcpuregister(list,NR_CX);
+                    cg.a_load_const_reg(list,OS_16,value,NR_CX);
+                    current_asmdata.getjumplabel(loop_start);
+                    cg.a_label(list,loop_start);
+                    case op of
+                      OP_SHL:
+                        begin
+                          list.concat(taicpu.op_const_reg(A_SHL,S_W,1,reg.reglo));
+                          list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg.reglo)));
+                          list.concat(taicpu.op_const_reg(A_RCL,S_W,1,reg.reghi));
+                          list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg.reghi)));
+                        end;
+                      OP_SHR,OP_SAR:
+                        begin
+                          cg.a_op_const_reg(list,op,OS_16,1,GetNextReg(reg.reghi));
+                          list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reghi));
+                          list.concat(taicpu.op_const_reg(A_RCR,S_W,1,GetNextReg(reg.reglo)));
+                          list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reglo));
+                        end;
+                    end;
+                    ai:=Taicpu.Op_Sym(A_LOOP,S_W,loop_start);
+                    ai.is_jmp := True;
+                    list.Concat(ai);
+                    cg.ungetcpuregister(list,NR_CX);
+                  end;
+                16,17:
+                  begin
+                    case op of
+                      OP_SHL:
+                        begin
+                          cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,GetNextReg(reg.reghi));
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reglo),reg.reghi);
+                          cg.a_load_reg_reg(list,OS_16,OS_16,reg.reglo,GetNextReg(reg.reglo));
+                          cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reglo,reg.reglo);
+                        end;
+                      OP_SHR:
+                        begin
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reglo),reg.reglo);
+                          cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,GetNextReg(reg.reglo));
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reghi),reg.reghi);
+                          cg.a_op_reg_reg(list,OP_XOR,OS_16,GetNextReg(reg.reghi),GetNextReg(reg.reghi));
+                        end;
+                      OP_SAR:
+                        begin
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reglo),reg.reglo);
+                          cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,GetNextReg(reg.reglo));
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reghi),reg.reghi);
+                          cg.a_op_const_reg(list,OP_SAR,OS_16,15,GetNextReg(reg.reghi));
+                        end;
+                    end;
+                    if value=17 then
+                      case op of
+                        OP_SHL:
+                          begin
+                            list.concat(taicpu.op_const_reg(A_SHL,S_W,1,GetNextReg(reg.reglo)));
+                            list.concat(taicpu.op_const_reg(A_RCL,S_W,1,reg.reghi));
+                            list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg.reghi)));
+                          end;
+                        OP_SHR,OP_SAR:
+                          begin
+                            cg.a_op_const_reg(list,op,OS_16,1,reg.reghi);
+                            list.concat(taicpu.op_const_reg(A_RCR,S_W,1,GetNextReg(reg.reglo)));
+                            list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reglo));
+                          end;
+                      end;
+                  end;
+                18..31:
+                  begin
+                    case op of
+                      OP_SHL:
+                        begin
+                          cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,GetNextReg(reg.reghi));
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reglo),reg.reghi);
+                          cg.a_load_reg_reg(list,OS_16,OS_16,reg.reglo,GetNextReg(reg.reglo));
+                          cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reglo,reg.reglo);
+                        end;
+                      OP_SHR:
+                        begin
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reglo),reg.reglo);
+                          cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,GetNextReg(reg.reglo));
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reghi),reg.reghi);
+                          cg.a_op_reg_reg(list,OP_XOR,OS_16,GetNextReg(reg.reghi),GetNextReg(reg.reghi));
+                        end;
+                      OP_SAR:
+                        begin
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reglo),reg.reglo);
+                          cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,GetNextReg(reg.reglo));
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reghi),reg.reghi);
+                          cg.a_op_const_reg(list,OP_SAR,OS_16,15,GetNextReg(reg.reghi));
+                        end;
+                    end;
+                    cg.getcpuregister(list,NR_CX);
+                    cg.a_load_const_reg(list,OS_16,value-16,NR_CX);
+                    current_asmdata.getjumplabel(loop_start);
+                    cg.a_label(list,loop_start);
+                    case op of
+                      OP_SHL:
+                        begin
+                          list.concat(taicpu.op_const_reg(A_SHL,S_W,1,GetNextReg(reg.reglo)));
+                          list.concat(taicpu.op_const_reg(A_RCL,S_W,1,reg.reghi));
+                          list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg.reghi)));
+                        end;
+                      OP_SHR,OP_SAR:
+                        begin
+                          cg.a_op_const_reg(list,op,OS_16,1,reg.reghi);
+                          list.concat(taicpu.op_const_reg(A_RCR,S_W,1,GetNextReg(reg.reglo)));
+                          list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg.reglo));
+                        end;
+                    end;
+                    ai:=Taicpu.Op_Sym(A_LOOP,S_W,loop_start);
+                    ai.is_jmp := True;
+                    list.Concat(ai);
+                    cg.ungetcpuregister(list,NR_CX);
+                  end;
+                32..47:
+                  case op of
+                    OP_SHL:
+                      begin
+                        cg.a_op_const_reg_reg(list,OP_SHL,OS_32,value-32,reg.reglo,reg.reghi);
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reglo,reg.reglo);
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,GetNextReg(reg.reglo),GetNextReg(reg.reglo));
+                      end;
+                    OP_SHR:
+                      begin
+                        cg.a_op_const_reg_reg(list,OP_SHR,OS_32,value-32,reg.reghi,reg.reglo);
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reghi,reg.reghi);
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,GetNextReg(reg.reghi),GetNextReg(reg.reghi));
+                      end;
+                    OP_SAR:
+                      begin
+                        cg.a_op_const_reg_reg(list,OP_SAR,OS_32,value-32,reg.reghi,reg.reglo);
+                        cg.a_op_const_reg_reg(list,OP_SAR,OS_16,15-(value-32),GetNextReg(reg.reglo),reg.reghi);
+                        cg.a_load_reg_reg(list,OS_16,OS_16,reg.reghi,GetNextReg(reg.reghi));
+                      end;
+                  end;
+                48..63:
+                  case op of
+                    OP_SHL:
+                      begin
+                        cg.a_load_reg_reg(list,OS_16,OS_16,reg.reglo,GetNextReg(reg.reghi));
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reglo,reg.reglo);
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,GetNextReg(reg.reglo),GetNextReg(reg.reglo));
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reghi,reg.reghi);
+                        cg.a_op_const_reg(list,OP_SHL,OS_16,value-48,GetNextReg(reg.reghi));
+                      end;
+                    OP_SHR:
+                      begin
+                        cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reghi),reg.reglo);
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,GetNextReg(reg.reghi),GetNextReg(reg.reghi));
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,reg.reghi,reg.reghi);
+                        cg.a_op_reg_reg(list,OP_XOR,OS_16,GetNextReg(reg.reglo),GetNextReg(reg.reglo));
+                        cg.a_op_const_reg(list,OP_SHR,OS_16,value-48,reg.reglo);
+                      end;
+                    OP_SAR:
+                      if value=63 then
+                        begin
+                          cg.a_op_const_reg(list,OP_SAR,OS_16,15,GetNextReg(reg.reghi));
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reghi),reg.reghi);
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reghi),GetNextReg(reg.reglo));
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reghi),reg.reglo);
+                        end
+                      else
+                        begin
+                          cg.a_op_const_reg_reg(list,OP_SAR,OS_16,value-48,GetNextReg(reg.reghi),reg.reglo);
+                          cg.a_op_const_reg_reg(list,OP_SAR,OS_16,15-(value-48),reg.reglo,GetNextReg(reg.reglo));
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reglo),reg.reghi);
+                          cg.a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg.reglo),GetNextReg(reg.reghi));
+                        end;
+                  end;
+              end;
+            end;
           else
             internalerror(200204021);
         end;

+ 11 - 80
compiler/i8086/n8086mat.pas

@@ -400,100 +400,31 @@ implementation
         location.register64.reglo:=hreg64lo;
         location.register64.reghi:=hreg64hi;
 
-        v:=0;
         if right.nodetype=ordconstn then
-          v:=Tordconstnode(right).value and 63;
-
-        { shifting by 0 directly coded: }
-        if (right.nodetype=ordconstn) and (v=0) then
-          begin
-            { ultra hyper fast shift by 0 }
-          end
-        { shifting by 1 directly coded: }
-        else if (right.nodetype=ordconstn) and (v=1) then
-          begin
-            if nodetype=shln then
-              begin
-                emit_const_reg(A_SHL,S_W,1,hreg64lo);
-                emit_const_reg(A_RCL,S_W,1,GetNextReg(hreg64lo));
-                emit_const_reg(A_RCL,S_W,1,hreg64hi);
-                emit_const_reg(A_RCL,S_W,1,GetNextReg(hreg64hi));
-              end
-            else
-              begin
-                emit_const_reg(A_SHR,S_W,1,GetNextReg(hreg64hi));
-                emit_const_reg(A_RCR,S_W,1,hreg64hi);
-                emit_const_reg(A_RCR,S_W,1,GetNextReg(hreg64lo));
-                emit_const_reg(A_RCR,S_W,1,hreg64lo);
-              end;
-          end
-        { shifting by >=48 }
-        else if (right.nodetype=ordconstn) and (v>=48) then
           begin
+            v:=Tordconstnode(right).value and 63;
+            location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+            location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
             if nodetype=shln then
-              begin
-                cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_16,OS_16,hreg64lo,GetNextReg(hreg64hi));
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,hreg64lo,hreg64lo);
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,GetNextReg(hreg64lo),GetNextReg(hreg64lo));
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,hreg64hi,hreg64hi);
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHL,OS_16,v-48,GetNextReg(hreg64hi));
-              end
+              cg64.a_op64_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_64,v,left.location.register64,location.register64)
             else
-              begin
-                cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_16,OS_16,GetNextReg(hreg64hi),hreg64lo);
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,GetNextReg(hreg64hi),GetNextReg(hreg64hi));
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,hreg64hi,hreg64hi);
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,GetNextReg(hreg64lo),GetNextReg(hreg64lo));
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_16,v-48,hreg64lo);
-              end;
-          end
-        { shifting by 32..47 }
-        else if (right.nodetype=ordconstn) and (v>=32) and (v<=47) then
-          begin
-            if nodetype=shln then
-              begin
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,hreg64hi,hreg64hi);
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,GetNextReg(hreg64hi),GetNextReg(hreg64hi));
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHL,OS_32,v-32,hreg64lo);
-              end
-            else
-              begin
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,hreg64lo,hreg64lo);
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,GetNextReg(hreg64lo),GetNextReg(hreg64lo));
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_32,v-32,hreg64hi);
-              end;
-            location.register64.reghi:=hreg64lo;
-            location.register64.reglo:=hreg64hi;
+              cg64.a_op64_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_64,v,left.location.register64,location.register64);
           end
         else
           begin
             { load right operators in a register }
             cg.getcpuregister(current_asmdata.CurrAsmList,NR_CX);
 
-            { shifting by a constant? }
-            if right.nodetype=ordconstn then
-              begin
-                v:=Tordconstnode(right).value and 63;
-                hlcg.a_load_const_reg(current_asmdata.CurrAsmList,u16inttype,v,NR_CX);
-              end
-            else
-              begin
-                hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u16inttype,right.location,NR_CX);
+            hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u16inttype,right.location,NR_CX);
 
-                { left operator is already in a register }
-                { hence are both in a register }
-                { is it in the case CX ? }
-              end;
+            { left operator is already in a register }
+            { hence are both in a register }
+            { is it in the case CX ? }
 
             current_asmdata.getjumplabel(l2);
             current_asmdata.getjumplabel(l3);
-            { for consts, we don't need the extra checks for 0 or >= 64, since
-              we've already handled them earlier as a special case }
-            if right.nodetype<>ordconstn then
-              begin
-                emit_const_reg(A_AND,S_W,63,NR_CX);
-                cg.a_jmp_flags(current_asmdata.CurrAsmList,F_E,l3);
-              end;
+            emit_const_reg(A_AND,S_W,63,NR_CX);
+            cg.a_jmp_flags(current_asmdata.CurrAsmList,F_E,l3);
             cg.a_label(current_asmdata.CurrAsmList,l2);
             if nodetype=shln then
               begin