Browse Source

+ ror/rol functions
+ internal compiler support for ror/rol on i386

git-svn-id: trunk@11466 -

florian 17 years ago
parent
commit
1afb1aa9cc

+ 1 - 0
.gitattributes

@@ -7697,6 +7697,7 @@ tests/test/trecreg2.pp svneol=native#text/plain
 tests/test/trecreg3.pp -text
 tests/test/trecreg3.pp -text
 tests/test/trecreg4.pp svneol=native#text/plain
 tests/test/trecreg4.pp svneol=native#text/plain
 tests/test/tresstr.pp svneol=native#text/plain
 tests/test/tresstr.pp svneol=native#text/plain
+tests/test/trox1.pp svneol=native#text/plain
 tests/test/trstr1.pp svneol=native#text/plain
 tests/test/trstr1.pp svneol=native#text/plain
 tests/test/trstr2.pp svneol=native#text/plain
 tests/test/trstr2.pp svneol=native#text/plain
 tests/test/trstr3.pp svneol=native#text/plain
 tests/test/trstr3.pp svneol=native#text/plain

+ 1 - 1
compiler/arm/cgcpu.pas

@@ -347,7 +347,7 @@ unit cgcpu;
     const
     const
       op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
       op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
         (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
         (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
-         A_NONE,A_NONE,A_NONE,A_SUB,A_EOR);
+         A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
 
 
 
 
     procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
     procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;

+ 4 - 2
compiler/cgbase.pas

@@ -102,7 +102,9 @@ interface
           OP_SHL,       { logical shift left       }
           OP_SHL,       { logical shift left       }
           OP_SHR,       { logical shift right      }
           OP_SHR,       { logical shift right      }
           OP_SUB,       { simple subtraction       }
           OP_SUB,       { simple subtraction       }
-          OP_XOR        { simple exclusive or      }
+          OP_XOR,       { simple exclusive or      }
+          OP_ROL,       { rotate left              }
+          OP_ROR        { rotate right             }
         );
         );
 
 
        {# Generic flag values - used for jump locations }
        {# Generic flag values - used for jump locations }
@@ -630,7 +632,7 @@ implementation
       const
       const
         list: array[topcg] of boolean =
         list: array[topcg] of boolean =
           (true,false,true,true,false,false,true,true,false,false,
           (true,false,true,true,false,false,true,true,false,false,
-           true,false,false,false,false,true);
+           true,false,false,false,false,true,false,false);
       begin
       begin
         commutativeop := list[op];
         commutativeop := list[op];
       end;
       end;

+ 4 - 4
compiler/cgobj.pas

@@ -1579,7 +1579,7 @@ implementation
                   SL_SETZERO,
                   SL_SETZERO,
                   SL_SETMAX:
                   SL_SETMAX:
                     a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,tosreg,slopt);
                     a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,tosreg,slopt);
-                  else                 
+                  else
                     a_load_subsetreg_subsetreg(list,subsetsize,subsetsize,fromsreg,tosreg);
                     a_load_subsetreg_subsetreg(list,subsetsize,subsetsize,fromsreg,tosreg);
                 end;
                 end;
                 valuereg := makeregsize(list,valuereg,loadsize);
                 valuereg := makeregsize(list,valuereg,loadsize);
@@ -1611,7 +1611,7 @@ implementation
                   SL_SETZERO,
                   SL_SETZERO,
                   SL_SETMAX:
                   SL_SETMAX:
                     a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,tosreg,slopt);
                     a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,tosreg,slopt);
-                  else                 
+                  else
                     a_load_subsetreg_subsetreg(list,subsetsize,subsetsize,fromsreg,tosreg);
                     a_load_subsetreg_subsetreg(list,subsetsize,subsetsize,fromsreg,tosreg);
                 end;
                 end;
                 extra_value_reg := makeregsize(list,extra_value_reg,loadsize);
                 extra_value_reg := makeregsize(list,extra_value_reg,loadsize);
@@ -2465,7 +2465,7 @@ implementation
                if a = 0 then
                if a = 0 then
                  op:=OP_NONE;
                  op:=OP_NONE;
             end;
             end;
-        OP_SAR,OP_SHL,OP_SHR:
+        OP_SAR,OP_SHL,OP_SHR,OP_ROL,OP_ROR:
            begin
            begin
               if a = 0 then
               if a = 0 then
                 op:=OP_NONE;
                 op:=OP_NONE;
@@ -3870,7 +3870,7 @@ implementation
       begin
       begin
         internalerror(200807234);
         internalerror(200807234);
       end;
       end;
-      
+
 
 
     function tcg.getflagregister(list: TAsmList; size: Tcgsize): Tregister;
     function tcg.getflagregister(list: TAsmList; size: Tcgsize): Tregister;
       begin
       begin

+ 5 - 0
compiler/compinnr.inc

@@ -72,6 +72,11 @@ const
    in_writestr_x        = 62;
    in_writestr_x        = 62;
    in_readstr_x         = 63;
    in_readstr_x         = 63;
    in_abs_long          = 64;
    in_abs_long          = 64;
+   in_ror_x             = 65;
+   in_ror_x_x           = 66;
+   in_rol_x             = 67;
+   in_rol_x_x           = 68;
+
 
 
 { Internal constant functions }
 { Internal constant functions }
    in_const_sqr        = 100;
    in_const_sqr        = 100;

+ 6 - 4
compiler/m68k/cgcpu.pas

@@ -158,7 +158,9 @@ unit cgcpu;
        A_LSL,
        A_LSL,
        A_LSR,
        A_LSR,
        A_SUB,
        A_SUB,
-       A_EOR
+       A_EOR,
+       A_NONE,
+       A_NONE
       );
       );
 
 
 
 
@@ -1306,13 +1308,13 @@ unit cgcpu;
 	        { size can't be negative }
 	        { size can't be negative }
 		if (localsize < 0) then
 		if (localsize < 0) then
 		  internalerror(2006122601);
 		  internalerror(2006122601);
-	      
+	
                 { Not to complicate the code generator too much, and since some }
                 { Not to complicate the code generator too much, and since some }
                 { of the systems only support this format, the localsize cannot }
                 { of the systems only support this format, the localsize cannot }
                 { exceed 32K in size.                                           }
                 { exceed 32K in size.                                           }
                 if (localsize > high(smallint)) then
                 if (localsize > high(smallint)) then
                   CGMessage(cg_e_localsize_too_big);
                   CGMessage(cg_e_localsize_too_big);
-                
+
                 list.concat(taicpu.op_reg_const(A_LINK,S_W,NR_FRAME_POINTER_REG,-localsize));
                 list.concat(taicpu.op_reg_const(A_LINK,S_W,NR_FRAME_POINTER_REG,-localsize));
 	      end
 	      end
 	    else
 	    else
@@ -1324,7 +1326,7 @@ unit cgcpu;
 		  two moves. So, use a link in #0 case too, for now. I'm not
 		  two moves. So, use a link in #0 case too, for now. I'm not
 		  really sure tho', that LINK supports #0 disposition, but i
 		  really sure tho', that LINK supports #0 disposition, but i
 		  see no reason why it shouldn't support it. (KB) }
 		  see no reason why it shouldn't support it. (KB) }
-		  
+		
 	        { when localsize = 0, use two moves, instead of link }
 	        { when localsize = 0, use two moves, instead of link }
 		r:=NR_FRAME_POINTER_REG;
 		r:=NR_FRAME_POINTER_REG;
 		rsp:=NR_STACK_POINTER_REG;
 		rsp:=NR_STACK_POINTER_REG;

+ 84 - 20
compiler/ncginl.pas

@@ -54,6 +54,7 @@ interface
           procedure second_round_real; virtual;
           procedure second_round_real; virtual;
           procedure second_trunc_real; virtual;
           procedure second_trunc_real; virtual;
           procedure second_abs_long; virtual;
           procedure second_abs_long; virtual;
+          procedure second_rox; virtual;
        end;
        end;
 
 
 implementation
 implementation
@@ -161,6 +162,11 @@ implementation
                    end;
                    end;
               end;
               end;
 {$endif SUPPORT_MMX}
 {$endif SUPPORT_MMX}
+            in_rol_x,
+            in_rol_x_x,
+            in_ror_x,
+            in_ror_x_x:
+              second_rox;
             else internalerror(9);
             else internalerror(9);
          end;
          end;
       end;
       end;
@@ -689,29 +695,87 @@ implementation
     end;
     end;
 
 
     procedure Tcginlinenode.second_get_caller_addr;
     procedure Tcginlinenode.second_get_caller_addr;
+      var
+        frame_ref:Treference;
+      begin
+        if current_procinfo.framepointer=NR_STACK_POINTER_REG then
+          begin
+            location_reset(location,LOC_REGISTER,OS_ADDR);
+            location.register:=cg.getaddressregister(current_asmdata.currasmlist);
+            reference_reset_base(frame_ref,NR_STACK_POINTER_REG,{current_procinfo.calc_stackframe_size}tg.lasttemp);
+            cg.a_load_ref_reg(current_asmdata.currasmlist,OS_ADDR,OS_ADDR,frame_ref,location.register);
+          end
+        else
+          begin
+            location_reset(location,LOC_REGISTER,OS_ADDR);
+            location.register:=cg.getaddressregister(current_asmdata.currasmlist);
+          {$ifdef cpu64bitaddr}
+            reference_reset_base(frame_ref,current_procinfo.framepointer,8);
+          {$else}
+            reference_reset_base(frame_ref,current_procinfo.framepointer,4);
+          {$endif}
+            cg.a_load_ref_reg(current_asmdata.currasmlist,OS_ADDR,OS_ADDR,frame_ref,location.register);
+          end;
+      end;
 
 
-    var frame_ref:Treference;
 
 
-    begin
-      if current_procinfo.framepointer=NR_STACK_POINTER_REG then
-        begin
-          location_reset(location,LOC_REGISTER,OS_ADDR);
-          location.register:=cg.getaddressregister(current_asmdata.currasmlist);
-          reference_reset_base(frame_ref,NR_STACK_POINTER_REG,{current_procinfo.calc_stackframe_size}tg.lasttemp);
-          cg.a_load_ref_reg(current_asmdata.currasmlist,OS_ADDR,OS_ADDR,frame_ref,location.register);
-        end
-      else
-        begin
-          location_reset(location,LOC_REGISTER,OS_ADDR);
-          location.register:=cg.getaddressregister(current_asmdata.currasmlist);
-        {$ifdef cpu64bitaddr}
-          reference_reset_base(frame_ref,current_procinfo.framepointer,8);
-        {$else}
-          reference_reset_base(frame_ref,current_procinfo.framepointer,4);
-        {$endif}
-          cg.a_load_ref_reg(current_asmdata.currasmlist,OS_ADDR,OS_ADDR,frame_ref,location.register);
+    procedure tcginlinenode.second_rox;
+      var
+        op : topcg;
+        hcountreg : tregister;
+        op1,op2 : tnode;
+      begin
+        { one or two parameters? }
+        if assigned(tcallparanode(left).right) then
+          begin
+            op1:=tcallparanode(tcallparanode(left).right).left;
+            op2:=tcallparanode(left).left;
+          end
+        else
+          op1:=left;
+
+        secondpass(op1);
+        { load left operator in a register }
+        location_copy(location,op1.location);
+        case inlinenumber of
+          in_ror_x,
+          in_ror_x_x:
+            op:=OP_ROR;
+          in_rol_x,
+          in_rol_x_x:
+            op:=OP_ROL;
         end;
         end;
-    end;
+        location_force_reg(current_asmdata.CurrAsmList,location,location.size,false);
+
+        if assigned(tcallparanode(left).right) then
+          begin
+             secondpass(op2);
+             { rotating by a constant directly coded: }
+             if op2.nodetype=ordconstn then
+               cg.a_op_const_reg(current_asmdata.CurrAsmList,op,location.size,
+                 tordconstnode(op2).value.uvalue and (resultdef.size*8-1),location.register)
+             else
+               begin
+                 location_force_reg(current_asmdata.CurrAsmList,op2.location,location.size,false);
+                 {
+                 if op2.location.loc<>LOC_REGISTER then
+                   begin
+                     hcountreg:=cg.getintregister(current_asmdata.CurrAsmList,
+                       op2.location.size);
+                     cg.a_load_loc_reg(current_asmdata.CurrAsmList,location.size,
+                       op2.location,hcountreg);
+                   end
+                 else
+                   hcountreg:=op2.location.register;
+                 }
+                 { do modulo 2 operation }
+                 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,op2.location.size,resultdef.size*8-1,op2.location.register);
+                 cg.a_op_reg_reg(current_asmdata.CurrAsmList,op,location.size,op2.location.register,location.register);
+               end;
+          end
+        else
+          cg.a_op_const_reg(current_asmdata.CurrAsmList,op,location.size,1,location.register);
+      end;
 
 
 begin
 begin
    cinlinenode:=tcginlinenode;
    cinlinenode:=tcginlinenode;

+ 21 - 3
compiler/ninl.pas

@@ -2434,7 +2434,20 @@ implementation
                 begin
                 begin
                   resultdef:=voidpointertype;
                   resultdef:=voidpointertype;
                 end;
                 end;
-               else
+              in_rol_x,
+              in_ror_x:
+                begin
+                  set_varstate(left,vs_read,[vsf_must_be_valid]);
+                  resultdef:=left.resultdef;
+                end;
+              in_rol_x_x,
+              in_ror_x_x:
+                begin
+                  set_varstate(tcallparanode(left).left,vs_read,[vsf_must_be_valid]);
+                  set_varstate(tcallparanode(tcallparanode(left).right).left,vs_read,[vsf_must_be_valid]);
+                  resultdef:=tcallparanode(tcallparanode(left).right).left.resultdef;
+                end;
+              else
                 internalerror(8);
                 internalerror(8);
             end;
             end;
           end;
           end;
@@ -2781,8 +2794,13 @@ implementation
              expectloc:=tcallparanode(left).left.expectloc;
              expectloc:=tcallparanode(left).left.expectloc;
            end;
            end;
 {$endif SUPPORT_UNALIGNED}
 {$endif SUPPORT_UNALIGNED}
-          else
-            internalerror(89);
+         in_rol_x,
+         in_rol_x_x,
+         in_ror_x,
+         in_ror_x_x:
+           expectloc:=LOC_REGISTER;
+         else
+           internalerror(89);
           end;
           end;
        end;
        end;
 {$maxfpuregisters default}
 {$maxfpuregisters default}

+ 5 - 0
compiler/options.pas

@@ -2197,6 +2197,11 @@ begin
   def_system_macro('FPC_HAS_INTERNAL_ABS_LONG');
   def_system_macro('FPC_HAS_INTERNAL_ABS_LONG');
 {$endif}
 {$endif}
 
 
+{ these cpus have an inline rol/ror implementaion }
+{$if defined(x86)}
+  def_system_macro('FPC_HAS_INTERNAL_ROX');
+{$endif}
+
 {$ifdef SUPPORT_UNALIGNED}
 {$ifdef SUPPORT_UNALIGNED}
   def_system_macro('FPC_SUPPORTS_UNALIGNED');
   def_system_macro('FPC_SUPPORTS_UNALIGNED');
   def_system_macro('FPC_UNALIGNED_FIXED');
   def_system_macro('FPC_UNALIGNED_FIXED');

+ 4 - 4
compiler/powerpc/cgcpu.pas

@@ -119,10 +119,10 @@ unit cgcpu;
 const
 const
   TOpCG2AsmOpConstLo: Array[topcg] of TAsmOp = (A_NONE,A_MR,A_ADDI,A_ANDI_,A_DIVWU,
   TOpCG2AsmOpConstLo: Array[topcg] of TAsmOp = (A_NONE,A_MR,A_ADDI,A_ANDI_,A_DIVWU,
                         A_DIVW,A_MULLW, A_MULLW, A_NONE,A_NONE,A_ORI,
                         A_DIVW,A_MULLW, A_MULLW, A_NONE,A_NONE,A_ORI,
-                        A_SRAWI,A_SLWI,A_SRWI,A_SUBI,A_XORI);
+                        A_SRAWI,A_SLWI,A_SRWI,A_SUBI,A_XORI,A_NONE,A_NONE);
   TOpCG2AsmOpConstHi: Array[topcg] of TAsmOp = (A_NONE,A_MR,A_ADDIS,A_ANDIS_,
   TOpCG2AsmOpConstHi: Array[topcg] of TAsmOp = (A_NONE,A_MR,A_ADDIS,A_ANDIS_,
                         A_DIVWU,A_DIVW, A_MULLW,A_MULLW,A_NONE,A_NONE,
                         A_DIVWU,A_DIVW, A_MULLW,A_MULLW,A_NONE,A_NONE,
-                        A_ORIS,A_NONE, A_NONE,A_NONE,A_SUBIS,A_XORIS);
+                        A_ORIS,A_NONE, A_NONE,A_NONE,A_SUBIS,A_XORIS,A_NONE,A_NONE);
 
 
   implementation
   implementation
 
 
@@ -353,7 +353,7 @@ const
           op := loadinstr[fromsize,ref2.index<>NR_NO,false];
           op := loadinstr[fromsize,ref2.index<>NR_NO,false];
           a_load_store(list,op,reg,ref2);
           a_load_store(list,op,reg,ref2);
           { sign extend shortint if necessary (because there is
           { sign extend shortint if necessary (because there is
-	   no load instruction to sign extend an 8 bit value automatically) 
+	   no load instruction to sign extend an 8 bit value automatically)
 	   and mask out extra sign bits when loading from a smaller signed
 	   and mask out extra sign bits when loading from a smaller signed
 	   to a larger unsigned type }
 	   to a larger unsigned type }
           if fromsize = OS_S8 then
           if fromsize = OS_S8 then
@@ -669,7 +669,7 @@ const
       const
       const
         op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
         op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
           (A_NONE,A_MR,A_ADD,A_AND,A_DIVWU,A_DIVW,A_MULLW,A_MULLW,A_NEG,A_NOT,A_OR,
           (A_NONE,A_MR,A_ADD,A_AND,A_DIVWU,A_DIVW,A_MULLW,A_MULLW,A_NEG,A_NOT,A_OR,
-           A_SRAW,A_SLW,A_SRW,A_SUB,A_XOR);
+           A_SRAW,A_SLW,A_SRW,A_SUB,A_XOR,A_NONE,A_NONE);
 
 
        begin
        begin
          if (op = OP_MOVE) then
          if (op = OP_MOVE) then

+ 8 - 8
compiler/powerpc64/cgcpu.pas

@@ -678,7 +678,7 @@ begin
   if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
   if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
     internalerror(2002090902);
     internalerror(2002090902);
   { if PIC or basic optimizations are enabled, and the number of instructions which would be
   { if PIC or basic optimizations are enabled, and the number of instructions which would be
-   required to load the value is greater than 2, store (and later load) the value from there } 
+   required to load the value is greater than 2, store (and later load) the value from there }
 //  if (((cs_opt_peephole in current_settings.optimizerswitches) or (cs_create_pic in current_settings.moduleswitches)) and
 //  if (((cs_opt_peephole in current_settings.optimizerswitches) or (cs_create_pic in current_settings.moduleswitches)) and
 //    (getInstructionLength(a) > 2)) then
 //    (getInstructionLength(a) > 2)) then
 //    loadConstantPIC(list, size, a, reg)
 //    loadConstantPIC(list, size, a, reg)
@@ -736,7 +736,7 @@ begin
   a_load_store(list, op, reg, ref2);
   a_load_store(list, op, reg, ref2);
   { sign extend shortint if necessary (because there is
   { sign extend shortint if necessary (because there is
    no load instruction to sign extend an 8 bit value automatically)
    no load instruction to sign extend an 8 bit value automatically)
-   and mask out extra sign bits when loading from a smaller 
+   and mask out extra sign bits when loading from a smaller
    signed to a larger unsigned type (where it matters) }
    signed to a larger unsigned type (where it matters) }
   if (fromsize = OS_S8) then begin
   if (fromsize = OS_S8) then begin
     a_load_reg_reg(list, OS_8, OS_S8, reg, reg);
     a_load_reg_reg(list, OS_8, OS_S8, reg, reg);
@@ -784,10 +784,10 @@ begin
   {$ifdef extdebug}
   {$ifdef extdebug}
   list.concat(tai_comment.create(strpnew('a_load_subsetreg_reg subsetregsize = ' + cgsize2string(sreg.subsetregsize) + ' subsetsize = ' + cgsize2string(subsetsize) + ' startbit = ' + intToStr(sreg.startbit) + ' tosize = ' + cgsize2string(tosize))));
   list.concat(tai_comment.create(strpnew('a_load_subsetreg_reg subsetregsize = ' + cgsize2string(sreg.subsetregsize) + ' subsetsize = ' + cgsize2string(subsetsize) + ' startbit = ' + intToStr(sreg.startbit) + ' tosize = ' + cgsize2string(tosize))));
   {$endif}
   {$endif}
-  { do the extraction if required and then extend the sign correctly. (The latter is actually required only for signed subsets 
+  { do the extraction if required and then extend the sign correctly. (The latter is actually required only for signed subsets
   and if that subset is not >= the tosize). }
   and if that subset is not >= the tosize). }
   if (sreg.startbit <> 0) or
   if (sreg.startbit <> 0) or
-     (sreg.bitlen <> tcgsize2size[subsetsize]*8) then begin 
+     (sreg.bitlen <> tcgsize2size[subsetsize]*8) then begin
     list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, destreg, sreg.subsetreg, (64 - sreg.startbit) and 63, 64 - sreg.bitlen));
     list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, destreg, sreg.subsetreg, (64 - sreg.startbit) and 63, 64 - sreg.bitlen));
     if (subsetsize in [OS_S8..OS_S128]) then
     if (subsetsize in [OS_S8..OS_S128]) then
       if ((sreg.bitlen mod 8) = 0) then begin
       if ((sreg.bitlen mod 8) = 0) then begin
@@ -1062,10 +1062,10 @@ procedure tcgppc.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
 const
 const
   op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
   op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
   (A_NONE, A_MR, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
   (A_NONE, A_MR, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
-   A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR);
+   A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR, A_NONE, A_NONE);
   op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
   op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
   (A_NONE, A_MR, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
   (A_NONE, A_MR, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
-   A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR);
+   A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR, A_NONE, A_NONE);
 begin
 begin
   case op of
   case op of
     OP_NEG, OP_NOT:
     OP_NEG, OP_NOT:
@@ -1860,7 +1860,7 @@ begin
   end;
   end;
 
 
   { for ppc64/linux emit correct code which sets up a stack frame and then calls the
   { for ppc64/linux emit correct code which sets up a stack frame and then calls the
-  external method normally to ensure that the GOT/TOC will be loaded correctly if 
+  external method normally to ensure that the GOT/TOC will be loaded correctly if
   required.
   required.
 
 
   It's not really advantageous to use cg methods here because they are too specialized.
   It's not really advantageous to use cg methods here because they are too specialized.
@@ -1938,7 +1938,7 @@ procedure tcgppc.a_load_store(list: TAsmList; op: tasmop; reg: tregister;
         A_LD, A_LDU, A_STD, A_STDU, A_LWA :
         A_LD, A_LDU, A_STD, A_STDU, A_LWA :
            if ((ref.offset mod 4) <> 0) then begin
            if ((ref.offset mod 4) <> 0) then begin
             tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
             tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
-    
+
             if (ref.base <> NR_NO) then begin
             if (ref.base <> NR_NO) then begin
               a_op_const_reg_reg(list, OP_ADD, OS_ADDR, ref.offset mod 4, ref.base, tmpreg);
               a_op_const_reg_reg(list, OP_ADD, OS_ADDR, ref.offset mod 4, ref.base, tmpreg);
               ref.base := tmpreg;
               ref.base := tmpreg;

+ 2 - 2
compiler/sparc/cgcpu.pas

@@ -109,10 +109,10 @@ interface
 
 
     const
     const
       TOpCG2AsmOp : array[topcg] of TAsmOp=(
       TOpCG2AsmOp : array[topcg] of TAsmOp=(
-        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_SMUL,A_UMUL,A_NEG,A_NOT,A_OR,A_SRA,A_SLL,A_SRL,A_SUB,A_XOR
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_SMUL,A_UMUL,A_NEG,A_NOT,A_OR,A_SRA,A_SLL,A_SRL,A_SUB,A_XOR,A_NONE,A_NONE
       );
       );
       TOpCG2AsmOpWithFlags : array[topcg] of TAsmOp=(
       TOpCG2AsmOpWithFlags : array[topcg] of TAsmOp=(
-        A_NONE,A_MOV,A_ADDcc,A_ANDcc,A_UDIVcc,A_SDIVcc,A_SMULcc,A_UMULcc,A_NEG,A_NOT,A_ORcc,A_SRA,A_SLL,A_SRL,A_SUBcc,A_XORcc
+        A_NONE,A_MOV,A_ADDcc,A_ANDcc,A_UDIVcc,A_SDIVcc,A_SMULcc,A_UMULcc,A_NEG,A_NOT,A_ORcc,A_SRA,A_SLL,A_SRL,A_SUBcc,A_XORcc,A_NONE,A_NONE
       );
       );
       TOpCmp2AsmCond : array[topcmp] of TAsmCond=(C_NONE,
       TOpCmp2AsmCond : array[topcmp] of TAsmCond=(C_NONE,
         C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A
         C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A

+ 9 - 9
compiler/x86/cgx86.pas

@@ -159,7 +159,7 @@ unit cgx86;
     const
     const
       TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
       TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
                             A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
                             A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
-                            A_SAR,A_SHL,A_SHR,A_SUB,A_XOR);
+                            A_SAR,A_SHL,A_SHR,A_SUB,A_XOR,A_ROL,A_ROR);
 
 
       TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
       TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
           C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A);
           C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A);
@@ -1098,10 +1098,10 @@ unit cgx86;
         opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
         opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
           ( { scalar }
           ( { scalar }
             ( { OS_F32 }
             ( { OS_F32 }
-              A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
+              A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP,A_NOP,A_NOP
             ),
             ),
             ( { OS_F64 }
             ( { OS_F64 }
-              A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
+              A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP,A_NOP,A_NOP
             )
             )
           ),
           ),
           ( { vectorized/packed }
           ( { vectorized/packed }
@@ -1109,10 +1109,10 @@ unit cgx86;
               these
               these
             }
             }
             ( { OS_F32 }
             ( { OS_F32 }
-              A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS
+              A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS,A_NOP,A_NOP
             ),
             ),
             ( { OS_F64 }
             ( { OS_F64 }
-              A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD
+              A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD,A_NOP,A_NOP
             )
             )
           )
           )
         );
         );
@@ -1259,7 +1259,7 @@ unit cgx86;
                    end
                    end
             else
             else
               list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a,reg));
               list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a,reg));
-          OP_SHL,OP_SHR,OP_SAR:
+          OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
             begin
             begin
 {$ifdef x86_64}
 {$ifdef x86_64}
               if (a and 63) <> 0 Then
               if (a and 63) <> 0 Then
@@ -1375,7 +1375,7 @@ unit cgx86;
             else
             else
               list.concat(taicpu.op_const_ref(TOpCG2AsmOp[op],
               list.concat(taicpu.op_const_ref(TOpCG2AsmOp[op],
                 TCgSize2OpSize[size],a,tmpref));
                 TCgSize2OpSize[size],a,tmpref));
-          OP_SHL,OP_SHR,OP_SAR:
+          OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
             begin
             begin
               if (a and 31) <> 0 then
               if (a and 31) <> 0 then
                 list.concat(taicpu.op_const_ref(
                 list.concat(taicpu.op_const_ref(
@@ -1407,9 +1407,9 @@ unit cgx86;
             { special stuff, needs separate handling inside code }
             { special stuff, needs separate handling inside code }
             { generator                                          }
             { generator                                          }
             internalerror(200109233);
             internalerror(200109233);
-          OP_SHR,OP_SHL,OP_SAR:
+          OP_SHR,OP_SHL,OP_SAR,OP_ROL,OP_ROR:
             begin
             begin
-              { Use ecx to load the value, that allows beter coalescing }
+              { Use ecx to load the value, that allows better coalescing }
               getcpuregister(list,NR_ECX);
               getcpuregister(list,NR_ECX);
               a_load_reg_reg(list,size,OS_32,src,NR_ECX);
               a_load_reg_reg(list,size,OS_32,src,NR_ECX);
               list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));
               list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));

+ 28 - 0
rtl/i386/i386.inc

@@ -1537,3 +1537,31 @@ asm
 end;
 end;
 
 
 {$endif}
 {$endif}
+
+{$ifdef FPC_HAS_INTERNAL_ROX}
+{ the i386 cg doesn't support yet directly coded 64 bit rotates }
+function Ror(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 63);
+  end;
+
+
+function Ror(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 63;
+    Result:=(AValue shr Dist) or (AValue shl (64-Dist));
+  end;
+
+
+function Rol(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 63);
+  end;
+
+
+function Rol(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 63;
+    Result:=(AValue shl Dist) or (AValue shr (64-Dist));
+  end;
+{$endif FPC_HAS_INTERNAL_ROX}

+ 111 - 1
rtl/inc/generic.inc

@@ -2043,4 +2043,114 @@ procedure WriteBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
 begin
 begin
 end;
 end;
 
 
-{$endif}
+{$endif FPC_SYSTEM_HAS_MEM_BARRIER}
+
+{$ifndef FPC_HAS_INTERNAL_ROX}
+{$ifndef FPC_SYSTEM_HAS_ROX}
+
+function Ror(Const AValue : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 7);
+  end;
+  
+
+function Ror(Const AValue : Byte;Dist : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 7;
+    Result:=(AValue shr Dist) or (AValue shl (8-Dist));
+  end;
+
+  
+function Ror(Const AValue : Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 15);
+  end;
+
+  
+function Ror(Const AValue : Word;Dist : Byte): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 15;
+    Result:=(AValue shr Dist) or (AValue shl (16-Dist));
+  end;
+
+  
+function Ror(Const AValue : DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 31);
+  end;
+
+
+function Ror(Const AValue : DWord;Dist : Byte): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 31;
+    Result:=(AValue shr Dist) or (AValue shl (32-Dist));
+  end;
+  
+
+function Ror(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 63);
+  end;
+
+
+function Ror(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 63;
+    Result:=(AValue shr Dist) or (AValue shl (64-Dist));
+  end;
+
+
+function Rol(Const AValue : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 7);
+  end;
+
+
+function Rol(Const AValue : Byte;Dist : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 7;
+    Result:=(AValue shl Dist) or (AValue shr (8-Dist));
+  end;
+
+
+function Rol(Const AValue : Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 15);
+  end;
+
+
+function Rol(Const AValue : Word;Dist : Byte): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 15;
+    Result:=(AValue shl Dist) or (AValue shr (16-Dist));
+  end;
+
+
+function Rol(Const AValue : DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 31);
+  end;
+
+
+function Rol(Const AValue : DWord;Dist : Byte): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 31;
+    Result:=(AValue shl Dist) or (AValue shr (32-Dist));
+  end;
+
+
+function Rol(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 63);
+  end;
+
+
+function Rol(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 63;
+    Result:=(AValue shl Dist) or (AValue shr (64-Dist));
+  end;
+
+{$endif FPC_SYSTEM_HAS_ROX}
+{$endif FPC_HAS_INTERNAL_ROX}
+

+ 4 - 0
rtl/inc/innr.inc

@@ -73,6 +73,10 @@ const
    fpc_in_writestr_x        = 62;
    fpc_in_writestr_x        = 62;
    fpc_in_readstr_x         = 63;
    fpc_in_readstr_x         = 63;
    fpc_in_abs_long          = 64;
    fpc_in_abs_long          = 64;
+   fpc_in_ror_x             = 65;
+   fpc_in_ror_x_x           = 66;
+   fpc_in_rol_x             = 67;
+   fpc_in_rol_x_x           = 68;
 
 
 { Internal constant functions }
 { Internal constant functions }
    fpc_in_const_sqr        = 100;
    fpc_in_const_sqr        = 100;

+ 51 - 0
rtl/inc/systemh.inc

@@ -608,6 +608,57 @@ function NtoLE(const AValue: DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
 function NtoLE(const AValue: Int64): Int64;{$ifdef SYSTEMINLINE}inline;{$endif}
 function NtoLE(const AValue: Int64): Int64;{$ifdef SYSTEMINLINE}inline;{$endif}
 function NtoLE(const AValue: QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
 function NtoLE(const AValue: QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
 
 
+{$ifdef FPC_HAS_INTERNAL_ROX}
+function Ror(Const AValue : Byte): Byte;[internproc:fpc_in_ror_x];
+function Ror(Const AValue : Byte;Dist : Byte): Byte;[internproc:fpc_in_ror_x_x];
+function Ror(Const AValue : Word): Word;[internproc:fpc_in_ror_x];
+function Ror(Const AValue : Word;Dist : Byte): Word;[internproc:fpc_in_ror_x_x];
+function Ror(Const AValue : DWord): DWord;[internproc:fpc_in_ror_x];
+function Ror(Const AValue : DWord;Dist : Byte): DWord;[internproc:fpc_in_ror_x_x];
+{ the i386 cg doesn't support yet directly coded 64 bit rotates }
+{$ifndef cpui386}
+function Ror(Const AValue : QWord): QWord;[internproc:fpc_in_ror_x];
+function Ror(Const AValue : QWord;Dist : Byte): QWord;[internproc:fpc_in_ror_x_x];
+{$else cpui386}
+function Ror(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+{$endif cpui386}
+
+function Rol(Const AValue : Byte): Byte;[internproc:fpc_in_rol_x];
+function Rol(Const AValue : Byte;Dist : Byte): Byte;[internproc:fpc_in_rol_x_x];
+function Rol(Const AValue : Word): Word;[internproc:fpc_in_rol_x];
+function Rol(Const AValue : Word;Dist : Byte): Word;[internproc:fpc_in_rol_x_x];
+function Rol(Const AValue : DWord): DWord;[internproc:fpc_in_rol_x];
+function Rol(Const AValue : DWord;Dist : Byte): DWord;[internproc:fpc_in_rol_x_x];
+{ the i386 cg doesn't support yet directly coded 64 bit rotates }
+{$ifndef cpui386}
+function Rol(Const AValue : QWord): QWord;[internproc:fpc_in_rol_x];
+function Rol(Const AValue : QWord;Dist : Byte): QWord;[internproc:fpc_in_rol_x_x];
+{$else cpui386}
+function Rol(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+{$endif cpui386}
+
+{$else FPC_HAS_INTERNAL_ROX}
+function Ror(Const AValue : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : Byte;Dist : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : Word;Dist : Byte): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : DWord;Dist : Byte): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+
+function Rol(Const AValue : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : Byte;Dist : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : Word;Dist : Byte): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : DWord;Dist : Byte): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+{$endif FPC_HAS_INTERNAL_ROX}
+
 {$ifndef FPUNONE}
 {$ifndef FPUNONE}
 { float math routines }
 { float math routines }
 {$I mathh.inc}
 {$I mathh.inc}

+ 78 - 0
tests/test/trox1.pp

@@ -0,0 +1,78 @@
+procedure do_error(i : integer);
+  begin
+    writeln('Error: ',i);
+    halt(1);
+  end;
+
+var
+  b1,b2 : byte;
+  w1 : word;
+  d1 : dword;
+  q1 : qword;
+begin
+  b1:=1;
+  b2:=3;
+  b1:=ror(b1);
+  b1:=ror(b1,2);
+  b1:=ror(b1,b2);
+  if b1<>4 then
+    do_error(1000);
+
+  w1:=1;
+  b2:=3;
+  w1:=ror(w1);
+  w1:=ror(w1,2);
+  w1:=ror(w1,b2);
+  if w1<>$400 then
+    do_error(1001);
+
+  d1:=1;
+  b2:=3;
+  d1:=ror(d1);
+  d1:=ror(d1,2);
+  d1:=ror(d1,b2);
+  if d1<>$4000000 then
+    do_error(1002);
+
+  q1:=1;
+  b2:=3;
+  q1:=ror(q1);
+  q1:=ror(q1,2);
+  q1:=ror(q1,b2);
+  if q1<>$400000000000000 then
+    do_error(1003);
+
+  b1:=1;
+  b2:=3;
+  b1:=rol(b1);
+  b1:=rol(b1,2);
+  b1:=rol(b1,b2);
+  if b1<>$40 then
+    do_error(2000);
+
+  w1:=$8001;
+  b2:=3;
+  w1:=rol(w1);
+  w1:=rol(w1,2);
+  w1:=rol(w1,b2);
+  if w1<>$60 then
+    do_error(2001);
+
+  d1:=$80000001;
+  b2:=3;
+  d1:=rol(d1);
+  d1:=rol(d1,2);
+  d1:=rol(d1,b2);
+  if d1<>$60 then
+    do_error(2002);
+
+  q1:=$8000000000000001;
+  b2:=3;
+  q1:=rol(q1);
+  q1:=rol(q1,2);
+  q1:=rol(q1,b2);
+  if q1<>$60 then
+    do_error(2003);
+
+  writeln('ok');
+end.