Browse Source

+ ror/rol functions
+ internal compiler support for ror/rol on i386

git-svn-id: trunk@11466 -

florian 17 years ago
parent
commit
1afb1aa9cc

+ 1 - 0
.gitattributes

@@ -7697,6 +7697,7 @@ tests/test/trecreg2.pp svneol=native#text/plain
 tests/test/trecreg3.pp -text
 tests/test/trecreg4.pp svneol=native#text/plain
 tests/test/tresstr.pp svneol=native#text/plain
+tests/test/trox1.pp svneol=native#text/plain
 tests/test/trstr1.pp svneol=native#text/plain
 tests/test/trstr2.pp svneol=native#text/plain
 tests/test/trstr3.pp svneol=native#text/plain

+ 1 - 1
compiler/arm/cgcpu.pas

@@ -347,7 +347,7 @@ unit cgcpu;
     const
       op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
         (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_ORR,
-         A_NONE,A_NONE,A_NONE,A_SUB,A_EOR);
+         A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
 
 
     procedure tcgarm.a_op_const_reg_reg(list: TAsmList; op: TOpCg;

+ 4 - 2
compiler/cgbase.pas

@@ -102,7 +102,9 @@ interface
           OP_SHL,       { logical shift left       }
           OP_SHR,       { logical shift right      }
           OP_SUB,       { simple subtraction       }
-          OP_XOR        { simple exclusive or      }
+          OP_XOR,       { simple exclusive or      }
+          OP_ROL,       { rotate left              }
+          OP_ROR        { rotate right             }
         );
 
        {# Generic flag values - used for jump locations }
@@ -630,7 +632,7 @@ implementation
       const
         list: array[topcg] of boolean =
           (true,false,true,true,false,false,true,true,false,false,
-           true,false,false,false,false,true);
+           true,false,false,false,false,true,false,false);
       begin
         commutativeop := list[op];
       end;

+ 4 - 4
compiler/cgobj.pas

@@ -1579,7 +1579,7 @@ implementation
                   SL_SETZERO,
                   SL_SETMAX:
                     a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,tosreg,slopt);
-                  else                 
+                  else
                     a_load_subsetreg_subsetreg(list,subsetsize,subsetsize,fromsreg,tosreg);
                 end;
                 valuereg := makeregsize(list,valuereg,loadsize);
@@ -1611,7 +1611,7 @@ implementation
                   SL_SETZERO,
                   SL_SETMAX:
                     a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,tosreg,slopt);
-                  else                 
+                  else
                     a_load_subsetreg_subsetreg(list,subsetsize,subsetsize,fromsreg,tosreg);
                 end;
                 extra_value_reg := makeregsize(list,extra_value_reg,loadsize);
@@ -2465,7 +2465,7 @@ implementation
                if a = 0 then
                  op:=OP_NONE;
             end;
-        OP_SAR,OP_SHL,OP_SHR:
+        OP_SAR,OP_SHL,OP_SHR,OP_ROL,OP_ROR:
            begin
               if a = 0 then
                 op:=OP_NONE;
@@ -3870,7 +3870,7 @@ implementation
       begin
         internalerror(200807234);
       end;
-      
+
 
     function tcg.getflagregister(list: TAsmList; size: Tcgsize): Tregister;
       begin

+ 5 - 0
compiler/compinnr.inc

@@ -72,6 +72,11 @@ const
    in_writestr_x        = 62;
    in_readstr_x         = 63;
    in_abs_long          = 64;
+   in_ror_x             = 65;
+   in_ror_x_x           = 66;
+   in_rol_x             = 67;
+   in_rol_x_x           = 68;
+
 
 { Internal constant functions }
    in_const_sqr        = 100;

+ 6 - 4
compiler/m68k/cgcpu.pas

@@ -158,7 +158,9 @@ unit cgcpu;
        A_LSL,
        A_LSR,
        A_SUB,
-       A_EOR
+       A_EOR,
+       A_NONE,
+       A_NONE
       );
 
 
@@ -1306,13 +1308,13 @@ unit cgcpu;
 	        { size can't be negative }
 		if (localsize < 0) then
 		  internalerror(2006122601);
-	      
+	
                 { Not to complicate the code generator too much, and since some }
                 { of the systems only support this format, the localsize cannot }
                 { exceed 32K in size.                                           }
                 if (localsize > high(smallint)) then
                   CGMessage(cg_e_localsize_too_big);
-                
+
                 list.concat(taicpu.op_reg_const(A_LINK,S_W,NR_FRAME_POINTER_REG,-localsize));
 	      end
 	    else
@@ -1324,7 +1326,7 @@ unit cgcpu;
 		  two moves. So, use a link in #0 case too, for now. I'm not
 		  really sure tho', that LINK supports #0 disposition, but i
 		  see no reason why it shouldn't support it. (KB) }
-		  
+		
 	        { when localsize = 0, use two moves, instead of link }
 		r:=NR_FRAME_POINTER_REG;
 		rsp:=NR_STACK_POINTER_REG;

+ 84 - 20
compiler/ncginl.pas

@@ -54,6 +54,7 @@ interface
           procedure second_round_real; virtual;
           procedure second_trunc_real; virtual;
           procedure second_abs_long; virtual;
+          procedure second_rox; virtual;
        end;
 
 implementation
@@ -161,6 +162,11 @@ implementation
                    end;
               end;
 {$endif SUPPORT_MMX}
+            in_rol_x,
+            in_rol_x_x,
+            in_ror_x,
+            in_ror_x_x:
+              second_rox;
             else internalerror(9);
          end;
       end;
@@ -689,29 +695,87 @@ implementation
     end;
 
     procedure Tcginlinenode.second_get_caller_addr;
+      var
+        frame_ref:Treference;
+      begin
+        if current_procinfo.framepointer=NR_STACK_POINTER_REG then
+          begin
+            location_reset(location,LOC_REGISTER,OS_ADDR);
+            location.register:=cg.getaddressregister(current_asmdata.currasmlist);
+            reference_reset_base(frame_ref,NR_STACK_POINTER_REG,{current_procinfo.calc_stackframe_size}tg.lasttemp);
+            cg.a_load_ref_reg(current_asmdata.currasmlist,OS_ADDR,OS_ADDR,frame_ref,location.register);
+          end
+        else
+          begin
+            location_reset(location,LOC_REGISTER,OS_ADDR);
+            location.register:=cg.getaddressregister(current_asmdata.currasmlist);
+          {$ifdef cpu64bitaddr}
+            reference_reset_base(frame_ref,current_procinfo.framepointer,8);
+          {$else}
+            reference_reset_base(frame_ref,current_procinfo.framepointer,4);
+          {$endif}
+            cg.a_load_ref_reg(current_asmdata.currasmlist,OS_ADDR,OS_ADDR,frame_ref,location.register);
+          end;
+      end;
 
-    var frame_ref:Treference;
 
-    begin
-      if current_procinfo.framepointer=NR_STACK_POINTER_REG then
-        begin
-          location_reset(location,LOC_REGISTER,OS_ADDR);
-          location.register:=cg.getaddressregister(current_asmdata.currasmlist);
-          reference_reset_base(frame_ref,NR_STACK_POINTER_REG,{current_procinfo.calc_stackframe_size}tg.lasttemp);
-          cg.a_load_ref_reg(current_asmdata.currasmlist,OS_ADDR,OS_ADDR,frame_ref,location.register);
-        end
-      else
-        begin
-          location_reset(location,LOC_REGISTER,OS_ADDR);
-          location.register:=cg.getaddressregister(current_asmdata.currasmlist);
-        {$ifdef cpu64bitaddr}
-          reference_reset_base(frame_ref,current_procinfo.framepointer,8);
-        {$else}
-          reference_reset_base(frame_ref,current_procinfo.framepointer,4);
-        {$endif}
-          cg.a_load_ref_reg(current_asmdata.currasmlist,OS_ADDR,OS_ADDR,frame_ref,location.register);
+    procedure tcginlinenode.second_rox;
+      var
+        op : topcg;
+        hcountreg : tregister;
+        op1,op2 : tnode;
+      begin
+        { one or two parameters? }
+        if assigned(tcallparanode(left).right) then
+          begin
+            op1:=tcallparanode(tcallparanode(left).right).left;
+            op2:=tcallparanode(left).left;
+          end
+        else
+          op1:=left;
+
+        secondpass(op1);
+        { load left operator in a register }
+        location_copy(location,op1.location);
+        case inlinenumber of
+          in_ror_x,
+          in_ror_x_x:
+            op:=OP_ROR;
+          in_rol_x,
+          in_rol_x_x:
+            op:=OP_ROL;
         end;
-    end;
+        location_force_reg(current_asmdata.CurrAsmList,location,location.size,false);
+
+        if assigned(tcallparanode(left).right) then
+          begin
+             secondpass(op2);
+             { rotating by a constant directly coded: }
+             if op2.nodetype=ordconstn then
+               cg.a_op_const_reg(current_asmdata.CurrAsmList,op,location.size,
+                 tordconstnode(op2).value.uvalue and (resultdef.size*8-1),location.register)
+             else
+               begin
+                 location_force_reg(current_asmdata.CurrAsmList,op2.location,location.size,false);
+                 {
+                 if op2.location.loc<>LOC_REGISTER then
+                   begin
+                     hcountreg:=cg.getintregister(current_asmdata.CurrAsmList,
+                       op2.location.size);
+                     cg.a_load_loc_reg(current_asmdata.CurrAsmList,location.size,
+                       op2.location,hcountreg);
+                   end
+                 else
+                   hcountreg:=op2.location.register;
+                 }
+                 { do modulo 2 operation }
+                 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,op2.location.size,resultdef.size*8-1,op2.location.register);
+                 cg.a_op_reg_reg(current_asmdata.CurrAsmList,op,location.size,op2.location.register,location.register);
+               end;
+          end
+        else
+          cg.a_op_const_reg(current_asmdata.CurrAsmList,op,location.size,1,location.register);
+      end;
 
 begin
    cinlinenode:=tcginlinenode;

+ 21 - 3
compiler/ninl.pas

@@ -2434,7 +2434,20 @@ implementation
                 begin
                   resultdef:=voidpointertype;
                 end;
-               else
+              in_rol_x,
+              in_ror_x:
+                begin
+                  set_varstate(left,vs_read,[vsf_must_be_valid]);
+                  resultdef:=left.resultdef;
+                end;
+              in_rol_x_x,
+              in_ror_x_x:
+                begin
+                  set_varstate(tcallparanode(left).left,vs_read,[vsf_must_be_valid]);
+                  set_varstate(tcallparanode(tcallparanode(left).right).left,vs_read,[vsf_must_be_valid]);
+                  resultdef:=tcallparanode(tcallparanode(left).right).left.resultdef;
+                end;
+              else
                 internalerror(8);
             end;
           end;
@@ -2781,8 +2794,13 @@ implementation
              expectloc:=tcallparanode(left).left.expectloc;
            end;
 {$endif SUPPORT_UNALIGNED}
-          else
-            internalerror(89);
+         in_rol_x,
+         in_rol_x_x,
+         in_ror_x,
+         in_ror_x_x:
+           expectloc:=LOC_REGISTER;
+         else
+           internalerror(89);
           end;
        end;
 {$maxfpuregisters default}

+ 5 - 0
compiler/options.pas

@@ -2197,6 +2197,11 @@ begin
   def_system_macro('FPC_HAS_INTERNAL_ABS_LONG');
 {$endif}
 
+{ these cpus have an inline rol/ror implementaion }
+{$if defined(x86)}
+  def_system_macro('FPC_HAS_INTERNAL_ROX');
+{$endif}
+
 {$ifdef SUPPORT_UNALIGNED}
   def_system_macro('FPC_SUPPORTS_UNALIGNED');
   def_system_macro('FPC_UNALIGNED_FIXED');

+ 4 - 4
compiler/powerpc/cgcpu.pas

@@ -119,10 +119,10 @@ unit cgcpu;
 const
   TOpCG2AsmOpConstLo: Array[topcg] of TAsmOp = (A_NONE,A_MR,A_ADDI,A_ANDI_,A_DIVWU,
                         A_DIVW,A_MULLW, A_MULLW, A_NONE,A_NONE,A_ORI,
-                        A_SRAWI,A_SLWI,A_SRWI,A_SUBI,A_XORI);
+                        A_SRAWI,A_SLWI,A_SRWI,A_SUBI,A_XORI,A_NONE,A_NONE);
   TOpCG2AsmOpConstHi: Array[topcg] of TAsmOp = (A_NONE,A_MR,A_ADDIS,A_ANDIS_,
                         A_DIVWU,A_DIVW, A_MULLW,A_MULLW,A_NONE,A_NONE,
-                        A_ORIS,A_NONE, A_NONE,A_NONE,A_SUBIS,A_XORIS);
+                        A_ORIS,A_NONE, A_NONE,A_NONE,A_SUBIS,A_XORIS,A_NONE,A_NONE);
 
   implementation
 
@@ -353,7 +353,7 @@ const
           op := loadinstr[fromsize,ref2.index<>NR_NO,false];
           a_load_store(list,op,reg,ref2);
           { sign extend shortint if necessary (because there is
-	   no load instruction to sign extend an 8 bit value automatically) 
+	   no load instruction to sign extend an 8 bit value automatically)
 	   and mask out extra sign bits when loading from a smaller signed
 	   to a larger unsigned type }
           if fromsize = OS_S8 then
@@ -669,7 +669,7 @@ const
       const
         op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
           (A_NONE,A_MR,A_ADD,A_AND,A_DIVWU,A_DIVW,A_MULLW,A_MULLW,A_NEG,A_NOT,A_OR,
-           A_SRAW,A_SLW,A_SRW,A_SUB,A_XOR);
+           A_SRAW,A_SLW,A_SRW,A_SUB,A_XOR,A_NONE,A_NONE);
 
        begin
          if (op = OP_MOVE) then

+ 8 - 8
compiler/powerpc64/cgcpu.pas

@@ -678,7 +678,7 @@ begin
   if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
     internalerror(2002090902);
   { if PIC or basic optimizations are enabled, and the number of instructions which would be
-   required to load the value is greater than 2, store (and later load) the value from there } 
+   required to load the value is greater than 2, store (and later load) the value from there }
 //  if (((cs_opt_peephole in current_settings.optimizerswitches) or (cs_create_pic in current_settings.moduleswitches)) and
 //    (getInstructionLength(a) > 2)) then
 //    loadConstantPIC(list, size, a, reg)
@@ -736,7 +736,7 @@ begin
   a_load_store(list, op, reg, ref2);
   { sign extend shortint if necessary (because there is
    no load instruction to sign extend an 8 bit value automatically)
-   and mask out extra sign bits when loading from a smaller 
+   and mask out extra sign bits when loading from a smaller
    signed to a larger unsigned type (where it matters) }
   if (fromsize = OS_S8) then begin
     a_load_reg_reg(list, OS_8, OS_S8, reg, reg);
@@ -784,10 +784,10 @@ begin
   {$ifdef extdebug}
   list.concat(tai_comment.create(strpnew('a_load_subsetreg_reg subsetregsize = ' + cgsize2string(sreg.subsetregsize) + ' subsetsize = ' + cgsize2string(subsetsize) + ' startbit = ' + intToStr(sreg.startbit) + ' tosize = ' + cgsize2string(tosize))));
   {$endif}
-  { do the extraction if required and then extend the sign correctly. (The latter is actually required only for signed subsets 
+  { do the extraction if required and then extend the sign correctly. (The latter is actually required only for signed subsets
   and if that subset is not >= the tosize). }
   if (sreg.startbit <> 0) or
-     (sreg.bitlen <> tcgsize2size[subsetsize]*8) then begin 
+     (sreg.bitlen <> tcgsize2size[subsetsize]*8) then begin
     list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, destreg, sreg.subsetreg, (64 - sreg.startbit) and 63, 64 - sreg.bitlen));
     if (subsetsize in [OS_S8..OS_S128]) then
       if ((sreg.bitlen mod 8) = 0) then begin
@@ -1062,10 +1062,10 @@ procedure tcgppc.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
 const
   op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
   (A_NONE, A_MR, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
-   A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR);
+   A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR, A_NONE, A_NONE);
   op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
   (A_NONE, A_MR, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
-   A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR);
+   A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR, A_NONE, A_NONE);
 begin
   case op of
     OP_NEG, OP_NOT:
@@ -1860,7 +1860,7 @@ begin
   end;
 
   { for ppc64/linux emit correct code which sets up a stack frame and then calls the
-  external method normally to ensure that the GOT/TOC will be loaded correctly if 
+  external method normally to ensure that the GOT/TOC will be loaded correctly if
   required.
 
   It's not really advantageous to use cg methods here because they are too specialized.
@@ -1938,7 +1938,7 @@ procedure tcgppc.a_load_store(list: TAsmList; op: tasmop; reg: tregister;
         A_LD, A_LDU, A_STD, A_STDU, A_LWA :
            if ((ref.offset mod 4) <> 0) then begin
             tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
-    
+
             if (ref.base <> NR_NO) then begin
               a_op_const_reg_reg(list, OP_ADD, OS_ADDR, ref.offset mod 4, ref.base, tmpreg);
               ref.base := tmpreg;

+ 2 - 2
compiler/sparc/cgcpu.pas

@@ -109,10 +109,10 @@ interface
 
     const
       TOpCG2AsmOp : array[topcg] of TAsmOp=(
-        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_SMUL,A_UMUL,A_NEG,A_NOT,A_OR,A_SRA,A_SLL,A_SRL,A_SUB,A_XOR
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_SMUL,A_UMUL,A_NEG,A_NOT,A_OR,A_SRA,A_SLL,A_SRL,A_SUB,A_XOR,A_NONE,A_NONE
       );
       TOpCG2AsmOpWithFlags : array[topcg] of TAsmOp=(
-        A_NONE,A_MOV,A_ADDcc,A_ANDcc,A_UDIVcc,A_SDIVcc,A_SMULcc,A_UMULcc,A_NEG,A_NOT,A_ORcc,A_SRA,A_SLL,A_SRL,A_SUBcc,A_XORcc
+        A_NONE,A_MOV,A_ADDcc,A_ANDcc,A_UDIVcc,A_SDIVcc,A_SMULcc,A_UMULcc,A_NEG,A_NOT,A_ORcc,A_SRA,A_SLL,A_SRL,A_SUBcc,A_XORcc,A_NONE,A_NONE
       );
       TOpCmp2AsmCond : array[topcmp] of TAsmCond=(C_NONE,
         C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A

+ 9 - 9
compiler/x86/cgx86.pas

@@ -159,7 +159,7 @@ unit cgx86;
     const
       TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
                             A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
-                            A_SAR,A_SHL,A_SHR,A_SUB,A_XOR);
+                            A_SAR,A_SHL,A_SHR,A_SUB,A_XOR,A_ROL,A_ROR);
 
       TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
           C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A);
@@ -1098,10 +1098,10 @@ unit cgx86;
         opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
           ( { scalar }
             ( { OS_F32 }
-              A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
+              A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP,A_NOP,A_NOP
             ),
             ( { OS_F64 }
-              A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
+              A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP,A_NOP,A_NOP
             )
           ),
           ( { vectorized/packed }
@@ -1109,10 +1109,10 @@ unit cgx86;
               these
             }
             ( { OS_F32 }
-              A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS
+              A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS,A_NOP,A_NOP
             ),
             ( { OS_F64 }
-              A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD
+              A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD,A_NOP,A_NOP
             )
           )
         );
@@ -1259,7 +1259,7 @@ unit cgx86;
                    end
             else
               list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a,reg));
-          OP_SHL,OP_SHR,OP_SAR:
+          OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
             begin
 {$ifdef x86_64}
               if (a and 63) <> 0 Then
@@ -1375,7 +1375,7 @@ unit cgx86;
             else
               list.concat(taicpu.op_const_ref(TOpCG2AsmOp[op],
                 TCgSize2OpSize[size],a,tmpref));
-          OP_SHL,OP_SHR,OP_SAR:
+          OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
             begin
               if (a and 31) <> 0 then
                 list.concat(taicpu.op_const_ref(
@@ -1407,9 +1407,9 @@ unit cgx86;
             { special stuff, needs separate handling inside code }
             { generator                                          }
             internalerror(200109233);
-          OP_SHR,OP_SHL,OP_SAR:
+          OP_SHR,OP_SHL,OP_SAR,OP_ROL,OP_ROR:
             begin
-              { Use ecx to load the value, that allows beter coalescing }
+              { Use ecx to load the value, that allows better coalescing }
               getcpuregister(list,NR_ECX);
               a_load_reg_reg(list,size,OS_32,src,NR_ECX);
               list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));

+ 28 - 0
rtl/i386/i386.inc

@@ -1537,3 +1537,31 @@ asm
 end;
 
 {$endif}
+
+{$ifdef FPC_HAS_INTERNAL_ROX}
+{ the i386 cg doesn't support yet directly coded 64 bit rotates }
+function Ror(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 63);
+  end;
+
+
+function Ror(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 63;
+    Result:=(AValue shr Dist) or (AValue shl (64-Dist));
+  end;
+
+
+function Rol(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 63);
+  end;
+
+
+function Rol(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 63;
+    Result:=(AValue shl Dist) or (AValue shr (64-Dist));
+  end;
+{$endif FPC_HAS_INTERNAL_ROX}

+ 111 - 1
rtl/inc/generic.inc

@@ -2043,4 +2043,114 @@ procedure WriteBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
 begin
 end;
 
-{$endif}
+{$endif FPC_SYSTEM_HAS_MEM_BARRIER}
+
+{$ifndef FPC_HAS_INTERNAL_ROX}
+{$ifndef FPC_SYSTEM_HAS_ROX}
+
+function Ror(Const AValue : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 7);
+  end;
+  
+
+function Ror(Const AValue : Byte;Dist : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 7;
+    Result:=(AValue shr Dist) or (AValue shl (8-Dist));
+  end;
+
+  
+function Ror(Const AValue : Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 15);
+  end;
+
+  
+function Ror(Const AValue : Word;Dist : Byte): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 15;
+    Result:=(AValue shr Dist) or (AValue shl (16-Dist));
+  end;
+
+  
+function Ror(Const AValue : DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 31);
+  end;
+
+
+function Ror(Const AValue : DWord;Dist : Byte): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 31;
+    Result:=(AValue shr Dist) or (AValue shl (32-Dist));
+  end;
+  
+
+function Ror(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shr 1) or (AValue shl 63);
+  end;
+
+
+function Ror(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 63;
+    Result:=(AValue shr Dist) or (AValue shl (64-Dist));
+  end;
+
+
+function Rol(Const AValue : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 7);
+  end;
+
+
+function Rol(Const AValue : Byte;Dist : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 7;
+    Result:=(AValue shl Dist) or (AValue shr (8-Dist));
+  end;
+
+
+function Rol(Const AValue : Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 15);
+  end;
+
+
+function Rol(Const AValue : Word;Dist : Byte): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 15;
+    Result:=(AValue shl Dist) or (AValue shr (16-Dist));
+  end;
+
+
+function Rol(Const AValue : DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 31);
+  end;
+
+
+function Rol(Const AValue : DWord;Dist : Byte): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 31;
+    Result:=(AValue shl Dist) or (AValue shr (32-Dist));
+  end;
+
+
+function Rol(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Result:=(AValue shl 1) or (AValue shr 63);
+  end;
+
+
+function Rol(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+  begin
+    Dist:=Dist and 63;
+    Result:=(AValue shl Dist) or (AValue shr (64-Dist));
+  end;
+
+{$endif FPC_SYSTEM_HAS_ROX}
+{$endif FPC_HAS_INTERNAL_ROX}
+

+ 4 - 0
rtl/inc/innr.inc

@@ -73,6 +73,10 @@ const
    fpc_in_writestr_x        = 62;
    fpc_in_readstr_x         = 63;
    fpc_in_abs_long          = 64;
+   fpc_in_ror_x             = 65;
+   fpc_in_ror_x_x           = 66;
+   fpc_in_rol_x             = 67;
+   fpc_in_rol_x_x           = 68;
 
 { Internal constant functions }
    fpc_in_const_sqr        = 100;

+ 51 - 0
rtl/inc/systemh.inc

@@ -608,6 +608,57 @@ function NtoLE(const AValue: DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
 function NtoLE(const AValue: Int64): Int64;{$ifdef SYSTEMINLINE}inline;{$endif}
 function NtoLE(const AValue: QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
 
+{$ifdef FPC_HAS_INTERNAL_ROX}
+function Ror(Const AValue : Byte): Byte;[internproc:fpc_in_ror_x];
+function Ror(Const AValue : Byte;Dist : Byte): Byte;[internproc:fpc_in_ror_x_x];
+function Ror(Const AValue : Word): Word;[internproc:fpc_in_ror_x];
+function Ror(Const AValue : Word;Dist : Byte): Word;[internproc:fpc_in_ror_x_x];
+function Ror(Const AValue : DWord): DWord;[internproc:fpc_in_ror_x];
+function Ror(Const AValue : DWord;Dist : Byte): DWord;[internproc:fpc_in_ror_x_x];
+{ the i386 cg doesn't support yet directly coded 64 bit rotates }
+{$ifndef cpui386}
+function Ror(Const AValue : QWord): QWord;[internproc:fpc_in_ror_x];
+function Ror(Const AValue : QWord;Dist : Byte): QWord;[internproc:fpc_in_ror_x_x];
+{$else cpui386}
+function Ror(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+{$endif cpui386}
+
+function Rol(Const AValue : Byte): Byte;[internproc:fpc_in_rol_x];
+function Rol(Const AValue : Byte;Dist : Byte): Byte;[internproc:fpc_in_rol_x_x];
+function Rol(Const AValue : Word): Word;[internproc:fpc_in_rol_x];
+function Rol(Const AValue : Word;Dist : Byte): Word;[internproc:fpc_in_rol_x_x];
+function Rol(Const AValue : DWord): DWord;[internproc:fpc_in_rol_x];
+function Rol(Const AValue : DWord;Dist : Byte): DWord;[internproc:fpc_in_rol_x_x];
+{ the i386 cg doesn't support yet directly coded 64 bit rotates }
+{$ifndef cpui386}
+function Rol(Const AValue : QWord): QWord;[internproc:fpc_in_rol_x];
+function Rol(Const AValue : QWord;Dist : Byte): QWord;[internproc:fpc_in_rol_x_x];
+{$else cpui386}
+function Rol(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+{$endif cpui386}
+
+{$else FPC_HAS_INTERNAL_ROX}
+function Ror(Const AValue : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : Byte;Dist : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : Word;Dist : Byte): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : DWord;Dist : Byte): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Ror(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+
+function Rol(Const AValue : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : Byte;Dist : Byte): Byte;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : Word;Dist : Byte): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : DWord;Dist : Byte): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+function Rol(Const AValue : QWord;Dist : Byte): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
+{$endif FPC_HAS_INTERNAL_ROX}
+
 {$ifndef FPUNONE}
 { float math routines }
 {$I mathh.inc}

+ 78 - 0
tests/test/trox1.pp

@@ -0,0 +1,78 @@
+procedure do_error(i : integer);
+  begin
+    writeln('Error: ',i);
+    halt(1);
+  end;
+
+var
+  b1,b2 : byte;
+  w1 : word;
+  d1 : dword;
+  q1 : qword;
+begin
+  b1:=1;
+  b2:=3;
+  b1:=ror(b1);
+  b1:=ror(b1,2);
+  b1:=ror(b1,b2);
+  if b1<>4 then
+    do_error(1000);
+
+  w1:=1;
+  b2:=3;
+  w1:=ror(w1);
+  w1:=ror(w1,2);
+  w1:=ror(w1,b2);
+  if w1<>$400 then
+    do_error(1001);
+
+  d1:=1;
+  b2:=3;
+  d1:=ror(d1);
+  d1:=ror(d1,2);
+  d1:=ror(d1,b2);
+  if d1<>$4000000 then
+    do_error(1002);
+
+  q1:=1;
+  b2:=3;
+  q1:=ror(q1);
+  q1:=ror(q1,2);
+  q1:=ror(q1,b2);
+  if q1<>$400000000000000 then
+    do_error(1003);
+
+  b1:=1;
+  b2:=3;
+  b1:=rol(b1);
+  b1:=rol(b1,2);
+  b1:=rol(b1,b2);
+  if b1<>$40 then
+    do_error(2000);
+
+  w1:=$8001;
+  b2:=3;
+  w1:=rol(w1);
+  w1:=rol(w1,2);
+  w1:=rol(w1,b2);
+  if w1<>$60 then
+    do_error(2001);
+
+  d1:=$80000001;
+  b2:=3;
+  d1:=rol(d1);
+  d1:=rol(d1,2);
+  d1:=rol(d1,b2);
+  if d1<>$60 then
+    do_error(2002);
+
+  q1:=$8000000000000001;
+  b2:=3;
+  q1:=rol(q1);
+  q1:=rol(q1,2);
+  q1:=rol(q1,b2);
+  if q1<>$60 then
+    do_error(2003);
+
+  writeln('ok');
+end.