Browse Source

* optimizations in tcg386.g_copyvaluepara_openarray in order to avoid the
second IMUL

git-svn-id: trunk@25828 -

nickysn 11 năm trước cách đây
mục cha
commit
aca22a59f7
1 tập tin đã thay đổi với 45 bổ sung16 xóa
  1. 45 16
      compiler/i386/cgcpu.pas

+ 45 - 16
compiler/i386/cgcpu.pas

@@ -411,7 +411,7 @@ unit cgcpu;
 
     procedure tcg386.g_copyvaluepara_openarray(list : TAsmList;const ref:treference;const lenloc:tlocation;elesize:tcgint;destreg:tregister);
       var
-        power,len  : longint;
+        power  : longint;
         opsize : topsize;
 {$ifndef __NOWINPECOFF__}
         again,ok : tasmlabel;
@@ -421,9 +421,21 @@ unit cgcpu;
         getcpuregister(list,NR_EDI);
         a_load_loc_reg(list,OS_INT,lenloc,NR_EDI);
         list.concat(Taicpu.op_reg(A_INC,S_L,NR_EDI));
-        { Now EDI contains (high+1). Copy it to ECX for later use. }
-        getcpuregister(list,NR_ECX);
-        list.concat(Taicpu.op_reg_reg(A_MOV,S_L,NR_EDI,NR_ECX));
+        { Now EDI contains (high+1). }
+
+        { special case handling for elesize=8, 4 and 2:
+          set ECX = (high+1) instead of ECX = (high+1)*elesize.
+
+          In the case of elesize=4 and 2, this allows us to avoid the SHR later.
+          In the case of elesize=8, we can later use a SHL ECX, 1 instead of
+          SHR ECX, 2 which is one byte shorter. }
+        if (elesize=8) or (elesize=4) or (elesize=2) then
+          begin
+            { Now EDI contains (high+1). Copy it to ECX for later use. }
+            getcpuregister(list,NR_ECX);
+            list.concat(Taicpu.op_reg_reg(A_MOV,S_L,NR_EDI,NR_ECX));
+          end;
+        { EDI := EDI * elesize }
         if (elesize<>1) then
          begin
            if ispowerof2(elesize, power) then
@@ -431,6 +443,12 @@ unit cgcpu;
            else
              list.concat(Taicpu.op_const_reg(A_IMUL,S_L,elesize,NR_EDI));
          end;
+        if (elesize<>8) and (elesize<>4) and (elesize<>2) then
+          begin
+            { Now EDI contains (high+1)*elesize. Copy it to ECX for later use. }
+            getcpuregister(list,NR_ECX);
+            list.concat(Taicpu.op_reg_reg(A_MOV,S_L,NR_EDI,NR_ECX));
+          end;
 {$ifndef __NOWINPECOFF__}
         { windows guards only a few pages for stack growing, }
         { so we have to access every page first              }
@@ -464,27 +482,38 @@ unit cgcpu;
         a_loadaddr_ref_reg(list,ref,NR_ESI);
 
         { calculate size }
-        len:=elesize;
         opsize:=S_B;
-        if (len and 3)=0 then
+        if elesize=8 then
+          begin
+            opsize:=S_L;
+            { ECX is number of qwords, convert to dwords }
+            list.concat(Taicpu.op_const_reg(A_SHL,S_L,1,NR_ECX))
+          end
+        else if elesize=4 then
+          begin
+            opsize:=S_L;
+            { ECX is already number of dwords, so no need to SHL/SHR }
+          end
+        else if elesize=2 then
+          begin
+            opsize:=S_W;
+            { ECX is already number of words, so no need to SHL/SHR }
+          end
+        else
+         if (elesize and 3)=0 then
          begin
            opsize:=S_L;
-           len:=len shr 2;
+           { ECX is number of bytes, convert to dwords }
+           list.concat(Taicpu.op_const_reg(A_SHR,S_L,2,NR_ECX))
          end
         else
-         if (len and 1)=0 then
+         if (elesize and 1)=0 then
           begin
             opsize:=S_W;
-            len:=len shr 1;
+            { ECX is number of bytes, convert to words }
+            list.concat(Taicpu.op_const_reg(A_SHR,S_L,1,NR_ECX))
           end;
 
-        if len>1 then
-          begin
-            if ispowerof2(len, power) then
-              list.concat(Taicpu.op_const_reg(A_SHL,S_L,power,NR_ECX))
-            else
-              list.concat(Taicpu.op_const_reg(A_IMUL,S_L,len,NR_ECX));
-          end;
         if ts_cld in current_settings.targetswitches then
           list.concat(Taicpu.op_none(A_CLD,S_NO));
         list.concat(Taicpu.op_none(A_REP,S_NO));