13 yıl önce · 2e0203b7a2
--- a/rtl/arm/arm.inc
+++ b/rtl/arm/arm.inc
@@ -224,62 +224,27 @@ asm
 
				   bxle  lr
			
 
				 {$endif}
			
 
				   // overlap?
			
 
				-  cmp r1,r0
			
 
				-  bls .Lnooverlap
			
 
				-  add r3,r0,r2
			
 
				-  cmp r3,r1
			
 
				-  bls .Lnooverlap
			
 
				-  // overlap, copy backward
			
 
				-.Loverlapped:
			
 
				-  subs r2,r2,#1
			
 
				-  ldrb r3,[r0,r2]
			
 
				-  strb r3,[r1,r2]
			
 
				-  bne .Loverlapped
			
 
				-{$if defined(cpuarmv3) or defined(cpuarmv4)}
			
 
				-  mov pc,lr
			
 
				-{$else}
			
 
				-  bx  lr
			
 
				-{$endif}
			
 
				-.Lnooverlap:
			
 
				-  // less then 16 bytes to copy?
			
 
				-  cmp r2,#8
			
 
				-  // yes, the forget about the whole optimizations
			
 
				-  // and do a bytewise copy
			
 
				-  blt .Lbyteloop
			
 
				-
			
 
				-  // both aligned?
			
 
				-  orr r3,r0,r1
			
 
				-  tst r3,#3
			
 
				-
			
 
				-  bne .Lbyteloop
			
 
				-(*
			
 
				-  // yes, then align
			
 
				-  // alignment to 4 byte boundries is enough
			
 
				-  ldrb ip,[r0],#1
			
 
				-  sub r2,r2,#1
			
 
				-  stb ip,[r1],#1
			
 
				-  tst r3,#2
			
 
				-  bne .Ldifferentaligned
			
 
				-  ldrh ip,[r0],#2
			
 
				-  sub r2,r2,#2
			
 
				-  sth ip,[r1],#2
			
 
				-
			
 
				-.Ldifferentaligned
			
 
				-  // qword aligned?
			
 
				-  orrs r3,r0,r1
			
 
				-  tst r3,#7
			
 
				-  bne .Ldwordloop
			
 
				-*)
			
 
				-  pld [r0,#32]
			
 
				+  subs   r3, r1, r0    // if (dest > source) and
			
 
				+  cmphi  r2, r3        //    (count > dest - src) then
			
 
				+  bhi    .Loverlapped  //   DoReverseByteCopy;
			
 
				+
			
 
				+  cmp r2,#8            // if (count < 8) then
			
 
				+  blt .Lbyteloop       //    DoForwardByteCopy;
			
 
				+  // Any way to avoid the above jump and fuse the next two instructions?
			
 
				+  tst   r0, #3         // if (source and 3) <> 0 or
			
 
				+  tsteq r1, #3         //    (dest and 3) <> 0 then
			
 
				+  bne   .Lbyteloop     //   DoForwardByteCopy;
			
 
				+
			
 
				+  pld   [r0,#32]
			
 
				 .Ldwordloop:
			
 
				-  sub r2,r2,#4
			
 
				-  ldr r3,[r0],#4
			
 
				+  ldmia r0!, {r3, ip}
			
 
				   // preload
			
 
				-  pld [r0,#64]
			
 
				-  cmp r2,#4
			
 
				-  str r3,[r1],#4
			
 
				-  bcs .Ldwordloop
			
 
				-  cmp r2,#0
			
 
				+  pld   [r0,#64]
			
 
				+  sub   r2,r2,#8
			
 
				+  cmp   r2, #8
			
 
				+  stmia r1!, {r3, ip}
			
 
				+  bge   .Ldwordloop
			
 
				+  cmp   r2,#0
			
 
				 {$if defined(cpuarmv3) or defined(cpuarmv4)}
			
 
				   moveq pc,lr
			
 
				 {$else}
			
@@ -295,6 +260,11 @@ asm
 
				 {$else}
			
 
				   bx  lr
			
 
				 {$endif}
			
 
				+.Loverlapped:
			
 
				+  subs r2,r2,#1
			
 
				+  ldrb r3,[r0,r2]
			
 
				+  strb r3,[r1,r2]
			
 
				+  bne .Loverlapped
			
 
				 end;
			
 
				 
			
 
				 procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
			
@@ -307,59 +277,24 @@ asm
 
				   bxle  lr
			
 
				 {$endif}
			
 
				   // overlap?
			
 
				-  cmp r1,r0
			
 
				-  bls .Lnooverlap
			
 
				-  add r3,r0,r2
			
 
				-  cmp r3,r1
			
 
				-  bls .Lnooverlap
			
 
				-  // overlap, copy backward
			
 
				-.Loverlapped:
			
 
				-  subs r2,r2,#1
			
 
				-  ldrb r3,[r0,r2]
			
 
				-  strb r3,[r1,r2]
			
 
				-  bne .Loverlapped
			
 
				-{$if defined(cpuarmv3) or defined(cpuarmv4)}
			
 
				-  mov pc,lr
			
 
				-{$else}
			
 
				-  bx  lr
			
 
				-{$endif}
			
 
				-.Lnooverlap:
			
 
				-  // less then 16 bytes to copy?
			
 
				-  cmp r2,#8
			
 
				-  // yes, the forget about the whole optimizations
			
 
				-  // and do a bytewise copy
			
 
				-  blt .Lbyteloop
			
 
				+  subs   r3, r1, r0    // if (dest > source) and
			
 
				+  cmphi  r2, r3        //    (count > dest - src) then
			
 
				+  bhi    .Loverlapped  //   DoReverseByteCopy;
			
 
				 
			
 
				-  // both aligned?
			
 
				-  orr r3,r0,r1
			
 
				-  tst r3,#3
			
 
				+  cmp r2,#8            // if (count < 8) then
			
 
				+  blt .Lbyteloop       //    DoForwardByteCopy;
			
 
				+  // Any way to avoid the above jump and fuse the next two instructions?
			
 
				+  tst   r0, #3         // if (source and 3) <> 0 or
			
 
				+  tsteq r1, #3         //    (dest and 3) <> 0 then
			
 
				+  bne   .Lbyteloop     //   DoForwardByteCopy;
			
 
				 
			
 
				-  bne .Lbyteloop
			
 
				-(*
			
 
				-  // yes, then align
			
 
				-  // alignment to 4 byte boundries is enough
			
 
				-  ldrb ip,[r0],#1
			
 
				-  sub r2,r2,#1
			
 
				-  stb ip,[r1],#1
			
 
				-  tst r3,#2
			
 
				-  bne .Ldifferentaligned
			
 
				-  ldrh ip,[r0],#2
			
 
				-  sub r2,r2,#2
			
 
				-  sth ip,[r1],#2
			
 
				-
			
 
				-.Ldifferentaligned
			
 
				-  // qword aligned?
			
 
				-  orrs r3,r0,r1
			
 
				-  tst r3,#7
			
 
				-  bne .Ldwordloop
			
 
				-*)
			
 
				 .Ldwordloop:
			
 
				-  sub r2,r2,#4
			
 
				-  ldr r3,[r0],#4
			
 
				-  cmp r2,#4
			
 
				-  str r3,[r1],#4
			
 
				-  bcs .Ldwordloop
			
 
				-  cmp r2,#0
			
 
				+  ldmia r0!, {r3, ip}
			
 
				+  sub   r2,r2,#8
			
 
				+  cmp   r2, #8
			
 
				+  stmia r1!, {r3, ip}
			
 
				+  bge   .Ldwordloop
			
 
				+  cmp   r2,#0
			
 
				 {$if defined(cpuarmv3) or defined(cpuarmv4)}
			
 
				   moveq pc,lr
			
 
				 {$else}
			
@@ -375,9 +310,13 @@ asm
 
				 {$else}
			
 
				   bx  lr
			
 
				 {$endif}
			
 
				+.Loverlapped:
			
 
				+  subs r2,r2,#1
			
 
				+  ldrb r3,[r0,r2]
			
 
				+  strb r3,[r1,r2]
			
 
				+  bne .Loverlapped
			
 
				 end;
			
 
				 
			
 
				-
			
 
				 const
			
 
				   moveproc : pointer = @move_blended;