21 years ago · 75f7724beb
--- a/rtl/x86_64/x86_64.inc
+++ b/rtl/x86_64/x86_64.inc
@@ -78,7 +78,7 @@ procedure Move(const source;var dest;count:longint);assembler;
 
				        rdx count
			
 
				      }
			
 
				      pushq %rbx
			
 
				-     prefetcht0 (%rsi)	// for more hopefully the hw prefetch will kick in
			
 
				+     prefetcht0 (%rsi)  // for more hopefully the hw prefetch will kick in
			
 
				      movq %rdi,%rax
			
 
				 
			
 
				      movl %edi,%ecx
			
@@ -140,10 +140,7 @@ procedure Move(const source;var dest;count:longint);assembler;
 
				      incq %rsi
			
 
				      loop .Lloop_1
			
 
				 
			
 
				-.Lende:
			
 
				-     sfence
			
 
				-     popq %rbx
			
 
				-     ret
			
 
				+     jmp .Lende
			
 
				 
			
 
				      { align destination }
			
 
				      { This is simpleminded. For bigger blocks it may make sense to align
			
@@ -165,8 +162,13 @@ procedure Move(const source;var dest;count:longint);assembler;
 
				 .Lsmall_alignment:
			
 
				      addq %r9,%rdx
			
 
				      jmp .Lhandle_7
			
 
				+
			
 
				+.Lende:
			
 
				+     sfence
			
 
				+     popq %rbx
			
 
				   end;
			
 
				 
			
 
				+
			
 
				 {$define FPC_SYSTEM_HAS_FILLCHAR}
			
 
				 Procedure FillChar(var x;count:longint;value:byte);assembler;
			
 
				   asm
			
@@ -180,7 +182,7 @@ Procedure FillChar(var x;count:longint;value:byte);assembler;
 
				     { expand byte value  }
			
 
				     movzbl %sil,%ecx
			
 
				     movabs $0x0101010101010101,%rax
			
 
				-    mul    %rcx		{ with rax, clobbers rdx }
			
 
				+    mul    %rcx         { with rax, clobbers rdx }
			
 
				 
			
 
				     { align dst }
			
 
				     movl  %edi,%r9d
			
@@ -191,7 +193,7 @@ Procedure FillChar(var x;count:longint;value:byte);assembler;
 
				      movq %r11,%rcx
			
 
				      movl $64,%r8d
			
 
				      shrq $6,%rcx
			
 
				-     jz	 .Lhandle_tail
			
 
				+     jz  .Lhandle_tail
			
 
				 
			
 
				 .Lloop_64:
			
 
				      movnti  %rax,(%rdi)
			
@@ -208,35 +210,37 @@ Procedure FillChar(var x;count:longint;value:byte);assembler;
 
				      { Handle tail in loops. The loops should be faster than hard
			
 
				         to predict jump tables. }
			
 
				 .Lhandle_tail:
			
 
				-     movl	%r11d,%ecx
			
 
				+     movl       %r11d,%ecx
			
 
				      andl    $56,%ecx
			
 
				      jz     .Lhandle_7
			
 
				-     shrl	$3,%ecx
			
 
				+     shrl       $3,%ecx
			
 
				 .Lloop_8:
			
 
				      movnti  %rax,(%rdi)
			
 
				      addq    $8,%rdi
			
 
				      loop    .Lloop_8
			
 
				 .Lhandle_7:
			
 
				-     movl	%r11d,%ecx
			
 
				-     andl	$7,%ecx
			
 
				+     movl       %r11d,%ecx
			
 
				+     andl       $7,%ecx
			
 
				      jz      .Lende
			
 
				 .Lloop_1:
			
 
				-     movb 	%al,(%rdi)
			
 
				-     addq	$1,%rdi
			
 
				-     loop	.Lloop_1
			
 
				+     movb       %al,(%rdi)
			
 
				+     addq       $1,%rdi
			
 
				+     loop       .Lloop_1
			
 
				+
			
 
				+     jmp .Lende
			
 
				 
			
 
				-.Lende:
			
 
				-     movq	%r10,%rax
			
 
				-     ret
			
 
				 .Lbad_alignment:
			
 
				      cmpq $7,%r11
			
 
				      jbe .Lhandle_7
			
 
				-     movnti %rax,(%rdi)	(* unaligned store *)
			
 
				+     movnti %rax,(%rdi) (* unaligned store *)
			
 
				      movq $8,%r8
			
 
				      subq %r9,%r8
			
 
				      addq %r8,%rdi
			
 
				      subq %r8,%r11
			
 
				      jmp .Lafter_bad_alignment
			
 
				+
			
 
				+.Lende:
			
 
				+     movq       %r10,%rax
			
 
				   end;
			
 
				 
			
 
				 {$define FPC_SYSTEM_HAS_DECLOCKED}
			
@@ -300,7 +304,10 @@ const
 
				 
			
 
				 {
			
 
				   $Log$
			
 
				-  Revision 1.6  2004-02-06 15:58:21  florian
			
 
				+  Revision 1.7  2004-02-23 15:52:15  peter
			
 
				+    * don't use ret
			
 
				+
			
 
				+  Revision 1.6  2004/02/06 15:58:21  florian
			
 
				     * fixed x86-64 assembler problems
			
 
				 
			
 
				   Revision 1.5  2004/02/05 01:16:12  florian