14 years ago · 6e09d76b07
--- a/rtl/x86_64/x86_64.inc
+++ b/rtl/x86_64/x86_64.inc
@@ -72,181 +72,387 @@ asm
 
															 .Lg_a_null:
														
 
															 end ['RAX'];
														
 
															-(*
														
 
															 {$define FPC_SYSTEM_HAS_MOVE}
														
 
															-procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;
														
 
															-  asm
														
 
															-     { rdi destination
														
 
															-       rsi source
														
 
															-       rdx count
														
 
															-     }
														
 
															-     pushq %rbx
														
 
															-     prefetcht0 (%rsi)  // for more hopefully the hw prefetch will kick in
														
 
															-     movq %rdi,%rax
														
 
															-
														
 
															-     movl %edi,%ecx
														
 
															-     andl $7,%ecx
														
 
															-     jnz  .Lbad_alignment
														
 
															-.Lafter_bad_alignment:
														
 
															-     movq %rdx,%rcx
														
 
															-     movl $64,%ebx
														
 
															-     shrq $6,%rcx
														
 
															-     jz .Lhandle_tail
														
 
															-
														
 
															-.Lloop_64:
														
 
															-     { no prefetch because we assume the hw prefetcher does it already
														
 
															-       and we have no specific temporal hint to give. XXX or give a nta
														
 
															-       hint for the source? }
														
 
															-     movq (%rsi),%r11
														
 
															-     movq 8(%rsi),%r8
														
 
															-     movq 2*8(%rsi),%r9
														
 
															-     movq 3*8(%rsi),%r10
														
 
															-     movnti %r11,(%rdi)
														
 
															-     movnti %r8,1*8(%rdi)
														
 
															-     movnti %r9,2*8(%rdi)
														
 
															-     movnti %r10,3*8(%rdi)
														
 
															-
														
 
															-     movq 4*8(%rsi),%r11
														
 
															-     movq 5*8(%rsi),%r8
														
 
															-     movq 6*8(%rsi),%r9
														
 
															-     movq 7*8(%rsi),%r10
														
 
															-     movnti %r11,4*8(%rdi)
														
 
															-     movnti %r8,5*8(%rdi)
														
 
															-     movnti %r9,6*8(%rdi)
														
 
															-     movnti %r10,7*8(%rdi)
														
 
															-
														
 
															-     addq %rbx,%rsi
														
 
															-     addq %rbx,%rdi
														
 
															-     loop .Lloop_64
														
 
															-
														
 
															-.Lhandle_tail:
														
 
															-     movl %edx,%ecx
														
 
															-     andl $63,%ecx
														
 
															-     shrl $3,%ecx
														
 
															-     jz   .Lhandle_7
														
 
															-     movl $8,%ebx
														
 
															-.Lloop_8:
														
 
															-     movq (%rsi),%r8
														
 
															-     movnti %r8,(%rdi)
														
 
															-     addq %rbx,%rdi
														
 
															-     addq %rbx,%rsi
														
 
															-     loop .Lloop_8
														
 
															-
														
 
															-.Lhandle_7:
														
 
															-     movl %edx,%ecx
														
 
															-     andl $7,%ecx
														
 
															-     jz .Lende
														
 
															-.Lloop_1:
														
 
															-     movb (%rsi),%r8b
														
 
															-     movb %r8b,(%rdi)
														
 
															-     incq %rdi
														
 
															-     incq %rsi
														
 
															-     loop .Lloop_1
														
 
															-
														
 
															-     jmp .Lende
														
 
															-
														
 
															-     { align destination }
														
 
															-     { This is simpleminded. For bigger blocks it may make sense to align
														
 
															-        src and dst to their aligned subset and handle the rest separately }
														
 
															-.Lbad_alignment:
														
 
															-     movl $8,%r9d
														
 
															-     subl %ecx,%r9d
														
 
															-     movl %r9d,%ecx
														
 
															-     subq %r9,%rdx
														
 
															-     js   .Lsmall_alignment
														
 
															-     jz   .Lsmall_alignment
														
 
															-.Lalign_1:
														
 
															-     movb (%rsi),%r8b
														
 
															-     movb %r8b,(%rdi)
														
 
															-     incq %rdi
														
 
															-     incq %rsi
														
 
															-     loop .Lalign_1
														
 
															-     jmp .Lafter_bad_alignment
														
 
															-.Lsmall_alignment:
														
 
															-     addq %r9,%rdx
														
 
															-     jmp .Lhandle_7
														
 
															-
														
 
															-.Lende:
														
 
															-     sfence
														
 
															-     popq %rbx
														
 
															-  end;
														
 
															-*)
														
 
															+procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
														
 
															+{ Linux: rdi source, rsi dest, rdx count
														
 
															+  win64: rcx source, rdx dest, r8 count }
														
 
															+asm
														
 
															+{$ifndef win64}
														
 
															+    mov    %rdx, %r8
														
 
															+    mov    %rsi, %rdx
														
 
															+    mov    %rdi, %rcx
														
 
															+{$endif win64}
														
 
															+
														
 
															+    mov    %r8, %rax
														
 
															+    sub    %rdx, %rcx            { rcx = src - dest }
														
 
															+    jz     .Lquit                { exit if src=dest }
														
 
															+    jnb    .L1                   { src>dest => forward move }
														
 
															+
														
 
															+    add    %rcx, %rax            { rcx is negative => r8+rcx > 0 if regions overlap }
														
 
															+    jb     .Lback                { if no overlap, still do forward move }
														
 
															+
														
 
															+.L1:
														
 
															+    cmp    $8, %r8
														
 
															+    jl     .Lless8f              { signed compare, negative count not allowed }
														
 
															+    test   $7, %dl
														
 
															+    je     .Ldestaligned
														
 
															+
														
 
															+    test   $1, %dl               { align dest by moving first 1+2+4 bytes }
														
 
															+    je     .L2f
														
 
															+    mov    (%rcx,%rdx,1),%al
														
 
															+    dec    %r8
														
 
															+    mov    %al, (%rdx)
														
 
															+    add    $1, %rdx
														
 
															+.L2f:
														
 
															+    test   $2, %dl
														
 
															+    je     .L4f
														
 
															+    mov    (%rcx,%rdx,1),%ax
														
 
															+    sub    $2, %r8
														
 
															+    mov    %ax, (%rdx)
														
 
															+    add    $2, %rdx
														
 
															+.L4f:
														
 
															+    test   $4, %dl
														
 
															+    je     .Ldestaligned
														
 
															+    mov    (%rcx,%rdx,1),%eax
														
 
															+    sub    $4, %r8
														
 
															+    mov    %eax, (%rdx)
														
 
															+    add    $4, %rdx
														
 
															+
														
 
															+.Ldestaligned:
														
 
															+    mov    %r8, %r9
														
 
															+    shr    $5, %r9
														
 
															+    jne    .Lmore32
														
 
															+
														
 
															+.Ltail:
														
 
															+    mov    %r8, %r9
														
 
															+    shr    $3, %r9
														
 
															+    je     .Lless8f
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lloop8f:                             { max. 8 iterations }
														
 
															+    mov    (%rcx,%rdx,1),%rax
														
 
															+    mov    %rax, (%rdx)
														
 
															+    add    $8, %rdx
														
 
															+    dec    %r9
														
 
															+    jne    .Lloop8f
														
 
															+    and    $7, %r8
														
 
															+
														
 
															+.Lless8f:
														
 
															+    test   %r8, %r8
														
 
															+    jle    .Lquit
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lloop1f:
														
 
															+    mov    (%rcx,%rdx,1),%al
														
 
															+    mov    %al,(%rdx)
														
 
															+    inc    %rdx
														
 
															+    dec    %r8
														
 
															+    jne    .Lloop1f
														
 
															+.Lquit:
														
 
															+    retq
														
 
															+
														
 
															+
														
 
															+.Lmore32:
														
 
															+    cmp    $0x2000, %r9          { this limit must be processor-specific (1/2 L2 cache size) }
														
 
															+    jnae   .Lloop32
														
 
															+    cmp    $0x1000, %rcx         { but don't bother bypassing cache if src and dest }
														
 
															+    jnb    .Lntloopf             { are close to each other}
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lloop32:
														
 
															+    add    $32,%rdx
														
 
															+    mov    -32(%rcx,%rdx,1),%rax
														
 
															+    mov    -24(%rcx,%rdx,1),%r10
														
 
															+    mov    %rax,-32(%rdx)
														
 
															+    mov    %r10,-24(%rdx)
														
 
															+    dec    %r9
														
 
															+    mov    -16(%rcx,%rdx,1),%rax
														
 
															+    mov    -8(%rcx,%rdx,1),%r10
														
 
															+    mov    %rax,-16(%rdx)
														
 
															+    mov    %r10,-8(%rdx)
														
 
															+    jne    .Lloop32
														
 
															+
														
 
															+    and    $0x1f, %r8
														
 
															+    jmpq   .Ltail
														
 
															+
														
 
															+.Lntloopf:
														
 
															+    mov    $32, %eax
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lpref:
														
 
															+    prefetchnta (%rcx,%rdx,1)
														
 
															+    prefetchnta 0x40(%rcx,%rdx,1)
														
 
															+    add    $0x80, %rdx
														
 
															+    dec    %eax
														
 
															+    jne    .Lpref
														
 
															+
														
 
															+    sub    $0x1000, %rdx
														
 
															+    mov    $64, %eax
														
 
															+
														
 
															+    .balign 16
														
 
															+.Loop64:
														
 
															+    add    $64, %rdx
														
 
															+    mov    -64(%rcx,%rdx,1), %r9
														
 
															+    mov    -56(%rcx,%rdx,1), %r10
														
 
															+    movnti %r9, -64(%rdx)
														
 
															+    movnti %r10, -56(%rdx)
														
 
															+
														
 
															+    mov    -48(%rcx,%rdx,1), %r9
														
 
															+    mov    -40(%rcx,%rdx,1), %r10
														
 
															+    movnti %r9, -48(%rdx)
														
 
															+    movnti %r10, -40(%rdx)
														
 
															+    dec    %eax
														
 
															+    mov    -32(%rcx,%rdx,1), %r9
														
 
															+    mov    -24(%rcx,%rdx,1), %r10
														
 
															+    movnti %r9, -32(%rdx)
														
 
															+    movnti %r10, -24(%rdx)
														
 
															+
														
 
															+    mov    -16(%rcx,%rdx,1), %r9
														
 
															+    mov    -8(%rcx,%rdx,1), %r10
														
 
															+    movnti %r9, -16(%rdx)
														
 
															+    movnti %r10, -8(%rdx)
														
 
															+    jne    .Loop64
														
 
															+
														
 
															+    sub    $0x1000, %r8
														
 
															+    cmp    $0x1000, %r8
														
 
															+    jae    .Lntloopf
														
 
															+
														
 
															+    mfence
														
 
															+    jmpq    .Ldestaligned        { go handle remaining bytes }
														
 
															+
														
 
															+{ backwards move }
														
 
															+.Lback:
														
 
															+    add    %r8, %rdx             { points to the end of dest }
														
 
															+    cmp    $8, %r8
														
 
															+    jl     .Lless8b              { signed compare, negative count not allowed }
														
 
															+    test   $7, %dl
														
 
															+    je     .Ldestalignedb
														
 
															+    test   $1, %dl
														
 
															+    je     .L2b
														
 
															+    dec    %rdx
														
 
															+    mov    (%rcx,%rdx,1), %al
														
 
															+    dec    %r8
														
 
															+    mov    %al, (%rdx)
														
 
															+.L2b:
														
 
															+    test   $2, %dl
														
 
															+    je     .L4b
														
 
															+    sub    $2, %rdx
														
 
															+    mov    (%rcx,%rdx,1), %ax
														
 
															+    sub    $2, %r8
														
 
															+    mov    %ax, (%rdx)
														
 
															+.L4b:
														
 
															+    test   $4, %dl
														
 
															+    je     .Ldestalignedb
														
 
															+    sub    $4, %rdx
														
 
															+    mov    (%rcx,%rdx,1), %eax
														
 
															+    sub    $4, %r8
														
 
															+    mov    %eax, (%rdx)
														
 
															+
														
 
															+.Ldestalignedb:
														
 
															+    mov    %r8, %r9
														
 
															+    shr    $5, %r9
														
 
															+    jne    .Lmore32b
														
 
															+
														
 
															+.Ltailb:
														
 
															+    mov    %r8, %r9
														
 
															+    shr    $3, %r9
														
 
															+    je     .Lless8b
														
 
															+
														
 
															+.Lloop8b:
														
 
															+    sub    $8, %rdx
														
 
															+    mov    (%rcx,%rdx,1), %rax
														
 
															+    dec    %r9
														
 
															+    mov    %rax, (%rdx)
														
 
															+    jne    .Lloop8b
														
 
															+    and    $7, %r8
														
 
															+
														
 
															+.Lless8b:
														
 
															+    test   %r8, %r8
														
 
															+    jle    .Lquit2
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lsmallb:
														
 
															+    dec   %rdx
														
 
															+    mov   (%rcx,%rdx,1), %al
														
 
															+    dec   %r8
														
 
															+    mov   %al,(%rdx)
														
 
															+    jnz   .Lsmallb
														
 
															+.Lquit2:
														
 
															+    retq
														
 
															+
														
 
															+.Lmore32b:
														
 
															+    cmp   $0x2000, %r9
														
 
															+    jnae  .Lloop32b
														
 
															+    cmp    $0xfffffffffffff000,%rcx
														
 
															+    jb     .Lntloopb
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lloop32b:
														
 
															+    sub    $32, %rdx
														
 
															+    mov    24(%rcx,%rdx,1), %rax
														
 
															+    mov    16(%rcx,%rdx,1), %r10
														
 
															+    mov    %rax, 24(%rdx)
														
 
															+    mov    %r10, 16(%rdx)
														
 
															+    dec    %r9
														
 
															+    mov    8(%rcx,%rdx,1),%rax
														
 
															+    mov    (%rcx,%rdx,1), %r10
														
 
															+    mov    %rax, 8(%rdx)
														
 
															+    mov    %r10, (%rdx)
														
 
															+    jne    .Lloop32b
														
 
															+    and    $0x1f, %r8
														
 
															+    jmpq   .Ltailb
														
 
															+
														
 
															+
														
 
															+.Lntloopb:
														
 
															+    mov    $32, %eax
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lprefb:
														
 
															+    sub    $0x80, %rdx
														
 
															+    prefetchnta (%rcx,%rdx,1)
														
 
															+    prefetchnta 0x40(%rcx,%rdx,1)
														
 
															+    dec    %eax
														
 
															+    jnz    .Lprefb
														
 
															+
														
 
															+    add    $0x1000, %rdx
														
 
															+    mov    $0x40, %eax
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lloop64b:
														
 
															+    sub    $64, %rdx
														
 
															+    mov    56(%rcx,%rdx,1), %r9
														
 
															+    mov    48(%rcx,%rdx,1), %r10
														
 
															+    movnti %r9, 56(%rdx)
														
 
															+    movnti %r10, 48(%rdx)
														
 
															+
														
 
															+    mov    40(%rcx,%rdx,1), %r9
														
 
															+    mov    32(%rcx,%rdx,1), %r10
														
 
															+    movnti %r9, 40(%rdx)
														
 
															+    movnti %r10, 32(%rdx)
														
 
															+    dec    %eax
														
 
															+    mov    24(%rcx,%rdx,1), %r9
														
 
															+    mov    16(%rcx,%rdx,1), %r10
														
 
															+    movnti %r9, 24(%rdx)
														
 
															+    movnti %r10, 16(%rdx)
														
 
															+
														
 
															+    mov    8(%rcx,%rdx,1), %r9
														
 
															+    mov    (%rcx,%rdx,1), %r10
														
 
															+    movnti %r9, 8(%rdx)
														
 
															+    movnti %r10, (%rdx)
														
 
															+    jne    .Lloop64b
														
 
															+
														
 
															+    sub    $0x1000, %r8
														
 
															+    cmp    $0x1000, %r8
														
 
															+    jae    .Lntloopb
														
 
															+    mfence
														
 
															+    jmpq   .Ldestalignedb
														
 
															+end;
														
 
															-(*
														
 
															 {$define FPC_SYSTEM_HAS_FILLCHAR}
														
 
															-Procedure FillChar(var x;count:longint;value:byte);assembler;
														
 
															+Procedure FillChar(var x;count:SizeInt;value:byte);assembler;nostackframe;
														
 
															   asm
														
 
															-    { rdi   destination
														
 
															-      rsi   value (char)
														
 
															-      rdx   count (bytes)
														
 
															-    }
														
 
															-    movq %rdi,%r10
														
 
															-    movq %rdx,%r11
														
 
															+{ win64: rcx dest, rdx count, r8b value
														
 
															+  linux: rdi dest, rsi count, rdx value }
														
 
															+{$ifndef win64}
														
 
															+    mov    %rdx, %r8
														
 
															+    mov    %rsi, %rdx
														
 
															+    mov    %rdi, %rcx
														
 
															+{$endif win64}
														
 
															+
														
 
															+    cmp    $8, %rdx
														
 
															+    jl     .Ltiny
														
 
															     { expand byte value  }
														
 
															-    movzbl %sil,%ecx
														
 
															-    movabs $0x0101010101010101,%rax
														
 
															-    mul    %rcx         { with rax, clobbers rdx }
														
 
															-
														
 
															-    { align dst }
														
 
															-    movl  %edi,%r9d
														
 
															-    andl  $7,%r9d
														
 
															-    jnz  .Lbad_alignment
														
 
															-.Lafter_bad_alignment:
														
 
															-
														
 
															-     movq %r11,%rcx
														
 
															-     movl $64,%r8d
														
 
															-     shrq $6,%rcx
														
 
															-     jz  .Lhandle_tail
														
 
															-
														
 
															-.Lloop_64:
														
 
															-     movnti  %rax,(%rdi)
														
 
															-     movnti  %rax,8(%rdi)
														
 
															-     movnti  %rax,16(%rdi)
														
 
															-     movnti  %rax,24(%rdi)
														
 
															-     movnti  %rax,32(%rdi)
														
 
															-     movnti  %rax,40(%rdi)
														
 
															-     movnti  %rax,48(%rdi)
														
 
															-     movnti  %rax,56(%rdi)
														
 
															-     addq    %r8,%rdi
														
 
															-     loop    .Lloop_64
														
 
															-
														
 
															-     { Handle tail in loops. The loops should be faster than hard
														
 
															-        to predict jump tables. }
														
 
															-.Lhandle_tail:
														
 
															-     movl       %r11d,%ecx
														
 
															-     andl    $56,%ecx
														
 
															-     jz     .Lhandle_7
														
 
															-     shrl       $3,%ecx
														
 
															-.Lloop_8:
														
 
															-     movnti  %rax,(%rdi)
														
 
															-     addq    $8,%rdi
														
 
															-     loop    .Lloop_8
														
 
															-.Lhandle_7:
														
 
															-     movl       %r11d,%ecx
														
 
															-     andl       $7,%ecx
														
 
															-     jz      .Lende
														
 
															-.Lloop_1:
														
 
															-     movb       %al,(%rdi)
														
 
															-     addq       $1,%rdi
														
 
															-     loop       .Lloop_1
														
 
															-
														
 
															-     jmp .Lende
														
 
															-
														
 
															-.Lbad_alignment:
														
 
															-     cmpq $7,%r11
														
 
															-     jbe .Lhandle_7
														
 
															-     movnti %rax,(%rdi) (* unaligned store *)
														
 
															-     movq $8,%r8
														
 
															-     subq %r9,%r8
														
 
															-     addq %r8,%rdi
														
 
															-     subq %r8,%r11
														
 
															-     jmp .Lafter_bad_alignment
														
 
															-
														
 
															-.Lende:
														
 
															-     movq       %r10,%rax
														
 
															+    movzbl %r8b, %r8
														
 
															+    mov    $0x0101010101010101,%r9
														
 
															+    imul   %r9, %r8
														
 
															+
														
 
															+    test   $7, %cl
														
 
															+    je     .Laligned
														
 
															+
														
 
															+    { align dest to 8 bytes }
														
 
															+    test   $1, %cl
														
 
															+    je     .L2
														
 
															+    movb   %r8b, (%rcx)
														
 
															+    add    $1, %rcx
														
 
															+    sub    $1, %rdx
														
 
															+.L2:
														
 
															+    test   $2, %cl
														
 
															+    je     .L4
														
 
															+    movw   %r8w, (%rcx)
														
 
															+    add    $2, %rcx
														
 
															+    sub    $2, %rdx
														
 
															+.L4:
														
 
															+    test   $4, %cl
														
 
															+    je     .Laligned
														
 
															+    movl   %r8d, (%rcx)
														
 
															+    add    $4, %rcx
														
 
															+    sub    $4, %rdx
														
 
															+
														
 
															+.Laligned:
														
 
															+    mov    %rdx, %rax
														
 
															+    and    $0x3f, %rdx
														
 
															+    shr    $6, %rax
														
 
															+    jne    .Lmore64
														
 
															+
														
 
															+.Lless64:
														
 
															+    mov    %rdx, %rax
														
 
															+    and    $7, %rdx
														
 
															+    shr    $3, %rax
														
 
															+    je     .Ltiny
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lloop8:                               { max. 8 iterations }
														
 
															+    mov    %r8, (%rcx)
														
 
															+    add    $8, %rcx
														
 
															+    dec    %rax
														
 
															+    jne    .Lloop8
														
 
															+.Ltiny:
														
 
															+    test   %rdx, %rdx
														
 
															+    jle    .Lquit
														
 
															+.Lloop1:
														
 
															+    movb   %r8b, (%rcx)
														
 
															+    inc    %rcx
														
 
															+    dec    %rdx
														
 
															+    jnz    .Lloop1
														
 
															+.Lquit:
														
 
															+    retq
														
 
															+
														
 
															+.Lmore64:
														
 
															+    cmp    $0x2000,%rax
														
 
															+    jae    .Lloop64nti
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lloop64:
														
 
															+    add    $64, %rcx
														
 
															+    mov    %r8, -64(%rcx)
														
 
															+    mov    %r8, -56(%rcx)
														
 
															+    mov    %r8, -48(%rcx)
														
 
															+    mov    %r8, -40(%rcx)
														
 
															+    dec    %rax
														
 
															+    mov    %r8, -32(%rcx)
														
 
															+    mov    %r8, -24(%rcx)
														
 
															+    mov    %r8, -16(%rcx)
														
 
															+    mov    %r8, -8(%rcx)
														
 
															+    jne    .Lloop64
														
 
															+    jmp    .Lless64
														
 
															+
														
 
															+    .balign 16
														
 
															+.Lloop64nti:
														
 
															+    add    $64, %rcx
														
 
															+    movnti %r8, -64(%rcx)
														
 
															+    movnti %r8, -56(%rcx)
														
 
															+    movnti %r8, -48(%rcx)
														
 
															+    movnti %r8, -40(%rcx)
														
 
															+    dec    %rax
														
 
															+    movnti %r8, -32(%rcx)
														
 
															+    movnti %r8, -24(%rcx)
														
 
															+    movnti %r8, -16(%rcx)
														
 
															+    movnti %r8, -8(%rcx)
														
 
															+    jnz    .Lloop64nti
														
 
															+    mfence
														
 
															+    jmp    .Lless64
														
 
															   end;
														
 
															-*)
														
 
															 {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
														
--- a/tests/test/units/system/tmem.pp
+++ b/tests/test/units/system/tmem.pp
@@ -16,9 +16,7 @@ var
 
															   dst_arraybyte : array[1..MAX_TABLE] of byte;
														
 
															   src_arraybyte : array[1..MAX_TABLE] of byte;
														
 
															   dst_arrayword : array[1..MAX_TABLE] of word;
														
 
															-  src_arrayword : array[1..MAX_TABLE] of word;
														
 
															   dst_arraylongword : array[1..MAX_TABLE] of longword;
														
 
															-  src_arratlongword : array[1..MAX_TABLE] of longword;
														
 
															   i: integer;
														
@@ -70,6 +68,8 @@ procedure test_fillchar;
 
															   for i := 1 to MAX_TABLE do
														
 
															     dst_arraybyte[i] := DEFAULT_VALUE;
														
 
															   fillchar(dst_arraybyte, -1, FILL_VALUE);
														
 
															+  for i := 1 to MAX_TABLE do
														
 
															+    test(dst_arraybyte[i], DEFAULT_VALUE);
														
 
															   writeln('Passed!');
														
 
															  end;
														
@@ -103,7 +103,7 @@ begin
 
															     test(dst_arraybyte[i], FILL_VALUE);
														
 
															   writeln('Passed!');
														
 
															   { zero move count }
														
 
															-  write('test move (zero count)...');
														
 
															+  write('testing move (zero count)...');
														
 
															   for i := 1 to MAX_TABLE do
														
 
															   begin
														
 
															     dst_arraybyte[i] := DEFAULT_VALUE;
														
@@ -114,11 +114,75 @@ begin
 
															     test(dst_arraybyte[i], DEFAULT_VALUE);
														
 
															   writeln('Passed!');
														
 
															   { negative move count }
														
 
															-  write('test move (negative count)...');
														
 
															+  write('testing move (negative count)...');
														
 
															   move(src_arraybyte,dst_arraybyte,-12);
														
 
															   writeln('Passed!');
														
 
															 end;
														
 
															+
														
 
															+procedure test_move_large(size: longint);
														
 
															+var
														
 
															+  src, dst: PLongInt;
														
 
															+  i: LongInt;
														
 
															+begin
														
 
															+  GetMem(src, size*sizeof(LongInt));
														
 
															+  GetMem(dst, size*sizeof(LongInt));
														
 
															+  write('testing move of ',size,' dwords ...');
														
 
															+  for i := 0 to size-1 do
														
 
															+  begin
														
 
															+    src[i] := i;
														
 
															+    dst[i] := -1;
														
 
															+  end;
														
 
															+  move(src[0], dst[2], (size-4)*sizeof(LongInt));
														
 
															+  test(dst[0], -1);
														
 
															+  test(dst[1], -1);
														
 
															+  test(dst[size-1], -1);
														
 
															+  test(dst[size-2], -1);
														
 
															+  for i := 2 to size-3 do
														
 
															+    test(dst[i], i-2);
														
 
															+  writeln('Passed!');
														
 
															+
														
 
															+  // repeat with source and dest swapped (maybe move in opposite direction)
														
 
															+  // current implementations detect that regions don't overlap and move forward,
														
 
															+  // so this test is mostly useless. But it won't harm anyway.
														
 
															+  write('testing move of ',size,' dwords, opposite direction...');
														
 
															+  for i := 0 to size-1 do
														
 
															+  begin
														
 
															+    dst[i] := i;
														
 
															+    src[i] := -1;
														
 
															+  end;
														
 
															+  move(dst[0], src[2], (size-4)*sizeof(LongInt));
														
 
															+  test(src[0], -1);
														
 
															+  test(src[1], -1);
														
 
															+  test(src[size-1], -1);
														
 
															+  test(src[size-2], -1);
														
 
															+  for i := 2 to size-3 do
														
 
															+    test(src[i], i-2);
														
 
															+  writeln('Passed!');
														
 
															+
														
 
															+  write('testing move of ',size,' dwords, overlapping forward...');
														
 
															+  for i := 0 to size-1 do
														
 
															+    src[i] := i;
														
 
															+  move(src[0], src[100], (size-100)*sizeof(LongInt));
														
 
															+  for i := 0 to 99 do
														
 
															+    test(src[i], i);
														
 
															+  for i := 100 to size-101 do
														
 
															+    test(src[i], i-100);
														
 
															+  writeln('Passed!');
														
 
															+
														
 
															+  write('testing move of ',size,' dwords, overlapping backward...');
														
 
															+  for i := 0 to size-1 do
														
 
															+    src[i] := i;
														
 
															+  move(src[100], src[0], (size-100)*sizeof(LongInt));
														
 
															+  for i := 0 to size-101 do
														
 
															+    test(src[i], i+100);
														
 
															+  for i := size-100 to size-1 do
														
 
															+    test(src[i], i);
														
 
															+  writeln('Passed!');
														
 
															+  FreeMem(dst);
														
 
															+  FreeMem(src);
														
 
															+end;
														
 
															+
														
 
															 {$ifdef fpc}
														
 
															 procedure test_fillword;
														
 
															  var
														
@@ -271,6 +335,8 @@ end;
 
															 begin
														
 
															   test_fillchar;
														
 
															   test_move;
														
 
															+  test_move_large(500);   // 512 longints=2048 bytes
														
 
															+  test_move_large(500000);
														
 
															 {$ifdef fpc}
														
 
															   test_fillword;
														
 
															   test_filldword;