set.inc 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2002 by the Free Pascal development team
  4. Include file with set operations called by the compiler
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$asmmode intel}
  12. {$ifndef FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
  13. procedure fpc_varset_add_sets(const set1,set2; var dest;size : ptrint); compilerproc; assembler; nostackframe;
  14. { Windows: rcx = set1, rdx = set2, r8 = dest, r9 = size
  15. Linux: rdi = set1, rsi = set2, rdx = dest, rcx = size }
  16. asm
  17. sub size, 16
  18. jl @Bytewise_Prepare { probably dead branch... }
  19. @16x_Loop:
  20. movdqu xmm0, xmmword ptr [set1 + size]
  21. movdqu xmm1, xmmword ptr [set2 + size]
  22. por xmm0, xmm1
  23. movdqu xmmword ptr [dest + size], xmm0
  24. sub size, 16
  25. ja @16x_Loop
  26. movdqu xmm0, xmmword ptr [set1] { Tail, just in case (if size is always divisible by 16, 16x_Loop can be altered to handle everything instead). }
  27. movdqu xmm1, xmmword ptr [set2]
  28. por xmm0, xmm1
  29. movdqu xmmword ptr [dest], xmm0
  30. ret
  31. @Bytewise_Prepare:
  32. add size, 15
  33. @Bytewise_Loop:
  34. movzx eax, byte ptr [set1 + size]
  35. or al, byte ptr [set2 + size]
  36. mov byte ptr [dest + size], al
  37. sub size, 1
  38. jae @Bytewise_Loop
  39. end;
  40. {$define FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
  41. {$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
  42. {$ifndef FPC_SYSTEM_HAS_FPC_VARSET_MUL_SETS}
  43. procedure fpc_varset_mul_sets(const set1,set2; var dest;size : ptrint); compilerproc; assembler; nostackframe;
  44. { Same as fpc_varset_add_sets but with 'and' instead of 'or'. }
  45. asm
  46. sub size, 16
  47. jl @Bytewise_Prepare { probably dead branch... }
  48. @16x_Loop:
  49. movdqu xmm0, xmmword ptr [set1 + size]
  50. movdqu xmm1, xmmword ptr [set2 + size]
  51. pand xmm0, xmm1
  52. movdqu xmmword ptr [dest + size], xmm0
  53. sub size, 16
  54. ja @16x_Loop
  55. movdqu xmm0, xmmword ptr [set1] { Tail, just in case (if size is always divisible by 16, 16x_Loop can be altered to handle everything instead). }
  56. movdqu xmm1, xmmword ptr [set2]
  57. pand xmm0, xmm1
  58. movdqu xmmword ptr [dest], xmm0
  59. ret
  60. @Bytewise_Prepare:
  61. add size, 15
  62. @Bytewise_Loop:
  63. movzx eax, byte ptr [set1 + size]
  64. and al, byte ptr [set2 + size]
  65. mov byte ptr [dest + size], al
  66. sub size, 1
  67. jae @Bytewise_Loop
  68. end;
  69. {$define FPC_SYSTEM_HAS_FPC_VARSET_MUL_SETS}
  70. {$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_MUL_SETS}
  71. {$ifndef FPC_SYSTEM_HAS_FPC_VARSET_SUB_SETS}
  72. procedure fpc_varset_sub_sets(const set1,set2; var dest;size : ptrint); compilerproc; assembler; nostackframe;
  73. { Windows: rcx = set1, rdx = set2, r8 = dest, r9 = size
  74. Linux: rdi = set1, rsi = set2, rdx = dest, rcx = size }
  75. asm
  76. sub size, 16
  77. jl @Bytewise_Prepare { probably dead branch... }
  78. movdqu xmm1, xmmword ptr [set1] { Tail, just in case (if size is always divisible by 16, 16x_Loop can be altered to handle everything instead). }
  79. movdqu xmm2, xmmword ptr [set2] { Precalculated because operation is not idempotent and dest can be equal to set1/set2. }
  80. pandn xmm2, xmm1
  81. @16x_Loop:
  82. movdqu xmm1, xmmword ptr [set1 + size]
  83. movdqu xmm0, xmmword ptr [set2 + size]
  84. pandn xmm0, xmm1
  85. movdqu xmmword ptr [dest + size], xmm0
  86. sub size, 16
  87. ja @16x_Loop
  88. movdqu xmmword ptr [dest], xmm2 { Write precalculated tail. }
  89. ret
  90. @Bytewise_Prepare:
  91. add size, 15
  92. @Bytewise_Loop:
  93. movzx eax, byte ptr [set2 + size]
  94. not eax
  95. and al, byte ptr [set1 + size]
  96. mov byte ptr [dest + size], al
  97. sub size, 1
  98. jae @Bytewise_Loop
  99. end;
  100. {$define FPC_SYSTEM_HAS_FPC_VARSET_SUB_SETS}
  101. {$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_SUB_SETS}
  102. {$ifndef FPC_SYSTEM_HAS_FPC_VARSET_SYMDIF_SETS}
  103. procedure fpc_varset_symdif_sets(const set1,set2; var dest;size : ptrint); compilerproc; assembler; nostackframe;
  104. { Same as fpc_varset_sub_sets but with 'xor' instead of 'and not'.
  105. Windows: rcx = set1, rdx = set2, r8 = dest, r9 = size
  106. Linux: rdi = set1, rsi = set2, rdx = dest, rcx = size }
  107. asm
  108. sub size, 16
  109. jl @Bytewise_Prepare { probably dead branch... }
  110. movdqu xmm2, xmmword ptr [set1] { Tail, just in case (if size is always divisible by 16, 16x_Loop can be altered to handle everything instead). }
  111. movdqu xmm1, xmmword ptr [set2] { Precalculated because operation is not idempotent and dest can be equal to set1/set2. }
  112. pxor xmm2, xmm1
  113. @16x_Loop:
  114. movdqu xmm0, xmmword ptr [set1 + size]
  115. movdqu xmm1, xmmword ptr [set2 + size]
  116. pxor xmm0, xmm1
  117. movdqu xmmword ptr [dest + size], xmm0
  118. sub size, 16
  119. ja @16x_Loop
  120. movdqu xmmword ptr [dest], xmm2 { Write precalculated tail. }
  121. ret
  122. @Bytewise_Prepare:
  123. add size, 15
  124. @Bytewise_Loop:
  125. movzx eax, byte ptr [set2 + size]
  126. xor al, byte ptr [set1 + size]
  127. mov byte ptr [dest + size], al
  128. sub size, 1
  129. jae @Bytewise_Loop
  130. end;
  131. {$define FPC_SYSTEM_HAS_FPC_VARSET_SYMDIF_SETS}
  132. {$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_SYMDIF_SETS}
  133. {$ifndef FPC_SYSTEM_HAS_FPC_VARSET_CONTAINS_SET}
  134. function fpc_varset_contains_sets(const set1,set2;size : ptrint):boolean; compilerproc; assembler; nostackframe;
  135. { Windows: rcx = set1, rdx = set2, r8 = size
  136. Linux: rdi = set1, rsi = set2, rdx = size }
  137. asm
  138. sub size, 16
  139. jl @Bytewise_Prepare { probably dead branch... }
  140. { Folds all 16-byte "set1 and not set2" chunks with OR and checks the final result for zero. Better for small enough sets. }
  141. movdqu xmm1, xmmword ptr [set1]
  142. movdqu xmm2, xmmword ptr [set2]
  143. pandn xmm2, xmm1
  144. @16x_Loop:
  145. movdqu xmm1, xmmword ptr [set1 + size]
  146. movdqu xmm0, xmmword ptr [set2 + size]
  147. pandn xmm0, xmm1
  148. por xmm2, xmm0
  149. sub size, 16
  150. ja @16x_Loop
  151. pxor xmm0, xmm0
  152. pcmpeqb xmm0, xmm2
  153. pmovmskb ecx, xmm0
  154. xor eax, eax
  155. inc cx
  156. setz al
  157. ret
  158. @No:
  159. xor eax, eax
  160. ret
  161. @Bytewise_Prepare:
  162. add size, 16
  163. neg size
  164. sub set1, size
  165. sub set2, size
  166. @Bytewise_Loop:
  167. movzx eax, byte ptr [set2 + size]
  168. not eax
  169. test byte ptr [set1 + size], al
  170. jnz @No
  171. inc size
  172. jnz @Bytewise_Loop
  173. mov eax, $1
  174. end;
  175. {$define FPC_SYSTEM_HAS_FPC_VARSET_CONTAINS_SET}
  176. {$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_CONTAINS_SET}
  177. {$asmmode att}