GR32.Types.SIMD.pas 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. unit GR32.Types.SIMD;
  2. (* ***** BEGIN LICENSE BLOCK *****
  3. * Version: MPL 1.1 or LGPL 2.1 with linking exception
  4. *
  5. * The contents of this file are subject to the Mozilla Public License Version
  6. * 1.1 (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. * http://www.mozilla.org/MPL/
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. *
  15. * Alternatively, the contents of this file may be used under the terms of the
  16. * Free Pascal modified version of the GNU Lesser General Public License
  17. * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
  18. * of this license are applicable instead of those above.
  19. * Please see the file LICENSE.txt for additional information concerning this
  20. * license.
  21. *
  22. * The Original Code is SIMD for Graphics32
  23. *
  24. * The Initial Developer of the Original Code is Anders Melander
  25. *
  26. * Portions created by the Initial Developer are Copyright (C) 2025
  27. * the Initial Developer. All Rights Reserved.
  28. *
  29. * ***** END LICENSE BLOCK ***** *)
  30. interface
  31. {$include GR32.inc}
  32. {$if not defined(PUREPASCAL)}
  33. //------------------------------------------------------------------------------
  34. //
  35. // SSE MXCSR rounding modes
  36. // For use with the SSE2 CVTSS2SI instruction - and friends.
  37. //
  38. //------------------------------------------------------------------------------
  39. type
  40. MXCSR = record
  41. const
  42. MASK = $FFFF9FFF;
  43. NEAREST = $00000000; // Round
  44. DOWN = $00002000; // Floor
  45. UP = $00004000; // Ceil
  46. TRUNC = $00006000; // Trunc
  47. end;
  48. //------------------------------------------------------------------------------
  49. //
  50. // Rounding control values.
  51. // For use with the SSE4.1 ROUND[S/P][S/D] instruction
  52. //
  53. //------------------------------------------------------------------------------
  54. type
  55. SSE_ROUND = record
  56. const
  57. TO_NEAREST_INT = $00; // Round
  58. TO_NEG_INF = $01; // Floor
  59. TO_POS_INF = $02; // Ceil
  60. TO_ZERO = $03; // Trunc
  61. CUR_DIRECTION = $04; // Rounds using default from MXCSR register
  62. RAISE_EXC = $00; // Raise exceptions
  63. NO_EXC = $08; // Suppress exceptions
  64. end;
  65. //------------------------------------------------------------------------------
  66. //
  67. // SIMD constants
  68. //
  69. //------------------------------------------------------------------------------
  70. // All SIMD values are arrays of 4 elements.
  71. // Element size is 32-bits so the type is either Single, Cardinal or Integer.
  72. //------------------------------------------------------------------------------
  73. //------------------------------------------------------------------------------
  74. // Unaligned constants
  75. //------------------------------------------------------------------------------
  76. const
  77. SSE_FloatOne : array[0..3] of Single = (1, 1, 1, 1);
  78. SSE_Float256x256 : array[0..3] of Single = ($00010000, $00010000, $00010000, $00010000); // 256*256
  79. SSE_IntAbsMask : array[0..3] of Cardinal = ($7FFFFFFF, $7FFFFFFF, $7FFFFFFF, $7FFFFFFF);
  80. //------------------------------------------------------------------------------
  81. // Aligned constants. Implemented as no-code assembly routines.
  82. //------------------------------------------------------------------------------
  83. // 8 x $FF00
  84. procedure SSE_FF00FF00_ALIGNED;
  85. // x/255 bias table ($7F * $8101)
  86. procedure SSE_003FFF7F_ALIGNED;
  87. // Aligned pack table for PSHUFB: Picks low byte of 4 dwords
  88. procedure SSE_0C080400_ALIGNED;
  89. {$ifend}
  90. //------------------------------------------------------------------------------
  91. //------------------------------------------------------------------------------
  92. //------------------------------------------------------------------------------
  93. implementation
  94. procedure SSE_FF00FF00_ALIGNED; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  95. asm
  96. {$ifdef FPC}
  97. ALIGN 16
  98. {$else}
  99. .ALIGN 16
  100. {$endif}
  101. dw $FF00, $FF00, $FF00, $FF00
  102. dw $FF00, $FF00, $FF00, $FF00
  103. end;
  104. // Aligned bias table
  105. procedure SSE_003FFF7F_ALIGNED; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  106. asm
  107. {$ifdef FPC}
  108. ALIGN 16
  109. {$else}
  110. .ALIGN 16
  111. {$endif}
  112. db $7F, $FF, $3F, $0
  113. db $7F, $FF, $3F, $0
  114. db $7F, $FF, $3F, $0
  115. db $7F, $FF, $3F, $0
  116. end;
  117. // Aligned pack table for PSHUFB: Picks low byte of 4 dwords
  118. procedure SSE_0C080400_ALIGNED; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
  119. asm
  120. {$ifdef FPC}
  121. ALIGN 16
  122. {$else}
  123. .ALIGN 16
  124. {$endif}
  125. db $00, $04, $08, $0C
  126. db $00, $04, $08, $0C
  127. db $00, $04, $08, $0C
  128. db $00, $04, $08, $0C
  129. end;
  130. end.