tvectorcall3.pp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. { %CPU=x86_64 }
  2. program vectorcall_stack_test;
  3. {$IFNDEF CPUX86_64}
  4. {$FATAL This test program can only be compiled on Windows or Linux 64-bit with an Intel processor }
  5. {$ENDIF}
  6. { This program can be compiled on Linux, and all the vectorcall
  7. routines should work the same, including the assembler routine.
  8. 'vectorcall' should be ignored by the compiler on this platform. }
  9. {$push}
  10. {$CODEALIGN RECORDMIN=16}
  11. {$PACKRECORDS C}
  12. type
  13. TM128 = record
  14. case Byte of
  15. 0: (M128_F32: array[0..3] of Single);
  16. 1: (M128_F64: array[0..1] of Double);
  17. end;
  18. {$CODEALIGN RECORDMIN=32}
  19. {$PACKRECORDS C}
  20. type
  21. TM256 = record
  22. case Byte of
  23. 0: (M256_F32: array[0..7] of Single);
  24. 1: (M256_F64: array[0..3] of Double);
  25. 2: (M256_M128: array[0..1] of TM128);
  26. end;
  27. {$pop}
  28. TVector4f = packed record
  29. case Byte of
  30. 0: (M128: TM128);
  31. 1: (X, Y, Z, W: Single);
  32. end;
  33. TVectorPair4f = packed record
  34. case Byte of
  35. 0: (M256: TM256);
  36. 1: (V: array[0..1] of TVector4f);
  37. 2: (X1, Y1, Z1, W1, X2, Y2, Z2, W2: Single);
  38. end;
  39. function TestFloat(TP: Single): Single; vectorcall; { vectorcall should have no effect on how this function behaves }
  40. begin
  41. TestFloat := TP * 1.5;
  42. end;
  43. function AddVectors(V1, V2: TVector4f): TVector4f; vectorcall;
  44. begin
  45. AddVectors.X := V1.X + V2.X;
  46. AddVectors.Y := V1.Y + V2.Y;
  47. AddVectors.Z := V1.Z + V2.Z;
  48. AddVectors.W := V1.W + V2.W;
  49. end;
  50. {$ASMMODE Intel}
  51. function AddVectorsAsm(V1, V2: TVector4f): TVector4f; vectorcall; assembler; nostackframe; inline; { The inline is for a future test }
  52. asm
  53. ADDPS XMM0, XMM1
  54. end;
  55. { Note: V1, V2 and the result will go on the stack until FPC fully supports 256-bit vectors }
  56. function AddVectors(V1, V2: TVectorPair4f): TVectorPair4f; vectorcall;
  57. var
  58. C: Integer;
  59. begin
  60. for C := 0 to 1 do
  61. begin
  62. AddVectors.V[C].X := V1.V[C].X + V2.V[C].X;
  63. AddVectors.V[C].Y := V1.V[C].Y + V2.V[C].Y;
  64. AddVectors.V[C].Z := V1.V[C].Z + V2.V[C].Z;
  65. AddVectors.V[C].W := V1.V[C].W + V2.V[C].W;
  66. end;
  67. end;
  68. var
  69. Vecs: array[0..1] of TVector4f; Res, ResAsm, Exp: TVector4f;
  70. Pairs: array[0..1] of TVectorPair4f; ResPair, ExpPair: TVectorPair4f;
  71. I: Integer;
  72. begin
  73. FillDWord(Vecs[0], 0, 8);
  74. Vecs[0].X := TestFloat(2.0);
  75. Vecs[0].Y := 1.0;
  76. Vecs[0].Z := -4.0;
  77. Vecs[0].W := 1.0;
  78. Vecs[1].X := 0.0;
  79. Vecs[1].Y := -2.0;
  80. Vecs[1].Z := TestFloat(4.0);
  81. Vecs[1].W := 0.0;
  82. Exp.X := 3.0;
  83. Exp.Y := -1.0;
  84. Exp.Z := 2.0;
  85. Exp.W := 1.0;
  86. Pairs[0].V[0].X := 1.0; Pairs[0].V[1].X := 5.0;
  87. Pairs[0].V[0].Y := 2.0; Pairs[0].V[1].Y := 6.0;
  88. Pairs[0].V[0].Z := 3.0; Pairs[0].V[1].Z := 7.0;
  89. Pairs[0].V[0].W := 4.0; Pairs[0].V[1].W := 8.0;
  90. Pairs[1].V[0].X := 9.0; Pairs[1].V[1].X := 13.0;
  91. Pairs[1].V[0].Y := 10.0; Pairs[1].V[1].Y := 14.0;
  92. Pairs[1].V[0].Z := 11.0; Pairs[1].V[1].Z := 15.0;
  93. Pairs[1].V[0].W := 12.0; Pairs[1].V[1].W := 16.0;
  94. ExpPair.V[0].X := 10.0; ExpPair.V[1].X := 18.0;
  95. ExpPair.V[0].Y := 12.0; ExpPair.V[1].Y := 20.0;
  96. ExpPair.V[0].Z := 14.0; ExpPair.V[1].Z := 22.0;
  97. ExpPair.V[0].W := 16.0; ExpPair.V[1].W := 24.0;
  98. WriteLn('Vecs[0] = (', Vecs[0].X, ', ', Vecs[0].Y, ', ', Vecs[0].Z, ', ', Vecs[0].W, ')');
  99. WriteLn('Vecs[1] = (', Vecs[1].X, ', ', Vecs[1].Y, ', ', Vecs[1].Z, ', ', Vecs[1].W, ')');
  100. Res := AddVectors(Vecs[0], Vecs[1]);
  101. ResAsm := AddVectorsAsm(Vecs[0], Vecs[1]);
  102. WriteLn('Result = (', Res.X, ', ', Res.Y, ', ', Res.Z, ', ', Res.W, ')');
  103. WriteLn('ResAsm = (', ResAsm.X, ', ', ResAsm.Y, ', ', ResAsm.Z, ', ', ResAsm.W, ')');
  104. WriteLn('Expected = (', Exp.X, ', ', Exp.Y, ', ', Exp.Z, ', ', Exp.W, ')');
  105. WriteLn('Pairs[0] = (', Pairs[0].V[0].X, ', ', Pairs[0].V[0].Y, ', ', Pairs[0].V[0].Z, ', ', Pairs[0].V[0].W, ', ', Pairs[0].V[1].X, ', ', Pairs[0].V[1].Y, ', ', Pairs[0].V[1].Z, ', ', Pairs[0].V[1].W, ')');
  106. WriteLn('Pairs[1] = (', Pairs[1].V[0].X, ', ', Pairs[1].V[0].Y, ', ', Pairs[1].V[0].Z, ', ', Pairs[1].V[0].W, ', ', Pairs[1].V[1].X, ', ', Pairs[1].V[1].Y, ', ', Pairs[1].V[1].Z, ', ', Pairs[1].V[1].W, ')');
  107. ResPair := AddVectors(Pairs[0], Pairs[1]);
  108. WriteLn('ResPair = (', ResPair.V[0].X, ', ', ResPair.V[0].Y, ', ', ResPair.V[0].Z, ', ', ResPair.V[0].W, ', ', ResPair.V[1].X, ', ', ResPair.V[1].Y, ', ', ResPair.V[1].Z, ', ', ResPair.V[1].W, ')');
  109. WriteLn('Expected = (', ExpPair.V[0].X, ', ', ExpPair.V[0].Y, ', ', ExpPair.V[0].Z, ', ', ExpPair.V[0].W, ', ', ExpPair.V[1].X, ', ', ExpPair.V[1].Y, ', ', ExpPair.V[1].Z, ', ', ExpPair.V[1].W, ')');
  110. for I := 0 to 3 do
  111. begin
  112. if Res.M128.M128_F32[I] <> Exp.M128.M128_F32[I] then
  113. begin
  114. WriteLn('FAILURE on Res.M128.M128_F32[', I, ']');
  115. Halt(1);
  116. end;
  117. if ResAsm.M128.M128_F32[I] <> Exp.M128.M128_F32[I] then
  118. begin
  119. WriteLn('FAILURE on ResAsm.M128.M128_F32[', I, ']');
  120. Halt(1);
  121. end;
  122. end;
  123. for I := 0 to 7 do
  124. begin
  125. if ResPair.M256.M256_F32[I] <> ExpPair.M256.M256_F32[I] then
  126. begin
  127. WriteLn('FAILURE on ResPair.M256.M256_F32[', I, ']');
  128. Halt(1);
  129. end;
  130. end;
  131. WriteLn('ok');
  132. end.