gtx_fast_trigonometry.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. #include <glm/gtc/type_precision.hpp>
  2. #include <glm/gtx/fast_trigonometry.hpp>
  3. #include <glm/gtx/integer.hpp>
  4. #include <glm/gtx/common.hpp>
  5. #include <glm/gtc/constants.hpp>
  6. #include <glm/gtc/ulp.hpp>
  7. #include <glm/gtc/vec1.hpp>
  8. #include <glm/trigonometric.hpp>
  9. #include <cmath>
  10. #include <ctime>
  11. #include <cstdio>
  12. #include <vector>
  13. namespace fastCos
  14. {
  15. int perf(bool NextFloat)
  16. {
  17. const float begin = -glm::pi<float>();
  18. const float end = glm::pi<float>();
  19. float result = 0.f;
  20. const std::clock_t timestamp1 = std::clock();
  21. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  22. result = glm::fastCos(i);
  23. const std::clock_t timestamp2 = std::clock();
  24. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  25. result = glm::cos(i);
  26. const std::clock_t timestamp3 = std::clock();
  27. const std::clock_t time_fast = timestamp2 - timestamp1;
  28. const std::clock_t time_default = timestamp3 - timestamp2;
  29. std::printf("fastCos Time %d clocks\n", static_cast<unsigned int>(time_fast));
  30. std::printf("cos Time %d clocks\n", static_cast<unsigned int>(time_default));
  31. return time_fast <= time_default ? 0 : 1;
  32. }
  33. }//namespace fastCos
  34. namespace fastSin
  35. {
  36. /*
  37. float sin(float x) {
  38. float temp;
  39. temp = (x + M_PI) / ((2 * M_PI) - M_PI);
  40. return limited_sin((x + M_PI) - ((2 * M_PI) - M_PI) * temp));
  41. }
  42. */
  43. int perf(bool NextFloat)
  44. {
  45. const float begin = -glm::pi<float>();
  46. const float end = glm::pi<float>();
  47. float result = 0.f;
  48. const std::clock_t timestamp1 = std::clock();
  49. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  50. result = glm::fastSin(i);
  51. const std::clock_t timestamp2 = std::clock();
  52. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  53. result = glm::sin(i);
  54. const std::clock_t timestamp3 = std::clock();
  55. const std::clock_t time_fast = timestamp2 - timestamp1;
  56. const std::clock_t time_default = timestamp3 - timestamp2;
  57. std::printf("fastSin Time %d clocks\n", static_cast<unsigned int>(time_fast));
  58. std::printf("sin Time %d clocks\n", static_cast<unsigned int>(time_default));
  59. return time_fast <= time_default ? 0 : 1;
  60. }
  61. }//namespace fastSin
  62. namespace fastTan
  63. {
  64. int perf(bool NextFloat)
  65. {
  66. const float begin = -glm::pi<float>();
  67. const float end = glm::pi<float>();
  68. float result = 0.f;
  69. const std::clock_t timestamp1 = std::clock();
  70. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  71. result = glm::fastTan(i);
  72. const std::clock_t timestamp2 = std::clock();
  73. for (float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  74. result = glm::tan(i);
  75. const std::clock_t timestamp3 = std::clock();
  76. const std::clock_t time_fast = timestamp2 - timestamp1;
  77. const std::clock_t time_default = timestamp3 - timestamp2;
  78. std::printf("fastTan Time %d clocks\n", static_cast<unsigned int>(time_fast));
  79. std::printf("tan Time %d clocks\n", static_cast<unsigned int>(time_default));
  80. return time_fast <= time_default ? 0 : 1;
  81. }
  82. }//namespace fastTan
  83. namespace fastAcos
  84. {
  85. int perf(bool NextFloat)
  86. {
  87. const float begin = -glm::pi<float>();
  88. const float end = glm::pi<float>();
  89. float result = 0.f;
  90. const std::clock_t timestamp1 = std::clock();
  91. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  92. result = glm::fastAcos(i);
  93. const std::clock_t timestamp2 = std::clock();
  94. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  95. result = glm::acos(i);
  96. const std::clock_t timestamp3 = std::clock();
  97. const std::clock_t time_fast = timestamp2 - timestamp1;
  98. const std::clock_t time_default = timestamp3 - timestamp2;
  99. std::printf("fastAcos Time %d clocks\n", static_cast<unsigned int>(time_fast));
  100. std::printf("acos Time %d clocks\n", static_cast<unsigned int>(time_default));
  101. return time_fast <= time_default ? 0 : 1;
  102. }
  103. }//namespace fastAcos
  104. namespace fastAsin
  105. {
  106. int perf(bool NextFloat)
  107. {
  108. const float begin = -glm::pi<float>();
  109. const float end = glm::pi<float>();
  110. float result = 0.f;
  111. const std::clock_t timestamp1 = std::clock();
  112. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  113. result = glm::fastAsin(i);
  114. const std::clock_t timestamp2 = std::clock();
  115. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  116. result = glm::asin(i);
  117. const std::clock_t timestamp3 = std::clock();
  118. const std::clock_t time_fast = timestamp2 - timestamp1;
  119. const std::clock_t time_default = timestamp3 - timestamp2;
  120. std::printf("fastAsin Time %d clocks\n", static_cast<unsigned int>(time_fast));
  121. std::printf("asin Time %d clocks\n", static_cast<unsigned int>(time_default));
  122. return time_fast <= time_default ? 0 : 1;
  123. }
  124. }//namespace fastAsin
  125. namespace fastAtan
  126. {
  127. int perf(bool NextFloat)
  128. {
  129. const float begin = -glm::pi<float>();
  130. const float end = glm::pi<float>();
  131. float result = 0.f;
  132. const std::clock_t timestamp1 = std::clock();
  133. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  134. result = glm::fastAtan(i);
  135. const std::clock_t timestamp2 = std::clock();
  136. for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
  137. result = glm::atan(i);
  138. const std::clock_t timestamp3 = std::clock();
  139. const std::clock_t time_fast = timestamp2 - timestamp1;
  140. const std::clock_t time_default = timestamp3 - timestamp2;
  141. std::printf("fastAtan Time %d clocks\n", static_cast<unsigned int>(time_fast));
  142. std::printf("atan Time %d clocks\n", static_cast<unsigned int>(time_default));
  143. return time_fast <= time_default ? 0 : 1;
  144. }
  145. }//namespace fastAtan
  146. namespace taylorCos
  147. {
  148. glm::vec4 const AngleShift(0.0f, glm::pi<float>() * 0.5f, glm::pi<float>() * 1.0f, glm::pi<float>() * 1.5f);
  149. template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
  150. GLM_FUNC_QUALIFIER vecType<T, P> taylorSeriesNewCos(vecType<T, P> const & x)
  151. {
  152. vecType<T, P> const Powed2(x * x);
  153. vecType<T, P> const Powed4(Powed2 * Powed2);
  154. vecType<T, P> const Powed6(Powed4 * Powed2);
  155. vecType<T, P> const Powed8(Powed4 * Powed4);
  156. return static_cast<T>(1)
  157. - Powed2 * static_cast<T>(0.5)
  158. + Powed4 * static_cast<T>(0.04166666666666666666666666666667)
  159. - Powed6 * static_cast<T>(0.00138888888888888888888888888889)
  160. + Powed8 * static_cast<T>(2.4801587301587301587301587301587e-5);
  161. }
  162. template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
  163. GLM_FUNC_QUALIFIER vecType<T, P> taylorSeriesNewCos6(vecType<T, P> const & x)
  164. {
  165. vecType<T, P> const Powed2(x * x);
  166. vecType<T, P> const Powed4(Powed2 * Powed2);
  167. vecType<T, P> const Powed6(Powed4 * Powed2);
  168. return static_cast<T>(1)
  169. - Powed2 * static_cast<T>(0.5)
  170. + Powed4 * static_cast<T>(0.04166666666666666666666666666667)
  171. - Powed6 * static_cast<T>(0.00138888888888888888888888888889);
  172. }
  173. template <glm::precision P, template <typename, glm::precision> class vecType>
  174. GLM_FUNC_QUALIFIER vecType<float, P> fastAbs(vecType<float, P> x)
  175. {
  176. int* Pointer = reinterpret_cast<int*>(&x[0]);
  177. Pointer[0] &= 0x7fffffff;
  178. Pointer[1] &= 0x7fffffff;
  179. Pointer[2] &= 0x7fffffff;
  180. Pointer[3] &= 0x7fffffff;
  181. return x;
  182. }
  183. template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
  184. GLM_FUNC_QUALIFIER vecType<T, P> fastCosNew(vecType<T, P> const & x)
  185. {
  186. vecType<T, P> const Angle0_PI(fastAbs(fmod(x + glm::pi<T>(), glm::two_pi<T>()) - glm::pi<T>()));
  187. return taylorSeriesNewCos6(x);
  188. /*
  189. vecType<bool, P> const FirstQuarterPi(lessThanEqual(Angle0_PI, vecType<T, P>(glm::half_pi<T>())));
  190. vecType<T, P> const RevertAngle(mix(vecType<T, P>(glm::pi<T>()), vecType<T, P>(0), FirstQuarterPi));
  191. vecType<T, P> const ReturnSign(mix(vecType<T, P>(-1), vecType<T, P>(1), FirstQuarterPi));
  192. vecType<T, P> const SectionAngle(RevertAngle - Angle0_PI);
  193. return ReturnSign * taylorSeriesNewCos(SectionAngle);
  194. */
  195. }
  196. int perf_fastCosNew(float Begin, float End, std::size_t Samples)
  197. {
  198. std::vector<glm::vec4> Results;
  199. Results.resize(Samples);
  200. float Steps = (End - Begin) / Samples;
  201. std::clock_t const TimeStampBegin = std::clock();
  202. for(std::size_t i = 0; i < Samples; ++i)
  203. Results[i] = fastCosNew(AngleShift + glm::vec4(Begin + Steps * i));
  204. std::clock_t const TimeStampEnd = std::clock();
  205. std::printf("fastCosNew %ld clocks\n", TimeStampEnd - TimeStampBegin);
  206. int Error = 0;
  207. for(std::size_t i = 0; i < Samples; ++i)
  208. Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
  209. return Error;
  210. }
  211. template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
  212. GLM_FUNC_QUALIFIER vecType<T, P> deterministic_fmod(vecType<T, P> const & x, T y)
  213. {
  214. return x - y * trunc(x / y);
  215. }
  216. template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
  217. GLM_FUNC_QUALIFIER vecType<T, P> fastCosDeterminisctic(vecType<T, P> const & x)
  218. {
  219. vecType<T, P> const Angle0_PI(abs(deterministic_fmod(x + glm::pi<T>(), glm::two_pi<T>()) - glm::pi<T>()));
  220. vecType<bool, P> const FirstQuarterPi(lessThanEqual(Angle0_PI, vecType<T, P>(glm::half_pi<T>())));
  221. vecType<T, P> const RevertAngle(mix(vecType<T, P>(glm::pi<T>()), vecType<T, P>(0), FirstQuarterPi));
  222. vecType<T, P> const ReturnSign(mix(vecType<T, P>(-1), vecType<T, P>(1), FirstQuarterPi));
  223. vecType<T, P> const SectionAngle(RevertAngle - Angle0_PI);
  224. return ReturnSign * taylorSeriesNewCos(SectionAngle);
  225. }
  226. int perf_fastCosDeterminisctic(float Begin, float End, std::size_t Samples)
  227. {
  228. std::vector<glm::vec4> Results;
  229. Results.resize(Samples);
  230. float Steps = (End - Begin) / Samples;
  231. std::clock_t const TimeStampBegin = std::clock();
  232. for(std::size_t i = 0; i < Samples; ++i)
  233. Results[i] = taylorCos::fastCosDeterminisctic(AngleShift + glm::vec4(Begin + Steps * i));
  234. std::clock_t const TimeStampEnd = std::clock();
  235. std::printf("fastCosDeterminisctic %ld clocks\n", TimeStampEnd - TimeStampBegin);
  236. int Error = 0;
  237. for(std::size_t i = 0; i < Samples; ++i)
  238. Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
  239. return Error;
  240. }
  241. template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
  242. GLM_FUNC_QUALIFIER vecType<T, P> taylorSeriesRefCos(vecType<T, P> const & x)
  243. {
  244. return static_cast<T>(1)
  245. - (x * x) / glm::factorial(static_cast<T>(2))
  246. + (x * x * x * x) / glm::factorial(static_cast<T>(4))
  247. - (x * x * x * x * x * x) / glm::factorial(static_cast<T>(6))
  248. + (x * x * x * x * x * x * x * x) / glm::factorial(static_cast<T>(8));
  249. }
  250. template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
  251. GLM_FUNC_QUALIFIER vecType<T, P> fastRefCos(vecType<T, P> const & x)
  252. {
  253. vecType<T, P> const Angle0_PI(glm::abs(fmod(x + glm::pi<T>(), glm::two_pi<T>()) - glm::pi<T>()));
  254. // return taylorSeriesRefCos(Angle0_PI);
  255. vecType<bool, P> const FirstQuarterPi(lessThanEqual(Angle0_PI, vecType<T, P>(glm::half_pi<T>())));
  256. vecType<T, P> const RevertAngle(mix(vecType<T, P>(glm::pi<T>()), vecType<T, P>(0), FirstQuarterPi));
  257. vecType<T, P> const ReturnSign(mix(vecType<T, P>(-1), vecType<T, P>(1), FirstQuarterPi));
  258. vecType<T, P> const SectionAngle(RevertAngle - Angle0_PI);
  259. return ReturnSign * taylorSeriesRefCos(SectionAngle);
  260. }
  261. int perf_fastCosRef(float Begin, float End, std::size_t Samples)
  262. {
  263. std::vector<glm::vec4> Results;
  264. Results.resize(Samples);
  265. float Steps = (End - Begin) / Samples;
  266. std::clock_t const TimeStampBegin = std::clock();
  267. for(std::size_t i = 0; i < Samples; ++i)
  268. Results[i] = taylorCos::fastRefCos(AngleShift + glm::vec4(Begin + Steps * i));
  269. std::clock_t const TimeStampEnd = std::clock();
  270. std::printf("fastCosRef %ld clocks\n", TimeStampEnd - TimeStampBegin);
  271. int Error = 0;
  272. for(std::size_t i = 0; i < Samples; ++i)
  273. Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
  274. return Error;
  275. }
  276. int perf_fastCosOld(float Begin, float End, std::size_t Samples)
  277. {
  278. std::vector<glm::vec4> Results;
  279. Results.resize(Samples);
  280. float Steps = (End - Begin) / Samples;
  281. std::clock_t const TimeStampBegin = std::clock();
  282. for(std::size_t i = 0; i < Samples; ++i)
  283. Results[i] = glm::fastCos(AngleShift + glm::vec4(Begin + Steps * i));
  284. std::clock_t const TimeStampEnd = std::clock();
  285. std::printf("fastCosOld %ld clocks\n", TimeStampEnd - TimeStampBegin);
  286. int Error = 0;
  287. for(std::size_t i = 0; i < Samples; ++i)
  288. Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
  289. return Error;
  290. }
  291. int perf_cos(float Begin, float End, std::size_t Samples)
  292. {
  293. std::vector<glm::vec4> Results;
  294. Results.resize(Samples);
  295. float Steps = (End - Begin) / Samples;
  296. std::clock_t const TimeStampBegin = std::clock();
  297. for(std::size_t i = 0; i < Samples; ++i)
  298. Results[i] = glm::cos(AngleShift + glm::vec4(Begin + Steps * i));
  299. std::clock_t const TimeStampEnd = std::clock();
  300. std::printf("cos %ld clocks\n", TimeStampEnd - TimeStampBegin);
  301. int Error = 0;
  302. for(std::size_t i = 0; i < Samples; ++i)
  303. Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
  304. return Error;
  305. }
  306. int perf(std::size_t const Samples)
  307. {
  308. int Error = 0;
  309. float const Begin = -glm::pi<float>();
  310. float const End = glm::pi<float>();
  311. Error += perf_cos(Begin, End, Samples);
  312. Error += perf_fastCosOld(Begin, End, Samples);
  313. Error += perf_fastCosRef(Begin, End, Samples);
  314. //Error += perf_fastCosNew(Begin, End, Samples);
  315. Error += perf_fastCosDeterminisctic(Begin, End, Samples);
  316. return Error;
  317. }
  318. int test()
  319. {
  320. int Error = 0;
  321. //for(float Angle = -4.0f * glm::pi<float>(); Angle < 4.0f * glm::pi<float>(); Angle += 0.1f)
  322. //for(float Angle = -720.0f; Angle < 720.0f; Angle += 0.1f)
  323. for(float Angle = 0.0f; Angle < 180.0f; Angle += 0.1f)
  324. {
  325. float const modAngle = std::fmod(glm::abs(Angle), 360.f);
  326. assert(modAngle >= 0.0f && modAngle <= 360.f);
  327. float const radAngle = glm::radians(modAngle);
  328. float const Cos0 = std::cos(radAngle);
  329. float const Cos1 = taylorCos::fastRefCos(glm::fvec1(radAngle)).x;
  330. Error += glm::abs(Cos1 - Cos0) < 0.1f ? 0 : 1;
  331. float const Cos2 = taylorCos::fastCosNew(glm::fvec1(radAngle)).x;
  332. //Error += glm::abs(Cos2 - Cos0) < 0.1f ? 0 : 1;
  333. assert(!Error);
  334. }
  335. return Error;
  336. }
  337. }//namespace taylorCos
  338. int main()
  339. {
  340. int Error(0);
  341. Error += ::taylorCos::test();
  342. Error += ::taylorCos::perf(1000);
  343. # ifdef NDEBUG
  344. ::fastCos::perf(false);
  345. ::fastSin::perf(false);
  346. ::fastTan::perf(false);
  347. ::fastAcos::perf(false);
  348. ::fastAsin::perf(false);
  349. ::fastAtan::perf(false);
  350. # endif//NDEBUG
  351. return Error;
  352. }