jan_math.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. /*
  2. ** Command & Conquer Renegade(tm)
  3. ** Copyright 2025 Electronic Arts Inc.
  4. **
  5. ** This program is free software: you can redistribute it and/or modify
  6. ** it under the terms of the GNU General Public License as published by
  7. ** the Free Software Foundation, either version 3 of the License, or
  8. ** (at your option) any later version.
  9. **
  10. ** This program is distributed in the hope that it will be useful,
  11. ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ** GNU General Public License for more details.
  14. **
  15. ** You should have received a copy of the GNU General Public License
  16. ** along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. This file contains some math functions that Jan worked on while visiting westwood
  20. between Meltdown and Siggraph 2001.
  21. */
  22. #include <iostream>
  23. #include <iomanip>
  24. #include <cmath>
  25. #include "p_timer.h"
  26. #include "wwmath.h"
  27. using namespace std;
  28. #undef for
  29. #define for if(false); else for
  30. /* (1-u)(1-u)(1-u)
  31. --------------- 1/6 0/6
  32. 6
  33. 3*u*u*u - 6u*u +4
  34. --------------- 4/6 1/6
  35. 6
  36. -3*u*u*u + 3*u*u + 3*u + 1
  37. --------------- 1/6 4/6
  38. 6
  39. u*u*u
  40. --------------- 0/6 1/6
  41. 6
  42. */
  43. const double pi = 3.1415926535897932384626433832795;
  44. const int SINTABLESIZE = 16;
  45. float sinTable[SINTABLESIZE+1][3];
  46. /*
  47. float sinTable[SINTABLESIZE+1][3] =
  48. {
  49. 0.f, 0.0327249329938f, 0.0654498139838f,
  50. 0.0980171403296f, 0.130584524375f, 0.16299423692f,
  51. 0.195090322016f, 0.22718650856f, 0.258968953094f,
  52. 0.290284677254f, 0.321600570763f, 0.352449632798f,
  53. 0.382683432365f, 0.412917427848f, 0.442536062284f,
  54. 0.471396736826f, 0.5002577f, 0.528360611467f,
  55. 0.55557023302f, 0.582780177536f, 0.609096734804f,
  56. 0.634393284164f, 0.659690191419f, 0.683966978964f,
  57. 0.707106781187f, 0.730247032377f, 0.752250161646f,
  58. 0.773010453363f, 0.79377112016f, 0.813288873188f,
  59. 0.831469612303f, 0.849650815039f, 0.866495135964f,
  60. 0.881921264348f, 0.897347904711f, 0.911356511164f,
  61. 0.923879532511f, 0.936403079986f, 0.947441092993f,
  62. 0.956940335732f, 0.966440181792f, 0.974401217255f,
  63. 0.980785280403f, 0.987169903631f, 0.991977366409f,
  64. 0.995184726672f, 0.998392578191f, 1.0000003005f,
  65. 1.f, 1.0000003005f, 0.998392578191f,
  66. };
  67. */
  68. void init_bez_sin()
  69. {
  70. for(int i=0; i<SINTABLESIZE+1; i++)
  71. {
  72. double x = sin(pi/2*(i*3)/(SINTABLESIZE*3));
  73. double y = sin(pi/2*(i*3+1)/(SINTABLESIZE*3)); // * 1.00053f;
  74. double z = sin(pi/2*(i*3+2)/(SINTABLESIZE*3)); // * 1.00053f;
  75. double w = sin(pi/2*(i*3+3)/(SINTABLESIZE*3));
  76. double mse = 4.0;
  77. double my = 1.0;
  78. double mz = 1.0;
  79. // * nu2 * nu;
  80. // nu2;
  81. // u * nu;
  82. // * u2 * u;
  83. double wy = 4.0;
  84. for(double dy=1.000178; dy<1.000537; dy+=0.0000001)
  85. {
  86. bool b = false;
  87. double wz = 4.0;
  88. for(double dz=1.000534; dz<1.000715; dz+=0.0000001)
  89. {
  90. double by = x * 8.0 / 27.0 +
  91. y * dy * 12.0 / 27.0 +
  92. z * dz * 6.0 / 27.0 +
  93. w / 27.0;
  94. double bz = x / 27.0 +
  95. y * dy * 6.0 / 27.0 +
  96. z * dz * 12.0 / 27.0 +
  97. w * 8.0 / 27.0;
  98. if(y*dy < 0.0 || y*dy > 1.0)
  99. continue;
  100. if(z*dz < 0.0 || z*dz > 1.0)
  101. continue;
  102. double yerr, zerr;
  103. // if(by > y)
  104. // yerr = by/y;
  105. // else
  106. yerr = y/by;
  107. // if(bz > z)
  108. // zerr = bz/z;
  109. // else
  110. zerr = z/bz;
  111. yerr -= 1.0;
  112. zerr -= 1.0;
  113. double se = yerr*yerr + zerr*zerr;
  114. if(mse > se)
  115. {
  116. wy = se;
  117. wz = se;
  118. mse = se;
  119. my = dy;
  120. mz = dz;
  121. b = false;
  122. }
  123. else
  124. {
  125. // if(se > wy)
  126. // b = true;
  127. // if(se > wz)
  128. // break;
  129. }
  130. }
  131. // if(b)
  132. // break;
  133. }
  134. sinTable[i][0] = float(x);
  135. sinTable[i][1] = float(y * my);
  136. sinTable[i][2] = float(z * mz);
  137. cout << setprecision(12);
  138. cout << setw(16) << x << "f, " << setw(16) << y*my << "f, " << setw(16) << z*mz << "f," << endl;
  139. }
  140. }
  141. const int ACOSTABLESIZE = 32;
  142. //float acosTable[ACOSTABLESIZE+1][3];
  143. float acosTable[ACOSTABLESIZE+1][3] =
  144. {
  145. 0, 0, 0,
  146. 0, 0, 0,
  147. 0, 0, 0,
  148. 0, 0, 0,
  149. 0, 0, 0,
  150. 0, 0, 0,
  151. 0, 0, 0,
  152. 0, 0, 0,
  153. 2.09439510239f, 2.07033956061f, 2.04678164613f,
  154. 2.02361292154f, 2.00044498842f, 1.97766674853f,
  155. 1.95519310129f, 1.93271994669f, 1.91055213001f,
  156. 1.88862003072f, 1.86668845355f, 1.84499333386f,
  157. 1.82347658194f, 1.80196011523f, 1.7806223216f,
  158. 1.75941271297f, 1.73820326172f, 1.71712229397f,
  159. 1.69612415796f, 1.67512622765f, 1.65421121018f,
  160. 1.63333708859f, 1.61246303408f, 1.59162963139f,
  161. 1.57079632679f, 1.54996304584f, 1.52912960772f,
  162. 1.508255565f, 1.48738138962f, 1.46646655946f,
  163. 1.44546849563f, 1.42447034954f, 1.40338938542f,
  164. 1.38217994062f, 1.36097042076f, 1.33963248005f,
  165. 1.31811607165f, 1.29659929615f, 1.27490425438f,
  166. 1.25297262287f, 1.23104063033f, 1.20887262684f,
  167. 1.1863995523f, 1.16392587108f, 1.14114765337f,
  168. 1.11797973205f, 1.09481095108f, 1.07125317716f,
  169. 1.0471975512f, 1.0231405f, 0.998586616212f,
  170. 0, 0, 0,
  171. 0, 0, 0,
  172. 0, 0, 0,
  173. 0, 0, 0,
  174. 0, 0, 0,
  175. 0, 0, 0,
  176. 0, 0, 0,
  177. 0, 0, 0
  178. };
  179. double calc_cos_plus_45(double cx)
  180. {
  181. double sx = sqrt(1-cx*cx);
  182. cx = cx*cos(pi/4) - sx*sin(pi/4);
  183. return cx;
  184. }
  185. void init_bez_acos()
  186. {
  187. for(int i=0; i<ACOSTABLESIZE; i++)
  188. {
  189. // if(i < ACOSTABLESIZE/4 || i > ACOSTABLESIZE/4*3)
  190. // continue;
  191. double x = acos(float(i*3)/(ACOSTABLESIZE*3)*2-1);
  192. double y = acos(float(i*3+1)/(ACOSTABLESIZE*3)*2-1); // * 1.00053f;
  193. double z = acos(float(i*3+2)/(ACOSTABLESIZE*3)*2-1); // * 1.00053f;
  194. double w;
  195. if(i != ACOSTABLESIZE-1)
  196. w = acos(float(i*3+3)/(ACOSTABLESIZE*3)*2-1);
  197. else
  198. w = 0.0;//-acos(float(i*3+1)/(ACOSTABLESIZE*3)*2-1);
  199. double mse = 4.0;
  200. double my = 1.0;
  201. double mz = 1.0;
  202. double wy = 4.0;
  203. for(double dy=0.9996944; dy<=1.0006926; dy+=16.0/8388608)
  204. {
  205. bool b = false;
  206. double wz = 4.0;
  207. for(double dz=0.9997872; dz<=1.0011092; dz+=16.0/8388608)
  208. {
  209. double by = x * 8.0 / 27.0 +
  210. y * dy * 12.0 / 27.0 +
  211. z * dz * 6.0 / 27.0 +
  212. w / 27.0;
  213. double bz = x / 27.0 +
  214. y * dy * 6.0 / 27.0 +
  215. z * dz * 12.0 / 27.0 +
  216. w * 8.0 / 27.0;
  217. if(y*dy < 0.0 || y*dy > pi)
  218. continue;
  219. if(z*dz < 0.0 || z*dz > pi)
  220. continue;
  221. double yerr, zerr;
  222. // if(by > y)
  223. // yerr = by/y;
  224. // else
  225. yerr = y/by;
  226. // if(bz > z)
  227. // zerr = bz/z;
  228. // else
  229. zerr = z/bz;
  230. yerr -= 1.0;
  231. zerr -= 1.0;
  232. double se = yerr*yerr + zerr*zerr;
  233. if(mse > se)
  234. {
  235. wy = se;
  236. wz = se;
  237. mse = se;
  238. my = dy;
  239. mz = dz;
  240. b = false;
  241. }
  242. else
  243. {
  244. // if(se > wy)
  245. // b = true;
  246. // if(se > wz)
  247. // break;
  248. }
  249. }
  250. // if(b)
  251. // break;
  252. }
  253. acosTable[i][0] = float(x);
  254. acosTable[i][1] = float(y * my);
  255. acosTable[i][2] = float(z * mz);
  256. cout << setprecision(12);
  257. cout << setw(16) << x << "f, " << setw(16) << y*my << "f, " << setw(16) << z*mz << "f," << endl;
  258. // cout << my << " " << mz << endl;
  259. }
  260. acosTable[ACOSTABLESIZE][0] = 0.f;
  261. }
  262. float bez_sin(float x)
  263. {
  264. x *= (SINTABLESIZE*4)/(pi*2);
  265. int ix0 = int(floor(x));
  266. float u = x - ix0;
  267. float nu = 1.f-u;
  268. float sign = 1.f;
  269. ix0 &= SINTABLESIZE*4-1;
  270. if(ix0 >= SINTABLESIZE*2)
  271. {
  272. sign = -1.f;
  273. ix0 -= SINTABLESIZE*2;
  274. }
  275. if(ix0 >= SINTABLESIZE)
  276. {
  277. ix0 = (SINTABLESIZE-1)-(ix0-SINTABLESIZE);
  278. float t = u;
  279. u = nu;
  280. nu = t;
  281. }
  282. int ix1 = ix0+1;
  283. // ix0 &= SINTABLESIZE-1;
  284. // ix1 &= SINTABLESIZE-1;
  285. float u2 = u*u;
  286. float nu2 = nu*nu;
  287. float r0 = sinTable[ix0][0] * nu2 * nu;
  288. float r1 = sinTable[ix0][1] * 3 * u * nu2;
  289. float r2 = sinTable[ix0][2] * 3 * u2 * nu;
  290. float r3 = sinTable[ix1][0] * u2 * u;
  291. r0 += r1;
  292. r2 += r3;
  293. return (r0+r2) * sign;
  294. }
  295. float bez_acos(float x)
  296. {
  297. // double sx = sqrt(1-x*x);
  298. // x = x*cos(pi/4) - sx*sin(pi/4);
  299. x = calc_cos_plus_45(x);
  300. x += 1.f;
  301. x *= ACOSTABLESIZE/2;
  302. int ix0 = int(floor(x));
  303. float u = x - ix0;
  304. float nu = 1.f-u;
  305. if(ix0 < 0)
  306. return pi-pi/4; // -infinite...
  307. if(ix0 >= ACOSTABLESIZE)
  308. return 0.f;
  309. int ix1 = ix0+1;
  310. float u2 = u*u;
  311. float nu2 = nu*nu;
  312. float r0 = acosTable[ix0][0] * nu2 * nu;
  313. float r1 = acosTable[ix0][1] * 3 * u * nu2;
  314. float r2 = acosTable[ix0][2] * 3 * u2 * nu;
  315. float r3 = acosTable[ix1][0] * u2 * u;
  316. r0 += r1;
  317. r2 += r3;
  318. return (r0+r2)-pi/4;
  319. }
  320. float mcos(float c, float m)
  321. {
  322. float ac = acos(c);
  323. float c1 = cos(ac*m);
  324. float c2 = cos(ac*(1-m));
  325. float c3 = c * cos(m);
  326. return c1;
  327. }
  328. float cheb_asin(float x)
  329. {
  330. float table[] =
  331. {
  332. 1.101460576f,
  333. +.5764093225f,
  334. -.09735417608f,
  335. +.003083774398f,
  336. -.000844493744f,
  337. +.005722363752f,
  338. -.005962135984f
  339. };
  340. float r = table[0] + table[1]*x;
  341. r -= sqrt(1-x*x);
  342. x = 2.f*x-1.f;
  343. float x2 = x;
  344. x *= x;
  345. for(int i=2; i<sizeof(table)/sizeof(table[0]); i++)
  346. {
  347. r += x * table[i];
  348. x *= x2;
  349. }
  350. return r;
  351. }
  352. float taylor_acos(float x)
  353. {
  354. /*
  355. (1) /(2*3),
  356. (3) /(2*4*5),
  357. (3*5) /(2*4*6*7),
  358. (3*5*7) /(2*4*6*8*9),
  359. (3*5*7*9) /(2*4*6*8*10*11),
  360. (3*5*7*9*11) /(2*4*6*8*10*12*13),
  361. (3*5*7*9*11*13) /(2*4*6*8*10*12*14*15),
  362. (3*5*7*9*11*13*15) /(2*4*6*8*10*12*14*16*17),
  363. (3*5*7*9*11*13*15*17) /(2*4*6*8*10*12*14*16*18*19),
  364. */
  365. float table[] =
  366. {
  367. 1.f,
  368. float(1) /(2*3),
  369. float(3) /(2*4*5),
  370. float(5) /(2*4*2*7),
  371. float(5*7) /(2*4*2*8*9),
  372. float(7*9) /(2*4*2*8*2*11),
  373. float(7*9*11) /(2*4*2*8*2*12*13),
  374. float(9*11*13) /(2*4*2*8*2*12*2*15),
  375. float(9*11*13*15) /(2*4*2*8*2*12*2*16*17),
  376. float(11*13*15*17) /(2*4*2*8*2*12*2*16*2*19),
  377. };
  378. // x = sqrt((1.f+x)/2.f);
  379. bool flip = false;
  380. if(x > 0.707106781186547524400844362104849f)
  381. {
  382. x = 0.707106781186547524400844362104849f - (x-0.707106781186547524400844362104849f);
  383. flip = true;
  384. }
  385. float r = 1.57079632679489661923132169163975f;
  386. float x2 = x*x;
  387. for(int i=0; i<sizeof(table)/sizeof(table[0]); i++)
  388. {
  389. r -= x * table[i];
  390. x *= x2;
  391. }
  392. if(flip)
  393. {
  394. r = 1.57079632679489661923132169163975f-r;
  395. }
  396. return r;
  397. }
  398. /*
  399. taylor acos
  400. 3 5 7 9 11
  401. %PI X 3 X 5 X 35 X 63 X
  402. --- - X - -- - ---- - ---- - ----- - ------
  403. 2 6 40 112 1152 2816
  404. taylor(sin(x),x,0,9);
  405. 3 5 7 9
  406. X X X X
  407. X - -- + --- - ---- + ------ + . . .
  408. 3! 5! 7! 9!
  409. taylor cos
  410. 2 4 6 8
  411. X X X X
  412. 1 - -- + -- - --- + ----- + . . .
  413. 2! 4! 6! 8!
  414. */
  415. int intChop(const float& f)
  416. {
  417. int a = *reinterpret_cast<const int*>(&f); // take bit pattern of float into a register
  418. int sign = (a>>31); // sign = 0xFFFFFFFF if original value is negative, 0 if positive
  419. int mantissa = (a&((1<<23)-1))|(1<<23); // extract mantissa and add the hidden bit
  420. int exponent = ((a&0x7fffffff)>>23)-127; // extract the exponent
  421. int r = (unsigned int(mantissa)<<8)>>(31-exponent); // ((1<<exponent)*mantissa)>>24 -- (we know that mantissa > (1<<24))
  422. return ((r ^ (sign)) - sign ) &~ (exponent>>31); // add original sign. If exponent was negative, make return value 0.
  423. }
  424. int intFloor (const float& f)
  425. {
  426. int a = *reinterpret_cast<const int*>(&f); // take bit pattern of float into a register
  427. int sign = (a>>31); // sign = 0xFFFFFFFF if original value is negative, 0 if positive
  428. a&=0x7fffffff; // we don't need the sign any more
  429. int exponent = (a>>23)-127; // extract the exponent
  430. int expsign = ~(exponent>>31); // 0xFFFFFFFF if exponent is positive, 0 otherwise
  431. int imask = ( (1<<(31-(exponent))))-1; // mask for true integer values
  432. int mantissa = (a&((1<<23)-1)); // extract mantissa (without the hidden bit)
  433. int r = (unsigned int(mantissa|(1<<23))<<8)>>(31-exponent); // ((1<<exponent)*(mantissa|hidden bit))>>24 -- (we know that mantissa > (1<<24))
  434. r = ((r & expsign) ^ (sign)) + ((!((mantissa<<8)&imask)&(expsign^((a-1)>>31)))&sign); // if (fabs(value)<1.0) value = 0; copy sign; if (value < 0 && value==(int)(value)) value++;
  435. return r;
  436. }
  437. const int CACHE_TRASH_BUFFER_SIZE = 4024000;
  438. int * CacheTrashBuffer = NULL;
  439. volatile int ReadVar;
  440. void trash_the_cache(void)
  441. {
  442. // read a million random bytes and overwrite them with a new value
  443. for (int j=0; j<1024000; j++) {
  444. int address = rand() % CACHE_TRASH_BUFFER_SIZE;
  445. ReadVar = CacheTrashBuffer[address];
  446. CacheTrashBuffer[address] = ReadVar+1;
  447. //cout<<ReadVar;
  448. }
  449. }
  450. int jan_main(int argc, char* argv[])
  451. {
  452. // init_bez_sin();
  453. init_bez_acos();
  454. CacheTrashBuffer = new int[CACHE_TRASH_BUFFER_SIZE];
  455. for (int i=0; i<CACHE_TRASH_BUFFER_SIZE; i++) {
  456. CacheTrashBuffer[i] = rand();
  457. }
  458. for(int i=0; i<10; i++)
  459. {
  460. // float foo = i/float(47) * pi * 2;
  461. float foo = float(i)/64.f;
  462. float r0 = 0.0f;
  463. float r1 = 0.0f;
  464. float r2 = 0.0f;
  465. unsigned long acos_cycles = 0;
  466. unsigned long bez_cycles = 0;
  467. unsigned long table_cycles = 0;
  468. unsigned long acos_sum = 0;
  469. unsigned long bez_sum = 0;
  470. unsigned long table_sum = 0;
  471. const int SAMPLE_COUNT = 20;
  472. {
  473. for (int j=0; j<SAMPLE_COUNT; j++) {
  474. foo = WWMath::Random_Float();
  475. acos_cycles = Get_CPU_Clock();
  476. r0 = acos(foo);
  477. acos_sum += Get_CPU_Clock() - acos_cycles;
  478. trash_the_cache();
  479. }
  480. }
  481. {
  482. for (int j=0; j<SAMPLE_COUNT; j++) {
  483. foo = WWMath::Random_Float();
  484. bez_cycles = Get_CPU_Clock();
  485. r1 = bez_acos(foo);
  486. bez_sum += Get_CPU_Clock() - bez_cycles;
  487. trash_the_cache();
  488. }
  489. }
  490. {
  491. for (int j=0; j<SAMPLE_COUNT; j++) {
  492. foo = WWMath::Random_Float();
  493. table_cycles = Get_CPU_Clock();
  494. r2 = WWMath::Fast_Acos(foo);
  495. table_sum += Get_CPU_Clock() - table_cycles;
  496. trash_the_cache();
  497. }
  498. }
  499. // cout << "x: " << setw(8) << foo << " sin(x): " << setw(8) << r0 << " bez_sin(x): " << setw(8) << r1 << " ratio: " << setw(8) << r0-r1 << endl;
  500. // cout << "x: " << setw(8) << foo << " acos(x): " << setw(8) << r0 << " bez_acos(x): " << setw(8) << r1 << " ratio: " << setw(8) << r0/r1 << endl;
  501. cout << "acos clocks: "<<acos_sum<<" bez clocks: "<<bez_sum<<" table clocks: "<<table_sum << endl;
  502. }
  503. delete CacheTrashBuffer;
  504. return 0;
  505. }