| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640 |
- /*
- ** Command & Conquer Renegade(tm)
- ** Copyright 2025 Electronic Arts Inc.
- **
- ** This program is free software: you can redistribute it and/or modify
- ** it under the terms of the GNU General Public License as published by
- ** the Free Software Foundation, either version 3 of the License, or
- ** (at your option) any later version.
- **
- ** This program is distributed in the hope that it will be useful,
- ** but WITHOUT ANY WARRANTY; without even the implied warranty of
- ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ** GNU General Public License for more details.
- **
- ** You should have received a copy of the GNU General Public License
- ** along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- /*
- This file contains some math functions that Jan worked on while visiting westwood
- between Meltdown and Siggraph 2001.
- */
- #include <iostream>
- #include <iomanip>
- #include <cmath>
- #include "p_timer.h"
- #include "wwmath.h"
- using namespace std;
- #undef for
- #define for if(false); else for
- /* (1-u)(1-u)(1-u)
- --------------- 1/6 0/6
- 6
- 3*u*u*u - 6u*u +4
- --------------- 4/6 1/6
- 6
- -3*u*u*u + 3*u*u + 3*u + 1
- --------------- 1/6 4/6
- 6
- u*u*u
- --------------- 0/6 1/6
- 6
- */
- const double pi = 3.1415926535897932384626433832795;
- const int SINTABLESIZE = 16;
- float sinTable[SINTABLESIZE+1][3];
- /*
- float sinTable[SINTABLESIZE+1][3] =
- {
- 0.f, 0.0327249329938f, 0.0654498139838f,
- 0.0980171403296f, 0.130584524375f, 0.16299423692f,
- 0.195090322016f, 0.22718650856f, 0.258968953094f,
- 0.290284677254f, 0.321600570763f, 0.352449632798f,
- 0.382683432365f, 0.412917427848f, 0.442536062284f,
- 0.471396736826f, 0.5002577f, 0.528360611467f,
- 0.55557023302f, 0.582780177536f, 0.609096734804f,
- 0.634393284164f, 0.659690191419f, 0.683966978964f,
- 0.707106781187f, 0.730247032377f, 0.752250161646f,
- 0.773010453363f, 0.79377112016f, 0.813288873188f,
- 0.831469612303f, 0.849650815039f, 0.866495135964f,
- 0.881921264348f, 0.897347904711f, 0.911356511164f,
- 0.923879532511f, 0.936403079986f, 0.947441092993f,
- 0.956940335732f, 0.966440181792f, 0.974401217255f,
- 0.980785280403f, 0.987169903631f, 0.991977366409f,
- 0.995184726672f, 0.998392578191f, 1.0000003005f,
- 1.f, 1.0000003005f, 0.998392578191f,
- };
- */
- void init_bez_sin()
- {
- for(int i=0; i<SINTABLESIZE+1; i++)
- {
- double x = sin(pi/2*(i*3)/(SINTABLESIZE*3));
- double y = sin(pi/2*(i*3+1)/(SINTABLESIZE*3)); // * 1.00053f;
- double z = sin(pi/2*(i*3+2)/(SINTABLESIZE*3)); // * 1.00053f;
- double w = sin(pi/2*(i*3+3)/(SINTABLESIZE*3));
- double mse = 4.0;
- double my = 1.0;
- double mz = 1.0;
- // * nu2 * nu;
- // nu2;
- // u * nu;
- // * u2 * u;
- double wy = 4.0;
-
- for(double dy=1.000178; dy<1.000537; dy+=0.0000001)
- {
- bool b = false;
- double wz = 4.0;
- for(double dz=1.000534; dz<1.000715; dz+=0.0000001)
- {
-
- double by = x * 8.0 / 27.0 +
- y * dy * 12.0 / 27.0 +
- z * dz * 6.0 / 27.0 +
- w / 27.0;
- double bz = x / 27.0 +
- y * dy * 6.0 / 27.0 +
- z * dz * 12.0 / 27.0 +
- w * 8.0 / 27.0;
- if(y*dy < 0.0 || y*dy > 1.0)
- continue;
- if(z*dz < 0.0 || z*dz > 1.0)
- continue;
- double yerr, zerr;
- // if(by > y)
- // yerr = by/y;
- // else
- yerr = y/by;
- // if(bz > z)
- // zerr = bz/z;
- // else
- zerr = z/bz;
- yerr -= 1.0;
- zerr -= 1.0;
- double se = yerr*yerr + zerr*zerr;
- if(mse > se)
- {
- wy = se;
- wz = se;
- mse = se;
- my = dy;
- mz = dz;
- b = false;
- }
- else
- {
- // if(se > wy)
- // b = true;
- // if(se > wz)
- // break;
- }
- }
- // if(b)
- // break;
- }
- sinTable[i][0] = float(x);
- sinTable[i][1] = float(y * my);
- sinTable[i][2] = float(z * mz);
- cout << setprecision(12);
- cout << setw(16) << x << "f, " << setw(16) << y*my << "f, " << setw(16) << z*mz << "f," << endl;
- }
- }
- const int ACOSTABLESIZE = 32;
- //float acosTable[ACOSTABLESIZE+1][3];
- float acosTable[ACOSTABLESIZE+1][3] =
- {
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 2.09439510239f, 2.07033956061f, 2.04678164613f,
- 2.02361292154f, 2.00044498842f, 1.97766674853f,
- 1.95519310129f, 1.93271994669f, 1.91055213001f,
- 1.88862003072f, 1.86668845355f, 1.84499333386f,
- 1.82347658194f, 1.80196011523f, 1.7806223216f,
- 1.75941271297f, 1.73820326172f, 1.71712229397f,
- 1.69612415796f, 1.67512622765f, 1.65421121018f,
- 1.63333708859f, 1.61246303408f, 1.59162963139f,
- 1.57079632679f, 1.54996304584f, 1.52912960772f,
- 1.508255565f, 1.48738138962f, 1.46646655946f,
- 1.44546849563f, 1.42447034954f, 1.40338938542f,
- 1.38217994062f, 1.36097042076f, 1.33963248005f,
- 1.31811607165f, 1.29659929615f, 1.27490425438f,
- 1.25297262287f, 1.23104063033f, 1.20887262684f,
- 1.1863995523f, 1.16392587108f, 1.14114765337f,
- 1.11797973205f, 1.09481095108f, 1.07125317716f,
- 1.0471975512f, 1.0231405f, 0.998586616212f,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0,
- 0, 0, 0
- };
- double calc_cos_plus_45(double cx)
- {
- double sx = sqrt(1-cx*cx);
- cx = cx*cos(pi/4) - sx*sin(pi/4);
- return cx;
- }
- void init_bez_acos()
- {
- for(int i=0; i<ACOSTABLESIZE; i++)
- {
- // if(i < ACOSTABLESIZE/4 || i > ACOSTABLESIZE/4*3)
- // continue;
- double x = acos(float(i*3)/(ACOSTABLESIZE*3)*2-1);
- double y = acos(float(i*3+1)/(ACOSTABLESIZE*3)*2-1); // * 1.00053f;
- double z = acos(float(i*3+2)/(ACOSTABLESIZE*3)*2-1); // * 1.00053f;
- double w;
- if(i != ACOSTABLESIZE-1)
- w = acos(float(i*3+3)/(ACOSTABLESIZE*3)*2-1);
- else
- w = 0.0;//-acos(float(i*3+1)/(ACOSTABLESIZE*3)*2-1);
- double mse = 4.0;
- double my = 1.0;
- double mz = 1.0;
- double wy = 4.0;
-
- for(double dy=0.9996944; dy<=1.0006926; dy+=16.0/8388608)
- {
- bool b = false;
- double wz = 4.0;
- for(double dz=0.9997872; dz<=1.0011092; dz+=16.0/8388608)
- {
- double by = x * 8.0 / 27.0 +
- y * dy * 12.0 / 27.0 +
- z * dz * 6.0 / 27.0 +
- w / 27.0;
- double bz = x / 27.0 +
- y * dy * 6.0 / 27.0 +
- z * dz * 12.0 / 27.0 +
- w * 8.0 / 27.0;
- if(y*dy < 0.0 || y*dy > pi)
- continue;
- if(z*dz < 0.0 || z*dz > pi)
- continue;
- double yerr, zerr;
- // if(by > y)
- // yerr = by/y;
- // else
- yerr = y/by;
- // if(bz > z)
- // zerr = bz/z;
- // else
- zerr = z/bz;
- yerr -= 1.0;
- zerr -= 1.0;
- double se = yerr*yerr + zerr*zerr;
- if(mse > se)
- {
- wy = se;
- wz = se;
- mse = se;
- my = dy;
- mz = dz;
- b = false;
- }
- else
- {
- // if(se > wy)
- // b = true;
- // if(se > wz)
- // break;
- }
- }
- // if(b)
- // break;
- }
- acosTable[i][0] = float(x);
- acosTable[i][1] = float(y * my);
- acosTable[i][2] = float(z * mz);
- cout << setprecision(12);
- cout << setw(16) << x << "f, " << setw(16) << y*my << "f, " << setw(16) << z*mz << "f," << endl;
- // cout << my << " " << mz << endl;
- }
- acosTable[ACOSTABLESIZE][0] = 0.f;
- }
- float bez_sin(float x)
- {
- x *= (SINTABLESIZE*4)/(pi*2);
- int ix0 = int(floor(x));
- float u = x - ix0;
- float nu = 1.f-u;
- float sign = 1.f;
- ix0 &= SINTABLESIZE*4-1;
- if(ix0 >= SINTABLESIZE*2)
- {
- sign = -1.f;
- ix0 -= SINTABLESIZE*2;
- }
- if(ix0 >= SINTABLESIZE)
- {
- ix0 = (SINTABLESIZE-1)-(ix0-SINTABLESIZE);
- float t = u;
- u = nu;
- nu = t;
- }
- int ix1 = ix0+1;
- // ix0 &= SINTABLESIZE-1;
- // ix1 &= SINTABLESIZE-1;
- float u2 = u*u;
- float nu2 = nu*nu;
- float r0 = sinTable[ix0][0] * nu2 * nu;
- float r1 = sinTable[ix0][1] * 3 * u * nu2;
- float r2 = sinTable[ix0][2] * 3 * u2 * nu;
- float r3 = sinTable[ix1][0] * u2 * u;
- r0 += r1;
- r2 += r3;
- return (r0+r2) * sign;
- }
- float bez_acos(float x)
- {
- // double sx = sqrt(1-x*x);
- // x = x*cos(pi/4) - sx*sin(pi/4);
- x = calc_cos_plus_45(x);
- x += 1.f;
- x *= ACOSTABLESIZE/2;
- int ix0 = int(floor(x));
- float u = x - ix0;
- float nu = 1.f-u;
- if(ix0 < 0)
- return pi-pi/4; // -infinite...
- if(ix0 >= ACOSTABLESIZE)
- return 0.f;
- int ix1 = ix0+1;
- float u2 = u*u;
- float nu2 = nu*nu;
- float r0 = acosTable[ix0][0] * nu2 * nu;
- float r1 = acosTable[ix0][1] * 3 * u * nu2;
- float r2 = acosTable[ix0][2] * 3 * u2 * nu;
- float r3 = acosTable[ix1][0] * u2 * u;
- r0 += r1;
- r2 += r3;
- return (r0+r2)-pi/4;
- }
- float mcos(float c, float m)
- {
- float ac = acos(c);
- float c1 = cos(ac*m);
- float c2 = cos(ac*(1-m));
- float c3 = c * cos(m);
- return c1;
- }
- float cheb_asin(float x)
- {
- float table[] =
- {
- 1.101460576f,
- +.5764093225f,
- -.09735417608f,
- +.003083774398f,
- -.000844493744f,
- +.005722363752f,
- -.005962135984f
- };
- float r = table[0] + table[1]*x;
- r -= sqrt(1-x*x);
- x = 2.f*x-1.f;
- float x2 = x;
- x *= x;
- for(int i=2; i<sizeof(table)/sizeof(table[0]); i++)
- {
- r += x * table[i];
- x *= x2;
- }
- return r;
- }
- float taylor_acos(float x)
- {
- /*
- (1) /(2*3),
- (3) /(2*4*5),
- (3*5) /(2*4*6*7),
- (3*5*7) /(2*4*6*8*9),
- (3*5*7*9) /(2*4*6*8*10*11),
- (3*5*7*9*11) /(2*4*6*8*10*12*13),
- (3*5*7*9*11*13) /(2*4*6*8*10*12*14*15),
- (3*5*7*9*11*13*15) /(2*4*6*8*10*12*14*16*17),
- (3*5*7*9*11*13*15*17) /(2*4*6*8*10*12*14*16*18*19),
- */
-
- float table[] =
- {
- 1.f,
- float(1) /(2*3),
- float(3) /(2*4*5),
- float(5) /(2*4*2*7),
- float(5*7) /(2*4*2*8*9),
- float(7*9) /(2*4*2*8*2*11),
- float(7*9*11) /(2*4*2*8*2*12*13),
- float(9*11*13) /(2*4*2*8*2*12*2*15),
- float(9*11*13*15) /(2*4*2*8*2*12*2*16*17),
- float(11*13*15*17) /(2*4*2*8*2*12*2*16*2*19),
- };
- // x = sqrt((1.f+x)/2.f);
- bool flip = false;
- if(x > 0.707106781186547524400844362104849f)
- {
- x = 0.707106781186547524400844362104849f - (x-0.707106781186547524400844362104849f);
- flip = true;
- }
-
- float r = 1.57079632679489661923132169163975f;
- float x2 = x*x;
- for(int i=0; i<sizeof(table)/sizeof(table[0]); i++)
- {
- r -= x * table[i];
- x *= x2;
- }
- if(flip)
- {
- r = 1.57079632679489661923132169163975f-r;
- }
- return r;
- }
- /*
- taylor acos
- 3 5 7 9 11
- %PI X 3 X 5 X 35 X 63 X
- --- - X - -- - ---- - ---- - ----- - ------
- 2 6 40 112 1152 2816
- taylor(sin(x),x,0,9);
- 3 5 7 9
- X X X X
- X - -- + --- - ---- + ------ + . . .
- 3! 5! 7! 9!
- taylor cos
- 2 4 6 8
- X X X X
- 1 - -- + -- - --- + ----- + . . .
- 2! 4! 6! 8!
- */
- int intChop(const float& f)
- {
- int a = *reinterpret_cast<const int*>(&f); // take bit pattern of float into a register
- int sign = (a>>31); // sign = 0xFFFFFFFF if original value is negative, 0 if positive
- int mantissa = (a&((1<<23)-1))|(1<<23); // extract mantissa and add the hidden bit
- int exponent = ((a&0x7fffffff)>>23)-127; // extract the exponent
- int r = (unsigned int(mantissa)<<8)>>(31-exponent); // ((1<<exponent)*mantissa)>>24 -- (we know that mantissa > (1<<24))
- return ((r ^ (sign)) - sign ) &~ (exponent>>31); // add original sign. If exponent was negative, make return value 0.
- }
- int intFloor (const float& f)
- {
- int a = *reinterpret_cast<const int*>(&f); // take bit pattern of float into a register
- int sign = (a>>31); // sign = 0xFFFFFFFF if original value is negative, 0 if positive
- a&=0x7fffffff; // we don't need the sign any more
- int exponent = (a>>23)-127; // extract the exponent
- int expsign = ~(exponent>>31); // 0xFFFFFFFF if exponent is positive, 0 otherwise
- int imask = ( (1<<(31-(exponent))))-1; // mask for true integer values
- int mantissa = (a&((1<<23)-1)); // extract mantissa (without the hidden bit)
- int r = (unsigned int(mantissa|(1<<23))<<8)>>(31-exponent); // ((1<<exponent)*(mantissa|hidden bit))>>24 -- (we know that mantissa > (1<<24))
- r = ((r & expsign) ^ (sign)) + ((!((mantissa<<8)&imask)&(expsign^((a-1)>>31)))&sign); // if (fabs(value)<1.0) value = 0; copy sign; if (value < 0 && value==(int)(value)) value++;
- return r;
- }
- const int CACHE_TRASH_BUFFER_SIZE = 4024000;
- int * CacheTrashBuffer = NULL;
- volatile int ReadVar;
- void trash_the_cache(void)
- {
- // read a million random bytes and overwrite them with a new value
- for (int j=0; j<1024000; j++) {
- int address = rand() % CACHE_TRASH_BUFFER_SIZE;
- ReadVar = CacheTrashBuffer[address];
- CacheTrashBuffer[address] = ReadVar+1;
- //cout<<ReadVar;
- }
- }
- int jan_main(int argc, char* argv[])
- {
- // init_bez_sin();
- init_bez_acos();
- CacheTrashBuffer = new int[CACHE_TRASH_BUFFER_SIZE];
- for (int i=0; i<CACHE_TRASH_BUFFER_SIZE; i++) {
- CacheTrashBuffer[i] = rand();
- }
- for(int i=0; i<10; i++)
- {
- // float foo = i/float(47) * pi * 2;
- float foo = float(i)/64.f;
- float r0 = 0.0f;
- float r1 = 0.0f;
- float r2 = 0.0f;
- unsigned long acos_cycles = 0;
- unsigned long bez_cycles = 0;
- unsigned long table_cycles = 0;
- unsigned long acos_sum = 0;
- unsigned long bez_sum = 0;
- unsigned long table_sum = 0;
- const int SAMPLE_COUNT = 20;
- {
- for (int j=0; j<SAMPLE_COUNT; j++) {
- foo = WWMath::Random_Float();
- acos_cycles = Get_CPU_Clock();
- r0 = acos(foo);
- acos_sum += Get_CPU_Clock() - acos_cycles;
- trash_the_cache();
- }
- }
-
- {
- for (int j=0; j<SAMPLE_COUNT; j++) {
- foo = WWMath::Random_Float();
- bez_cycles = Get_CPU_Clock();
- r1 = bez_acos(foo);
- bez_sum += Get_CPU_Clock() - bez_cycles;
- trash_the_cache();
- }
- }
- {
- for (int j=0; j<SAMPLE_COUNT; j++) {
- foo = WWMath::Random_Float();
- table_cycles = Get_CPU_Clock();
- r2 = WWMath::Fast_Acos(foo);
- table_sum += Get_CPU_Clock() - table_cycles;
- trash_the_cache();
- }
- }
-
- // cout << "x: " << setw(8) << foo << " sin(x): " << setw(8) << r0 << " bez_sin(x): " << setw(8) << r1 << " ratio: " << setw(8) << r0-r1 << endl;
- // cout << "x: " << setw(8) << foo << " acos(x): " << setw(8) << r0 << " bez_acos(x): " << setw(8) << r1 << " ratio: " << setw(8) << r0/r1 << endl;
- cout << "acos clocks: "<<acos_sum<<" bez clocks: "<<bez_sum<<" table clocks: "<<table_sum << endl;
- }
- delete CacheTrashBuffer;
- return 0;
- }
|