123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389 |
- /*
- Copyright (c) 2012 Advanced Micro Devices, Inc.
- This software is provided 'as-is', without any express or implied warranty.
- In no event will the authors be held liable for any damages arising from the use of this software.
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it freely,
- subject to the following restrictions:
- 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
- */
- //Originally written by Erwin Coumans
- #define NEW_PAIR_MARKER -1
- typedef struct
- {
- union
- {
- float4 m_min;
- float m_minElems[4];
- int m_minIndices[4];
- };
- union
- {
- float4 m_max;
- float m_maxElems[4];
- int m_maxIndices[4];
- };
- } btAabbCL;
- /// conservative test for overlap between two aabbs
- bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);
- bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)
- {
- bool overlap = true;
- overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
- overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
- overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
- return overlap;
- }
- bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);
- bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)
- {
- bool overlap = true;
- overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
- overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
- overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
- return overlap;
- }
- bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);
- bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)
- {
- bool overlap = true;
- overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
- overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
- overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
- return overlap;
- }
- __kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)
- {
- int i = get_global_id(0);
- if (i>=numUnsortedAabbs)
- return;
- int j = get_global_id(1);
- if (j>=numUnSortedAabbs2)
- return;
- __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];
- __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];
- if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))
- {
- int4 myPair;
-
- int xIndex = unsortedAabbPtr[0].m_minIndices[3];
- int yIndex = unsortedAabbPtr2[0].m_minIndices[3];
- if (xIndex>yIndex)
- {
- int tmp = xIndex;
- xIndex=yIndex;
- yIndex=tmp;
- }
-
- myPair.x = xIndex;
- myPair.y = yIndex;
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
- }
- __kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
- {
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
- for (int j=i+1;j<numObjects;j++)
- {
- if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
- {
- int4 myPair;
- myPair.x = aabbs[i].m_minIndices[3];
- myPair.y = aabbs[j].m_minIndices[3];
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
- }
- }
- __kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
- {
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
- for (int j=i+1;j<numObjects;j++)
- {
- if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis]))
- {
- break;
- }
- if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
- {
- int4 myPair;
- myPair.x = aabbs[i].m_minIndices[3];
- myPair.y = aabbs[j].m_minIndices[3];
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
- }
- }
- __kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
- {
- int i = get_global_id(0);
- int localId = get_local_id(0);
- __local int numActiveWgItems[1];
- __local int breakRequest[1];
- if (localId==0)
- {
- numActiveWgItems[0] = 0;
- breakRequest[0] = 0;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- atomic_inc(numActiveWgItems);
- barrier(CLK_LOCAL_MEM_FENCE);
- int localBreak = 0;
- int j=i+1;
- do
- {
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (j<numObjects)
- {
- if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis]))
- {
- if (!localBreak)
- {
- atomic_inc(breakRequest);
- localBreak = 1;
- }
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (j>=numObjects && !localBreak)
- {
- atomic_inc(breakRequest);
- localBreak = 1;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (!localBreak)
- {
- if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
- {
- int4 myPair;
- myPair.x = aabbs[i].m_minIndices[3];
- myPair.y = aabbs[j].m_minIndices[3];
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
- }
- j++;
- } while (breakRequest[0]<numActiveWgItems[0]);
- }
- __kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
- {
- int i = get_global_id(0);
- int localId = get_local_id(0);
- __local int numActiveWgItems[1];
- __local int breakRequest[1];
- __local btAabbCL localAabbs[128];// = aabbs[i];
-
- btAabbCL myAabb;
-
- myAabb = (i<numObjects)? aabbs[i]:aabbs[0];
- float testValue = myAabb.m_maxElems[axis];
-
- if (localId==0)
- {
- numActiveWgItems[0] = 0;
- breakRequest[0] = 0;
- }
- int localCount=0;
- int block=0;
- localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0];
- localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0];
-
- barrier(CLK_LOCAL_MEM_FENCE);
- atomic_inc(numActiveWgItems);
- barrier(CLK_LOCAL_MEM_FENCE);
- int localBreak = 0;
-
- int j=i+1;
- do
- {
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (j<numObjects)
- {
- if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis]))
- {
- if (!localBreak)
- {
- atomic_inc(breakRequest);
- localBreak = 1;
- }
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (j>=numObjects && !localBreak)
- {
- atomic_inc(breakRequest);
- localBreak = 1;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (!localBreak)
- {
- if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))
- {
- int4 myPair;
- myPair.x = myAabb.m_minIndices[3];
- myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
- localCount++;
- if (localCount==64)
- {
- localCount = 0;
- block+=64;
- localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0];
- localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0];
- }
- j++;
-
- } while (breakRequest[0]<numActiveWgItems[0]);
-
- }
- //http://stereopsis.com/radix.html
- unsigned int FloatFlip(float fl);
- unsigned int FloatFlip(float fl)
- {
- unsigned int f = *(unsigned int*)&fl;
- unsigned int mask = -(int)(f >> 31) | 0x80000000;
- return f ^ mask;
- }
- float IFloatFlip(unsigned int f);
- float IFloatFlip(unsigned int f)
- {
- unsigned int mask = ((f >> 31) - 1) | 0x80000000;
- unsigned int fl = f ^ mask;
- return *(float*)&fl;
- }
- __kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)
- {
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
- int src = destAabbs[i].m_maxIndices[3];
- destAabbs[i] = allAabbs[src];
- destAabbs[i].m_maxIndices[3] = src;
- }
- __kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)
- {
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
-
-
- sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);
- sortData[i].y = i;
-
- }
- __kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)
- {
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
-
- sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];
- }
- __kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)
- {
- int i = get_global_id(0);
- if (i>=numAabbs)
- return;
-
- btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];
-
- float4 s;
- s = (smallAabb.m_max+smallAabb.m_min)*0.5f;
- sum[i]=s;
- sum2[i]=s*s;
- }
|