123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- /*
- Copyright (c) 2012 Advanced Micro Devices, Inc.
- This software is provided 'as-is', without any express or implied warranty.
- In no event will the authors be held liable for any damages arising from the use of this software.
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it freely,
- subject to the following restrictions:
- 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
- */
- #include <stdio.h>
- #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
- #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
- #include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h"
- #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
- #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
- #include "Bullet3Common/b3CommandLineArgs.h"
- #include "Bullet3Common/b3MinMax.h"
- int g_nPassed = 0;
- int g_nFailed = 0;
- bool g_testFailed = 0;
- #define TEST_INIT g_testFailed = 0;
- #define TEST_ASSERT(x) if( !(x) ){g_testFailed = 1;}
- #define TEST_REPORT(testName) printf("[%s] %s\n",(g_testFailed)?"X":"O", testName); if(g_testFailed) g_nFailed++; else g_nPassed++;
- #define NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment))
- cl_context g_context=0;
- cl_device_id g_device=0;
- cl_command_queue g_queue =0;
- const char* g_deviceName = 0;
- void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
- {
- //void* glCtx=0;
- //void* glDC = 0;
- int ciErrNum = 0;
- //bound search and radix sort only work on GPU right now (assume 32 or 64 width workgroup without barriers)
- cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
- g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
- oclCHECKERROR(ciErrNum, CL_SUCCESS);
- int numDev = b3OpenCLUtils::getNumDevices(g_context);
- if (numDev>0)
- {
- b3OpenCLDeviceInfo info;
- g_device= b3OpenCLUtils::getDevice(g_context,0);
- g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
- oclCHECKERROR(ciErrNum, CL_SUCCESS);
- b3OpenCLUtils::printDeviceInfo(g_device);
- b3OpenCLUtils::getDeviceInfo(g_device,&info);
- g_deviceName = info.m_deviceName;
- }
- }
- void exitCL()
- {
- clReleaseCommandQueue(g_queue);
- clReleaseContext(g_context);
- }
- inline void fillIntTest()
- {
- TEST_INIT;
- b3FillCL* fillCL = new b3FillCL(g_context,g_device,g_queue);
- int maxSize=1024*256;
- b3OpenCLArray<int> intBuffer(g_context,g_queue,maxSize);
- intBuffer.resize(maxSize);
-
- #define NUM_TESTS 7
- int dx = maxSize/NUM_TESTS;
- for (int iter=0;iter<NUM_TESTS;iter++)
- {
- int size = b3Min( 11+dx*iter, maxSize );
- int value = 2;
-
- int offset=0;
- fillCL->execute(intBuffer,value,size,offset);
- b3AlignedObjectArray<int> hostBuf2;
- hostBuf2.resize(size);
- fillCL->executeHost(hostBuf2,value,size,offset);
- b3AlignedObjectArray<int> hostBuf;
- intBuffer.copyToHost(hostBuf);
- for(int i=0; i<size; i++)
- {
- TEST_ASSERT( hostBuf[i] == hostBuf2[i] );
- TEST_ASSERT( hostBuf[i] == hostBuf2[i] );
- }
- }
-
- delete fillCL;
- TEST_REPORT( "fillIntTest" );
- }
- __inline
- void seedRandom(int seed)
- {
- srand( seed );
- }
- template<typename T>
- __inline
- T getRandom(const T& minV, const T& maxV)
- {
- float r = (rand()%10000)/10000.f;
- T range = maxV - minV;
- return (T)(minV + r*range);
- }
- struct b3SortDataCompare
- {
- inline bool operator()(const b3SortData& first, const b3SortData& second) const
- {
- return (first.m_key < second.m_key) || (first.m_key==second.m_key && first.m_value < second.m_value);
- }
- };
- void boundSearchTest( )
- {
- TEST_INIT;
- int maxSize = 1024*256;
- int bucketSize = 256;
- b3OpenCLArray<b3SortData> srcCL(g_context,g_queue,maxSize);
- b3OpenCLArray<unsigned int> upperCL(g_context,g_queue,maxSize);
- b3OpenCLArray<unsigned int> lowerCL(g_context,g_queue,maxSize);
-
- b3AlignedObjectArray<b3SortData> srcHost;
- b3AlignedObjectArray<unsigned int> upperHost;
- b3AlignedObjectArray<unsigned int> lowerHost;
- b3AlignedObjectArray<unsigned int> upperHostCompare;
- b3AlignedObjectArray<unsigned int> lowerHostCompare;
-
- b3BoundSearchCL* search = new b3BoundSearchCL(g_context,g_device,g_queue, maxSize);
- int dx = maxSize/NUM_TESTS;
- for(int iter=0; iter<NUM_TESTS; iter++)
- {
-
- int size = b3Min( 128+dx*iter, maxSize );
- upperHost.resize(bucketSize);
- lowerHost.resize(bucketSize);
- upperHostCompare.resize(bucketSize);
- lowerHostCompare.resize(bucketSize);
- srcHost.resize(size);
- for(int i=0; i<size; i++)
- {
- b3SortData v;
- // v.m_key = i<2? 0 : 5;
- v.m_key = getRandom(0,bucketSize);
- v.m_value = i;
- srcHost.at(i) = v;
- }
- srcHost.quickSort(b3SortDataCompare());
- srcCL.copyFromHost(srcHost);
- {
-
- for(int i=0; i<bucketSize; i++)
- {
- lowerHost[i] = -1;
- lowerHostCompare[i] = -1;
- upperHost[i] = -1;
- upperHostCompare[i] = -1;
- }
- upperCL.copyFromHost(upperHost);
- lowerCL.copyFromHost(lowerHost);
- }
- search->execute(srcCL,size,upperCL,bucketSize,b3BoundSearchCL::BOUND_UPPER);
- search->execute(srcCL,size,lowerCL,bucketSize,b3BoundSearchCL::BOUND_LOWER);
- search->executeHost(srcHost,size,upperHostCompare,bucketSize,b3BoundSearchCL::BOUND_UPPER);
- search->executeHost(srcHost,size,lowerHostCompare,bucketSize,b3BoundSearchCL::BOUND_LOWER);
- lowerCL.copyToHost(lowerHost);
- upperCL.copyToHost(upperHost);
- for(int i=0; i<bucketSize; i++)
- {
- TEST_ASSERT(upperHostCompare[i] == upperHost[i]);
- TEST_ASSERT(lowerHostCompare[i] == lowerHost[i]);
- }
- /*
- for(int i=1; i<bucketSize; i++)
- {
- int lhi_1 = lowerHost[i-1];
- int lhi = lowerHost[i];
- for(int j=lhi_1; j<lhi; j++)
- //for(int j=lowerHost[i-1]; j<lowerHost[i]; j++)
- {
- TEST_ASSERT( srcHost[j].m_key < i );
- }
- }
- for(int i=0; i<bucketSize; i++)
- {
- int jMin = (i==0)?0:upperHost[i-1];
- for(int j=jMin; j<upperHost[i]; j++)
- {
- TEST_ASSERT( srcHost[j].m_key <= i );
- }
- }
- */
- for(int i=0; i<bucketSize; i++)
- {
- int lhi = lowerHost[i];
- int uhi = upperHost[i];
- for(int j=lhi; j<uhi; j++)
- {
- if ( srcHost[j].m_key != i )
- {
- printf("error %d != %d\n",srcHost[j].m_key,i);
- }
- TEST_ASSERT( srcHost[j].m_key == i );
- }
- }
- }
- delete search;
- TEST_REPORT( "boundSearchTest" );
- }
- void prefixScanTest()
- {
- TEST_INIT;
- int maxSize = 1024*256;
- b3AlignedObjectArray<unsigned int> buf0Host;
- b3AlignedObjectArray<unsigned int> buf1Host;
- b3OpenCLArray<unsigned int> buf2CL(g_context,g_queue,maxSize);
- b3OpenCLArray<unsigned int> buf3CL(g_context,g_queue,maxSize);
-
-
- b3PrefixScanCL* scan = new b3PrefixScanCL(g_context,g_device,g_queue,maxSize);
-
- int dx = maxSize/NUM_TESTS;
- for(int iter=0; iter<NUM_TESTS; iter++)
- {
- int size = b3Min( 128+dx*iter, maxSize );
- buf0Host.resize(size);
- buf1Host.resize(size);
- for(int i=0; i<size; i++)
- buf0Host[i] = 1;
-
- buf2CL.copyFromHost( buf0Host);
-
- unsigned int sumHost, sumGPU;
- scan->executeHost(buf0Host, buf1Host, size, &sumHost );
- scan->execute( buf2CL, buf3CL, size, &sumGPU );
- buf3CL.copyToHost(buf0Host);
-
- TEST_ASSERT( sumHost == sumGPU );
- for(int i=0; i<size; i++)
- TEST_ASSERT( buf1Host[i] == buf0Host[i] );
- }
- delete scan;
- TEST_REPORT( "scanTest" );
- }
- bool radixSortTest()
- {
- TEST_INIT;
-
- int maxSize = 1024*256;
- b3AlignedObjectArray<b3SortData> buf0Host;
- buf0Host.resize(maxSize);
- b3AlignedObjectArray<b3SortData> buf1Host;
- buf1Host.resize(maxSize );
- b3OpenCLArray<b3SortData> buf2CL(g_context,g_queue,maxSize);
- b3RadixSort32CL* sort = new b3RadixSort32CL(g_context,g_device,g_queue,maxSize);
- int dx = maxSize/NUM_TESTS;
- for(int iter=0; iter<NUM_TESTS; iter++)
- {
- int size = b3Min( 128+dx*iter, maxSize-512 );
- size = NEXTMULTIPLEOF( size, 512 );//not necessary
-
- buf0Host.resize(size);
- for(int i=0; i<size; i++)
- {
- b3SortData v;
- v.m_key = getRandom(0,0xff);
- v.m_value = i;
- buf0Host[i] = v;
- }
- buf2CL.copyFromHost( buf0Host);
-
- sort->executeHost( buf0Host);
- sort->execute(buf2CL);
- buf2CL.copyToHost(buf1Host);
-
- for(int i=0; i<size; i++)
- {
- TEST_ASSERT( buf0Host[i].m_value == buf1Host[i].m_value && buf0Host[i].m_key == buf1Host[i].m_key );
- }
- }
- delete sort;
- TEST_REPORT( "radixSort" );
- return g_testFailed;
- }
- int main(int argc, char** argv)
- {
- int preferredDeviceIndex = -1;
- int preferredPlatformIndex = -1;
- b3CommandLineArgs args(argc, argv);
- args.GetCmdLineArgument("deviceId", preferredDeviceIndex);
- args.GetCmdLineArgument("platformId", preferredPlatformIndex);
- initCL(preferredDeviceIndex,preferredPlatformIndex);
- fillIntTest();
- boundSearchTest();
- prefixScanTest();
- radixSortTest();
- exitCL();
- printf("%d tests passed, %d tests failed\n",g_nPassed, g_nFailed);
- printf("End, press <enter>\n");
- getchar();
- }
|