| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- /******************************************************************************
- Have to keep this as a separate file, so it won't be linked if unused.
- BC7 - If entire block has the same alpha value, then it's best when that alpha is equal to 255,
- because in that mode, RGB will have more precision.
- /******************************************************************************/
- #include "stdafx.h"
- #include "../../../../ThirdPartyLibs/begin.h"
- #if (WINDOWS && !ARM) || MAC
- #include "../../../../ThirdPartyLibs/BC7/ispc_texcomp/ispc_texcomp.h" // Windows and Mac link against precompiled lib's generated by Intel Compiler, so all we need is a header
- #else // other platforms need source
- #include "../../../../ThirdPartyLibs/BC7/ispc_texcomp/ispc_texcomp.cpp"
- #pragma warning(push)
- #pragma warning(disable:4715) // not all control paths return a value
- #include "../../../../ThirdPartyLibs/BC7/kernel.ispc.cpp"
- #pragma warning(pop)
- #endif
- #include "../../../../ThirdPartyLibs/end.h"
- namespace EE{
- /******************************************************************************/
- static struct BCThreads
- {
- Threads threads;
- Bool initialized;
- SyncLock lock;
- void init()
- {
- if(!initialized)
- {
- SyncLocker locker(lock);
- if(!initialized)
- {
- threads.create(false, Cpu.threads()-1); // -1 because we will do processing on the caller thread too
- initialized=true; // enable at the end
- }
- }
- }
- }BC;
- /******************************************************************************/
- struct Data
- {
- bc7_enc_settings settings;
- C Image &src;
- Image &dest;
- Int total_blocks, thread_blocks, threads;
- Data(C Image &src, Image &dest) : src(src), dest(dest)
- {
- total_blocks=src.h()/4;
- threads=Min(total_blocks, BC.threads.threads1()); // +1 because we will do processing on the caller thread too
- thread_blocks=total_blocks/threads;
- #if 0 // 3x slower and only small quality difference
- GetProfile_alpha_slow(&settings);
- #else
- GetProfile_alpha_basic(&settings);
- #endif
- }
- };
- /******************************************************************************/
- static void CompressBC7Block(IntPtr elm_index, Data &data, Int thread_index)
- {
- rgba_surface surf;
- Int block_start=elm_index*data.thread_blocks, y_start=block_start*4;
- surf.ptr =ConstCast(data.src.data()+y_start*data.src.pitch());
- surf.stride=data.src.pitch();
- surf.width =data.src.w ();
- surf.height=((elm_index==data.threads-1) ? data.src.h()-y_start : data.thread_blocks*4); // last thread must process all remaining blocks
- #if 1 // Intel
- CompressBlocksBC7(&surf, data.dest.data() + block_start*data.dest.pitch(), &data.settings);
- #else // DirectX
- REPD(by, surf.height/4)
- REPD(bx, surf.width /4)
- {
- XMVECTOR dx_rgba[4][4]; ASSERT(SIZE(XMVECTOR)==SIZE(Vec4));
- Int px=bx*4, py=by*4, // pixel
- xo[4], yo[4];
- REP(4)
- {
- xo[i]=px+i;
- yo[i]=py+i+y_start;
- }
- data.src.gather((Vec4*)&dx_rgba[0][0], xo, Elms(xo), yo, Elms(yo));
- DirectX::D3DXEncodeBC7(data.dest.data() + bx*16 + (by+block_start)*data.dest.pitch(), &dx_rgba[0][0], 0);
- }
- #endif
- }
- /******************************************************************************/
- Bool _CompressBC7(C Image &src, Image &dest)
- {
- Bool ok=false;
- if(dest.hwType()==IMAGE_BC7)
- {
- Image temp; C Image *s=&src;
- if(s->hwType()!=IMAGE_R8G8B8A8 || s->w()!=dest.hwW() || s->h()!=dest.hwH())
- {
- if(s->copyTry(temp, dest.hwW(), dest.hwH(), 1, IMAGE_R8G8B8A8, IMAGE_SOFT, 1, FILTER_NO_STRETCH, true))s=&temp;else return false; // we need to cover the area for entire HW size, to process partial and Pow2Padded blocks too
- }
- if(s->lockRead())
- {
- if(dest.lock(LOCK_WRITE))
- {
- ok=true;
- BC.init();
- Data data(*s, dest); // !! call after 'BC.init' !!
- BC.threads.process1(data.threads, CompressBC7Block, data, INT_MAX); // use all available threads, including this one
- dest.unlock();
- }
- s->unlock();
- }
- }
- return ok;
- }
- /******************************************************************************/
- }
- /******************************************************************************/
|