BC7.cpp 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /******************************************************************************
  2. Have to keep this as a separate file, so it won't be linked if unused.
  3. BC7 - If entire block has the same alpha value, then it's best when that alpha is equal to 255,
  4. because in that mode, RGB will have more precision.
  5. /******************************************************************************/
  6. #include "stdafx.h"
  7. #include "../../../../ThirdPartyLibs/begin.h"
  8. #if (WINDOWS && !ARM) || MAC
  9. #include "../../../../ThirdPartyLibs/BC7/ispc_texcomp/ispc_texcomp.h" // Windows and Mac link against precompiled lib's generated by Intel Compiler, so all we need is a header
  10. #else // other platforms need source
  11. #include "../../../../ThirdPartyLibs/BC7/ispc_texcomp/ispc_texcomp.cpp"
  12. #pragma warning(push)
  13. #pragma warning(disable:4715) // not all control paths return a value
  14. #include "../../../../ThirdPartyLibs/BC7/kernel.ispc.cpp"
  15. #pragma warning(pop)
  16. #endif
  17. #include "../../../../ThirdPartyLibs/end.h"
  18. namespace EE{
  19. /******************************************************************************/
  20. static struct BCThreads
  21. {
  22. Threads threads;
  23. Bool initialized;
  24. SyncLock lock;
  25. void init()
  26. {
  27. if(!initialized)
  28. {
  29. SyncLocker locker(lock);
  30. if(!initialized)
  31. {
  32. threads.create(false, Cpu.threads()-1); // -1 because we will do processing on the caller thread too
  33. initialized=true; // enable at the end
  34. }
  35. }
  36. }
  37. }BC;
  38. /******************************************************************************/
  39. struct Data
  40. {
  41. bc7_enc_settings settings;
  42. C Image &src;
  43. Image &dest;
  44. Int total_blocks, thread_blocks, threads;
  45. Data(C Image &src, Image &dest) : src(src), dest(dest)
  46. {
  47. total_blocks=src.h()/4;
  48. threads=Min(total_blocks, BC.threads.threads1()); // +1 because we will do processing on the caller thread too
  49. thread_blocks=total_blocks/threads;
  50. #if 0 // 3x slower and only small quality difference
  51. GetProfile_alpha_slow(&settings);
  52. #else
  53. GetProfile_alpha_basic(&settings);
  54. #endif
  55. }
  56. };
  57. /******************************************************************************/
  58. static void CompressBC7Block(IntPtr elm_index, Data &data, Int thread_index)
  59. {
  60. rgba_surface surf;
  61. Int block_start=elm_index*data.thread_blocks, y_start=block_start*4;
  62. surf.ptr =ConstCast(data.src.data()+y_start*data.src.pitch());
  63. surf.stride=data.src.pitch();
  64. surf.width =data.src.w ();
  65. surf.height=((elm_index==data.threads-1) ? data.src.h()-y_start : data.thread_blocks*4); // last thread must process all remaining blocks
  66. #if 1 // Intel
  67. CompressBlocksBC7(&surf, data.dest.data() + block_start*data.dest.pitch(), &data.settings);
  68. #else // DirectX
  69. REPD(by, surf.height/4)
  70. REPD(bx, surf.width /4)
  71. {
  72. XMVECTOR dx_rgba[4][4]; ASSERT(SIZE(XMVECTOR)==SIZE(Vec4));
  73. Int px=bx*4, py=by*4, // pixel
  74. xo[4], yo[4];
  75. REP(4)
  76. {
  77. xo[i]=px+i;
  78. yo[i]=py+i+y_start;
  79. }
  80. data.src.gather((Vec4*)&dx_rgba[0][0], xo, Elms(xo), yo, Elms(yo));
  81. DirectX::D3DXEncodeBC7(data.dest.data() + bx*16 + (by+block_start)*data.dest.pitch(), &dx_rgba[0][0], 0);
  82. }
  83. #endif
  84. }
  85. /******************************************************************************/
  86. Bool _CompressBC7(C Image &src, Image &dest)
  87. {
  88. Bool ok=false;
  89. if(dest.hwType()==IMAGE_BC7)
  90. {
  91. Image temp; C Image *s=&src;
  92. if(s->hwType()!=IMAGE_R8G8B8A8 || s->w()!=dest.hwW() || s->h()!=dest.hwH())
  93. {
  94. if(s->copyTry(temp, dest.hwW(), dest.hwH(), 1, IMAGE_R8G8B8A8, IMAGE_SOFT, 1, FILTER_NO_STRETCH, true))s=&temp;else return false; // we need to cover the area for entire HW size, to process partial and Pow2Padded blocks too
  95. }
  96. if(s->lockRead())
  97. {
  98. if(dest.lock(LOCK_WRITE))
  99. {
  100. ok=true;
  101. BC.init();
  102. Data data(*s, dest); // !! call after 'BC.init' !!
  103. BC.threads.process1(data.threads, CompressBC7Block, data, INT_MAX); // use all available threads, including this one
  104. dest.unlock();
  105. }
  106. s->unlock();
  107. }
  108. }
  109. return ok;
  110. }
  111. /******************************************************************************/
  112. }
  113. /******************************************************************************/