winAsmBlit.cc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. //-----------------------------------------------------------------------------
  2. // Copyright (c) 2013 GarageGames, LLC
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to
  6. // deal in the Software without restriction, including without limitation the
  7. // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8. // sell copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20. // IN THE SOFTWARE.
  21. //-----------------------------------------------------------------------------
  22. #include "math/mMath.h"
  23. #include "graphics/dgl.h"
  24. #include "graphics/gBitmap.h"
  25. #if !defined(__MWERKS__) && defined(_MSC_VER)
  26. #define asm _asm
  27. #endif
  28. //--------------------------------------------------------------------------
  29. //void terrMipBlit_asm(U16 *dest, U32 destStride, U32 squareSize, const U8 *sourcePtr, U32 sourceStep, U32 sourceRowAdd)
  30. //{
  31. // //for(U32 k = 0; k < squareSize; k++)
  32. // //{
  33. // // for(U32 l = 0; l < squareSize; l++)
  34. // // {
  35. // // dest[l] = *((U16 *) sourcePtr);
  36. // // sourcePtr += sourceStep;
  37. // // }
  38. // // dest -= destStride;
  39. // // sourcePtr += sourceRowAdd;
  40. // //}
  41. // if(sourceStep == 2)
  42. // {
  43. // destStride <<= 1;
  44. // sourceRowAdd += squareSize << 1;
  45. // asm
  46. // {
  47. // push eax
  48. // push ebx
  49. // push ecx
  50. // push edx
  51. // push edi
  52. // push esi
  53. //
  54. // mov edx, squareSize
  55. // mov edi, dest
  56. // mov esi, sourcePtr
  57. // shr edx, 1
  58. // mov ecx, 0
  59. // mov ebx, 0
  60. // pixelLoop2:
  61. // mov eax, [esi+ebx*4]
  62. // mov [edi+ebx*4], eax
  63. // inc ebx
  64. // cmp ebx, edx
  65. // jnz pixelLoop2
  66. //
  67. // mov ebx, 0
  68. // inc ecx
  69. // sub edi, destStride
  70. // add esi, sourceRowAdd
  71. // cmp ecx, squareSize
  72. // jl pixelLoop2
  73. //
  74. // pop esi
  75. // pop edi
  76. // pop edx
  77. // pop ecx
  78. // pop ebx
  79. // pop eax
  80. // }
  81. // }
  82. // else if(sourceStep == -2)
  83. // {
  84. // destStride <<= 1;
  85. // asm
  86. // {
  87. // push eax
  88. // push ebx
  89. // push ecx
  90. // push edx
  91. // push edi
  92. // push esi
  93. //
  94. // mov edx, squareSize
  95. // mov edi, dest
  96. // mov esi, sourcePtr
  97. // shr edx, 1
  98. // mov ecx, 0
  99. // mov ebx, 0
  100. // pixelLoopNeg2:
  101. // mov eax, [esi-2]
  102. // sub esi, 4
  103. // ror eax, 16
  104. // mov [edi+ebx*4], eax
  105. // inc ebx
  106. // cmp ebx, edx
  107. // jnz pixelLoopNeg2
  108. //
  109. // mov ebx, 0
  110. // inc ecx
  111. // sub edi, destStride
  112. // add esi, sourceRowAdd
  113. // cmp ecx, squareSize
  114. // jl pixelLoopNeg2
  115. //
  116. // pop esi
  117. // pop edi
  118. // pop edx
  119. // pop ecx
  120. // pop ebx
  121. // pop eax
  122. // }
  123. // }
  124. // else
  125. // {
  126. // destStride = (destStride + squareSize) << 1;
  127. // asm
  128. // {
  129. // push eax
  130. // push ebx
  131. // push ecx
  132. // push edx
  133. // push edi
  134. // push esi
  135. //
  136. // mov eax, squareSize
  137. // mov edi, dest
  138. // mov esi, sourcePtr
  139. // lea edx, [edi + eax * 2]
  140. // mov ecx, 0 // row index
  141. // mov ebx, sourceStep
  142. // pixelLoop:
  143. // mov ax, [esi+ebx]
  144. // shl eax, 16
  145. // add edi, 4
  146. // mov ax, [esi]
  147. // lea esi, [esi+ebx*2]
  148. // mov [edi-4], eax
  149. // cmp edi, edx
  150. // jnz pixelLoop
  151. //
  152. // inc ecx
  153. // sub edi, destStride
  154. // mov eax, squareSize
  155. // add esi, sourceRowAdd
  156. // lea edx, [edi + eax * 2]
  157. // cmp ecx, squareSize
  158. // jl pixelLoop
  159. //
  160. // pop esi
  161. // pop edi
  162. // pop edx
  163. // pop ecx
  164. // pop ebx
  165. // pop eax
  166. // }
  167. // }
  168. //}
  169. //--------------------------------------------------------------------------
  170. void bitmapExtrude5551_asm(const void *srcMip, void *mip, U32 height, U32 width)
  171. {
  172. const U16 *src = (const U16 *) srcMip;
  173. U16 *dst = (U16 *) mip;
  174. U32 stride = width << 1;
  175. for(U32 y = 0; y < height; y++)
  176. {
  177. for(U32 x = 0; x < width; x++)
  178. {
  179. U32 a = src[0];
  180. U32 b = src[1];
  181. U32 c = src[stride];
  182. U32 d = src[stride+1];
  183. dst[x] = ((((a >> 11) + (b >> 11) + (c >> 11) + (d >> 11)) >> 2) << 11) |
  184. ((( ((a >> 6) & 0x1f) + ((b >> 6) & 0x1f) + ((c >> 6) & 0x1f) + ((d >> 6) & 0x1F) ) >> 2) << 6) |
  185. ((( ((a >> 1) & 0x1F) + ((b >> 1) & 0x1F) + ((c >> 1) & 0x1f) + ((d >> 1) & 0x1f)) >> 2) << 1);
  186. src += 2;
  187. }
  188. src += stride;
  189. dst += width;
  190. }
  191. }
  192. #if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
  193. //--------------------------------------------------------------------------
  194. void bitmapExtrudeRGB_mmx(const void *srcMip, void *mip, U32 srcHeight, U32 srcWidth)
  195. {
  196. if (srcHeight == 1 || srcWidth == 1) {
  197. bitmapExtrudeRGB_c(srcMip, mip, srcHeight, srcWidth);
  198. return;
  199. }
  200. U32 width = srcWidth >> 1;
  201. U32 height = srcHeight >> 1;
  202. if (width <= 1)
  203. {
  204. bitmapExtrudeRGB_c(srcMip, mip, srcHeight, srcWidth);
  205. return;
  206. }
  207. U64 ZERO = 0x0000000000000000;
  208. const U8 *src = (const U8 *) srcMip;
  209. U8 *dst = (U8 *) mip;
  210. U32 srcStride = (width << 1) * 3;
  211. U32 dstStride = width * 3;
  212. for(U32 y = 0; y < height; y++)
  213. {
  214. asm
  215. {
  216. mov eax, src
  217. mov ebx, eax
  218. add ebx, srcStride
  219. mov ecx, dst
  220. mov edx, width
  221. //--------------------------------------
  222. row_loop:
  223. punpcklbw mm0, [eax]
  224. psrlw mm0, 8
  225. punpcklbw mm1, [eax+3]
  226. psrlw mm1, 8
  227. paddw mm0, mm1
  228. punpcklbw mm1, [ebx]
  229. psrlw mm1, 8
  230. paddw mm0, mm1
  231. punpcklbw mm1, [ebx+3]
  232. psrlw mm1, 8
  233. paddw mm0, mm1
  234. psrlw mm0, 2
  235. //pxor mm1, mm1
  236. packuswb mm0, ZERO // mm1
  237. movd [ecx], mm0
  238. add eax, 6
  239. add ebx, 6
  240. add ecx, 3
  241. dec edx
  242. jnz row_loop
  243. }
  244. src += srcStride + srcStride; // advance to next line
  245. dst += dstStride;
  246. }
  247. asm
  248. {
  249. emms
  250. }
  251. }
  252. //--------------------------------------------------------------------------
  253. void bitmapConvertRGB_to_5551_mmx(U8 *src, U32 pixels)
  254. {
  255. U64 MULFACT = 0x0008200000082000; // RGB quad word multiplier
  256. U64 REDBLUE = 0x00f800f800f800f8; // Red-Blue mask
  257. U64 GREEN = 0x0000f8000000f800; // Green mask
  258. U64 ALPHA = 0x0000000000010001; // 100% Alpha mask
  259. U64 ZERO = 0x0000000000000000;
  260. U32 evenPixels = pixels >> 1; // the MMX loop can only do an even number
  261. U32 oddPixels = pixels & 1; // of pixels since it processes 2 at a time
  262. U16 *dst = (U16*)src;
  263. if (evenPixels)
  264. {
  265. asm
  266. {
  267. mov eax, src // YES, src = dst at start
  268. mov ebx, dst // convert image in place
  269. mov edx, evenPixels
  270. pixel_loop2:
  271. movd mm0, [eax] // get first 24-bit pixel
  272. movd mm1, [eax+3] // get second 24-bit pixel
  273. punpckldq mm0, mm1 // put second in high dword
  274. movq mm1, mm0 // save the original data
  275. pand mm0, REDBLUE // mask out all but the 5MSBits of red and blue
  276. pmaddwd mm0, MULFACT // multiply each word by
  277. // 2**13, 2**3, 2**13, 2**3 and add results
  278. pand mm1, GREEN // mask out all but the 5MSBits of green
  279. por mm0, mm1 // combine the red, green, and blue bits
  280. psrld mm0, 6 // shift into position
  281. packssdw mm0, ZERO // pack into single dword
  282. pslld mm0, 1 // shift into final position
  283. por mm0, ALPHA // add the alpha bit
  284. movd [ebx], mm0
  285. add eax, 6
  286. add ebx, 4
  287. dec edx
  288. jnz pixel_loop2
  289. mov src, eax
  290. mov dst, ebx
  291. emms
  292. }
  293. }
  294. if (oddPixels)
  295. {
  296. U32 r = src[0] >> 3;
  297. U32 g = src[1] >> 3;
  298. U32 b = src[2] >> 3;
  299. *dst = (b << 1) | (g << 6) | (r << 11) | 1;
  300. }
  301. }
  302. #endif
  303. //--------------------------------------------------------------------------
  304. void PlatformBlitInit()
  305. {
  306. bitmapExtrude5551 = bitmapExtrude5551_asm;
  307. bitmapExtrudeRGB = bitmapExtrudeRGB_c;
  308. if (PlatformSystemInfo.processor.properties & CPU_PROP_MMX)
  309. {
  310. #if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
  311. bitmapExtrudeRGB = bitmapExtrudeRGB_mmx;
  312. bitmapConvertRGB_to_5551 = bitmapConvertRGB_to_5551_mmx;
  313. #endif
  314. }
  315. // terrMipBlit = terrMipBlit_asm;
  316. }