winAsmBlit.cpp 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. //-----------------------------------------------------------------------------
  2. // Copyright (c) 2012 GarageGames, LLC
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to
  6. // deal in the Software without restriction, including without limitation the
  7. // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8. // sell copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20. // IN THE SOFTWARE.
  21. //-----------------------------------------------------------------------------
  22. #include "math/mMath.h"
  23. #include "gfx/bitmap/gBitmap.h"
  24. #include "gfx/bitmap/bitmapUtils.h"
  25. #if !defined(__MWERKS__) && defined(_MSC_VER)
  26. #define asm _asm
  27. #endif
  28. //--------------------------------------------------------------------------
  29. void bitmapExtrude5551_asm(const void *srcMip, void *mip, U32 height, U32 width)
  30. {
  31. const U16 *src = (const U16 *) srcMip;
  32. U16 *dst = (U16 *) mip;
  33. U32 stride = width << 1;
  34. for(U32 y = 0; y < height; y++)
  35. {
  36. for(U32 x = 0; x < width; x++)
  37. {
  38. U32 a = src[0];
  39. U32 b = src[1];
  40. U32 c = src[stride];
  41. U32 d = src[stride+1];
  42. dst[x] = ((((a >> 11) + (b >> 11) + (c >> 11) + (d >> 11)) >> 2) << 11) |
  43. ((( ((a >> 6) & 0x1f) + ((b >> 6) & 0x1f) + ((c >> 6) & 0x1f) + ((d >> 6) & 0x1F) ) >> 2) << 6) |
  44. ((( ((a >> 1) & 0x1F) + ((b >> 1) & 0x1F) + ((c >> 1) & 0x1f) + ((d >> 1) & 0x1f)) >> 2) << 1);
  45. src += 2;
  46. }
  47. src += stride;
  48. dst += width;
  49. }
  50. }
  51. #if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
  52. //--------------------------------------------------------------------------
  53. void bitmapExtrudeRGB_mmx(const void *srcMip, void *mip, U32 srcHeight, U32 srcWidth)
  54. {
  55. if (srcHeight == 1 || srcWidth == 1) {
  56. bitmapExtrudeRGB_c(srcMip, mip, srcHeight, srcWidth);
  57. return;
  58. }
  59. U32 width = srcWidth >> 1;
  60. U32 height = srcHeight >> 1;
  61. if (width <= 1)
  62. {
  63. bitmapExtrudeRGB_c(srcMip, mip, srcHeight, srcWidth);
  64. return;
  65. }
  66. U64 ZERO = 0x0000000000000000;
  67. const U8 *src = (const U8 *) srcMip;
  68. U8 *dst = (U8 *) mip;
  69. U32 srcStride = (width << 1) * 3;
  70. U32 dstStride = width * 3;
  71. for(U32 y = 0; y < height; y++)
  72. {
  73. asm
  74. {
  75. mov eax, src
  76. mov ebx, eax
  77. add ebx, srcStride
  78. mov ecx, dst
  79. mov edx, width
  80. //--------------------------------------
  81. row_loop:
  82. punpcklbw mm0, [eax]
  83. psrlw mm0, 8
  84. punpcklbw mm1, [eax+3]
  85. psrlw mm1, 8
  86. paddw mm0, mm1
  87. punpcklbw mm1, [ebx]
  88. psrlw mm1, 8
  89. paddw mm0, mm1
  90. punpcklbw mm1, [ebx+3]
  91. psrlw mm1, 8
  92. paddw mm0, mm1
  93. psrlw mm0, 2
  94. //pxor mm1, mm1
  95. packuswb mm0, ZERO // mm1
  96. movd [ecx], mm0
  97. add eax, 6
  98. add ebx, 6
  99. add ecx, 3
  100. dec edx
  101. jnz row_loop
  102. }
  103. src += srcStride + srcStride; // advance to next line
  104. dst += dstStride;
  105. }
  106. asm
  107. {
  108. emms
  109. }
  110. }
  111. //--------------------------------------------------------------------------
  112. void bitmapConvertRGB_to_5551_mmx(U8 *src, U32 pixels)
  113. {
  114. U64 MULFACT = 0x0008200000082000; // RGB quad word multiplier
  115. U64 REDBLUE = 0x00f800f800f800f8; // Red-Blue mask
  116. U64 GREEN = 0x0000f8000000f800; // Green mask
  117. U64 ALPHA = 0x0000000000010001; // 100% Alpha mask
  118. U64 ZERO = 0x0000000000000000;
  119. U32 evenPixels = pixels >> 1; // the MMX loop can only do an even number
  120. U32 oddPixels = pixels & 1; // of pixels since it processes 2 at a time
  121. U16 *dst = (U16*)src;
  122. if (evenPixels)
  123. {
  124. asm
  125. {
  126. mov eax, src // YES, src = dst at start
  127. mov ebx, dst // convert image in place
  128. mov edx, evenPixels
  129. pixel_loop2:
  130. movd mm0, [eax] // get first 24-bit pixel
  131. movd mm1, [eax+3] // get second 24-bit pixel
  132. punpckldq mm0, mm1 // put second in high dword
  133. movq mm1, mm0 // save the original data
  134. pand mm0, REDBLUE // mask out all but the 5MSBits of red and blue
  135. pmaddwd mm0, MULFACT // multiply each word by
  136. // 2**13, 2**3, 2**13, 2**3 and add results
  137. pand mm1, GREEN // mask out all but the 5MSBits of green
  138. por mm0, mm1 // combine the red, green, and blue bits
  139. psrld mm0, 6 // shift into position
  140. packssdw mm0, ZERO // pack into single dword
  141. pslld mm0, 1 // shift into final position
  142. por mm0, ALPHA // add the alpha bit
  143. movd [ebx], mm0
  144. add eax, 6
  145. add ebx, 4
  146. dec edx
  147. jnz pixel_loop2
  148. mov src, eax
  149. mov dst, ebx
  150. emms
  151. }
  152. }
  153. if (oddPixels)
  154. {
  155. U32 r = src[0] >> 3;
  156. U32 g = src[1] >> 3;
  157. U32 b = src[2] >> 3;
  158. *dst = (b << 1) | (g << 6) | (r << 11) | 1;
  159. }
  160. }
  161. #endif
  162. //--------------------------------------------------------------------------
  163. void PlatformBlitInit()
  164. {
  165. bitmapExtrude5551 = bitmapExtrude5551_asm;
  166. bitmapExtrudeRGB = bitmapExtrudeRGB_c;
  167. if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
  168. {
  169. #if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
  170. bitmapExtrudeRGB = bitmapExtrudeRGB_mmx;
  171. bitmapConvertRGB_to_5551 = bitmapConvertRGB_to_5551_mmx;
  172. #endif
  173. }
  174. }