filterAPI.cpp 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878
  1. 
  2. // zlib open source license
  3. //
  4. // Copyright (c) 2017 to 2025 David Forsgren Piuva
  5. //
  6. // This software is provided 'as-is', without any express or implied
  7. // warranty. In no event will the authors be held liable for any damages
  8. // arising from the use of this software.
  9. //
  10. // Permission is granted to anyone to use this software for any purpose,
  11. // including commercial applications, and to alter it and redistribute it
  12. // freely, subject to the following restrictions:
  13. //
  14. // 1. The origin of this software must not be misrepresented; you must not
  15. // claim that you wrote the original software. If you use this software
  16. // in a product, an acknowledgment in the product documentation would be
  17. // appreciated but is not required.
  18. //
  19. // 2. Altered source versions must be plainly marked as such, and must not be
  20. // misrepresented as being the original software.
  21. //
  22. // 3. This notice may not be removed or altered from any source
  23. // distribution.
  24. // TODO: Optimize and clean up using template programming to automatically unpack image data in advance for easy access.
  25. // Create reusable inline functions for fast pixel sampling in a separate header while prototyping.
  26. #include <cassert>
  27. #include "filterAPI.h"
  28. #include "imageAPI.h"
  29. #include "drawAPI.h"
  30. #include "../image/PackOrder.h"
  31. #include "../base/simd.h"
  32. namespace dsr {
  33. static inline U32x4 ColorRgbaI32_to_U32x4(const ColorRgbaI32& color) {
  34. return U32x4(color.red, color.green, color.blue, color.alpha);
  35. }
  36. static inline ColorRgbaI32 U32x4_to_ColorRgbaI32(const U32x4& color) {
  37. UVector4D vResult = color.get();
  38. return ColorRgbaI32(vResult.x, vResult.y, vResult.z, vResult.w);
  39. }
  40. // Uniform linear interpolation of colors from a 16-bit sub-pixel weight
  41. // Pre-condition0 <= fineRatio <= 65536
  42. // Post-condition: Returns colorA * (1 - (fineRatio / 65536)) + colorB * (fineRatio / 65536)
  43. static inline U32x4 mixColorsUniform(const U32x4 &colorA, const U32x4 &colorB, uint32_t fineRatio) {
  44. uint16_t ratio = (uint16_t)bitShiftRightImmediate<8>(fineRatio);
  45. uint16_t invRatio = 256 - ratio;
  46. U16x8 weightA = U16x8(invRatio);
  47. U16x8 weightB = U16x8(ratio);
  48. U32x4 lowMask(0x00FF00FFu);
  49. U16x8 lowColorA = U16x8(colorA & lowMask);
  50. U16x8 lowColorB = U16x8(colorB & lowMask);
  51. U32x4 highMask(0xFF00FF00u);
  52. U16x8 highColorA = bitShiftRightImmediate<8>(U16x8((colorA & highMask)));
  53. U16x8 highColorB = bitShiftRightImmediate<8>(U16x8((colorB & highMask)));
  54. U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();
  55. U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();
  56. return ((bitShiftRightImmediate<8>(lowColor) & lowMask) | (highColor & highMask));
  57. }
  58. // TODO: Use wrappers around images to get the needed information unpacked in advance for faster reading of pixels.
  59. #define READ_RGBAU8_CLAMP(X,Y) image_readPixel_clamp(source, X, Y)
  60. #define READ_RGBAU8_CLAMP_SIMD(X,Y) ColorRgbaI32_to_U32x4(READ_RGBAU8_CLAMP(X,Y))
  61. // Fixed-precision decimal system with 16-bit indices and 16-bit sub-pixel weights
  62. static const uint32_t interpolationFullPixel = 65536;
  63. static const uint32_t interpolationHalfPixel = interpolationFullPixel / 2;
  64. // Modulo mask for values greater than or equal to 0 and lesser than interpolationFullPixel
  65. static const uint32_t interpolationWeightMask = interpolationFullPixel - 1;
  66. template <bool BILINEAR>
  67. static uint32_t samplePixel(const ImageRgbaU8& target, const ImageRgbaU8& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
  68. if (BILINEAR) {
  69. uint32_t upperRatio = 65536 - lowerRatio;
  70. uint32_t leftRatio = 65536 - rightRatio;
  71. U32x4 vUpperLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY);
  72. U32x4 vUpperRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY);
  73. U32x4 vLowerLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY + 1);
  74. U32x4 vLowerRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY + 1);
  75. U32x4 vLeftRatio = U32x4(leftRatio);
  76. U32x4 vRightRatio = U32x4(rightRatio);
  77. U32x4 vUpperColor = bitShiftRightImmediate<16>((vUpperLeftColor * vLeftRatio) + (vUpperRightColor * vRightRatio));
  78. U32x4 vLowerColor = bitShiftRightImmediate<16>((vLowerLeftColor * vLeftRatio) + (vLowerRightColor * vRightRatio));
  79. U32x4 vCenterColor = bitShiftRightImmediate<16>((vUpperColor * upperRatio) + (vLowerColor * lowerRatio));
  80. return image_saturateAndPack(target, U32x4_to_ColorRgbaI32(vCenterColor));
  81. } else {
  82. return image_saturateAndPack(target, image_readPixel_clamp(source, leftX, upperY));
  83. }
  84. }
  85. template <bool BILINEAR>
  86. static uint8_t samplePixel(const ImageU8& target, const ImageU8& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
  87. if (BILINEAR) {
  88. uint32_t upperRatio = 65536 - lowerRatio;
  89. uint32_t leftRatio = 65536 - rightRatio;
  90. uint32_t upperLeftLuma = image_readPixel_clamp(source, leftX, upperY);
  91. uint32_t upperRightLuma = image_readPixel_clamp(source, leftX + 1, upperY);
  92. uint32_t lowerLeftLuma = image_readPixel_clamp(source, leftX, upperY + 1);
  93. uint32_t lowerRightLuma = image_readPixel_clamp(source, leftX + 1, upperY + 1);
  94. uint32_t upperLuma = bitShiftRightImmediate<16>((upperLeftLuma * leftRatio) + (upperRightLuma * rightRatio));
  95. uint32_t lowerLuma = bitShiftRightImmediate<16>((lowerLeftLuma * leftRatio) + (lowerRightLuma * rightRatio));
  96. return bitShiftRightImmediate<16>((upperLuma * upperRatio) + (lowerLuma * lowerRatio));
  97. } else {
  98. return image_readPixel_clamp(source, leftX, upperY);
  99. }
  100. }
  101. // BILINEAR: Enables linear interpolation
  102. // scaleRegion:
  103. // The stretched location of the source image in the target image
  104. // Making it smaller than the target image will fill the outside with stretched pixels
  105. // Allowing the caller to crop away parts of the source image that aren't interesting
  106. // Can be used to round the region to a multiple of the input size for a fixed pixel size
  107. template <bool BILINEAR, typename IMAGE_TYPE, typename PIXEL_TYPE>
  108. static void resize_reference(const IMAGE_TYPE& target, const IMAGE_TYPE& source, const IRect& scaleRegion) {
  109. // Reference implementation
  110. // Offset in source pixels per target pixel
  111. int32_t offsetX = interpolationFullPixel * image_getWidth(source) / scaleRegion.width();
  112. int32_t offsetY = interpolationFullPixel * image_getHeight(source) / scaleRegion.height();
  113. int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
  114. int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
  115. if (BILINEAR) {
  116. startX -= interpolationHalfPixel;
  117. startY -= interpolationHalfPixel;
  118. }
  119. SafePointer<PIXEL_TYPE> targetRow = image_getSafePointer<PIXEL_TYPE>(target);
  120. int32_t readY = startY;
  121. for (int32_t y = 0; y < image_getHeight(target); y++) {
  122. int32_t naturalY = readY;
  123. if (naturalY < 0) { naturalY = 0; }
  124. uint32_t sampleY = (uint32_t)naturalY;
  125. uint32_t upperY = bitShiftRightImmediate<16>(sampleY);
  126. uint32_t lowerRatio = sampleY & interpolationWeightMask;
  127. SafePointer<PIXEL_TYPE> targetPixel = targetRow;
  128. int32_t readX = startX;
  129. for (int32_t x = 0; x < image_getWidth(target); x++) {
  130. int32_t naturalX = readX;
  131. if (naturalX < 0) { naturalX = 0; }
  132. uint32_t sampleX = (uint32_t)naturalX;
  133. uint32_t leftX = bitShiftRightImmediate<16>(sampleX);
  134. uint32_t rightRatio = sampleX & interpolationWeightMask;
  135. *targetPixel = samplePixel<BILINEAR>(target, source, leftX, upperY, rightRatio, lowerRatio);
  136. targetPixel += 1;
  137. readX += offsetX;
  138. }
  139. targetRow.increaseBytes(image_getStride(target));
  140. readY += offsetY;
  141. }
  142. }
  143. template <bool BILINEAR, bool SIMD_ALIGNED>
  144. static void resize_optimized(const ImageRgbaU8& target, const ImageRgbaU8& source, const IRect& scaleRegion) {
  145. // Get source information
  146. // Compare dimensions
  147. const bool sameWidth = image_getWidth(source) == scaleRegion.width() && scaleRegion.left() == 0;
  148. const bool sameHeight = image_getHeight(source) == scaleRegion.height() && scaleRegion.top() == 0;
  149. const bool samePackOrder = image_getPackOrderIndex(target) == image_getPackOrderIndex(source);
  150. if (sameWidth && sameHeight) {
  151. // No need to resize, just make a copy to save time
  152. draw_copy(target, source);
  153. } else if (sameWidth && (samePackOrder || BILINEAR)) {
  154. // Only vertical interpolation
  155. // Offset in source pixels per target pixel
  156. int32_t offsetY = interpolationFullPixel * image_getHeight(source) / scaleRegion.height();
  157. int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
  158. if (BILINEAR) {
  159. startY -= interpolationHalfPixel;
  160. }
  161. SafePointer<uint32_t> targetRow = image_getSafePointer<uint32_t>(target);
  162. int32_t readY = startY;
  163. for (int32_t y = 0; y < image_getHeight(target); y++) {
  164. int32_t naturalY = readY;
  165. if (naturalY < 0) { naturalY = 0; }
  166. uint32_t sampleY = (uint32_t)naturalY;
  167. uint32_t upperY = bitShiftRightImmediate<16>(sampleY);
  168. uint32_t lowerY = upperY + 1;
  169. if (upperY >= (uint32_t)image_getHeight(source)) upperY = image_getHeight(source) - 1;
  170. if (lowerY >= (uint32_t)image_getHeight(source)) lowerY = image_getHeight(source) - 1;
  171. if (BILINEAR) {
  172. uint32_t lowerRatio = sampleY & interpolationWeightMask;
  173. uint32_t upperRatio = 65536 - lowerRatio;
  174. SafePointer<uint32_t> targetPixel = targetRow;
  175. if (SIMD_ALIGNED) {
  176. SafePointer<const uint32_t> sourceRowUpper = image_getSafePointer<uint32_t>(source, upperY);
  177. SafePointer<const uint32_t> sourceRowLower = image_getSafePointer<uint32_t>(source, lowerY);
  178. for (int32_t x = 0; x < image_getWidth(target); x += 4) {
  179. ALIGN16 U32x4 vUpperPackedColor = U32x4::readAligned(sourceRowUpper, "resize_optimized @ read vUpperPackedColor");
  180. ALIGN16 U32x4 vLowerPackedColor = U32x4::readAligned(sourceRowLower, "resize_optimized @ read vLowerPackedColor");
  181. ALIGN16 U32x4 vCenterColor = mixColorsUniform(vUpperPackedColor, vLowerPackedColor, lowerRatio);
  182. vCenterColor.writeAligned(targetPixel, "resize_optimized @ write vCenterColor");
  183. sourceRowUpper += 4;
  184. sourceRowLower += 4;
  185. targetPixel += 4;
  186. }
  187. } else {
  188. for (int32_t x = 0; x < image_getWidth(target); x++) {
  189. ALIGN16 U32x4 vUpperColor = READ_RGBAU8_CLAMP_SIMD(x, upperY);
  190. ALIGN16 U32x4 vLowerColor = READ_RGBAU8_CLAMP_SIMD(x, lowerY);
  191. ALIGN16 U32x4 vCenterColor = bitShiftRightImmediate<16>((vUpperColor * upperRatio) + (vLowerColor * lowerRatio));
  192. ColorRgbaI32 finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
  193. *targetPixel = image_saturateAndPack(target, finalColor);
  194. targetPixel += 1;
  195. }
  196. }
  197. } else {
  198. SafePointer<const uint32_t> sourceRowUpper = image_getSafePointer<uint32_t>(source, upperY);
  199. // Nearest neighbor sampling from a same width can be done using one copy per row
  200. safeMemoryCopy(targetRow, sourceRowUpper, image_getWidth(source) * 4);
  201. }
  202. targetRow.increaseBytes(image_getStride(target));
  203. readY += offsetY;
  204. }
  205. } else if (sameHeight) {
  206. // Only horizontal interpolation
  207. // Offset in source pixels per target pixel
  208. int32_t offsetX = interpolationFullPixel * image_getWidth(source) / scaleRegion.width();
  209. int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
  210. if (BILINEAR) {
  211. startX -= interpolationHalfPixel;
  212. }
  213. SafePointer<uint32_t> targetRow = image_getSafePointer<uint32_t>(target);
  214. for (int32_t y = 0; y < image_getHeight(target); y++) {
  215. SafePointer<uint32_t> targetPixel = targetRow;
  216. int32_t readX = startX;
  217. for (int32_t x = 0; x < image_getWidth(target); x++) {
  218. int32_t naturalX = readX;
  219. if (naturalX < 0) { naturalX = 0; }
  220. uint32_t sampleX = (uint32_t)naturalX;
  221. uint32_t leftX = bitShiftRightImmediate<16>(sampleX);
  222. uint32_t rightX = leftX + 1;
  223. uint32_t rightRatio = sampleX & interpolationWeightMask;
  224. uint32_t leftRatio = 65536 - rightRatio;
  225. ColorRgbaI32 finalColor;
  226. if (BILINEAR) {
  227. ALIGN16 U32x4 vLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, y);
  228. ALIGN16 U32x4 vRightColor = READ_RGBAU8_CLAMP_SIMD(rightX, y);
  229. ALIGN16 U32x4 vCenterColor = bitShiftRightImmediate<16>((vLeftColor * leftRatio) + (vRightColor * rightRatio));
  230. finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
  231. } else {
  232. finalColor = READ_RGBAU8_CLAMP(leftX, y);
  233. }
  234. *targetPixel = image_saturateAndPack(target, finalColor);
  235. targetPixel += 1;
  236. readX += offsetX;
  237. }
  238. targetRow.increaseBytes(image_getStride(target));
  239. }
  240. } else {
  241. // Call the reference implementation
  242. resize_reference<BILINEAR, ImageRgbaU8, uint32_t>(target, source, scaleRegion);
  243. }
  244. }
  245. // Converting run-time flags into compile-time constants
  246. static void resize_aux(const ImageRgbaU8& target, const ImageRgbaU8& source, bool interpolate, const IRect& scaleRegion) {
  247. // If writing to padding is allowed and both images are 16-byte aligned with the same pack order
  248. if (!(image_isSubImage(source) || image_isSubImage(target))) {
  249. // SIMD resize allowed
  250. if (interpolate) {
  251. resize_optimized<true, true>(target, source, scaleRegion);
  252. } else {
  253. resize_optimized<false, true>(target, source, scaleRegion);
  254. }
  255. } else {
  256. // Non-SIMD resize
  257. if (interpolate) {
  258. resize_optimized<true, false>(target, source, scaleRegion);
  259. } else {
  260. resize_optimized<false, false>(target, source, scaleRegion);
  261. }
  262. }
  263. }
  264. // TODO: Optimize monochrome resizing.
  265. static void resize_aux(const ImageU8& target, const ImageU8& source, bool interpolate, const IRect& scaleRegion) {
  266. if (interpolate) {
  267. resize_reference<true, ImageU8, uint8_t>(target, source, scaleRegion);
  268. } else {
  269. resize_reference<false, ImageU8, uint8_t>(target, source, scaleRegion);
  270. }
  271. }
  272. // Creating an image to replacedImage with the same pack order as originalImage when applicable to the image format.
  273. static ImageRgbaU8 createWithSamePackOrder(const ImageRgbaU8& originalImage, int32_t width, int32_t height) {
  274. return image_create_RgbaU8_native(width, height, image_getPackOrderIndex(originalImage));
  275. }
  276. static ImageU8 createWithSamePackOrder(const ImageU8& originalImage, int32_t width, int32_t height) {
  277. return image_create_U8(width, height);
  278. }
  279. template <typename IMAGE_TYPE>
  280. void resizeToTarget(IMAGE_TYPE& target, const IMAGE_TYPE& source, bool interpolate) {
  281. IRect scaleRegion = image_getBound(target);
  282. if (image_getWidth(target) != image_getWidth(source) && image_getHeight(target) > image_getHeight(source)) {
  283. // Upscaling is faster in two steps by both reusing the horizontal interpolation and vectorizing the vertical interpolation.
  284. int tempWidth = image_getWidth(target);
  285. int tempHeight = image_getHeight(source);
  286. IRect tempScaleRegion = IRect(scaleRegion.left(), 0, scaleRegion.width(), image_getHeight(source));
  287. // Create a temporary buffer.
  288. IMAGE_TYPE newTempImage = createWithSamePackOrder(target, tempWidth, tempHeight);
  289. resize_aux(newTempImage, source, interpolate, tempScaleRegion);
  290. resize_aux(target, newTempImage, interpolate, scaleRegion);
  291. } else {
  292. // Downscaling or only changing one dimension is faster in one step.
  293. resize_aux(target, source, interpolate, scaleRegion);
  294. }
  295. }
  296. template <bool CONVERT_COLOR>
  297. static inline uint32_t convertRead(const ImageRgbaU8& target, const ImageRgbaU8& source, int x, int y) {
  298. uint32_t result = image_readPixel_clamp_packed(source, x, y);
  299. if (CONVERT_COLOR) {
  300. result = image_truncateAndPack(target, image_unpack(source, result));
  301. }
  302. return result;
  303. }
  304. // Used for drawing large pixels
  305. static inline void fillRectangle(const ImageRgbaU8& target, int pixelLeft, int pixelRight, int pixelTop, int pixelBottom, const uint32_t& packedColor) {
  306. SafePointer<uint32_t> targetRow = image_getSafePointer<uint32_t>(target, pixelTop) + pixelLeft;
  307. for (int y = pixelTop; y < pixelBottom; y++) {
  308. SafePointer<uint32_t> targetPixel = targetRow;
  309. for (int x = pixelLeft; x < pixelRight; x++) {
  310. *targetPixel = packedColor;
  311. targetPixel += 1;
  312. }
  313. targetRow.increaseBytes(image_getStride(target));
  314. }
  315. }
  316. template <bool CONVERT_COLOR>
  317. static void blockMagnify_reference(
  318. const ImageRgbaU8& target, const ImageRgbaU8& source,
  319. int pixelWidth, int pixelHeight, int clipWidth, int clipHeight) {
  320. int sourceY = 0;
  321. int maxSourceX = image_getWidth(source) - 1;
  322. int maxSourceY = image_getHeight(source) - 1;
  323. if (clipWidth > image_getWidth(target)) { clipWidth = image_getWidth(target); }
  324. if (clipHeight > image_getHeight(target)) { clipHeight = image_getHeight(target); }
  325. for (int32_t pixelTop = 0; pixelTop < clipHeight; pixelTop += pixelHeight) {
  326. int sourceX = 0;
  327. for (int32_t pixelLeft = 0; pixelLeft < clipWidth; pixelLeft += pixelWidth) {
  328. // Read the pixel once
  329. uint32_t sourceColor = convertRead<CONVERT_COLOR>(target, source, sourceX, sourceY);
  330. // Write to all target pixels in a conditionless loop
  331. fillRectangle(target, pixelLeft, pixelLeft + pixelWidth, pixelTop, pixelTop + pixelHeight, sourceColor);
  332. // Iterate and clamp the read coordinate
  333. sourceX++;
  334. if (sourceX > maxSourceX) { sourceX = maxSourceX; }
  335. }
  336. // Iterate and clamp the read coordinate
  337. sourceY++;
  338. if (sourceY > maxSourceY) { sourceY = maxSourceY; }
  339. }
  340. }
  341. // Pre-condition:
  342. // * The source and target images have the same pack order
  343. // * Both source and target are 16-byte aligned, but does not have to own their padding
  344. // * clipWidth % 2 == 0
  345. // * clipHeight % 2 == 0
  346. static void blockMagnify_2x2(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
  347. SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
  348. SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
  349. SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
  350. int blockTargetStride = image_getStride(target) * 2;
  351. for (int upperTargetY = 0; upperTargetY + 2 <= clipHeight; upperTargetY+=2) {
  352. // Carriage return
  353. SafePointer<const uint32_t> sourcePixel = sourceRow;
  354. SafePointer<uint32_t> targetPixelA = targetRowA;
  355. SafePointer<uint32_t> targetPixelB = targetRowB;
  356. // Write to whole multiples of 8 pixels
  357. int writeLeftX = 0;
  358. while (writeLeftX + 2 <= clipWidth) {
  359. // Read one pixel at a time
  360. uint32_t scalarValue = *sourcePixel;
  361. sourcePixel += 1;
  362. // Write to a whole block of pixels
  363. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue;
  364. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue;
  365. targetPixelA += 2;
  366. targetPixelB += 2;
  367. // Count
  368. writeLeftX += 2;
  369. }
  370. // Line feed
  371. sourceRow.increaseBytes(image_getStride(source));
  372. targetRowA.increaseBytes(blockTargetStride);
  373. targetRowB.increaseBytes(blockTargetStride);
  374. }
  375. }
  376. // Pre-condition:
  377. // * The source and target images have the same pack order
  378. // * Both source and target are 16-byte aligned, but does not have to own their padding
  379. // * clipWidth % 3 == 0
  380. // * clipHeight % 3 == 0
  381. static void blockMagnify_3x3(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
  382. SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
  383. SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
  384. SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
  385. SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
  386. int blockTargetStride = image_getStride(target) * 3;
  387. for (int upperTargetY = 0; upperTargetY + 3 <= clipHeight; upperTargetY+=3) {
  388. // Carriage return
  389. SafePointer<const uint32_t> sourcePixel = sourceRow;
  390. SafePointer<uint32_t> targetPixelA = targetRowA;
  391. SafePointer<uint32_t> targetPixelB = targetRowB;
  392. SafePointer<uint32_t> targetPixelC = targetRowC;
  393. int writeLeftX = 0;
  394. while (writeLeftX + 3 <= clipWidth) {
  395. // Read one pixel at a time
  396. uint32_t scalarValue = *sourcePixel;
  397. sourcePixel += 1;
  398. // Write to a whole block of pixels
  399. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue;
  400. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue;
  401. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue;
  402. targetPixelA += 3;
  403. targetPixelB += 3;
  404. targetPixelC += 3;
  405. // Count
  406. writeLeftX += 3;
  407. }
  408. // Line feed
  409. sourceRow.increaseBytes(image_getStride(source));
  410. targetRowA.increaseBytes(blockTargetStride);
  411. targetRowB.increaseBytes(blockTargetStride);
  412. targetRowC.increaseBytes(blockTargetStride);
  413. }
  414. }
  415. // Pre-condition:
  416. // * The source and target images have the same pack order
  417. // * Both source and target are 16-byte aligned, but does not have to own their padding
  418. // * clipWidth % 4 == 0
  419. // * clipHeight % 4 == 0
  420. static void blockMagnify_4x4(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
  421. SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
  422. SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
  423. SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
  424. SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
  425. SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
  426. int quadTargetStride = image_getStride(target) * 4;
  427. for (int upperTargetY = 0; upperTargetY + 4 <= clipHeight; upperTargetY+=4) {
  428. // Carriage return
  429. SafePointer<const uint32_t> sourcePixel = sourceRow;
  430. SafePointer<uint32_t> targetPixelA = targetRowA;
  431. SafePointer<uint32_t> targetPixelB = targetRowB;
  432. SafePointer<uint32_t> targetPixelC = targetRowC;
  433. SafePointer<uint32_t> targetPixelD = targetRowD;
  434. int writeLeftX = 0;
  435. while (writeLeftX + 4 <= clipWidth) {
  436. // Read one pixel at a time
  437. uint32_t scalarValue = *sourcePixel;
  438. sourcePixel += 1;
  439. // Convert scalar to SIMD vector of 4 repeated pixels
  440. ALIGN16 U32x4 sourcePixels = U32x4(scalarValue);
  441. // Write to 4x4 pixels using 4 SIMD writes
  442. sourcePixels.writeAligned(targetPixelA, "blockMagnify_4x4 @ write A");
  443. sourcePixels.writeAligned(targetPixelB, "blockMagnify_4x4 @ write B");
  444. sourcePixels.writeAligned(targetPixelC, "blockMagnify_4x4 @ write C");
  445. sourcePixels.writeAligned(targetPixelD, "blockMagnify_4x4 @ write D");
  446. targetPixelA += 4;
  447. targetPixelB += 4;
  448. targetPixelC += 4;
  449. targetPixelD += 4;
  450. // Count
  451. writeLeftX += 4;
  452. }
  453. // Line feed
  454. sourceRow.increaseBytes(image_getStride(source));
  455. targetRowA.increaseBytes(quadTargetStride);
  456. targetRowB.increaseBytes(quadTargetStride);
  457. targetRowC.increaseBytes(quadTargetStride);
  458. targetRowD.increaseBytes(quadTargetStride);
  459. }
  460. }
  461. // Pre-condition:
  462. // * The source and target images have the same pack order
  463. // * Both source and target are 16-byte aligned, but does not have to own their padding
  464. // * clipWidth % 5 == 0
  465. // * clipHeight % 5 == 0
  466. static void blockMagnify_5x5(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
  467. SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
  468. SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
  469. SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
  470. SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
  471. SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
  472. SafePointer<uint32_t> targetRowE = image_getSafePointer<uint32_t>(target, 4);
  473. int blockTargetStride = image_getStride(target) * 5;
  474. for (int upperTargetY = 0; upperTargetY + 5 <= clipHeight; upperTargetY+=5) {
  475. // Carriage return
  476. SafePointer<const uint32_t> sourcePixel = sourceRow;
  477. SafePointer<uint32_t> targetPixelA = targetRowA;
  478. SafePointer<uint32_t> targetPixelB = targetRowB;
  479. SafePointer<uint32_t> targetPixelC = targetRowC;
  480. SafePointer<uint32_t> targetPixelD = targetRowD;
  481. SafePointer<uint32_t> targetPixelE = targetRowE;
  482. int writeLeftX = 0;
  483. while (writeLeftX + 5 <= clipWidth) {
  484. // Read one pixel at a time
  485. uint32_t scalarValue = *sourcePixel;
  486. sourcePixel += 1;
  487. // Write to a whole block of pixels
  488. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue;
  489. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue;
  490. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue;
  491. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue;
  492. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue;
  493. targetPixelA += 5;
  494. targetPixelB += 5;
  495. targetPixelC += 5;
  496. targetPixelD += 5;
  497. targetPixelE += 5;
  498. // Count
  499. writeLeftX += 5;
  500. }
  501. // Line feed
  502. sourceRow.increaseBytes(image_getStride(source));
  503. targetRowA.increaseBytes(blockTargetStride);
  504. targetRowB.increaseBytes(blockTargetStride);
  505. targetRowC.increaseBytes(blockTargetStride);
  506. targetRowD.increaseBytes(blockTargetStride);
  507. targetRowE.increaseBytes(blockTargetStride);
  508. }
  509. }
  510. // Pre-condition:
  511. // * The source and target images have the same pack order
  512. // * Both source and target are 16-byte aligned, but does not have to own their padding
  513. // * clipWidth % 6 == 0
  514. // * clipHeight % 6 == 0
  515. static void blockMagnify_6x6(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
  516. SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
  517. SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
  518. SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
  519. SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
  520. SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
  521. SafePointer<uint32_t> targetRowE = image_getSafePointer<uint32_t>(target, 4);
  522. SafePointer<uint32_t> targetRowF = image_getSafePointer<uint32_t>(target, 5);
  523. int blockTargetStride = image_getStride(target) * 6;
  524. for (int upperTargetY = 0; upperTargetY + 6 <= clipHeight; upperTargetY+=6) {
  525. // Carriage return
  526. SafePointer<const uint32_t> sourcePixel = sourceRow;
  527. SafePointer<uint32_t> targetPixelA = targetRowA;
  528. SafePointer<uint32_t> targetPixelB = targetRowB;
  529. SafePointer<uint32_t> targetPixelC = targetRowC;
  530. SafePointer<uint32_t> targetPixelD = targetRowD;
  531. SafePointer<uint32_t> targetPixelE = targetRowE;
  532. SafePointer<uint32_t> targetPixelF = targetRowF;
  533. int writeLeftX = 0;
  534. while (writeLeftX + 6 <= clipWidth) {
  535. // Read one pixel at a time
  536. uint32_t scalarValue = *sourcePixel;
  537. sourcePixel += 1;
  538. // Write to a whole block of pixels
  539. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue;
  540. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue;
  541. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue;
  542. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue;
  543. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue;
  544. targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue;
  545. targetPixelA += 6;
  546. targetPixelB += 6;
  547. targetPixelC += 6;
  548. targetPixelD += 6;
  549. targetPixelE += 6;
  550. targetPixelF += 6;
  551. // Count
  552. writeLeftX += 6;
  553. }
  554. // Line feed
  555. sourceRow.increaseBytes(image_getStride(source));
  556. targetRowA.increaseBytes(blockTargetStride);
  557. targetRowB.increaseBytes(blockTargetStride);
  558. targetRowC.increaseBytes(blockTargetStride);
  559. targetRowD.increaseBytes(blockTargetStride);
  560. targetRowE.increaseBytes(blockTargetStride);
  561. targetRowF.increaseBytes(blockTargetStride);
  562. }
  563. }
  564. // Pre-condition:
  565. // * The source and target images have the same pack order
  566. // * Both source and target are 16-byte aligned, but does not have to own their padding
  567. // * clipWidth % 7 == 0
  568. // * clipHeight % 7 == 0
  569. static void blockMagnify_7x7(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
  570. SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
  571. SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
  572. SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
  573. SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
  574. SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
  575. SafePointer<uint32_t> targetRowE = image_getSafePointer<uint32_t>(target, 4);
  576. SafePointer<uint32_t> targetRowF = image_getSafePointer<uint32_t>(target, 5);
  577. SafePointer<uint32_t> targetRowG = image_getSafePointer<uint32_t>(target, 6);
  578. int blockTargetStride = image_getStride(target) * 7;
  579. for (int upperTargetY = 0; upperTargetY + 7 <= clipHeight; upperTargetY+=7) {
  580. // Carriage return
  581. SafePointer<const uint32_t> sourcePixel = sourceRow;
  582. SafePointer<uint32_t> targetPixelA = targetRowA;
  583. SafePointer<uint32_t> targetPixelB = targetRowB;
  584. SafePointer<uint32_t> targetPixelC = targetRowC;
  585. SafePointer<uint32_t> targetPixelD = targetRowD;
  586. SafePointer<uint32_t> targetPixelE = targetRowE;
  587. SafePointer<uint32_t> targetPixelF = targetRowF;
  588. SafePointer<uint32_t> targetPixelG = targetRowG;
  589. int writeLeftX = 0;
  590. while (writeLeftX + 7 <= clipWidth) {
  591. // Read one pixel at a time
  592. uint32_t scalarValue = *sourcePixel;
  593. sourcePixel += 1;
  594. // Write to a whole block of pixels
  595. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue;
  596. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue;
  597. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue;
  598. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue;
  599. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue;
  600. targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue;
  601. targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue;
  602. targetPixelA += 7;
  603. targetPixelB += 7;
  604. targetPixelC += 7;
  605. targetPixelD += 7;
  606. targetPixelE += 7;
  607. targetPixelF += 7;
  608. targetPixelG += 7;
  609. // Count
  610. writeLeftX += 7;
  611. }
  612. // Line feed
  613. sourceRow.increaseBytes(image_getStride(source));
  614. targetRowA.increaseBytes(blockTargetStride);
  615. targetRowB.increaseBytes(blockTargetStride);
  616. targetRowC.increaseBytes(blockTargetStride);
  617. targetRowD.increaseBytes(blockTargetStride);
  618. targetRowE.increaseBytes(blockTargetStride);
  619. targetRowF.increaseBytes(blockTargetStride);
  620. targetRowG.increaseBytes(blockTargetStride);
  621. }
  622. }
  623. // Pre-condition:
  624. // * The source and target images have the same pack order
  625. // * Both source and target are 16-byte aligned, but does not have to own their padding
  626. // * clipWidth % 8 == 0
  627. // * clipHeight % 8 == 0
  628. static void blockMagnify_8x8(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
  629. SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
  630. SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
  631. SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
  632. SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
  633. SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
  634. SafePointer<uint32_t> targetRowE = image_getSafePointer<uint32_t>(target, 4);
  635. SafePointer<uint32_t> targetRowF = image_getSafePointer<uint32_t>(target, 5);
  636. SafePointer<uint32_t> targetRowG = image_getSafePointer<uint32_t>(target, 6);
  637. SafePointer<uint32_t> targetRowH = image_getSafePointer<uint32_t>(target, 7);
  638. int blockTargetStride = image_getStride(target) * 8;
  639. for (int upperTargetY = 0; upperTargetY + 8 <= clipHeight; upperTargetY+=8) {
  640. // Carriage return
  641. SafePointer<const uint32_t> sourcePixel = sourceRow;
  642. SafePointer<uint32_t> targetPixelA = targetRowA;
  643. SafePointer<uint32_t> targetPixelB = targetRowB;
  644. SafePointer<uint32_t> targetPixelC = targetRowC;
  645. SafePointer<uint32_t> targetPixelD = targetRowD;
  646. SafePointer<uint32_t> targetPixelE = targetRowE;
  647. SafePointer<uint32_t> targetPixelF = targetRowF;
  648. SafePointer<uint32_t> targetPixelG = targetRowG;
  649. SafePointer<uint32_t> targetPixelH = targetRowH;
  650. int writeLeftX = 0;
  651. while (writeLeftX + 8 <= clipWidth) {
  652. // Read one pixel at a time
  653. uint32_t scalarValue = *sourcePixel;
  654. sourcePixel += 1;
  655. // Write to a whole block of pixels
  656. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue; targetPixelA[7] = scalarValue;
  657. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue; targetPixelB[7] = scalarValue;
  658. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue; targetPixelC[7] = scalarValue;
  659. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue; targetPixelD[7] = scalarValue;
  660. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue; targetPixelE[7] = scalarValue;
  661. targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue; targetPixelF[7] = scalarValue;
  662. targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue; targetPixelG[7] = scalarValue;
  663. targetPixelH[0] = scalarValue; targetPixelH[1] = scalarValue; targetPixelH[2] = scalarValue; targetPixelH[3] = scalarValue; targetPixelH[4] = scalarValue; targetPixelH[5] = scalarValue; targetPixelH[6] = scalarValue; targetPixelH[7] = scalarValue;
  664. targetPixelA += 8;
  665. targetPixelB += 8;
  666. targetPixelC += 8;
  667. targetPixelD += 8;
  668. targetPixelE += 8;
  669. targetPixelF += 8;
  670. targetPixelG += 8;
  671. targetPixelH += 8;
  672. // Count
  673. writeLeftX += 8;
  674. }
  675. // Line feed
  676. sourceRow.increaseBytes(image_getStride(source));
  677. targetRowA.increaseBytes(blockTargetStride);
  678. targetRowB.increaseBytes(blockTargetStride);
  679. targetRowC.increaseBytes(blockTargetStride);
  680. targetRowD.increaseBytes(blockTargetStride);
  681. targetRowE.increaseBytes(blockTargetStride);
  682. targetRowF.increaseBytes(blockTargetStride);
  683. targetRowG.increaseBytes(blockTargetStride);
  684. targetRowH.increaseBytes(blockTargetStride);
  685. }
  686. }
  687. static void blackEdges(const ImageRgbaU8& target, int excludedWidth, int excludedHeight) {
  688. // Right side
  689. draw_rectangle(target, IRect(excludedWidth, 0, image_getWidth(target) - excludedWidth, excludedHeight), 0);
  690. // Bottom and corner
  691. draw_rectangle(target, IRect(0, excludedHeight, image_getWidth(target), image_getHeight(target) - excludedHeight), 0);
  692. }
  693. static void imageImpl_blockMagnify(const ImageRgbaU8& target, const ImageRgbaU8& source, int pixelWidth, int pixelHeight) {
  694. if (pixelWidth < 1) { pixelWidth = 1; }
  695. if (pixelHeight < 1) { pixelHeight = 1; }
  696. bool sameOrder = image_getPackOrderIndex(target) == image_getPackOrderIndex(source);
  697. // Find the part of source which fits into target with whole pixels
  698. int clipWidth = roundDown(min(image_getWidth(target), image_getWidth(source) * pixelWidth), pixelWidth);
  699. int clipHeight = roundDown(min(image_getHeight(target), image_getHeight(source) * pixelHeight), pixelHeight);
  700. if (sameOrder) {
  701. if (!(image_isSubImage(source) || image_isSubImage(target))) {
  702. if (pixelWidth == 2 && pixelHeight == 2) {
  703. blockMagnify_2x2(target, source, clipWidth, clipHeight);
  704. } else if (pixelWidth == 3 && pixelHeight == 3) {
  705. blockMagnify_3x3(target, source, clipWidth, clipHeight);
  706. } else if (pixelWidth == 4 && pixelHeight == 4) {
  707. blockMagnify_4x4(target, source, clipWidth, clipHeight);
  708. } else if (pixelWidth == 5 && pixelHeight == 5) {
  709. blockMagnify_5x5(target, source, clipWidth, clipHeight);
  710. } else if (pixelWidth == 6 && pixelHeight == 6) {
  711. blockMagnify_6x6(target, source, clipWidth, clipHeight);
  712. } else if (pixelWidth == 7 && pixelHeight == 7) {
  713. blockMagnify_7x7(target, source, clipWidth, clipHeight);
  714. } else if (pixelWidth == 8 && pixelHeight == 8) {
  715. blockMagnify_8x8(target, source, clipWidth, clipHeight);
  716. } else {
  717. blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
  718. }
  719. } else {
  720. blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
  721. }
  722. } else {
  723. blockMagnify_reference<true>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
  724. }
  725. blackEdges(target, clipWidth, clipHeight);
  726. }
  727. static void mapRgbaU8(const ImageRgbaU8& target, const ImageGenRgbaU8& lambda, int startX, int startY) {
  728. const int targetWidth = image_getWidth(target);
  729. const int targetHeight = image_getHeight(target);
  730. const int targetStride = image_getStride(target);
  731. SafePointer<uint32_t> targetRow = image_getSafePointer<uint32_t>(target);
  732. for (int y = startY; y < targetHeight + startY; y++) {
  733. SafePointer<uint32_t> targetPixel = targetRow;
  734. for (int x = startX; x < targetWidth + startX; x++) {
  735. *targetPixel = image_saturateAndPack(target, lambda(x, y));
  736. targetPixel += 1;
  737. }
  738. targetRow.increaseBytes(targetStride);
  739. }
  740. }
  741. void filter_mapRgbaU8(const ImageRgbaU8 target, const ImageGenRgbaU8& lambda, int startX, int startY) {
  742. if (image_exists(target)) {
  743. mapRgbaU8(target, lambda, startX, startY);
  744. }
  745. }
  746. OrderedImageRgbaU8 filter_generateRgbaU8(int width, int height, const ImageGenRgbaU8& lambda, int startX, int startY) {
  747. OrderedImageRgbaU8 result = image_create_RgbaU8(width, height);
  748. filter_mapRgbaU8(result, lambda, startX, startY);
  749. return result;
  750. }
  751. template <typename IMAGE_TYPE, typename PIXEL_TYPE, int MIN_VALUE, int MAX_VALUE>
  752. static void mapMonochrome(const IMAGE_TYPE& target, const ImageGenI32& lambda, int startX, int startY) {
  753. const int targetWidth = image_getWidth(target);
  754. const int targetHeight = image_getHeight(target);
  755. const int targetStride = image_getStride(target);
  756. SafePointer<PIXEL_TYPE> targetRow = image_getSafePointer<PIXEL_TYPE>(target);
  757. for (int y = startY; y < targetHeight + startY; y++) {
  758. SafePointer<PIXEL_TYPE> targetPixel = targetRow;
  759. for (int x = startX; x < targetWidth + startX; x++) {
  760. int output = lambda(x, y);
  761. if (output < MIN_VALUE) { output = MIN_VALUE; }
  762. if (output > MAX_VALUE) { output = MAX_VALUE; }
  763. *targetPixel = output;
  764. targetPixel += 1;
  765. }
  766. targetRow.increaseBytes(targetStride);
  767. }
  768. }
  769. void filter_mapU8(const ImageU8 target, const ImageGenI32& lambda, int startX, int startY) {
  770. if (image_exists(target)) {
  771. mapMonochrome<ImageU8, uint8_t, 0, 255>(target, lambda, startX, startY);
  772. }
  773. }
  774. AlignedImageU8 filter_generateU8(int width, int height, const ImageGenI32& lambda, int startX, int startY) {
  775. AlignedImageU8 result = image_create_U8(width, height);
  776. filter_mapU8(result, lambda, startX, startY);
  777. return result;
  778. }
  779. void filter_mapU16(const ImageU16 target, const ImageGenI32& lambda, int startX, int startY) {
  780. if (image_exists(target)) {
  781. mapMonochrome<ImageU16, uint16_t, 0, 65535>(target, lambda, startX, startY);
  782. }
  783. }
  784. AlignedImageU16 filter_generateU16(int width, int height, const ImageGenI32& lambda, int startX, int startY) {
  785. AlignedImageU16 result = image_create_U16(width, height);
  786. filter_mapU16(result, lambda, startX, startY);
  787. return result;
  788. }
  789. static void mapF32(const ImageF32& target, const ImageGenF32& lambda, int startX, int startY) {
  790. const int targetWidth = image_getWidth(target);
  791. const int targetHeight = image_getHeight(target);
  792. const int targetStride = image_getStride(target);
  793. SafePointer<float> targetRow = image_getSafePointer<float>(target);
  794. for (int y = startY; y < targetHeight + startY; y++) {
  795. SafePointer<float> targetPixel = targetRow;
  796. for (int x = startX; x < targetWidth + startX; x++) {
  797. *targetPixel = lambda(x, y);
  798. targetPixel += 1;
  799. }
  800. targetRow.increaseBytes(targetStride);
  801. }
  802. }
  803. void filter_mapF32(const ImageF32 target, const ImageGenF32& lambda, int startX, int startY) {
  804. if (image_exists(target)) {
  805. mapF32(target, lambda, startX, startY);
  806. }
  807. }
  808. AlignedImageF32 filter_generateF32(int width, int height, const ImageGenF32& lambda, int startX, int startY) {
  809. AlignedImageF32 result = image_create_F32(width, height);
  810. filter_mapF32(result, lambda, startX, startY);
  811. return result;
  812. }
  813. // -------------------------------- Resize --------------------------------
  814. OrderedImageRgbaU8 filter_resize(const ImageRgbaU8 &source, Sampler interpolation, int32_t newWidth, int32_t newHeight) {
  815. if (image_exists(source)) {
  816. OrderedImageRgbaU8 resultImage = image_create_RgbaU8(newWidth, newHeight);
  817. resizeToTarget<ImageRgbaU8>(resultImage, source, interpolation == Sampler::Linear);
  818. return resultImage;
  819. } else {
  820. return OrderedImageRgbaU8(); // Null gives null
  821. }
  822. }
  823. AlignedImageU8 filter_resize(const ImageU8 &source, Sampler interpolation, int32_t newWidth, int32_t newHeight) {
  824. if (image_exists(source)) {
  825. AlignedImageU8 resultImage = image_create_U8(newWidth, newHeight);
  826. resizeToTarget<ImageU8>(resultImage, source, interpolation == Sampler::Linear);
  827. return resultImage;
  828. } else {
  829. return AlignedImageU8(); // Null gives null
  830. }
  831. }
  832. void filter_blockMagnify(const ImageRgbaU8 &target, const ImageRgbaU8& source, int pixelWidth, int pixelHeight) {
  833. if (image_exists(target) && image_exists(source)) {
  834. imageImpl_blockMagnify(target, source, pixelWidth, pixelHeight);
  835. }
  836. }
  837. }