draw.cpp 76 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576
  1. // zlib open source license
  2. // zlib open source license
  3. //
  4. // Copyright (c) 2018 to 2019 David Forsgren Piuva
  5. //
  6. // This software is provided 'as-is', without any express or implied
  7. // warranty. In no event will the authors be held liable for any damages
  8. // arising from the use of this software.
  9. //
  10. // Permission is granted to anyone to use this software for any purpose,
  11. // including commercial applications, and to alter it and redistribute it
  12. // freely, subject to the following restrictions:
  13. //
  14. // 1. The origin of this software must not be misrepresented; you must not
  15. // claim that you wrote the original software. If you use this software
  16. // in a product, an acknowledgment in the product documentation would be
  17. // appreciated but is not required.
  18. //
  19. // 2. Altered source versions must be plainly marked as such, and must not be
  20. // misrepresented as being the original software.
  21. //
  22. // 3. This notice may not be removed or altered from any source
  23. // distribution.
  24. #include "../base/simd.h"
  25. #include "draw.h"
  26. #include "internal/imageInternal.h"
  27. #include "../math/scalar.h"
  28. #include <limits>
  29. using namespace dsr;
  30. // Preconditions:
  31. // 0 <= a <= 255
  32. // 0 <= b <= 255
  33. // Postconditions:
  34. // Returns the normalized multiplication of a and b, where the 0..255 range represents decimal values from 0.0 to 1.0.
  35. // The result may not be less than zero or larger than any of the inputs.
  36. // Examples:
  37. // normalizedByteMultiplication(0, 0) = 0
  38. // normalizedByteMultiplication(x, 0) = 0
  39. // normalizedByteMultiplication(0, x) = 0
  40. // normalizedByteMultiplication(x, 255) = x
  41. // normalizedByteMultiplication(255, x) = x
  42. // normalizedByteMultiplication(255, 255) = 255
  43. static inline uint32_t normalizedByteMultiplication(uint32_t a, uint32_t b) {
  44. // Approximate the reciprocal of an unsigned byte's maximum value 255 for normalization
  45. // 256³ / 255 ≈ 65793
  46. // Truncation goes down, so add half a unit before rounding to get the closest value
  47. // 2^24 / 2 = 8388608
  48. // No overflow for unsigned 32-bit integers
  49. // 255² * 65793 + 8388608 = 4286578433 < 2^32
  50. return (a * b * 65793 + 8388608) >> 24;
  51. }
  52. // True iff high and low bytes are equal
  53. // Equivalent to value % 257 == 0 because A + B * 256 = A * 257 when A = B.
  54. inline bool isUniformByteU16(uint16_t value) {
  55. return (value & 0x00FF) == ((value & 0xFF00) >> 8);
  56. }
  57. // -------------------------------- Drawing shapes --------------------------------
  58. template <typename COLOR_TYPE>
  59. static inline void drawSolidRectangleAssign(ImageImpl &target, int left, int top, int right, int bottom, COLOR_TYPE color) {
  60. int leftBound = std::max(0, left);
  61. int topBound = std::max(0, top);
  62. int rightBound = std::min(right, target.width);
  63. int bottomBound = std::min(bottom, target.height);
  64. int stride = target.stride;
  65. SafePointer<COLOR_TYPE> rowData = imageInternal::getSafeData<COLOR_TYPE>(target, topBound);
  66. rowData += leftBound;
  67. for (int y = topBound; y < bottomBound; y++) {
  68. SafePointer<COLOR_TYPE> pixelData = rowData;
  69. for (int x = leftBound; x < rightBound; x++) {
  70. pixelData.get() = color;
  71. pixelData += 1;
  72. }
  73. rowData.increaseBytes(stride);
  74. }
  75. }
  76. template <typename COLOR_TYPE>
  77. static inline void drawSolidRectangleMemset(ImageImpl &target, int left, int top, int right, int bottom, uint8_t uniformByte) {
  78. int leftBound = std::max(0, left);
  79. int topBound = std::max(0, top);
  80. int rightBound = std::min(right, target.width);
  81. int bottomBound = std::min(bottom, target.height);
  82. if (rightBound > leftBound && bottomBound > topBound) {
  83. int stride = target.stride;
  84. SafePointer<COLOR_TYPE> rowData = imageInternal::getSafeData<COLOR_TYPE>(target, topBound);
  85. rowData += leftBound;
  86. int filledWidth = rightBound - leftBound;
  87. int rowSize = filledWidth * sizeof(COLOR_TYPE);
  88. int rowCount = bottomBound - topBound;
  89. if (!target.isSubImage && filledWidth == target.width) {
  90. // Write over any padding for parent images owning the whole buffer.
  91. // Including parent images with sub-images using the same data
  92. // because no child image may display the parent-image's padding bytes.
  93. safeMemorySet(rowData, uniformByte, (stride * (rowCount - 1)) + rowSize);
  94. } else if (rowSize == stride) {
  95. // When the filled row stretches all the way from left to right in the main allocation
  96. // there's no unseen pixels being overwritten in other images sharing the buffer.
  97. // This case handles sub-images that uses the full width of
  98. // the parent image which doesn't have any padding.
  99. safeMemorySet(rowData, uniformByte, rowSize * rowCount);
  100. } else {
  101. // Fall back on using one memset operation per row.
  102. // This case is for sub-images that must preserve interleaved pixel rows belonging
  103. // to other images that aren't visible and therefore not owned by this image.
  104. for (int y = topBound; y < bottomBound; y++) {
  105. safeMemorySet(rowData, uniformByte, rowSize);
  106. rowData.increaseBytes(stride);
  107. }
  108. }
  109. }
  110. }
  111. void dsr::imageImpl_draw_solidRectangle(ImageU8Impl& image, const IRect& bound, int color) {
  112. if (color < 0) { color = 0; }
  113. if (color > 255) { color = 255; }
  114. drawSolidRectangleMemset<uint8_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
  115. }
  116. void dsr::imageImpl_draw_solidRectangle(ImageU16Impl& image, const IRect& bound, int color) {
  117. if (color < 0) { color = 0; }
  118. if (color > 65535) { color = 65535; }
  119. uint16_t uColor = color;
  120. if (isUniformByteU16(uColor)) {
  121. drawSolidRectangleMemset<uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
  122. } else {
  123. drawSolidRectangleAssign<uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), uColor);
  124. }
  125. }
  126. void dsr::imageImpl_draw_solidRectangle(ImageF32Impl& image, const IRect& bound, float color) {
  127. if (color == 0.0f) {
  128. drawSolidRectangleMemset<float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
  129. } else {
  130. drawSolidRectangleAssign<float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
  131. }
  132. }
  133. void dsr::imageImpl_draw_solidRectangle(ImageRgbaU8Impl& image, const IRect& bound, const ColorRgbaI32& color) {
  134. Color4xU8 packedColor = image.packRgba(color.saturate());
  135. if (packedColor.isUniformByte()) {
  136. drawSolidRectangleMemset<Color4xU8>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor.channels[0]);
  137. } else {
  138. drawSolidRectangleAssign<Color4xU8>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor);
  139. }
  140. }
  141. template <typename IMAGE_TYPE, typename COLOR_TYPE>
  142. inline void drawLineSuper(IMAGE_TYPE &target, int x1, int y1, int x2, int y2, COLOR_TYPE color) {
  143. if (y1 == y2) {
  144. // Sideways
  145. int left = std::min(x1, x2);
  146. int right = std::max(x1, x2);
  147. for (int x = left; x <= right; x++) {
  148. IMAGE_TYPE::writePixel(target, x, y1, color);
  149. }
  150. } else if (x1 == x2) {
  151. // Down
  152. int top = std::min(y1, y2);
  153. int bottom = std::max(y1, y2);
  154. for (int y = top; y <= bottom; y++) {
  155. IMAGE_TYPE::writePixel(target, x1, y, color);
  156. }
  157. } else {
  158. if (std::abs(y2 - y1) >= std::abs(x2 - x1)) {
  159. if (y2 < y1) {
  160. swap(x1, x2);
  161. swap(y1, y2);
  162. }
  163. assert(y2 > y1);
  164. if (x2 > x1) {
  165. // Down right
  166. int x = x1;
  167. int y = y1;
  168. int tilt = (x2 - x1) * 2;
  169. int maxError = y2 - y1;
  170. int error = 0;
  171. while (y <= y2) {
  172. IMAGE_TYPE::writePixel(target, x, y, color);
  173. error += tilt;
  174. if (error >= maxError) {
  175. x++;
  176. error -= maxError * 2;
  177. }
  178. y++;
  179. }
  180. } else {
  181. // Down left
  182. int x = x1;
  183. int y = y1;
  184. int tilt = (x1 - x2) * 2;
  185. int maxError = y2 - y1;
  186. int error = 0;
  187. while (y <= y2) {
  188. IMAGE_TYPE::writePixel(target, x, y, color);
  189. error += tilt;
  190. if (error >= maxError) {
  191. x--;
  192. error -= maxError * 2;
  193. }
  194. y++;
  195. }
  196. }
  197. } else {
  198. if (x2 < x1) {
  199. swap(x1, x2);
  200. swap(y1, y2);
  201. }
  202. assert(x2 > x1);
  203. if (y2 > y1) {
  204. // Down right
  205. int x = x1;
  206. int y = y1;
  207. int tilt = (y2 - y1) * 2;
  208. int maxError = x2 - x1;
  209. int error = 0;
  210. while (x <= x2) {
  211. IMAGE_TYPE::writePixel(target, x, y, color);
  212. error += tilt;
  213. if (error >= maxError) {
  214. y++;
  215. error -= maxError * 2;
  216. }
  217. x++;
  218. }
  219. } else {
  220. // Up right
  221. int x = x1;
  222. int y = y1;
  223. int tilt = (y1 - y2) * 2;
  224. int maxError = x2 - x1;
  225. int error = 0;
  226. while (x <= x2) {
  227. IMAGE_TYPE::writePixel(target, x, y, color);
  228. error += tilt;
  229. if (error >= maxError) {
  230. y--;
  231. error -= maxError * 2;
  232. }
  233. x++;
  234. }
  235. }
  236. }
  237. }
  238. }
  239. void dsr::imageImpl_draw_line(ImageU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
  240. if (color < 0) { color = 0; }
  241. if (color > 255) { color = 255; }
  242. drawLineSuper<ImageU8Impl, uint8_t>(image, x1, y1, x2, y2, color);
  243. }
  244. void dsr::imageImpl_draw_line(ImageU16Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
  245. if (color < 0) { color = 0; }
  246. if (color > 65535) { color = 65535; }
  247. drawLineSuper<ImageU16Impl, uint16_t>(image, x1, y1, x2, y2, color);
  248. }
  249. void dsr::imageImpl_draw_line(ImageF32Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color) {
  250. drawLineSuper<ImageF32Impl, float>(image, x1, y1, x2, y2, color);
  251. }
  252. void dsr::imageImpl_draw_line(ImageRgbaU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color) {
  253. drawLineSuper<ImageRgbaU8Impl, Color4xU8>(image, x1, y1, x2, y2, image.packRgba(color.saturate()));
  254. }
  255. // -------------------------------- Drawing images --------------------------------
  256. // A packet with the dimensions of an image
  257. struct ImageDimensions {
  258. // width is the number of used pixels on each row.
  259. // height is the number of rows.
  260. // stride is the byte offset from one row to another including any padding.
  261. // pixelSize is the byte offset from one pixel to another from left to right.
  262. int32_t width, height, stride, pixelSize;
  263. ImageDimensions() : width(0), height(0), stride(0), pixelSize(0) {}
  264. ImageDimensions(const ImageImpl& image) :
  265. width(image.width), height(image.height), stride(image.stride), pixelSize(image.pixelSize) {}
  266. };
  267. struct ImageWriter : public ImageDimensions {
  268. uint8_t *data;
  269. ImageWriter(const ImageDimensions &dimensions, uint8_t *data) :
  270. ImageDimensions(dimensions), data(data) {}
  271. };
  272. struct ImageReader : public ImageDimensions {
  273. const uint8_t *data;
  274. ImageReader(const ImageDimensions &dimensions, const uint8_t *data) :
  275. ImageDimensions(dimensions), data(data) {}
  276. };
  277. static ImageWriter getWriter(ImageImpl &image) {
  278. return ImageWriter(ImageDimensions(image), buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset);
  279. }
  280. static ImageReader getReader(const ImageImpl &image) {
  281. return ImageReader(ImageDimensions(image), buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset);
  282. }
  283. static ImageImpl getGenericSubImage(const ImageImpl &image, int32_t left, int32_t top, int32_t width, int32_t height) {
  284. assert(left >= 0 && top >= 0 && width >= 1 && height >= 1 && left + width <= image.width && top + height <= image.height);
  285. intptr_t newOffset = image.startOffset + (left * image.pixelSize) + (top * image.stride);
  286. return ImageImpl(width, height, image.stride, image.pixelSize, image.buffer, newOffset);
  287. }
  288. struct ImageIntersection {
  289. ImageWriter subTarget;
  290. ImageReader subSource;
  291. ImageIntersection(const ImageWriter &subTarget, const ImageReader &subSource) :
  292. subTarget(subTarget), subSource(subSource) {}
  293. static bool canCreate(ImageImpl &target, const ImageImpl &source, int32_t left, int32_t top) {
  294. int32_t targetRegionRight = left + source.width;
  295. int32_t targetRegionBottom = top + source.height;
  296. return left < target.width && top < target.height && targetRegionRight > 0 && targetRegionBottom > 0;
  297. }
  298. // Only call if canCreate passed with the same arguments
  299. static ImageIntersection create(ImageImpl &target, const ImageImpl &source, int32_t left, int32_t top) {
  300. int32_t targetRegionRight = left + source.width;
  301. int32_t targetRegionBottom = top + source.height;
  302. assert(ImageIntersection::canCreate(target, source, left, top));
  303. // Check if the source has to be clipped
  304. if (left < 0 || top < 0 || targetRegionRight > target.width || targetRegionBottom > target.height) {
  305. int32_t clipLeft = std::max(0, -left);
  306. int32_t clipTop = std::max(0, -top);
  307. int32_t clipRight = std::max(0, targetRegionRight - target.width);
  308. int32_t clipBottom = std::max(0, targetRegionBottom - target.height);
  309. int32_t newWidth = source.width - (clipLeft + clipRight);
  310. int32_t newHeight = source.height - (clipTop + clipBottom);
  311. assert(newWidth > 0 && newHeight > 0);
  312. // Partial drawing
  313. ImageImpl subTarget = getGenericSubImage(target, left + clipLeft, top + clipTop, newWidth, newHeight);
  314. ImageImpl subSource = getGenericSubImage(source, clipLeft, clipTop, newWidth, newHeight);
  315. return ImageIntersection(getWriter(subTarget), getReader(subSource));
  316. } else {
  317. // Full drawing
  318. ImageImpl subTarget = getGenericSubImage(target, left, top, source.width, source.height);
  319. return ImageIntersection(getWriter(subTarget), getReader(source));
  320. }
  321. }
  322. };
  323. #define ITERATE_ROWS(WRITER, READER, OPERATION) \
  324. { \
  325. uint8_t *targetRow = WRITER.data; \
  326. const uint8_t *sourceRow = READER.data; \
  327. for (int32_t y = 0; y < READER.height; y++) { \
  328. OPERATION; \
  329. targetRow += WRITER.stride; \
  330. sourceRow += READER.stride; \
  331. } \
  332. }
  333. #define ITERATE_PIXELS(WRITER, READER, OPERATION) \
  334. { \
  335. uint8_t *targetRow = WRITER.data; \
  336. const uint8_t *sourceRow = READER.data; \
  337. for (int32_t y = 0; y < READER.height; y++) { \
  338. uint8_t *targetPixel = targetRow; \
  339. const uint8_t *sourcePixel = sourceRow; \
  340. for (int32_t x = 0; x < READER.width; x++) { \
  341. {OPERATION;} \
  342. targetPixel += WRITER.pixelSize; \
  343. sourcePixel += READER.pixelSize; \
  344. } \
  345. targetRow += WRITER.stride; \
  346. sourceRow += READER.stride; \
  347. } \
  348. }
  349. #define ITERATE_PIXELS_2(WRITER1, READER1, WRITER2, READER2, OPERATION) \
  350. { \
  351. uint8_t *targetRow1 = WRITER1.data; \
  352. uint8_t *targetRow2 = WRITER2.data; \
  353. const uint8_t *sourceRow1 = READER1.data; \
  354. const uint8_t *sourceRow2 = READER2.data; \
  355. int minWidth = std::min(READER1.width, READER2.width); \
  356. int minHeight = std::min(READER1.height, READER2.height); \
  357. for (int32_t y = 0; y < minHeight; y++) { \
  358. uint8_t *targetPixel1 = targetRow1; \
  359. uint8_t *targetPixel2 = targetRow2; \
  360. const uint8_t *sourcePixel1 = sourceRow1; \
  361. const uint8_t *sourcePixel2 = sourceRow2; \
  362. for (int32_t x = 0; x < minWidth; x++) { \
  363. {OPERATION;} \
  364. targetPixel1 += WRITER1.pixelSize; \
  365. targetPixel2 += WRITER2.pixelSize; \
  366. sourcePixel1 += READER1.pixelSize; \
  367. sourcePixel2 += READER2.pixelSize; \
  368. } \
  369. targetRow1 += WRITER1.stride; \
  370. targetRow2 += WRITER2.stride; \
  371. sourceRow1 += READER1.stride; \
  372. sourceRow2 += READER2.stride; \
  373. } \
  374. }
  375. #define ITERATE_PIXELS_3(WRITER1, READER1, WRITER2, READER2, WRITER3, READER3, OPERATION) \
  376. { \
  377. uint8_t *targetRow1 = WRITER1.data; \
  378. uint8_t *targetRow2 = WRITER2.data; \
  379. uint8_t *targetRow3 = WRITER3.data; \
  380. const uint8_t *sourceRow1 = READER1.data; \
  381. const uint8_t *sourceRow2 = READER2.data; \
  382. const uint8_t *sourceRow3 = READER3.data; \
  383. int minWidth = std::min(std::min(READER1.width, READER2.width), READER3.width); \
  384. int minHeight = std::min(std::min(READER1.height, READER2.height), READER3.height); \
  385. for (int32_t y = 0; y < minHeight; y++) { \
  386. uint8_t *targetPixel1 = targetRow1; \
  387. uint8_t *targetPixel2 = targetRow2; \
  388. uint8_t *targetPixel3 = targetRow3; \
  389. const uint8_t *sourcePixel1 = sourceRow1; \
  390. const uint8_t *sourcePixel2 = sourceRow2; \
  391. const uint8_t *sourcePixel3 = sourceRow3; \
  392. for (int32_t x = 0; x < minWidth; x++) { \
  393. {OPERATION;} \
  394. targetPixel1 += WRITER1.pixelSize; \
  395. targetPixel2 += WRITER2.pixelSize; \
  396. targetPixel3 += WRITER3.pixelSize; \
  397. sourcePixel1 += READER1.pixelSize; \
  398. sourcePixel2 += READER2.pixelSize; \
  399. sourcePixel3 += READER3.pixelSize; \
  400. } \
  401. targetRow1 += WRITER1.stride; \
  402. targetRow2 += WRITER2.stride; \
  403. targetRow3 += WRITER3.stride; \
  404. sourceRow1 += READER1.stride; \
  405. sourceRow2 += READER2.stride; \
  406. sourceRow3 += READER3.stride; \
  407. } \
  408. }
  409. static inline int saturateFloat(float value) {
  410. if (!(value >= 0.0f)) {
  411. // NaN or negative
  412. return 0;
  413. } else if (value > 255.0f) {
  414. // Too large
  415. return 255;
  416. } else {
  417. // Round to closest
  418. return (int)(value + 0.5f);
  419. }
  420. }
  421. // Copy data from one image region to another of the same size.
  422. // Packing order is reinterpreted without conversion.
  423. static void copyImageData(ImageWriter writer, ImageReader reader) {
  424. assert(writer.width == reader.width && writer.height == reader.height && writer.pixelSize == reader.pixelSize);
  425. ITERATE_ROWS(writer, reader, std::memcpy(targetRow, sourceRow, reader.width * reader.pixelSize));
  426. }
  427. void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top) {
  428. if (ImageIntersection::canCreate(target, source, left, top)) {
  429. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  430. if (target.packOrder == source.packOrder) {
  431. // No conversion needed
  432. copyImageData(intersection.subTarget, intersection.subSource);
  433. } else {
  434. // Read and repack to convert between different color formats
  435. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  436. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  437. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  438. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  439. targetPixel[target.packOrder.alphaIndex] = sourcePixel[source.packOrder.alphaIndex];
  440. );
  441. }
  442. }
  443. }
  444. void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
  445. if (ImageIntersection::canCreate(target, source, left, top)) {
  446. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  447. copyImageData(intersection.subTarget, intersection.subSource);
  448. }
  449. }
  450. void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
  451. if (ImageIntersection::canCreate(target, source, left, top)) {
  452. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  453. copyImageData(intersection.subTarget, intersection.subSource);
  454. }
  455. }
  456. void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
  457. if (ImageIntersection::canCreate(target, source, left, top)) {
  458. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  459. copyImageData(intersection.subTarget, intersection.subSource);
  460. }
  461. }
  462. void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
  463. if (ImageIntersection::canCreate(target, source, left, top)) {
  464. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  465. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  466. uint8_t luma = *sourcePixel;
  467. targetPixel[target.packOrder.redIndex] = luma;
  468. targetPixel[target.packOrder.greenIndex] = luma;
  469. targetPixel[target.packOrder.blueIndex] = luma;
  470. targetPixel[target.packOrder.alphaIndex] = 255;
  471. );
  472. }
  473. }
  474. void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
  475. if (ImageIntersection::canCreate(target, source, left, top)) {
  476. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  477. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  478. int luma = *((const uint16_t*)sourcePixel);
  479. if (luma > 255) { luma = 255; }
  480. targetPixel[target.packOrder.redIndex] = luma;
  481. targetPixel[target.packOrder.greenIndex] = luma;
  482. targetPixel[target.packOrder.blueIndex] = luma;
  483. targetPixel[target.packOrder.alphaIndex] = 255;
  484. );
  485. }
  486. }
  487. void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
  488. if (ImageIntersection::canCreate(target, source, left, top)) {
  489. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  490. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  491. int luma = saturateFloat(*((const float*)sourcePixel));
  492. targetPixel[target.packOrder.redIndex] = luma;
  493. targetPixel[target.packOrder.greenIndex] = luma;
  494. targetPixel[target.packOrder.blueIndex] = luma;
  495. targetPixel[target.packOrder.alphaIndex] = 255;
  496. );
  497. }
  498. }
  499. void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
  500. if (ImageIntersection::canCreate(target, source, left, top)) {
  501. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  502. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  503. *targetPixel = saturateFloat(*((const float*)sourcePixel));
  504. );
  505. }
  506. }
  507. void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
  508. if (ImageIntersection::canCreate(target, source, left, top)) {
  509. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  510. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  511. int luma = *((const uint16_t*)sourcePixel);
  512. if (luma > 255) { luma = 255; }
  513. *targetPixel = luma;
  514. );
  515. }
  516. }
  517. void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
  518. if (ImageIntersection::canCreate(target, source, left, top)) {
  519. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  520. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  521. *((uint16_t*)targetPixel) = *sourcePixel;
  522. );
  523. }
  524. }
  525. void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
  526. if (ImageIntersection::canCreate(target, source, left, top)) {
  527. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  528. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  529. int luma = *((const float*)sourcePixel);
  530. if (luma < 0) { luma = 0; }
  531. if (luma > 65535) { luma = 65535; }
  532. *((uint16_t*)targetPixel) = *sourcePixel;
  533. );
  534. }
  535. }
  536. void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
  537. if (ImageIntersection::canCreate(target, source, left, top)) {
  538. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  539. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  540. *((float*)targetPixel) = (float)(*sourcePixel);
  541. );
  542. }
  543. }
  544. void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
  545. if (ImageIntersection::canCreate(target, source, left, top)) {
  546. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  547. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  548. int luma = *((const uint16_t*)sourcePixel);
  549. if (luma > 255) { luma = 255; }
  550. *((float*)targetPixel) = (float)luma;
  551. );
  552. }
  553. }
  554. void dsr::imageImpl_drawAlphaFilter(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top) {
  555. if (ImageIntersection::canCreate(target, source, left, top)) {
  556. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  557. // Read and repack to convert between different color formats
  558. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  559. // Optimized for anti-aliasing, where most alpha values are 0 or 255
  560. uint32_t sourceRatio = sourcePixel[source.packOrder.alphaIndex];
  561. if (sourceRatio > 0) {
  562. if (sourceRatio == 255) {
  563. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  564. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  565. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  566. targetPixel[target.packOrder.alphaIndex] = 255;
  567. } else {
  568. uint32_t targetRatio = 255 - sourceRatio;
  569. targetPixel[target.packOrder.redIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.redIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[source.packOrder.redIndex], sourceRatio);
  570. targetPixel[target.packOrder.greenIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.greenIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[source.packOrder.greenIndex], sourceRatio);
  571. targetPixel[target.packOrder.blueIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.blueIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[source.packOrder.blueIndex], sourceRatio);
  572. targetPixel[target.packOrder.alphaIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.alphaIndex], targetRatio) + sourceRatio;
  573. }
  574. }
  575. );
  576. }
  577. }
  578. void dsr::imageImpl_drawMaxAlpha(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top, int32_t sourceAlphaOffset) {
  579. if (ImageIntersection::canCreate(target, source, left, top)) {
  580. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  581. // Read and repack to convert between different color formats
  582. if (sourceAlphaOffset == 0) {
  583. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  584. int sourceAlpha = sourcePixel[source.packOrder.alphaIndex];
  585. if (sourceAlpha > targetPixel[target.packOrder.alphaIndex]) {
  586. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  587. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  588. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  589. targetPixel[target.packOrder.alphaIndex] = sourceAlpha;
  590. }
  591. );
  592. } else {
  593. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  594. int sourceAlpha = sourcePixel[source.packOrder.alphaIndex];
  595. if (sourceAlpha > 0) {
  596. sourceAlpha += sourceAlphaOffset;
  597. if (sourceAlpha > targetPixel[target.packOrder.alphaIndex]) {
  598. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  599. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  600. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  601. if (sourceAlpha < 0) { sourceAlpha = 0; }
  602. if (sourceAlpha > 255) { sourceAlpha = 255; }
  603. targetPixel[target.packOrder.alphaIndex] = sourceAlpha;
  604. }
  605. }
  606. );
  607. }
  608. }
  609. }
  610. void dsr::imageImpl_drawAlphaClip(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top, int32_t threshold) {
  611. if (ImageIntersection::canCreate(target, source, left, top)) {
  612. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  613. // Read and repack to convert between different color formats
  614. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  615. if (sourcePixel[source.packOrder.alphaIndex] > threshold) {
  616. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  617. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  618. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  619. targetPixel[target.packOrder.alphaIndex] = 255;
  620. }
  621. );
  622. }
  623. }
  624. template <bool FULL_ALPHA>
  625. static void drawSilhouette_template(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
  626. if (ImageIntersection::canCreate(target, source, left, top)) {
  627. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  628. // Read and repack to convert between different color formats
  629. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  630. uint32_t sourceRatio;
  631. if (FULL_ALPHA) {
  632. sourceRatio = *sourcePixel;
  633. } else {
  634. sourceRatio = normalizedByteMultiplication(*sourcePixel, color.alpha);
  635. }
  636. if (sourceRatio > 0) {
  637. if (sourceRatio == 255) {
  638. targetPixel[target.packOrder.redIndex] = color.red;
  639. targetPixel[target.packOrder.greenIndex] = color.green;
  640. targetPixel[target.packOrder.blueIndex] = color.blue;
  641. targetPixel[target.packOrder.alphaIndex] = 255;
  642. } else {
  643. uint32_t targetRatio = 255 - sourceRatio;
  644. targetPixel[target.packOrder.redIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.redIndex], targetRatio) + normalizedByteMultiplication(color.red, sourceRatio);
  645. targetPixel[target.packOrder.greenIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.greenIndex], targetRatio) + normalizedByteMultiplication(color.green, sourceRatio);
  646. targetPixel[target.packOrder.blueIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.blueIndex], targetRatio) + normalizedByteMultiplication(color.blue, sourceRatio);
  647. targetPixel[target.packOrder.alphaIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.alphaIndex], targetRatio) + sourceRatio;
  648. }
  649. }
  650. );
  651. }
  652. }
  653. void dsr::imageImpl_drawSilhouette(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
  654. if (color.alpha > 0) {
  655. ColorRgbaI32 saturatedColor = color.saturate();
  656. if (color.alpha < 255) {
  657. drawSilhouette_template<false>(target, source, saturatedColor, left, top);
  658. } else {
  659. drawSilhouette_template<true>(target, source, saturatedColor, left, top);
  660. }
  661. }
  662. }
  663. void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, int32_t left, int32_t top, int32_t sourceHeightOffset) {
  664. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  665. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  666. ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
  667. int32_t newHeight = *((const uint16_t*)sourcePixel);
  668. if (newHeight > 0) {
  669. newHeight += sourceHeightOffset;
  670. if (newHeight < 0) { newHeight = 0; }
  671. if (newHeight > 65535) { newHeight = 65535; }
  672. if (newHeight > 0 && newHeight > *((uint16_t*)targetPixel)) {
  673. *((uint16_t*)targetPixel) = newHeight;
  674. }
  675. }
  676. );
  677. }
  678. }
  679. void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
  680. int32_t left, int32_t top, int32_t sourceHeightOffset) {
  681. assert(sourceA.width == sourceHeight.width);
  682. assert(sourceA.height == sourceHeight.height);
  683. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  684. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  685. ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
  686. ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
  687. int32_t newHeight = *((const uint16_t*)sourcePixel1);
  688. if (newHeight > 0) {
  689. newHeight += sourceHeightOffset;
  690. if (newHeight < 0) { newHeight = 0; }
  691. if (newHeight > 65535) { newHeight = 65535; }
  692. if (newHeight > *((uint16_t*)targetPixel1)) {
  693. *((uint16_t*)targetPixel1) = newHeight;
  694. targetPixel2[targetA.packOrder.redIndex] = sourcePixel2[sourceA.packOrder.redIndex];
  695. targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
  696. targetPixel2[targetA.packOrder.blueIndex] = sourcePixel2[sourceA.packOrder.blueIndex];
  697. targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
  698. }
  699. }
  700. );
  701. }
  702. }
  703. void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
  704. ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left, int32_t top, int32_t sourceHeightOffset) {
  705. assert(sourceA.width == sourceHeight.width);
  706. assert(sourceA.height == sourceHeight.height);
  707. assert(sourceB.width == sourceHeight.width);
  708. assert(sourceB.height == sourceHeight.height);
  709. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  710. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  711. ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
  712. ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
  713. ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
  714. int32_t newHeight = *((const uint16_t*)sourcePixel1);
  715. if (newHeight > 0) {
  716. newHeight += sourceHeightOffset;
  717. if (newHeight < 0) { newHeight = 0; }
  718. if (newHeight > 65535) { newHeight = 65535; }
  719. if (newHeight > *((uint16_t*)targetPixel1)) {
  720. *((uint16_t*)targetPixel1) = newHeight;
  721. targetPixel2[targetA.packOrder.redIndex] = sourcePixel2[sourceA.packOrder.redIndex];
  722. targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
  723. targetPixel2[targetA.packOrder.blueIndex] = sourcePixel2[sourceA.packOrder.blueIndex];
  724. targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
  725. targetPixel3[targetB.packOrder.redIndex] = sourcePixel3[sourceB.packOrder.redIndex];
  726. targetPixel3[targetB.packOrder.greenIndex] = sourcePixel3[sourceB.packOrder.greenIndex];
  727. targetPixel3[targetB.packOrder.blueIndex] = sourcePixel3[sourceB.packOrder.blueIndex];
  728. targetPixel3[targetB.packOrder.alphaIndex] = sourcePixel3[sourceB.packOrder.alphaIndex];
  729. }
  730. }
  731. );
  732. }
  733. }
  734. void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, int32_t left, int32_t top, float sourceHeightOffset) {
  735. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  736. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  737. ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
  738. float newHeight = *((const float*)sourcePixel);
  739. if (newHeight > -std::numeric_limits<float>::infinity()) {
  740. newHeight += sourceHeightOffset;
  741. if (newHeight > *((float*)targetPixel)) {
  742. *((float*)targetPixel) = newHeight;
  743. }
  744. }
  745. );
  746. }
  747. }
  748. void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
  749. int32_t left, int32_t top, float sourceHeightOffset) {
  750. assert(sourceA.width == sourceHeight.width);
  751. assert(sourceA.height == sourceHeight.height);
  752. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  753. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  754. ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
  755. ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
  756. float newHeight = *((const float*)sourcePixel1);
  757. if (newHeight > -std::numeric_limits<float>::infinity()) {
  758. newHeight += sourceHeightOffset;
  759. if (newHeight > *((float*)targetPixel1)) {
  760. *((float*)targetPixel1) = newHeight;
  761. targetPixel2[targetA.packOrder.redIndex] = sourcePixel2[sourceA.packOrder.redIndex];
  762. targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
  763. targetPixel2[targetA.packOrder.blueIndex] = sourcePixel2[sourceA.packOrder.blueIndex];
  764. targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
  765. }
  766. }
  767. );
  768. }
  769. }
  770. void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
  771. ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left, int32_t top, float sourceHeightOffset) {
  772. assert(sourceA.width == sourceHeight.width);
  773. assert(sourceA.height == sourceHeight.height);
  774. assert(sourceB.width == sourceHeight.width);
  775. assert(sourceB.height == sourceHeight.height);
  776. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  777. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  778. ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
  779. ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
  780. ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
  781. float newHeight = *((const float*)sourcePixel1);
  782. if (newHeight > -std::numeric_limits<float>::infinity()) {
  783. newHeight += sourceHeightOffset;
  784. if (newHeight > *((float*)targetPixel1)) {
  785. *((float*)targetPixel1) = newHeight;
  786. targetPixel2[targetA.packOrder.redIndex] = sourcePixel2[sourceA.packOrder.redIndex];
  787. targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
  788. targetPixel2[targetA.packOrder.blueIndex] = sourcePixel2[sourceA.packOrder.blueIndex];
  789. targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
  790. targetPixel3[targetB.packOrder.redIndex] = sourcePixel3[sourceB.packOrder.redIndex];
  791. targetPixel3[targetB.packOrder.greenIndex] = sourcePixel3[sourceB.packOrder.greenIndex];
  792. targetPixel3[targetB.packOrder.blueIndex] = sourcePixel3[sourceB.packOrder.blueIndex];
  793. targetPixel3[targetB.packOrder.alphaIndex] = sourcePixel3[sourceB.packOrder.alphaIndex];
  794. }
  795. }
  796. );
  797. }
  798. }
  799. // -------------------------------- Resize --------------------------------
  800. static inline U32x4 ColorRgbaI32_to_U32x4(const ColorRgbaI32& color) {
  801. return U32x4(color.red, color.green, color.blue, color.alpha);
  802. }
  803. static inline ColorRgbaI32 U32x4_to_ColorRgbaI32(const U32x4& color) {
  804. UVector4D vResult = color.get();
  805. return ColorRgbaI32(vResult.x, vResult.y, vResult.z, vResult.w);
  806. }
  807. // Uniform linear interpolation of colors from a 16-bit sub-pixel weight
  808. // Pre-condition0 <= fineRatio <= 65536
  809. // Post-condition: Returns colorA * (1 - (fineRatio / 65536)) + colorB * (fineRatio / 65536)
  810. static inline U32x4 mixColorsUniform(const U32x4 &colorA, const U32x4 &colorB, uint32_t fineRatio) {
  811. uint16_t ratio = fineRatio >> 8;
  812. uint16_t invRatio = 256 - ratio;
  813. U16x8 weightA = U16x8(invRatio);
  814. U16x8 weightB = U16x8(ratio);
  815. U32x4 lowMask(0x00FF00FFu);
  816. U16x8 lowColorA = U16x8(colorA & lowMask);
  817. U16x8 lowColorB = U16x8(colorB & lowMask);
  818. U32x4 highMask(0xFF00FF00u);
  819. U16x8 highColorA = U16x8((colorA & highMask) >> 8);
  820. U16x8 highColorB = U16x8((colorB & highMask) >> 8);
  821. U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();
  822. U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();
  823. return (((lowColor >> 8) & lowMask) | (highColor & highMask));
  824. }
  825. #define READ_RGBAU8_CLAMP(X,Y) ImageRgbaU8Impl::unpackRgba(ImageRgbaU8Impl::readPixel_clamp(source, X, Y), source.packOrder)
  826. #define READ_RGBAU8_CLAMP_SIMD(X,Y) ColorRgbaI32_to_U32x4(READ_RGBAU8_CLAMP(X,Y))
  827. // Fixed-precision decimal system with 16-bit indices and 16-bit sub-pixel weights
  828. static const uint32_t interpolationFullPixel = 65536;
  829. static const uint32_t interpolationHalfPixel = interpolationFullPixel / 2;
  830. // Modulo mask for values greater than or equal to 0 and lesser than interpolationFullPixel
  831. static const uint32_t interpolationWeightMask = interpolationFullPixel - 1;
  832. template <bool BILINEAR>
  833. static uint32_t samplePixel(const ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
  834. if (BILINEAR) {
  835. uint32_t upperRatio = 65536 - lowerRatio;
  836. uint32_t leftRatio = 65536 - rightRatio;
  837. U32x4 vUpperLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY);
  838. U32x4 vUpperRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY);
  839. U32x4 vLowerLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY + 1);
  840. U32x4 vLowerRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY + 1);
  841. U32x4 vLeftRatio = U32x4(leftRatio);
  842. U32x4 vRightRatio = U32x4(rightRatio);
  843. U32x4 vUpperColor = ((vUpperLeftColor * vLeftRatio) + (vUpperRightColor * vRightRatio)) >> 16;
  844. U32x4 vLowerColor = ((vLowerLeftColor * vLeftRatio) + (vLowerRightColor * vRightRatio)) >> 16;
  845. U32x4 vCenterColor = ((vUpperColor * upperRatio) + (vLowerColor * lowerRatio)) >> 16;
  846. return (target.packRgba(U32x4_to_ColorRgbaI32(vCenterColor))).packed;
  847. } else {
  848. return (target.packRgba(READ_RGBAU8_CLAMP(leftX, upperY))).packed;
  849. }
  850. }
  851. template <bool BILINEAR>
  852. static uint8_t samplePixel(const ImageU8Impl& target, const ImageU8Impl& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
  853. if (BILINEAR) {
  854. uint32_t upperRatio = 65536 - lowerRatio;
  855. uint32_t leftRatio = 65536 - rightRatio;
  856. uint32_t upperLeftLuma = ImageU8Impl::readPixel_clamp(source, leftX, upperY);
  857. uint32_t upperRightLuma = ImageU8Impl::readPixel_clamp(source, leftX + 1, upperY);
  858. uint32_t lowerLeftLuma = ImageU8Impl::readPixel_clamp(source, leftX, upperY + 1);
  859. uint32_t lowerRightLuma = ImageU8Impl::readPixel_clamp(source, leftX + 1, upperY + 1);
  860. uint32_t upperLuma = ((upperLeftLuma * leftRatio) + (upperRightLuma * rightRatio)) >> 16;
  861. uint32_t lowerLuma = ((lowerLeftLuma * leftRatio) + (lowerRightLuma * rightRatio)) >> 16;
  862. return ((upperLuma * upperRatio) + (lowerLuma * lowerRatio)) >> 16;
  863. } else {
  864. return ImageU8Impl::readPixel_clamp(source, leftX, upperY);
  865. }
  866. }
  867. // BILINEAR: Enables linear interpolation
  868. // scaleRegion:
  869. // The stretched location of the source image in the target image
  870. // Making it smaller than the target image will fill the outside with stretched pixels
  871. // Allowing the caller to crop away parts of the source image that aren't interesting
  872. // Can be used to round the region to a multiple of the input size for a fixed pixel size
  873. template <bool BILINEAR, typename IMAGE_TYPE, typename PIXEL_TYPE>
  874. static void resize_reference(IMAGE_TYPE& target, const IMAGE_TYPE& source, const IRect& scaleRegion) {
  875. // Reference implementation
  876. // Offset in source pixels per target pixel
  877. int32_t offsetX = interpolationFullPixel * source.width / scaleRegion.width();
  878. int32_t offsetY = interpolationFullPixel * source.height / scaleRegion.height();
  879. int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
  880. int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
  881. if (BILINEAR) {
  882. startX -= interpolationHalfPixel;
  883. startY -= interpolationHalfPixel;
  884. }
  885. SafePointer<PIXEL_TYPE> targetRow = imageInternal::getSafeData<PIXEL_TYPE>(target);
  886. int32_t readY = startY;
  887. for (int32_t y = 0; y < target.height; y++) {
  888. int32_t naturalY = readY;
  889. if (naturalY < 0) { naturalY = 0; }
  890. uint32_t sampleY = (uint32_t)naturalY;
  891. uint32_t upperY = sampleY >> 16;
  892. uint32_t lowerRatio = sampleY & interpolationWeightMask;
  893. SafePointer<PIXEL_TYPE> targetPixel = targetRow;
  894. int32_t readX = startX;
  895. for (int32_t x = 0; x < target.width; x++) {
  896. int32_t naturalX = readX;
  897. if (naturalX < 0) { naturalX = 0; }
  898. uint32_t sampleX = (uint32_t)naturalX;
  899. uint32_t leftX = sampleX >> 16;
  900. uint32_t rightRatio = sampleX & interpolationWeightMask;
  901. *targetPixel = samplePixel<BILINEAR>(target, source, leftX, upperY, rightRatio, lowerRatio);
  902. targetPixel += 1;
  903. readX += offsetX;
  904. }
  905. targetRow.increaseBytes(target.stride);
  906. readY += offsetY;
  907. }
  908. }
  909. // BILINEAR: Enables linear interpolation
  910. // SIMD_ALIGNED: Each line starts 16-byte aligned, has a stride divisible with 16-bytes and is allowed to overwrite padding.
  911. template <bool BILINEAR, bool SIMD_ALIGNED>
  912. static void resize_optimized(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, const IRect& scaleRegion) {
  913. // Get source information
  914. // Compare dimensions
  915. const bool sameWidth = source.width == scaleRegion.width() && scaleRegion.left() == 0;
  916. const bool sameHeight = source.height == scaleRegion.height() && scaleRegion.top() == 0;
  917. const bool samePackOrder = target.packOrder.packOrderIndex == source.packOrder.packOrderIndex;
  918. if (sameWidth && sameHeight) {
  919. // No need to resize, just make a copy to save time
  920. imageImpl_drawCopy(target, source);
  921. } else if (sameWidth && (samePackOrder || BILINEAR)) {
  922. // Only vertical interpolation
  923. // Offset in source pixels per target pixel
  924. int32_t offsetY = interpolationFullPixel * source.height / scaleRegion.height();
  925. int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
  926. if (BILINEAR) {
  927. startY -= interpolationHalfPixel;
  928. }
  929. SafePointer<uint32_t> targetRow = imageInternal::getSafeData<uint32_t>(target);
  930. int32_t readY = startY;
  931. for (int32_t y = 0; y < target.height; y++) {
  932. int32_t naturalY = readY;
  933. if (naturalY < 0) { naturalY = 0; }
  934. uint32_t sampleY = (uint32_t)naturalY;
  935. uint32_t upperY = sampleY >> 16;
  936. uint32_t lowerY = upperY + 1;
  937. if (upperY >= (uint32_t)source.height) upperY = source.height - 1;
  938. if (lowerY >= (uint32_t)source.height) lowerY = source.height - 1;
  939. if (BILINEAR) {
  940. uint32_t lowerRatio = sampleY & interpolationWeightMask;
  941. uint32_t upperRatio = 65536 - lowerRatio;
  942. SafePointer<uint32_t> targetPixel = targetRow;
  943. if (SIMD_ALIGNED) {
  944. const SafePointer<uint32_t> sourceRowUpper = imageInternal::getSafeData<uint32_t>(source, upperY);
  945. const SafePointer<uint32_t> sourceRowLower = imageInternal::getSafeData<uint32_t>(source, lowerY);
  946. for (int32_t x = 0; x < target.width; x += 4) {
  947. ALIGN16 U32x4 vUpperPackedColor = U32x4::readAligned(sourceRowUpper, "resize_optimized @ read vUpperPackedColor");
  948. ALIGN16 U32x4 vLowerPackedColor = U32x4::readAligned(sourceRowLower, "resize_optimized @ read vLowerPackedColor");
  949. ALIGN16 U32x4 vCenterColor = mixColorsUniform(vUpperPackedColor, vLowerPackedColor, lowerRatio);
  950. vCenterColor.writeAligned(targetPixel, "resize_optimized @ write vCenterColor");
  951. sourceRowUpper += 4;
  952. sourceRowLower += 4;
  953. targetPixel += 4;
  954. }
  955. } else {
  956. for (int32_t x = 0; x < target.width; x++) {
  957. ALIGN16 U32x4 vUpperColor = READ_RGBAU8_CLAMP_SIMD(x, upperY);
  958. ALIGN16 U32x4 vLowerColor = READ_RGBAU8_CLAMP_SIMD(x, lowerY);
  959. ALIGN16 U32x4 vCenterColor = ((vUpperColor * upperRatio) + (vLowerColor * lowerRatio)) >> 16;
  960. ColorRgbaI32 finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
  961. *targetPixel = target.packRgba(finalColor).packed;
  962. targetPixel += 1;
  963. }
  964. }
  965. } else {
  966. const SafePointer<uint32_t> sourceRowUpper = imageInternal::getSafeData<uint32_t>(source, upperY);
  967. // Nearest neighbor sampling from a same width can be done using one copy per row
  968. safeMemoryCopy(targetRow, sourceRowUpper, source.width * 4);
  969. }
  970. targetRow.increaseBytes(target.stride);
  971. readY += offsetY;
  972. }
  973. } else if (sameHeight) {
  974. // Only horizontal interpolation
  975. // Offset in source pixels per target pixel
  976. int32_t offsetX = interpolationFullPixel * source.width / scaleRegion.width();
  977. int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
  978. if (BILINEAR) {
  979. startX -= interpolationHalfPixel;
  980. }
  981. SafePointer<uint32_t> targetRow = imageInternal::getSafeData<uint32_t>(target);
  982. for (int32_t y = 0; y < target.height; y++) {
  983. SafePointer<uint32_t> targetPixel = targetRow;
  984. int32_t readX = startX;
  985. for (int32_t x = 0; x < target.width; x++) {
  986. int32_t naturalX = readX;
  987. if (naturalX < 0) { naturalX = 0; }
  988. uint32_t sampleX = (uint32_t)naturalX;
  989. uint32_t leftX = sampleX >> 16;
  990. uint32_t rightX = leftX + 1;
  991. uint32_t rightRatio = sampleX & interpolationWeightMask;
  992. uint32_t leftRatio = 65536 - rightRatio;
  993. ColorRgbaI32 finalColor;
  994. if (BILINEAR) {
  995. ALIGN16 U32x4 vLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, y);
  996. ALIGN16 U32x4 vRightColor = READ_RGBAU8_CLAMP_SIMD(rightX, y);
  997. ALIGN16 U32x4 vCenterColor = ((vLeftColor * leftRatio) + (vRightColor * rightRatio)) >> 16;
  998. finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
  999. } else {
  1000. finalColor = READ_RGBAU8_CLAMP(leftX, y);
  1001. }
  1002. *targetPixel = target.packRgba(finalColor).packed;
  1003. targetPixel += 1;
  1004. readX += offsetX;
  1005. }
  1006. targetRow.increaseBytes(target.stride);
  1007. }
  1008. } else {
  1009. // Call the reference implementation
  1010. resize_reference<BILINEAR, ImageRgbaU8Impl, uint32_t>(target, source, scaleRegion);
  1011. }
  1012. }
  1013. // Returns true iff each line start in image is aligned with 16 bytes
  1014. // Often not the case for sub-images, even if the parent image is aligned
  1015. static bool imageIs16ByteAligned(const ImageImpl& image) {
  1016. return (uint32_t)((image.stride & 15) == 0 && ((uintptr_t)(imageInternal::getSafeData<uint8_t>(image).getUnsafe()) & 15) == 0);
  1017. }
  1018. // Converting run-time flags into compile-time constants
  1019. static void resize_aux(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate, bool paddWrite, const IRect& scaleRegion) {
  1020. // If writing to padding is allowed and both images are 16-byte aligned with the same pack order
  1021. if (paddWrite && imageIs16ByteAligned(source) && imageIs16ByteAligned(target)) {
  1022. // SIMD resize allowed
  1023. if (interpolate) {
  1024. resize_optimized<true, true>(target, source, scaleRegion);
  1025. } else {
  1026. resize_optimized<false, true>(target, source, scaleRegion);
  1027. }
  1028. } else {
  1029. // Non-SIMD resize
  1030. if (interpolate) {
  1031. resize_optimized<true, false>(target, source, scaleRegion);
  1032. } else {
  1033. resize_optimized<false, false>(target, source, scaleRegion);
  1034. }
  1035. }
  1036. }
  1037. // TODO: Optimize monochrome resizing.
  1038. static void resize_aux(ImageU8Impl& target, const ImageU8Impl& source, bool interpolate, bool paddWrite, const IRect& scaleRegion) {
  1039. if (interpolate) {
  1040. resize_reference<true, ImageU8Impl, uint8_t>(target, source, scaleRegion);
  1041. } else {
  1042. resize_reference<false, ImageU8Impl, uint8_t>(target, source, scaleRegion);
  1043. }
  1044. }
  1045. // Creating an image to replacedImage with the same pack order as originalImage when applicable to the image format.
  1046. static ImageRgbaU8Impl createWithSamePackOrder(const ImageRgbaU8Impl& originalImage, int32_t width, int32_t height) {
  1047. return ImageRgbaU8Impl(width, height, originalImage.packOrder.packOrderIndex);
  1048. }
  1049. static ImageU8Impl createWithSamePackOrder(const ImageU8Impl& originalImage, int32_t width, int32_t height) {
  1050. return ImageU8Impl(width, height);
  1051. }
  1052. template <typename IMAGE_TYPE>
  1053. void resizeToTarget(IMAGE_TYPE& target, const IMAGE_TYPE& source, bool interpolate) {
  1054. IRect scaleRegion = imageInternal::getBound(target);
  1055. if (target.width != source.width && target.height > source.height) {
  1056. // Upscaling is faster in two steps by both reusing the horizontal interpolation and vectorizing the vertical interpolation.
  1057. int tempWidth = target.width;
  1058. int tempHeight = source.height;
  1059. IRect tempScaleRegion = IRect(scaleRegion.left(), 0, scaleRegion.width(), source.height);
  1060. // Create a temporary buffer.
  1061. IMAGE_TYPE newTempImage = createWithSamePackOrder(target, tempWidth, tempHeight);
  1062. resize_aux(newTempImage, source, interpolate, true, tempScaleRegion);
  1063. resize_aux(target, newTempImage, interpolate, true, scaleRegion);
  1064. } else {
  1065. // Downscaling or only changing one dimension is faster in one step.
  1066. resize_aux(target, source, interpolate, true, scaleRegion);
  1067. }
  1068. }
  1069. void dsr::imageImpl_resizeToTarget(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate) {
  1070. resizeToTarget<ImageRgbaU8Impl>(target, source, interpolate);
  1071. }
  1072. void dsr::imageImpl_resizeToTarget(ImageU8Impl& target, const ImageU8Impl& source, bool interpolate) {
  1073. resizeToTarget<ImageU8Impl>(target, source, interpolate);
  1074. }
  1075. template <bool CONVERT_COLOR>
  1076. static inline Color4xU8 convertRead(const ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int x, int y) {
  1077. Color4xU8 result = ImageRgbaU8Impl::readPixel_clamp(source, x, y);
  1078. if (CONVERT_COLOR) {
  1079. result = target.packRgba(ImageRgbaU8Impl::unpackRgba(result, source.packOrder));
  1080. }
  1081. return result;
  1082. }
  1083. // Used for drawing large pixels
  1084. static inline void fillRectangle(ImageRgbaU8Impl& target, int pixelLeft, int pixelRight, int pixelTop, int pixelBottom, const Color4xU8& packedColor) {
  1085. SafePointer<Color4xU8> targetRow = imageInternal::getSafeData<Color4xU8>(target, pixelTop) + pixelLeft;
  1086. for (int y = pixelTop; y < pixelBottom; y++) {
  1087. SafePointer<Color4xU8> targetPixel = targetRow;
  1088. for (int x = pixelLeft; x < pixelRight; x++) {
  1089. *targetPixel = packedColor;
  1090. targetPixel += 1;
  1091. }
  1092. targetRow.increaseBytes(target.stride);
  1093. }
  1094. }
  1095. template <bool CONVERT_COLOR>
  1096. static void blockMagnify_reference(
  1097. ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source,
  1098. int pixelWidth, int pixelHeight, int clipWidth, int clipHeight) {
  1099. int sourceY = 0;
  1100. int maxSourceX = source.width - 1;
  1101. int maxSourceY = source.height - 1;
  1102. if (clipWidth > target.width) { clipWidth = target.width; }
  1103. if (clipHeight > target.height) { clipHeight = target.height; }
  1104. for (int32_t pixelTop = 0; pixelTop < clipHeight; pixelTop += pixelHeight) {
  1105. int sourceX = 0;
  1106. for (int32_t pixelLeft = 0; pixelLeft < clipWidth; pixelLeft += pixelWidth) {
  1107. // Read the pixel once
  1108. Color4xU8 sourceColor = convertRead<CONVERT_COLOR>(target, source, sourceX, sourceY);
  1109. // Write to all target pixels in a conditionless loop
  1110. fillRectangle(target, pixelLeft, pixelLeft + pixelWidth, pixelTop, pixelTop + pixelHeight, sourceColor);
  1111. // Iterate and clamp the read coordinate
  1112. sourceX++;
  1113. if (sourceX > maxSourceX) { sourceX = maxSourceX; }
  1114. }
  1115. // Iterate and clamp the read coordinate
  1116. sourceY++;
  1117. if (sourceY > maxSourceY) { sourceY = maxSourceY; }
  1118. }
  1119. }
  1120. // Pre-condition:
  1121. // * The source and target images have the same pack order
  1122. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1123. // * clipWidth % 2 == 0
  1124. // * clipHeight % 2 == 0
  1125. static void blockMagnify_2x2(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1126. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1127. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1128. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1129. int blockTargetStride = target.stride * 2;
  1130. for (int upperTargetY = 0; upperTargetY + 2 <= clipHeight; upperTargetY+=2) {
  1131. // Carriage return
  1132. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1133. SafePointer<uint32_t> targetPixelA = targetRowA;
  1134. SafePointer<uint32_t> targetPixelB = targetRowB;
  1135. // Write to whole multiples of 8 pixels
  1136. int writeLeftX = 0;
  1137. while (writeLeftX + 2 <= clipWidth) {
  1138. // Read one pixel at a time
  1139. uint32_t scalarValue = *sourcePixel;
  1140. sourcePixel += 1;
  1141. // Write to a whole block of pixels
  1142. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue;
  1143. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue;
  1144. targetPixelA += 2;
  1145. targetPixelB += 2;
  1146. // Count
  1147. writeLeftX += 2;
  1148. }
  1149. // Line feed
  1150. sourceRow.increaseBytes(source.stride);
  1151. targetRowA.increaseBytes(blockTargetStride);
  1152. targetRowB.increaseBytes(blockTargetStride);
  1153. }
  1154. }
  1155. // Pre-condition:
  1156. // * The source and target images have the same pack order
  1157. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1158. // * clipWidth % 3 == 0
  1159. // * clipHeight % 3 == 0
  1160. static void blockMagnify_3x3(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1161. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1162. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1163. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1164. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1165. int blockTargetStride = target.stride * 3;
  1166. for (int upperTargetY = 0; upperTargetY + 3 <= clipHeight; upperTargetY+=3) {
  1167. // Carriage return
  1168. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1169. SafePointer<uint32_t> targetPixelA = targetRowA;
  1170. SafePointer<uint32_t> targetPixelB = targetRowB;
  1171. SafePointer<uint32_t> targetPixelC = targetRowC;
  1172. int writeLeftX = 0;
  1173. while (writeLeftX + 3 <= clipWidth) {
  1174. // Read one pixel at a time
  1175. uint32_t scalarValue = *sourcePixel;
  1176. sourcePixel += 1;
  1177. // Write to a whole block of pixels
  1178. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue;
  1179. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue;
  1180. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue;
  1181. targetPixelA += 3;
  1182. targetPixelB += 3;
  1183. targetPixelC += 3;
  1184. // Count
  1185. writeLeftX += 3;
  1186. }
  1187. // Line feed
  1188. sourceRow.increaseBytes(source.stride);
  1189. targetRowA.increaseBytes(blockTargetStride);
  1190. targetRowB.increaseBytes(blockTargetStride);
  1191. targetRowC.increaseBytes(blockTargetStride);
  1192. }
  1193. }
  1194. // Pre-condition:
  1195. // * The source and target images have the same pack order
  1196. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1197. // * clipWidth % 4 == 0
  1198. // * clipHeight % 4 == 0
  1199. static void blockMagnify_4x4(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1200. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1201. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1202. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1203. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1204. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1205. int quadTargetStride = target.stride * 4;
  1206. for (int upperTargetY = 0; upperTargetY + 4 <= clipHeight; upperTargetY+=4) {
  1207. // Carriage return
  1208. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1209. SafePointer<uint32_t> targetPixelA = targetRowA;
  1210. SafePointer<uint32_t> targetPixelB = targetRowB;
  1211. SafePointer<uint32_t> targetPixelC = targetRowC;
  1212. SafePointer<uint32_t> targetPixelD = targetRowD;
  1213. int writeLeftX = 0;
  1214. while (writeLeftX + 4 <= clipWidth) {
  1215. // Read one pixel at a time
  1216. uint32_t scalarValue = *sourcePixel;
  1217. sourcePixel += 1;
  1218. // Convert scalar to SIMD vector of 4 repeated pixels
  1219. ALIGN16 U32x4 sourcePixels = U32x4(scalarValue);
  1220. // Write to 4x4 pixels using 4 SIMD writes
  1221. sourcePixels.writeAligned(targetPixelA, "blockMagnify_4x4 @ write A");
  1222. sourcePixels.writeAligned(targetPixelB, "blockMagnify_4x4 @ write B");
  1223. sourcePixels.writeAligned(targetPixelC, "blockMagnify_4x4 @ write C");
  1224. sourcePixels.writeAligned(targetPixelD, "blockMagnify_4x4 @ write D");
  1225. targetPixelA += 4;
  1226. targetPixelB += 4;
  1227. targetPixelC += 4;
  1228. targetPixelD += 4;
  1229. // Count
  1230. writeLeftX += 4;
  1231. }
  1232. // Line feed
  1233. sourceRow.increaseBytes(source.stride);
  1234. targetRowA.increaseBytes(quadTargetStride);
  1235. targetRowB.increaseBytes(quadTargetStride);
  1236. targetRowC.increaseBytes(quadTargetStride);
  1237. targetRowD.increaseBytes(quadTargetStride);
  1238. }
  1239. }
  1240. // Pre-condition:
  1241. // * The source and target images have the same pack order
  1242. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1243. // * clipWidth % 5 == 0
  1244. // * clipHeight % 5 == 0
  1245. static void blockMagnify_5x5(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1246. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1247. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1248. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1249. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1250. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1251. SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
  1252. int blockTargetStride = target.stride * 5;
  1253. for (int upperTargetY = 0; upperTargetY + 5 <= clipHeight; upperTargetY+=5) {
  1254. // Carriage return
  1255. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1256. SafePointer<uint32_t> targetPixelA = targetRowA;
  1257. SafePointer<uint32_t> targetPixelB = targetRowB;
  1258. SafePointer<uint32_t> targetPixelC = targetRowC;
  1259. SafePointer<uint32_t> targetPixelD = targetRowD;
  1260. SafePointer<uint32_t> targetPixelE = targetRowE;
  1261. int writeLeftX = 0;
  1262. while (writeLeftX + 5 <= clipWidth) {
  1263. // Read one pixel at a time
  1264. uint32_t scalarValue = *sourcePixel;
  1265. sourcePixel += 1;
  1266. // Write to a whole block of pixels
  1267. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue;
  1268. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue;
  1269. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue;
  1270. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue;
  1271. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue;
  1272. targetPixelA += 5;
  1273. targetPixelB += 5;
  1274. targetPixelC += 5;
  1275. targetPixelD += 5;
  1276. targetPixelE += 5;
  1277. // Count
  1278. writeLeftX += 5;
  1279. }
  1280. // Line feed
  1281. sourceRow.increaseBytes(source.stride);
  1282. targetRowA.increaseBytes(blockTargetStride);
  1283. targetRowB.increaseBytes(blockTargetStride);
  1284. targetRowC.increaseBytes(blockTargetStride);
  1285. targetRowD.increaseBytes(blockTargetStride);
  1286. targetRowE.increaseBytes(blockTargetStride);
  1287. }
  1288. }
  1289. // Pre-condition:
  1290. // * The source and target images have the same pack order
  1291. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1292. // * clipWidth % 6 == 0
  1293. // * clipHeight % 6 == 0
  1294. static void blockMagnify_6x6(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1295. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1296. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1297. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1298. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1299. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1300. SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
  1301. SafePointer<uint32_t> targetRowF = imageInternal::getSafeData<uint32_t>(target, 5);
  1302. int blockTargetStride = target.stride * 6;
  1303. for (int upperTargetY = 0; upperTargetY + 6 <= clipHeight; upperTargetY+=6) {
  1304. // Carriage return
  1305. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1306. SafePointer<uint32_t> targetPixelA = targetRowA;
  1307. SafePointer<uint32_t> targetPixelB = targetRowB;
  1308. SafePointer<uint32_t> targetPixelC = targetRowC;
  1309. SafePointer<uint32_t> targetPixelD = targetRowD;
  1310. SafePointer<uint32_t> targetPixelE = targetRowE;
  1311. SafePointer<uint32_t> targetPixelF = targetRowF;
  1312. int writeLeftX = 0;
  1313. while (writeLeftX + 6 <= clipWidth) {
  1314. // Read one pixel at a time
  1315. uint32_t scalarValue = *sourcePixel;
  1316. sourcePixel += 1;
  1317. // Write to a whole block of pixels
  1318. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue;
  1319. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue;
  1320. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue;
  1321. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue;
  1322. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue;
  1323. targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue;
  1324. targetPixelA += 6;
  1325. targetPixelB += 6;
  1326. targetPixelC += 6;
  1327. targetPixelD += 6;
  1328. targetPixelE += 6;
  1329. targetPixelF += 6;
  1330. // Count
  1331. writeLeftX += 6;
  1332. }
  1333. // Line feed
  1334. sourceRow.increaseBytes(source.stride);
  1335. targetRowA.increaseBytes(blockTargetStride);
  1336. targetRowB.increaseBytes(blockTargetStride);
  1337. targetRowC.increaseBytes(blockTargetStride);
  1338. targetRowD.increaseBytes(blockTargetStride);
  1339. targetRowE.increaseBytes(blockTargetStride);
  1340. targetRowF.increaseBytes(blockTargetStride);
  1341. }
  1342. }
  1343. // Pre-condition:
  1344. // * The source and target images have the same pack order
  1345. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1346. // * clipWidth % 7 == 0
  1347. // * clipHeight % 7 == 0
  1348. static void blockMagnify_7x7(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1349. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1350. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1351. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1352. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1353. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1354. SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
  1355. SafePointer<uint32_t> targetRowF = imageInternal::getSafeData<uint32_t>(target, 5);
  1356. SafePointer<uint32_t> targetRowG = imageInternal::getSafeData<uint32_t>(target, 6);
  1357. int blockTargetStride = target.stride * 7;
  1358. for (int upperTargetY = 0; upperTargetY + 7 <= clipHeight; upperTargetY+=7) {
  1359. // Carriage return
  1360. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1361. SafePointer<uint32_t> targetPixelA = targetRowA;
  1362. SafePointer<uint32_t> targetPixelB = targetRowB;
  1363. SafePointer<uint32_t> targetPixelC = targetRowC;
  1364. SafePointer<uint32_t> targetPixelD = targetRowD;
  1365. SafePointer<uint32_t> targetPixelE = targetRowE;
  1366. SafePointer<uint32_t> targetPixelF = targetRowF;
  1367. SafePointer<uint32_t> targetPixelG = targetRowG;
  1368. int writeLeftX = 0;
  1369. while (writeLeftX + 7 <= clipWidth) {
  1370. // Read one pixel at a time
  1371. uint32_t scalarValue = *sourcePixel;
  1372. sourcePixel += 1;
  1373. // Write to a whole block of pixels
  1374. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue;
  1375. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue;
  1376. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue;
  1377. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue;
  1378. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue;
  1379. targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue;
  1380. targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue;
  1381. targetPixelA += 7;
  1382. targetPixelB += 7;
  1383. targetPixelC += 7;
  1384. targetPixelD += 7;
  1385. targetPixelE += 7;
  1386. targetPixelF += 7;
  1387. targetPixelG += 7;
  1388. // Count
  1389. writeLeftX += 7;
  1390. }
  1391. // Line feed
  1392. sourceRow.increaseBytes(source.stride);
  1393. targetRowA.increaseBytes(blockTargetStride);
  1394. targetRowB.increaseBytes(blockTargetStride);
  1395. targetRowC.increaseBytes(blockTargetStride);
  1396. targetRowD.increaseBytes(blockTargetStride);
  1397. targetRowE.increaseBytes(blockTargetStride);
  1398. targetRowF.increaseBytes(blockTargetStride);
  1399. targetRowG.increaseBytes(blockTargetStride);
  1400. }
  1401. }
  1402. // Pre-condition:
  1403. // * The source and target images have the same pack order
  1404. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1405. // * clipWidth % 8 == 0
  1406. // * clipHeight % 8 == 0
  1407. static void blockMagnify_8x8(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1408. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1409. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1410. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1411. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1412. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1413. SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
  1414. SafePointer<uint32_t> targetRowF = imageInternal::getSafeData<uint32_t>(target, 5);
  1415. SafePointer<uint32_t> targetRowG = imageInternal::getSafeData<uint32_t>(target, 6);
  1416. SafePointer<uint32_t> targetRowH = imageInternal::getSafeData<uint32_t>(target, 7);
  1417. int blockTargetStride = target.stride * 8;
  1418. for (int upperTargetY = 0; upperTargetY + 8 <= clipHeight; upperTargetY+=8) {
  1419. // Carriage return
  1420. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1421. SafePointer<uint32_t> targetPixelA = targetRowA;
  1422. SafePointer<uint32_t> targetPixelB = targetRowB;
  1423. SafePointer<uint32_t> targetPixelC = targetRowC;
  1424. SafePointer<uint32_t> targetPixelD = targetRowD;
  1425. SafePointer<uint32_t> targetPixelE = targetRowE;
  1426. SafePointer<uint32_t> targetPixelF = targetRowF;
  1427. SafePointer<uint32_t> targetPixelG = targetRowG;
  1428. SafePointer<uint32_t> targetPixelH = targetRowH;
  1429. int writeLeftX = 0;
  1430. while (writeLeftX + 8 <= clipWidth) {
  1431. // Read one pixel at a time
  1432. uint32_t scalarValue = *sourcePixel;
  1433. sourcePixel += 1;
  1434. // Write to a whole block of pixels
  1435. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue; targetPixelA[7] = scalarValue;
  1436. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue; targetPixelB[7] = scalarValue;
  1437. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue; targetPixelC[7] = scalarValue;
  1438. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue; targetPixelD[7] = scalarValue;
  1439. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue; targetPixelE[7] = scalarValue;
  1440. targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue; targetPixelF[7] = scalarValue;
  1441. targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue; targetPixelG[7] = scalarValue;
  1442. targetPixelH[0] = scalarValue; targetPixelH[1] = scalarValue; targetPixelH[2] = scalarValue; targetPixelH[3] = scalarValue; targetPixelH[4] = scalarValue; targetPixelH[5] = scalarValue; targetPixelH[6] = scalarValue; targetPixelH[7] = scalarValue;
  1443. targetPixelA += 8;
  1444. targetPixelB += 8;
  1445. targetPixelC += 8;
  1446. targetPixelD += 8;
  1447. targetPixelE += 8;
  1448. targetPixelF += 8;
  1449. targetPixelG += 8;
  1450. targetPixelH += 8;
  1451. // Count
  1452. writeLeftX += 8;
  1453. }
  1454. // Line feed
  1455. sourceRow.increaseBytes(source.stride);
  1456. targetRowA.increaseBytes(blockTargetStride);
  1457. targetRowB.increaseBytes(blockTargetStride);
  1458. targetRowC.increaseBytes(blockTargetStride);
  1459. targetRowD.increaseBytes(blockTargetStride);
  1460. targetRowE.increaseBytes(blockTargetStride);
  1461. targetRowF.increaseBytes(blockTargetStride);
  1462. targetRowG.increaseBytes(blockTargetStride);
  1463. targetRowH.increaseBytes(blockTargetStride);
  1464. }
  1465. }
  1466. static void blackEdges(ImageRgbaU8Impl& target, int excludedWidth, int excludedHeight) {
  1467. // Right side
  1468. drawSolidRectangleMemset<Color4xU8>(target, excludedWidth, 0, target.width, excludedHeight, 0);
  1469. // Bottom and corner
  1470. drawSolidRectangleMemset<Color4xU8>(target, 0, excludedHeight, target.width, target.height, 0);
  1471. }
  1472. void dsr::imageImpl_blockMagnify(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int pixelWidth, int pixelHeight) {
  1473. if (pixelWidth < 1) { pixelWidth = 1; }
  1474. if (pixelHeight < 1) { pixelHeight = 1; }
  1475. bool sameOrder = target.packOrder.packOrderIndex == source.packOrder.packOrderIndex;
  1476. // Find the part of source which fits into target with whole pixels
  1477. int clipWidth = roundDown(std::min(target.width, source.width * pixelWidth), pixelWidth);
  1478. int clipHeight = roundDown(std::min(target.height, source.height * pixelHeight), pixelHeight);
  1479. if (sameOrder) {
  1480. if (imageIs16ByteAligned(source) && imageIs16ByteAligned(target)) {
  1481. if (pixelWidth == 2 && pixelHeight == 2) {
  1482. blockMagnify_2x2(target, source, clipWidth, clipHeight);
  1483. } else if (pixelWidth == 3 && pixelHeight == 3) {
  1484. blockMagnify_3x3(target, source, clipWidth, clipHeight);
  1485. } else if (pixelWidth == 4 && pixelHeight == 4) {
  1486. blockMagnify_4x4(target, source, clipWidth, clipHeight);
  1487. } else if (pixelWidth == 5 && pixelHeight == 5) {
  1488. blockMagnify_5x5(target, source, clipWidth, clipHeight);
  1489. } else if (pixelWidth == 6 && pixelHeight == 6) {
  1490. blockMagnify_6x6(target, source, clipWidth, clipHeight);
  1491. } else if (pixelWidth == 7 && pixelHeight == 7) {
  1492. blockMagnify_7x7(target, source, clipWidth, clipHeight);
  1493. } else if (pixelWidth == 8 && pixelHeight == 8) {
  1494. blockMagnify_8x8(target, source, clipWidth, clipHeight);
  1495. } else {
  1496. blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
  1497. }
  1498. } else {
  1499. blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
  1500. }
  1501. } else {
  1502. blockMagnify_reference<true>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
  1503. }
  1504. blackEdges(target, clipWidth, clipHeight);
  1505. }