draw.cpp 77 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598
  1. // zlib open source license
  2. //
  3. // Copyright (c) 2018 to 2019 David Forsgren Piuva
  4. //
  5. // This software is provided 'as-is', without any express or implied
  6. // warranty. In no event will the authors be held liable for any damages
  7. // arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any purpose,
  10. // including commercial applications, and to alter it and redistribute it
  11. // freely, subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented; you must not
  14. // claim that you wrote the original software. If you use this software
  15. // in a product, an acknowledgment in the product documentation would be
  16. // appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such, and must not be
  19. // misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source
  22. // distribution.
  23. #include "../base/simdExtra.h"
  24. #include "draw.h"
  25. #include "internal/imageInternal.h"
  26. #include "../math/scalar.h"
  27. #include <limits>
  28. using namespace dsr;
  29. // Preconditions:
  30. // 0 <= a <= 255
  31. // 0 <= b <= 255
  32. // Postconditions:
  33. // Returns the normalized multiplication of a and b, where the 0..255 range represents decimal values from 0.0 to 1.0.
  34. // The result may not be less than zero or larger than any of the inputs.
  35. // Examples:
  36. // normalizedByteMultiplication(0, 0) = 0
  37. // normalizedByteMultiplication(x, 0) = 0
  38. // normalizedByteMultiplication(0, x) = 0
  39. // normalizedByteMultiplication(x, 255) = x
  40. // normalizedByteMultiplication(255, x) = x
  41. // normalizedByteMultiplication(255, 255) = 255
  42. static inline uint32_t normalizedByteMultiplication(uint32_t a, uint32_t b) {
  43. // Approximate the reciprocal of an unsigned byte's maximum value 255 for normalization
  44. // 256³ / 255 ≈ 65793
  45. // Truncation goes down, so add half a unit before rounding to get the closest value
  46. // 2^24 / 2 = 8388608
  47. // No overflow for unsigned 32-bit integers
  48. // 255² * 65793 + 8388608 = 4286578433 < 2^32
  49. return (a * b * 65793 + 8388608) >> 24;
  50. }
  51. // True iff high and low bytes are equal
  52. // Equivalent to value % 257 == 0 because A + B * 256 = A * 257 when A = B.
  53. inline bool isUniformByteU16(uint16_t value) {
  54. return (value & 0x00FF) == ((value & 0xFF00) >> 8);
  55. }
  56. // -------------------------------- Drawing shapes --------------------------------
  57. template <typename COLOR_TYPE>
  58. static inline void drawSolidRectangleAssign(ImageImpl &target, int left, int top, int right, int bottom, COLOR_TYPE color) {
  59. int leftBound = std::max(0, left);
  60. int topBound = std::max(0, top);
  61. int rightBound = std::min(right, target.width);
  62. int bottomBound = std::min(bottom, target.height);
  63. int stride = target.stride;
  64. SafePointer<COLOR_TYPE> rowData = imageInternal::getSafeData<COLOR_TYPE>(target, topBound);
  65. rowData += leftBound;
  66. for (int y = topBound; y < bottomBound; y++) {
  67. SafePointer<COLOR_TYPE> pixelData = rowData;
  68. for (int x = leftBound; x < rightBound; x++) {
  69. pixelData.get() = color;
  70. pixelData += 1;
  71. }
  72. rowData.increaseBytes(stride);
  73. }
  74. }
  75. template <typename COLOR_TYPE>
  76. static inline void drawSolidRectangleMemset(ImageImpl &target, int left, int top, int right, int bottom, uint8_t uniformByte) {
  77. int leftBound = std::max(0, left);
  78. int topBound = std::max(0, top);
  79. int rightBound = std::min(right, target.width);
  80. int bottomBound = std::min(bottom, target.height);
  81. if (rightBound > leftBound && bottomBound > topBound) {
  82. int stride = target.stride;
  83. SafePointer<COLOR_TYPE> rowData = imageInternal::getSafeData<COLOR_TYPE>(target, topBound);
  84. rowData += leftBound;
  85. int filledWidth = rightBound - leftBound;
  86. int rowSize = filledWidth * sizeof(COLOR_TYPE);
  87. int rowCount = bottomBound - topBound;
  88. if (!target.isSubImage && filledWidth == target.width) {
  89. // Write over any padding for parent images owning the whole buffer.
  90. // Including parent images with sub-images using the same data
  91. // because no child image may display the parent-image's padding bytes.
  92. safeMemorySet(rowData, uniformByte, (stride * (rowCount - 1)) + rowSize);
  93. } else if (rowSize == stride) {
  94. // When the filled row stretches all the way from left to right in the main allocation
  95. // there's no unseen pixels being overwritten in other images sharing the buffer.
  96. // This case handles sub-images that uses the full width of
  97. // the parent image which doesn't have any padding.
  98. safeMemorySet(rowData, uniformByte, rowSize * rowCount);
  99. } else {
  100. // Fall back on using one memset operation per row.
  101. // This case is for sub-images that must preserve interleaved pixel rows belonging
  102. // to other images that aren't visible and therefore not owned by this image.
  103. for (int y = topBound; y < bottomBound; y++) {
  104. safeMemorySet(rowData, uniformByte, rowSize);
  105. rowData.increaseBytes(stride);
  106. }
  107. }
  108. }
  109. }
  110. void dsr::imageImpl_draw_solidRectangle(ImageU8Impl& image, const IRect& bound, int color) {
  111. if (color < 0) { color = 0; }
  112. if (color > 255) { color = 255; }
  113. drawSolidRectangleMemset<uint8_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
  114. }
  115. void dsr::imageImpl_draw_solidRectangle(ImageU16Impl& image, const IRect& bound, int color) {
  116. if (color < 0) { color = 0; }
  117. if (color > 65535) { color = 65535; }
  118. uint16_t uColor = color;
  119. if (isUniformByteU16(uColor)) {
  120. drawSolidRectangleMemset<uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
  121. } else {
  122. drawSolidRectangleAssign<uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), uColor);
  123. }
  124. }
  125. void dsr::imageImpl_draw_solidRectangle(ImageF32Impl& image, const IRect& bound, float color) {
  126. if (color == 0.0f) {
  127. drawSolidRectangleMemset<float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
  128. } else {
  129. drawSolidRectangleAssign<float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
  130. }
  131. }
  132. void dsr::imageImpl_draw_solidRectangle(ImageRgbaU8Impl& image, const IRect& bound, const ColorRgbaI32& color) {
  133. Color4xU8 packedColor = image.packRgba(color.saturate());
  134. if (packedColor.isUniformByte()) {
  135. drawSolidRectangleMemset<Color4xU8>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor.channels[0]);
  136. } else {
  137. drawSolidRectangleAssign<Color4xU8>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor);
  138. }
  139. }
  140. template <typename IMAGE_TYPE, typename COLOR_TYPE>
  141. inline void drawLineSuper(IMAGE_TYPE &target, int x1, int y1, int x2, int y2, COLOR_TYPE color) {
  142. if (y1 == y2) {
  143. // Sideways
  144. int left = std::min(x1, x2);
  145. int right = std::max(x1, x2);
  146. for (int x = left; x <= right; x++) {
  147. IMAGE_TYPE::writePixel(target, x, y1, color);
  148. }
  149. } else if (x1 == x2) {
  150. // Down
  151. int top = std::min(y1, y2);
  152. int bottom = std::max(y1, y2);
  153. for (int y = top; y <= bottom; y++) {
  154. IMAGE_TYPE::writePixel(target, x1, y, color);
  155. }
  156. } else {
  157. if (std::abs(y2 - y1) >= std::abs(x2 - x1)) {
  158. if (y2 < y1) {
  159. swap(x1, x2);
  160. swap(y1, y2);
  161. }
  162. assert(y2 > y1);
  163. if (x2 > x1) {
  164. // Down right
  165. int x = x1;
  166. int y = y1;
  167. int tilt = (x2 - x1) * 2;
  168. int maxError = y2 - y1;
  169. int error = 0;
  170. while (y <= y2) {
  171. IMAGE_TYPE::writePixel(target, x, y, color);
  172. error += tilt;
  173. if (error >= maxError) {
  174. x++;
  175. error -= maxError * 2;
  176. }
  177. y++;
  178. }
  179. } else {
  180. // Down left
  181. int x = x1;
  182. int y = y1;
  183. int tilt = (x1 - x2) * 2;
  184. int maxError = y2 - y1;
  185. int error = 0;
  186. while (y <= y2) {
  187. IMAGE_TYPE::writePixel(target, x, y, color);
  188. error += tilt;
  189. if (error >= maxError) {
  190. x--;
  191. error -= maxError * 2;
  192. }
  193. y++;
  194. }
  195. }
  196. } else {
  197. if (x2 < x1) {
  198. swap(x1, x2);
  199. swap(y1, y2);
  200. }
  201. assert(x2 > x1);
  202. if (y2 > y1) {
  203. // Down right
  204. int x = x1;
  205. int y = y1;
  206. int tilt = (y2 - y1) * 2;
  207. int maxError = x2 - x1;
  208. int error = 0;
  209. while (x <= x2) {
  210. IMAGE_TYPE::writePixel(target, x, y, color);
  211. error += tilt;
  212. if (error >= maxError) {
  213. y++;
  214. error -= maxError * 2;
  215. }
  216. x++;
  217. }
  218. } else {
  219. // Up right
  220. int x = x1;
  221. int y = y1;
  222. int tilt = (y1 - y2) * 2;
  223. int maxError = x2 - x1;
  224. int error = 0;
  225. while (x <= x2) {
  226. IMAGE_TYPE::writePixel(target, x, y, color);
  227. error += tilt;
  228. if (error >= maxError) {
  229. y--;
  230. error -= maxError * 2;
  231. }
  232. x++;
  233. }
  234. }
  235. }
  236. }
  237. }
  238. void dsr::imageImpl_draw_line(ImageU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
  239. if (color < 0) { color = 0; }
  240. if (color > 255) { color = 255; }
  241. drawLineSuper<ImageU8Impl, uint8_t>(image, x1, y1, x2, y2, color);
  242. }
  243. void dsr::imageImpl_draw_line(ImageU16Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
  244. if (color < 0) { color = 0; }
  245. if (color > 65535) { color = 65535; }
  246. drawLineSuper<ImageU16Impl, uint16_t>(image, x1, y1, x2, y2, color);
  247. }
  248. void dsr::imageImpl_draw_line(ImageF32Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color) {
  249. drawLineSuper<ImageF32Impl, float>(image, x1, y1, x2, y2, color);
  250. }
  251. void dsr::imageImpl_draw_line(ImageRgbaU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color) {
  252. drawLineSuper<ImageRgbaU8Impl, Color4xU8>(image, x1, y1, x2, y2, image.packRgba(color.saturate()));
  253. }
  254. // -------------------------------- Drawing images --------------------------------
  255. // A packet with the dimensions of an image
  256. struct ImageDimensions {
  257. // width is the number of used pixels on each row.
  258. // height is the number of rows.
  259. // stride is the byte offset from one row to another including any padding.
  260. // pixelSize is the byte offset from one pixel to another from left to right.
  261. int32_t width, height, stride, pixelSize;
  262. ImageDimensions() : width(0), height(0), stride(0), pixelSize(0) {}
  263. ImageDimensions(const ImageImpl& image) :
  264. width(image.width), height(image.height), stride(image.stride), pixelSize(image.pixelSize) {}
  265. };
  266. struct ImageWriter : public ImageDimensions {
  267. uint8_t *data;
  268. ImageWriter(const ImageDimensions &dimensions, uint8_t *data) :
  269. ImageDimensions(dimensions), data(data) {}
  270. };
  271. struct ImageReader : public ImageDimensions {
  272. const uint8_t *data;
  273. ImageReader(const ImageDimensions &dimensions, const uint8_t *data) :
  274. ImageDimensions(dimensions), data(data) {}
  275. };
  276. static ImageWriter getWriter(ImageImpl &image) {
  277. return ImageWriter(ImageDimensions(image), buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset);
  278. }
  279. static ImageReader getReader(const ImageImpl &image) {
  280. return ImageReader(ImageDimensions(image), buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset);
  281. }
  282. static ImageImpl getGenericSubImage(const ImageImpl &image, int32_t left, int32_t top, int32_t width, int32_t height) {
  283. assert(left >= 0 && top >= 0 && width >= 1 && height >= 1 && left + width <= image.width && top + height <= image.height);
  284. intptr_t newOffset = image.startOffset + (left * image.pixelSize) + (top * image.stride);
  285. return ImageImpl(width, height, image.stride, image.pixelSize, image.buffer, newOffset);
  286. }
  287. struct ImageIntersection {
  288. ImageWriter subTarget;
  289. ImageReader subSource;
  290. ImageIntersection(const ImageWriter &subTarget, const ImageReader &subSource) :
  291. subTarget(subTarget), subSource(subSource) {}
  292. static bool canCreate(ImageImpl &target, const ImageImpl &source, int32_t left, int32_t top) {
  293. int32_t targetRegionRight = left + source.width;
  294. int32_t targetRegionBottom = top + source.height;
  295. return left < target.width && top < target.height && targetRegionRight > 0 && targetRegionBottom > 0;
  296. }
  297. // Only call if canCreate passed with the same arguments
  298. static ImageIntersection create(ImageImpl &target, const ImageImpl &source, int32_t left, int32_t top) {
  299. int32_t targetRegionRight = left + source.width;
  300. int32_t targetRegionBottom = top + source.height;
  301. assert(ImageIntersection::canCreate(target, source, left, top));
  302. // Check if the source has to be clipped
  303. if (left < 0 || top < 0 || targetRegionRight > target.width || targetRegionBottom > target.height) {
  304. int32_t clipLeft = std::max(0, -left);
  305. int32_t clipTop = std::max(0, -top);
  306. int32_t clipRight = std::max(0, targetRegionRight - target.width);
  307. int32_t clipBottom = std::max(0, targetRegionBottom - target.height);
  308. int32_t newWidth = source.width - (clipLeft + clipRight);
  309. int32_t newHeight = source.height - (clipTop + clipBottom);
  310. assert(newWidth > 0 && newHeight > 0);
  311. // Partial drawing
  312. ImageImpl subTarget = getGenericSubImage(target, left + clipLeft, top + clipTop, newWidth, newHeight);
  313. ImageImpl subSource = getGenericSubImage(source, clipLeft, clipTop, newWidth, newHeight);
  314. return ImageIntersection(getWriter(subTarget), getReader(subSource));
  315. } else {
  316. // Full drawing
  317. ImageImpl subTarget = getGenericSubImage(target, left, top, source.width, source.height);
  318. return ImageIntersection(getWriter(subTarget), getReader(source));
  319. }
  320. }
  321. };
  322. #define ITERATE_ROWS(WRITER, READER, OPERATION) \
  323. { \
  324. uint8_t *targetRow = WRITER.data; \
  325. const uint8_t *sourceRow = READER.data; \
  326. for (int32_t y = 0; y < READER.height; y++) { \
  327. OPERATION; \
  328. targetRow += WRITER.stride; \
  329. sourceRow += READER.stride; \
  330. } \
  331. }
  332. #define ITERATE_PIXELS(WRITER, READER, OPERATION) \
  333. { \
  334. uint8_t *targetRow = WRITER.data; \
  335. const uint8_t *sourceRow = READER.data; \
  336. for (int32_t y = 0; y < READER.height; y++) { \
  337. uint8_t *targetPixel = targetRow; \
  338. const uint8_t *sourcePixel = sourceRow; \
  339. for (int32_t x = 0; x < READER.width; x++) { \
  340. {OPERATION;} \
  341. targetPixel += WRITER.pixelSize; \
  342. sourcePixel += READER.pixelSize; \
  343. } \
  344. targetRow += WRITER.stride; \
  345. sourceRow += READER.stride; \
  346. } \
  347. }
  348. #define ITERATE_PIXELS_2(WRITER1, READER1, WRITER2, READER2, OPERATION) \
  349. { \
  350. uint8_t *targetRow1 = WRITER1.data; \
  351. uint8_t *targetRow2 = WRITER2.data; \
  352. const uint8_t *sourceRow1 = READER1.data; \
  353. const uint8_t *sourceRow2 = READER2.data; \
  354. int minWidth = std::min(READER1.width, READER2.width); \
  355. int minHeight = std::min(READER1.height, READER2.height); \
  356. for (int32_t y = 0; y < minHeight; y++) { \
  357. uint8_t *targetPixel1 = targetRow1; \
  358. uint8_t *targetPixel2 = targetRow2; \
  359. const uint8_t *sourcePixel1 = sourceRow1; \
  360. const uint8_t *sourcePixel2 = sourceRow2; \
  361. for (int32_t x = 0; x < minWidth; x++) { \
  362. {OPERATION;} \
  363. targetPixel1 += WRITER1.pixelSize; \
  364. targetPixel2 += WRITER2.pixelSize; \
  365. sourcePixel1 += READER1.pixelSize; \
  366. sourcePixel2 += READER2.pixelSize; \
  367. } \
  368. targetRow1 += WRITER1.stride; \
  369. targetRow2 += WRITER2.stride; \
  370. sourceRow1 += READER1.stride; \
  371. sourceRow2 += READER2.stride; \
  372. } \
  373. }
  374. #define ITERATE_PIXELS_3(WRITER1, READER1, WRITER2, READER2, WRITER3, READER3, OPERATION) \
  375. { \
  376. uint8_t *targetRow1 = WRITER1.data; \
  377. uint8_t *targetRow2 = WRITER2.data; \
  378. uint8_t *targetRow3 = WRITER3.data; \
  379. const uint8_t *sourceRow1 = READER1.data; \
  380. const uint8_t *sourceRow2 = READER2.data; \
  381. const uint8_t *sourceRow3 = READER3.data; \
  382. int minWidth = std::min(std::min(READER1.width, READER2.width), READER3.width); \
  383. int minHeight = std::min(std::min(READER1.height, READER2.height), READER3.height); \
  384. for (int32_t y = 0; y < minHeight; y++) { \
  385. uint8_t *targetPixel1 = targetRow1; \
  386. uint8_t *targetPixel2 = targetRow2; \
  387. uint8_t *targetPixel3 = targetRow3; \
  388. const uint8_t *sourcePixel1 = sourceRow1; \
  389. const uint8_t *sourcePixel2 = sourceRow2; \
  390. const uint8_t *sourcePixel3 = sourceRow3; \
  391. for (int32_t x = 0; x < minWidth; x++) { \
  392. {OPERATION;} \
  393. targetPixel1 += WRITER1.pixelSize; \
  394. targetPixel2 += WRITER2.pixelSize; \
  395. targetPixel3 += WRITER3.pixelSize; \
  396. sourcePixel1 += READER1.pixelSize; \
  397. sourcePixel2 += READER2.pixelSize; \
  398. sourcePixel3 += READER3.pixelSize; \
  399. } \
  400. targetRow1 += WRITER1.stride; \
  401. targetRow2 += WRITER2.stride; \
  402. targetRow3 += WRITER3.stride; \
  403. sourceRow1 += READER1.stride; \
  404. sourceRow2 += READER2.stride; \
  405. sourceRow3 += READER3.stride; \
  406. } \
  407. }
  408. static inline int saturateFloat(float value) {
  409. if (!(value >= 0.0f)) {
  410. // NaN or negative
  411. return 0;
  412. } else if (value > 255.0f) {
  413. // Too large
  414. return 255;
  415. } else {
  416. // Round to closest
  417. return (int)(value + 0.5f);
  418. }
  419. }
  420. // Copy data from one image region to another of the same size.
  421. // Packing order is reinterpreted without conversion.
  422. static void copyImageData(ImageWriter writer, ImageReader reader) {
  423. assert(writer.width == reader.width && writer.height == reader.height && writer.pixelSize == reader.pixelSize);
  424. ITERATE_ROWS(writer, reader, std::memcpy(targetRow, sourceRow, reader.width * reader.pixelSize));
  425. }
  426. void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top) {
  427. if (ImageIntersection::canCreate(target, source, left, top)) {
  428. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  429. if (target.packOrder == source.packOrder) {
  430. // No conversion needed
  431. copyImageData(intersection.subTarget, intersection.subSource);
  432. } else {
  433. // Read and repack to convert between different color formats
  434. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  435. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  436. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  437. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  438. targetPixel[target.packOrder.alphaIndex] = sourcePixel[source.packOrder.alphaIndex];
  439. );
  440. }
  441. }
  442. }
  443. void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
  444. if (ImageIntersection::canCreate(target, source, left, top)) {
  445. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  446. copyImageData(intersection.subTarget, intersection.subSource);
  447. }
  448. }
  449. void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
  450. if (ImageIntersection::canCreate(target, source, left, top)) {
  451. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  452. copyImageData(intersection.subTarget, intersection.subSource);
  453. }
  454. }
  455. void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
  456. if (ImageIntersection::canCreate(target, source, left, top)) {
  457. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  458. copyImageData(intersection.subTarget, intersection.subSource);
  459. }
  460. }
  461. void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
  462. if (ImageIntersection::canCreate(target, source, left, top)) {
  463. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  464. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  465. uint8_t luma = *sourcePixel;
  466. targetPixel[target.packOrder.redIndex] = luma;
  467. targetPixel[target.packOrder.greenIndex] = luma;
  468. targetPixel[target.packOrder.blueIndex] = luma;
  469. targetPixel[target.packOrder.alphaIndex] = 255;
  470. );
  471. }
  472. }
  473. void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
  474. if (ImageIntersection::canCreate(target, source, left, top)) {
  475. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  476. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  477. int luma = *((const uint16_t*)sourcePixel);
  478. if (luma > 255) { luma = 255; }
  479. targetPixel[target.packOrder.redIndex] = luma;
  480. targetPixel[target.packOrder.greenIndex] = luma;
  481. targetPixel[target.packOrder.blueIndex] = luma;
  482. targetPixel[target.packOrder.alphaIndex] = 255;
  483. );
  484. }
  485. }
  486. void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
  487. if (ImageIntersection::canCreate(target, source, left, top)) {
  488. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  489. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  490. int luma = saturateFloat(*((const float*)sourcePixel));
  491. targetPixel[target.packOrder.redIndex] = luma;
  492. targetPixel[target.packOrder.greenIndex] = luma;
  493. targetPixel[target.packOrder.blueIndex] = luma;
  494. targetPixel[target.packOrder.alphaIndex] = 255;
  495. );
  496. }
  497. }
  498. void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
  499. if (ImageIntersection::canCreate(target, source, left, top)) {
  500. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  501. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  502. *targetPixel = saturateFloat(*((const float*)sourcePixel));
  503. );
  504. }
  505. }
  506. void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
  507. if (ImageIntersection::canCreate(target, source, left, top)) {
  508. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  509. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  510. int luma = *((const uint16_t*)sourcePixel);
  511. if (luma > 255) { luma = 255; }
  512. *targetPixel = luma;
  513. );
  514. }
  515. }
  516. void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
  517. if (ImageIntersection::canCreate(target, source, left, top)) {
  518. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  519. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  520. *((uint16_t*)targetPixel) = *sourcePixel;
  521. );
  522. }
  523. }
  524. void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
  525. if (ImageIntersection::canCreate(target, source, left, top)) {
  526. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  527. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  528. int luma = *((const float*)sourcePixel);
  529. if (luma < 0) { luma = 0; }
  530. if (luma > 65535) { luma = 65535; }
  531. *((uint16_t*)targetPixel) = *sourcePixel;
  532. );
  533. }
  534. }
  535. void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
  536. if (ImageIntersection::canCreate(target, source, left, top)) {
  537. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  538. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  539. *((float*)targetPixel) = (float)(*sourcePixel);
  540. );
  541. }
  542. }
  543. void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
  544. if (ImageIntersection::canCreate(target, source, left, top)) {
  545. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  546. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  547. int luma = *((const uint16_t*)sourcePixel);
  548. if (luma > 255) { luma = 255; }
  549. *((float*)targetPixel) = (float)luma;
  550. );
  551. }
  552. }
  553. void dsr::imageImpl_drawAlphaFilter(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top) {
  554. if (ImageIntersection::canCreate(target, source, left, top)) {
  555. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  556. // Read and repack to convert between different color formats
  557. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  558. // Optimized for anti-aliasing, where most alpha values are 0 or 255
  559. uint32_t sourceRatio = sourcePixel[source.packOrder.alphaIndex];
  560. if (sourceRatio > 0) {
  561. if (sourceRatio == 255) {
  562. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  563. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  564. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  565. targetPixel[target.packOrder.alphaIndex] = 255;
  566. } else {
  567. uint32_t targetRatio = 255 - sourceRatio;
  568. targetPixel[target.packOrder.redIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.redIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[source.packOrder.redIndex], sourceRatio);
  569. targetPixel[target.packOrder.greenIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.greenIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[source.packOrder.greenIndex], sourceRatio);
  570. targetPixel[target.packOrder.blueIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.blueIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[source.packOrder.blueIndex], sourceRatio);
  571. targetPixel[target.packOrder.alphaIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.alphaIndex], targetRatio) + sourceRatio;
  572. }
  573. }
  574. );
  575. }
  576. }
  577. void dsr::imageImpl_drawMaxAlpha(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top, int32_t sourceAlphaOffset) {
  578. if (ImageIntersection::canCreate(target, source, left, top)) {
  579. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  580. // Read and repack to convert between different color formats
  581. if (sourceAlphaOffset == 0) {
  582. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  583. int sourceAlpha = sourcePixel[source.packOrder.alphaIndex];
  584. if (sourceAlpha > targetPixel[target.packOrder.alphaIndex]) {
  585. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  586. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  587. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  588. targetPixel[target.packOrder.alphaIndex] = sourceAlpha;
  589. }
  590. );
  591. } else {
  592. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  593. int sourceAlpha = sourcePixel[source.packOrder.alphaIndex];
  594. if (sourceAlpha > 0) {
  595. sourceAlpha += sourceAlphaOffset;
  596. if (sourceAlpha > targetPixel[target.packOrder.alphaIndex]) {
  597. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  598. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  599. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  600. if (sourceAlpha < 0) { sourceAlpha = 0; }
  601. if (sourceAlpha > 255) { sourceAlpha = 255; }
  602. targetPixel[target.packOrder.alphaIndex] = sourceAlpha;
  603. }
  604. }
  605. );
  606. }
  607. }
  608. }
  609. void dsr::imageImpl_drawAlphaClip(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top, int32_t threshold) {
  610. if (ImageIntersection::canCreate(target, source, left, top)) {
  611. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  612. // Read and repack to convert between different color formats
  613. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  614. if (sourcePixel[source.packOrder.alphaIndex] > threshold) {
  615. targetPixel[target.packOrder.redIndex] = sourcePixel[source.packOrder.redIndex];
  616. targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
  617. targetPixel[target.packOrder.blueIndex] = sourcePixel[source.packOrder.blueIndex];
  618. targetPixel[target.packOrder.alphaIndex] = 255;
  619. }
  620. );
  621. }
  622. }
  623. template <bool FULL_ALPHA>
  624. static void drawSilhouette_template(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
  625. if (ImageIntersection::canCreate(target, source, left, top)) {
  626. ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
  627. // Read and repack to convert between different color formats
  628. ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
  629. uint32_t sourceRatio;
  630. if (FULL_ALPHA) {
  631. sourceRatio = *sourcePixel;
  632. } else {
  633. sourceRatio = normalizedByteMultiplication(*sourcePixel, color.alpha);
  634. }
  635. if (sourceRatio > 0) {
  636. if (sourceRatio == 255) {
  637. targetPixel[target.packOrder.redIndex] = color.red;
  638. targetPixel[target.packOrder.greenIndex] = color.green;
  639. targetPixel[target.packOrder.blueIndex] = color.blue;
  640. targetPixel[target.packOrder.alphaIndex] = 255;
  641. } else {
  642. uint32_t targetRatio = 255 - sourceRatio;
  643. targetPixel[target.packOrder.redIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.redIndex], targetRatio) + normalizedByteMultiplication(color.red, sourceRatio);
  644. targetPixel[target.packOrder.greenIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.greenIndex], targetRatio) + normalizedByteMultiplication(color.green, sourceRatio);
  645. targetPixel[target.packOrder.blueIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.blueIndex], targetRatio) + normalizedByteMultiplication(color.blue, sourceRatio);
  646. targetPixel[target.packOrder.alphaIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.alphaIndex], targetRatio) + sourceRatio;
  647. }
  648. }
  649. );
  650. }
  651. }
  652. void dsr::imageImpl_drawSilhouette(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
  653. if (color.alpha > 0) {
  654. ColorRgbaI32 saturatedColor = color.saturate();
  655. if (color.alpha < 255) {
  656. drawSilhouette_template<false>(target, source, saturatedColor, left, top);
  657. } else {
  658. drawSilhouette_template<true>(target, source, saturatedColor, left, top);
  659. }
  660. }
  661. }
  662. void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, int32_t left, int32_t top, int32_t sourceHeightOffset) {
  663. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  664. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  665. ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
  666. int32_t newHeight = *((const uint16_t*)sourcePixel);
  667. if (newHeight > 0) {
  668. newHeight += sourceHeightOffset;
  669. if (newHeight < 0) { newHeight = 0; }
  670. if (newHeight > 65535) { newHeight = 65535; }
  671. if (newHeight > 0 && newHeight > *((uint16_t*)targetPixel)) {
  672. *((uint16_t*)targetPixel) = newHeight;
  673. }
  674. }
  675. );
  676. }
  677. }
  678. void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
  679. int32_t left, int32_t top, int32_t sourceHeightOffset) {
  680. assert(sourceA.width == sourceHeight.width);
  681. assert(sourceA.height == sourceHeight.height);
  682. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  683. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  684. ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
  685. ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
  686. int32_t newHeight = *((const uint16_t*)sourcePixel1);
  687. if (newHeight > 0) {
  688. newHeight += sourceHeightOffset;
  689. if (newHeight < 0) { newHeight = 0; }
  690. if (newHeight > 65535) { newHeight = 65535; }
  691. if (newHeight > *((uint16_t*)targetPixel1)) {
  692. *((uint16_t*)targetPixel1) = newHeight;
  693. targetPixel2[targetA.packOrder.redIndex] = sourcePixel2[sourceA.packOrder.redIndex];
  694. targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
  695. targetPixel2[targetA.packOrder.blueIndex] = sourcePixel2[sourceA.packOrder.blueIndex];
  696. targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
  697. }
  698. }
  699. );
  700. }
  701. }
  702. void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
  703. ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left, int32_t top, int32_t sourceHeightOffset) {
  704. assert(sourceA.width == sourceHeight.width);
  705. assert(sourceA.height == sourceHeight.height);
  706. assert(sourceB.width == sourceHeight.width);
  707. assert(sourceB.height == sourceHeight.height);
  708. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  709. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  710. ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
  711. ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
  712. ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
  713. int32_t newHeight = *((const uint16_t*)sourcePixel1);
  714. if (newHeight > 0) {
  715. newHeight += sourceHeightOffset;
  716. if (newHeight < 0) { newHeight = 0; }
  717. if (newHeight > 65535) { newHeight = 65535; }
  718. if (newHeight > *((uint16_t*)targetPixel1)) {
  719. *((uint16_t*)targetPixel1) = newHeight;
  720. targetPixel2[targetA.packOrder.redIndex] = sourcePixel2[sourceA.packOrder.redIndex];
  721. targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
  722. targetPixel2[targetA.packOrder.blueIndex] = sourcePixel2[sourceA.packOrder.blueIndex];
  723. targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
  724. targetPixel3[targetB.packOrder.redIndex] = sourcePixel3[sourceB.packOrder.redIndex];
  725. targetPixel3[targetB.packOrder.greenIndex] = sourcePixel3[sourceB.packOrder.greenIndex];
  726. targetPixel3[targetB.packOrder.blueIndex] = sourcePixel3[sourceB.packOrder.blueIndex];
  727. targetPixel3[targetB.packOrder.alphaIndex] = sourcePixel3[sourceB.packOrder.alphaIndex];
  728. }
  729. }
  730. );
  731. }
  732. }
  733. void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, int32_t left, int32_t top, float sourceHeightOffset) {
  734. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  735. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  736. ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
  737. float newHeight = *((const float*)sourcePixel);
  738. if (newHeight > -std::numeric_limits<float>::infinity()) {
  739. newHeight += sourceHeightOffset;
  740. if (newHeight > *((float*)targetPixel)) {
  741. *((float*)targetPixel) = newHeight;
  742. }
  743. }
  744. );
  745. }
  746. }
  747. void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
  748. int32_t left, int32_t top, float sourceHeightOffset) {
  749. assert(sourceA.width == sourceHeight.width);
  750. assert(sourceA.height == sourceHeight.height);
  751. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  752. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  753. ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
  754. ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
  755. float newHeight = *((const float*)sourcePixel1);
  756. if (newHeight > -std::numeric_limits<float>::infinity()) {
  757. newHeight += sourceHeightOffset;
  758. if (newHeight > *((float*)targetPixel1)) {
  759. *((float*)targetPixel1) = newHeight;
  760. targetPixel2[targetA.packOrder.redIndex] = sourcePixel2[sourceA.packOrder.redIndex];
  761. targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
  762. targetPixel2[targetA.packOrder.blueIndex] = sourcePixel2[sourceA.packOrder.blueIndex];
  763. targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
  764. }
  765. }
  766. );
  767. }
  768. }
  769. void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
  770. ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left, int32_t top, float sourceHeightOffset) {
  771. assert(sourceA.width == sourceHeight.width);
  772. assert(sourceA.height == sourceHeight.height);
  773. assert(sourceB.width == sourceHeight.width);
  774. assert(sourceB.height == sourceHeight.height);
  775. if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
  776. ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
  777. ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
  778. ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
  779. ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
  780. float newHeight = *((const float*)sourcePixel1);
  781. if (newHeight > -std::numeric_limits<float>::infinity()) {
  782. newHeight += sourceHeightOffset;
  783. if (newHeight > *((float*)targetPixel1)) {
  784. *((float*)targetPixel1) = newHeight;
  785. targetPixel2[targetA.packOrder.redIndex] = sourcePixel2[sourceA.packOrder.redIndex];
  786. targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
  787. targetPixel2[targetA.packOrder.blueIndex] = sourcePixel2[sourceA.packOrder.blueIndex];
  788. targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
  789. targetPixel3[targetB.packOrder.redIndex] = sourcePixel3[sourceB.packOrder.redIndex];
  790. targetPixel3[targetB.packOrder.greenIndex] = sourcePixel3[sourceB.packOrder.greenIndex];
  791. targetPixel3[targetB.packOrder.blueIndex] = sourcePixel3[sourceB.packOrder.blueIndex];
  792. targetPixel3[targetB.packOrder.alphaIndex] = sourcePixel3[sourceB.packOrder.alphaIndex];
  793. }
  794. }
  795. );
  796. }
  797. }
  798. // -------------------------------- Resize --------------------------------
  799. static inline U32x4 ColorRgbaI32_to_U32x4(const ColorRgbaI32& color) {
  800. return U32x4(color.red, color.green, color.blue, color.alpha);
  801. }
  802. static inline ColorRgbaI32 U32x4_to_ColorRgbaI32(const U32x4& color) {
  803. UVector4D vResult = color.get();
  804. return ColorRgbaI32(vResult.x, vResult.y, vResult.z, vResult.w);
  805. }
  806. // Uniform linear interpolation of colors from a 16-bit sub-pixel weight
  807. // Pre-condition0 <= fineRatio <= 65536
  808. // Post-condition: Returns colorA * (1 - (fineRatio / 65536)) + colorB * (fineRatio / 65536)
  809. static inline U32x4 mixColorsUniform(const U32x4 &colorA, const U32x4 &colorB, uint32_t fineRatio) {
  810. uint16_t ratio = fineRatio >> 8;
  811. uint16_t invRatio = 256 - ratio;
  812. U16x8 weightA = U16x8(invRatio);
  813. U16x8 weightB = U16x8(ratio);
  814. U32x4 lowMask(0x00FF00FFu);
  815. U16x8 lowColorA = U16x8(colorA & lowMask);
  816. U16x8 lowColorB = U16x8(colorB & lowMask);
  817. U32x4 highMask(0xFF00FF00u);
  818. U16x8 highColorA = U16x8((colorA & highMask) >> 8);
  819. U16x8 highColorB = U16x8((colorB & highMask) >> 8);
  820. U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();
  821. U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();
  822. return (((lowColor >> 8) & lowMask) | (highColor & highMask));
  823. }
  824. #define READ_RGBAU8_CLAMP(X,Y) ImageRgbaU8Impl::unpackRgba(ImageRgbaU8Impl::readPixel_clamp(source, X, Y), source.packOrder)
  825. #define READ_RGBAU8_CLAMP_SIMD(X,Y) ColorRgbaI32_to_U32x4(READ_RGBAU8_CLAMP(X,Y))
  826. // Fixed-precision decimal system with 16-bit indices and 16-bit sub-pixel weights
  827. static const uint32_t interpolationFullPixel = 65536;
  828. static const uint32_t interpolationHalfPixel = interpolationFullPixel / 2;
  829. // Modulo mask for values greater than or equal to 0 and lesser than interpolationFullPixel
  830. static const uint32_t interpolationWeightMask = interpolationFullPixel - 1;
  831. template <bool BILINEAR>
  832. static uint32_t samplePixel(const ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
  833. if (BILINEAR) {
  834. uint32_t upperRatio = 65536 - lowerRatio;
  835. uint32_t leftRatio = 65536 - rightRatio;
  836. U32x4 vUpperLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY);
  837. U32x4 vUpperRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY);
  838. U32x4 vLowerLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY + 1);
  839. U32x4 vLowerRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY + 1);
  840. U32x4 vLeftRatio = U32x4(leftRatio);
  841. U32x4 vRightRatio = U32x4(rightRatio);
  842. U32x4 vUpperColor = ((vUpperLeftColor * vLeftRatio) + (vUpperRightColor * vRightRatio)) >> 16;
  843. U32x4 vLowerColor = ((vLowerLeftColor * vLeftRatio) + (vLowerRightColor * vRightRatio)) >> 16;
  844. U32x4 vCenterColor = ((vUpperColor * upperRatio) + (vLowerColor * lowerRatio)) >> 16;
  845. return (target.packRgba(U32x4_to_ColorRgbaI32(vCenterColor))).packed;
  846. } else {
  847. return (target.packRgba(READ_RGBAU8_CLAMP(leftX, upperY))).packed;
  848. }
  849. }
  850. template <bool BILINEAR>
  851. static uint8_t samplePixel(const ImageU8Impl& target, const ImageU8Impl& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
  852. if (BILINEAR) {
  853. uint32_t upperRatio = 65536 - lowerRatio;
  854. uint32_t leftRatio = 65536 - rightRatio;
  855. uint32_t upperLeftLuma = ImageU8Impl::readPixel_clamp(source, leftX, upperY);
  856. uint32_t upperRightLuma = ImageU8Impl::readPixel_clamp(source, leftX + 1, upperY);
  857. uint32_t lowerLeftLuma = ImageU8Impl::readPixel_clamp(source, leftX, upperY + 1);
  858. uint32_t lowerRightLuma = ImageU8Impl::readPixel_clamp(source, leftX + 1, upperY + 1);
  859. uint32_t upperLuma = ((upperLeftLuma * leftRatio) + (upperRightLuma * rightRatio)) >> 16;
  860. uint32_t lowerLuma = ((lowerLeftLuma * leftRatio) + (lowerRightLuma * rightRatio)) >> 16;
  861. return ((upperLuma * upperRatio) + (lowerLuma * lowerRatio)) >> 16;
  862. } else {
  863. return ImageU8Impl::readPixel_clamp(source, leftX, upperY);
  864. }
  865. }
  866. // BILINEAR: Enables linear interpolation
  867. // scaleRegion:
  868. // The stretched location of the source image in the target image
  869. // Making it smaller than the target image will fill the outside with stretched pixels
  870. // Allowing the caller to crop away parts of the source image that aren't interesting
  871. // Can be used to round the region to a multiple of the input size for a fixed pixel size
  872. template <bool BILINEAR, typename IMAGE_TYPE, typename PIXEL_TYPE>
  873. static void resize_reference(IMAGE_TYPE& target, const IMAGE_TYPE& source, const IRect& scaleRegion) {
  874. // Reference implementation
  875. // Offset in source pixels per target pixel
  876. int32_t offsetX = interpolationFullPixel * source.width / scaleRegion.width();
  877. int32_t offsetY = interpolationFullPixel * source.height / scaleRegion.height();
  878. int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
  879. int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
  880. if (BILINEAR) {
  881. startX -= interpolationHalfPixel;
  882. startY -= interpolationHalfPixel;
  883. }
  884. SafePointer<PIXEL_TYPE> targetRow = imageInternal::getSafeData<PIXEL_TYPE>(target);
  885. int32_t readY = startY;
  886. for (int32_t y = 0; y < target.height; y++) {
  887. int32_t naturalY = readY;
  888. if (naturalY < 0) { naturalY = 0; }
  889. uint32_t sampleY = (uint32_t)naturalY;
  890. uint32_t upperY = sampleY >> 16;
  891. uint32_t lowerRatio = sampleY & interpolationWeightMask;
  892. SafePointer<PIXEL_TYPE> targetPixel = targetRow;
  893. int32_t readX = startX;
  894. for (int32_t x = 0; x < target.width; x++) {
  895. int32_t naturalX = readX;
  896. if (naturalX < 0) { naturalX = 0; }
  897. uint32_t sampleX = (uint32_t)naturalX;
  898. uint32_t leftX = sampleX >> 16;
  899. uint32_t rightRatio = sampleX & interpolationWeightMask;
  900. *targetPixel = samplePixel<BILINEAR>(target, source, leftX, upperY, rightRatio, lowerRatio);
  901. targetPixel += 1;
  902. readX += offsetX;
  903. }
  904. targetRow.increaseBytes(target.stride);
  905. readY += offsetY;
  906. }
  907. }
  908. // BILINEAR: Enables linear interpolation
  909. // SIMD_ALIGNED: Each line starts 16-byte aligned, has a stride divisible with 16-bytes and is allowed to overwrite padding.
  910. template <bool BILINEAR, bool SIMD_ALIGNED>
  911. static void resize_optimized(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, const IRect& scaleRegion) {
  912. // Get source information
  913. // Compare dimensions
  914. const bool sameWidth = source.width == scaleRegion.width() && scaleRegion.left() == 0;
  915. const bool sameHeight = source.height == scaleRegion.height() && scaleRegion.top() == 0;
  916. const bool samePackOrder = target.packOrder.packOrderIndex == source.packOrder.packOrderIndex;
  917. if (sameWidth && sameHeight) {
  918. // No need to resize, just make a copy to save time
  919. imageImpl_drawCopy(target, source);
  920. } else if (sameWidth && (samePackOrder || BILINEAR)) {
  921. // Only vertical interpolation
  922. // Offset in source pixels per target pixel
  923. int32_t offsetY = interpolationFullPixel * source.height / scaleRegion.height();
  924. int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
  925. if (BILINEAR) {
  926. startY -= interpolationHalfPixel;
  927. }
  928. SafePointer<uint32_t> targetRow = imageInternal::getSafeData<uint32_t>(target);
  929. int32_t readY = startY;
  930. for (int32_t y = 0; y < target.height; y++) {
  931. int32_t naturalY = readY;
  932. if (naturalY < 0) { naturalY = 0; }
  933. uint32_t sampleY = (uint32_t)naturalY;
  934. uint32_t upperY = sampleY >> 16;
  935. uint32_t lowerY = upperY + 1;
  936. if (upperY >= (uint32_t)source.height) upperY = source.height - 1;
  937. if (lowerY >= (uint32_t)source.height) lowerY = source.height - 1;
  938. if (BILINEAR) {
  939. uint32_t lowerRatio = sampleY & interpolationWeightMask;
  940. uint32_t upperRatio = 65536 - lowerRatio;
  941. SafePointer<uint32_t> targetPixel = targetRow;
  942. if (SIMD_ALIGNED) {
  943. const SafePointer<uint32_t> sourceRowUpper = imageInternal::getSafeData<uint32_t>(source, upperY);
  944. const SafePointer<uint32_t> sourceRowLower = imageInternal::getSafeData<uint32_t>(source, lowerY);
  945. for (int32_t x = 0; x < target.width; x += 4) {
  946. ALIGN16 U32x4 vUpperPackedColor = U32x4::readAligned(sourceRowUpper, "resize_optimized @ read vUpperPackedColor");
  947. ALIGN16 U32x4 vLowerPackedColor = U32x4::readAligned(sourceRowLower, "resize_optimized @ read vLowerPackedColor");
  948. ALIGN16 U32x4 vCenterColor = mixColorsUniform(vUpperPackedColor, vLowerPackedColor, lowerRatio);
  949. vCenterColor.writeAligned(targetPixel, "resize_optimized @ write vCenterColor");
  950. sourceRowUpper += 4;
  951. sourceRowLower += 4;
  952. targetPixel += 4;
  953. }
  954. } else {
  955. for (int32_t x = 0; x < target.width; x++) {
  956. ALIGN16 U32x4 vUpperColor = READ_RGBAU8_CLAMP_SIMD(x, upperY);
  957. ALIGN16 U32x4 vLowerColor = READ_RGBAU8_CLAMP_SIMD(x, lowerY);
  958. ALIGN16 U32x4 vCenterColor = ((vUpperColor * upperRatio) + (vLowerColor * lowerRatio)) >> 16;
  959. ColorRgbaI32 finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
  960. *targetPixel = target.packRgba(finalColor).packed;
  961. targetPixel += 1;
  962. }
  963. }
  964. } else {
  965. const SafePointer<uint32_t> sourceRowUpper = imageInternal::getSafeData<uint32_t>(source, upperY);
  966. // Nearest neighbor sampling from a same width can be done using one copy per row
  967. safeMemoryCopy(targetRow, sourceRowUpper, source.width * 4);
  968. }
  969. targetRow.increaseBytes(target.stride);
  970. readY += offsetY;
  971. }
  972. } else if (sameHeight) {
  973. // Only horizontal interpolation
  974. // Offset in source pixels per target pixel
  975. int32_t offsetX = interpolationFullPixel * source.width / scaleRegion.width();
  976. int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
  977. if (BILINEAR) {
  978. startX -= interpolationHalfPixel;
  979. }
  980. SafePointer<uint32_t> targetRow = imageInternal::getSafeData<uint32_t>(target);
  981. for (int32_t y = 0; y < target.height; y++) {
  982. SafePointer<uint32_t> targetPixel = targetRow;
  983. int32_t readX = startX;
  984. for (int32_t x = 0; x < target.width; x++) {
  985. int32_t naturalX = readX;
  986. if (naturalX < 0) { naturalX = 0; }
  987. uint32_t sampleX = (uint32_t)naturalX;
  988. uint32_t leftX = sampleX >> 16;
  989. uint32_t rightX = leftX + 1;
  990. uint32_t rightRatio = sampleX & interpolationWeightMask;
  991. uint32_t leftRatio = 65536 - rightRatio;
  992. ColorRgbaI32 finalColor;
  993. if (BILINEAR) {
  994. ALIGN16 U32x4 vLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, y);
  995. ALIGN16 U32x4 vRightColor = READ_RGBAU8_CLAMP_SIMD(rightX, y);
  996. ALIGN16 U32x4 vCenterColor = ((vLeftColor * leftRatio) + (vRightColor * rightRatio)) >> 16;
  997. finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
  998. } else {
  999. finalColor = READ_RGBAU8_CLAMP(leftX, y);
  1000. }
  1001. *targetPixel = target.packRgba(finalColor).packed;
  1002. targetPixel += 1;
  1003. readX += offsetX;
  1004. }
  1005. targetRow.increaseBytes(target.stride);
  1006. }
  1007. } else {
  1008. // Call the reference implementation
  1009. resize_reference<BILINEAR, ImageRgbaU8Impl, uint32_t>(target, source, scaleRegion);
  1010. }
  1011. }
  1012. // Returns true iff each line start in image is aligned with 16 bytes
  1013. // Often not the case for sub-images, even if the parent image is aligned
  1014. static bool imageIs16ByteAligned(const ImageImpl& image) {
  1015. return (uint32_t)((image.stride & 15) == 0 && ((uintptr_t)(imageInternal::getSafeData<uint8_t>(image).getUnsafe()) & 15) == 0);
  1016. }
  1017. // Converting run-time flags into compile-time constants
  1018. static void resize_aux(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate, bool paddWrite, const IRect& scaleRegion) {
  1019. // If writing to padding is allowed and both images are 16-byte aligned with the same pack order
  1020. if (paddWrite && imageIs16ByteAligned(source) && imageIs16ByteAligned(target)) {
  1021. // SIMD resize allowed
  1022. if (interpolate) {
  1023. resize_optimized<true, true>(target, source, scaleRegion);
  1024. } else {
  1025. resize_optimized<false, true>(target, source, scaleRegion);
  1026. }
  1027. } else {
  1028. // Non-SIMD resize
  1029. if (interpolate) {
  1030. resize_optimized<true, false>(target, source, scaleRegion);
  1031. } else {
  1032. resize_optimized<false, false>(target, source, scaleRegion);
  1033. }
  1034. }
  1035. }
  1036. // TODO: Optimize monochrome resizing.
  1037. static void resize_aux(ImageU8Impl& target, const ImageU8Impl& source, bool interpolate, bool paddWrite, const IRect& scaleRegion) {
  1038. if (interpolate) {
  1039. resize_reference<true, ImageU8Impl, uint8_t>(target, source, scaleRegion);
  1040. } else {
  1041. resize_reference<false, ImageU8Impl, uint8_t>(target, source, scaleRegion);
  1042. }
  1043. }
  1044. // Creating an image to replacedImage with the same pack order as originalImage when applicable to the image format.
  1045. static ImageRgbaU8Impl createWithSamePackOrder(const ImageRgbaU8Impl& originalImage, int32_t width, int32_t height) {
  1046. return ImageRgbaU8Impl(width, height, originalImage.packOrder.packOrderIndex, DSR_DEFAULT_ALIGNMENT);
  1047. }
  1048. static ImageU8Impl createWithSamePackOrder(const ImageU8Impl& originalImage, int32_t width, int32_t height) {
  1049. return ImageU8Impl(width, height, DSR_DEFAULT_ALIGNMENT);
  1050. }
  1051. template <typename IMAGE_TYPE>
  1052. void resizeToTarget(IMAGE_TYPE& target, const IMAGE_TYPE& source, bool interpolate) {
  1053. IRect scaleRegion = imageInternal::getBound(target);
  1054. if (target.width != source.width && target.height > source.height) {
  1055. // Upscaling is faster in two steps by both reusing the horizontal interpolation and vectorizing the vertical interpolation.
  1056. int tempWidth = target.width;
  1057. int tempHeight = source.height;
  1058. IRect tempScaleRegion = IRect(scaleRegion.left(), 0, scaleRegion.width(), source.height);
  1059. // Create a temporary buffer.
  1060. IMAGE_TYPE newTempImage = createWithSamePackOrder(target, tempWidth, tempHeight);
  1061. resize_aux(newTempImage, source, interpolate, true, tempScaleRegion);
  1062. resize_aux(target, newTempImage, interpolate, true, scaleRegion);
  1063. } else {
  1064. // Downscaling or only changing one dimension is faster in one step.
  1065. resize_aux(target, source, interpolate, true, scaleRegion);
  1066. }
  1067. }
  1068. void dsr::imageImpl_resizeToTarget(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate) {
  1069. resizeToTarget<ImageRgbaU8Impl>(target, source, interpolate);
  1070. }
  1071. void dsr::imageImpl_resizeToTarget(ImageU8Impl& target, const ImageU8Impl& source, bool interpolate) {
  1072. resizeToTarget<ImageU8Impl>(target, source, interpolate);
  1073. }
  1074. template <bool CONVERT_COLOR>
  1075. static inline Color4xU8 convertRead(const ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int x, int y) {
  1076. Color4xU8 result = ImageRgbaU8Impl::readPixel_clamp(source, x, y);
  1077. if (CONVERT_COLOR) {
  1078. result = target.packRgba(ImageRgbaU8Impl::unpackRgba(result, source.packOrder));
  1079. }
  1080. return result;
  1081. }
  1082. // Used for drawing large pixels
  1083. static inline void fillRectangle(ImageRgbaU8Impl& target, int pixelLeft, int pixelRight, int pixelTop, int pixelBottom, const Color4xU8& packedColor) {
  1084. SafePointer<Color4xU8> targetRow = imageInternal::getSafeData<Color4xU8>(target, pixelTop) + pixelLeft;
  1085. for (int y = pixelTop; y < pixelBottom; y++) {
  1086. SafePointer<Color4xU8> targetPixel = targetRow;
  1087. for (int x = pixelLeft; x < pixelRight; x++) {
  1088. *targetPixel = packedColor;
  1089. targetPixel += 1;
  1090. }
  1091. targetRow.increaseBytes(target.stride);
  1092. }
  1093. }
  1094. template <bool CONVERT_COLOR>
  1095. static void blockMagnify_reference(
  1096. ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source,
  1097. int pixelWidth, int pixelHeight, int clipWidth, int clipHeight) {
  1098. int sourceY = 0;
  1099. int maxSourceX = source.width - 1;
  1100. int maxSourceY = source.height - 1;
  1101. if (clipWidth > target.width) { clipWidth = target.width; }
  1102. if (clipHeight > target.height) { clipHeight = target.height; }
  1103. for (int32_t pixelTop = 0; pixelTop < clipHeight; pixelTop += pixelHeight) {
  1104. int sourceX = 0;
  1105. for (int32_t pixelLeft = 0; pixelLeft < clipWidth; pixelLeft += pixelWidth) {
  1106. // Read the pixel once
  1107. Color4xU8 sourceColor = convertRead<CONVERT_COLOR>(target, source, sourceX, sourceY);
  1108. // Write to all target pixels in a conditionless loop
  1109. fillRectangle(target, pixelLeft, pixelLeft + pixelWidth, pixelTop, pixelTop + pixelHeight, sourceColor);
  1110. // Iterate and clamp the read coordinate
  1111. sourceX++;
  1112. if (sourceX > maxSourceX) { sourceX = maxSourceX; }
  1113. }
  1114. // Iterate and clamp the read coordinate
  1115. sourceY++;
  1116. if (sourceY > maxSourceY) { sourceY = maxSourceY; }
  1117. }
  1118. }
  1119. // Pre-condition:
  1120. // * The source and target images have the same pack order
  1121. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1122. // * clipWidth % 2 == 0
  1123. // * clipHeight % 2 == 0
  1124. static void blockMagnify_2x2(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1125. #ifdef USE_SIMD_EXTRA
  1126. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1127. SafePointer<uint32_t> upperTargetRow = imageInternal::getSafeData<uint32_t>(target, 0);
  1128. SafePointer<uint32_t> lowerTargetRow = imageInternal::getSafeData<uint32_t>(target, 1);
  1129. int doubleTargetStride = target.stride * 2;
  1130. for (int upperTargetY = 0; upperTargetY + 2 <= clipHeight; upperTargetY+=2) {
  1131. // Carriage return
  1132. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1133. SafePointer<uint32_t> upperTargetPixel = upperTargetRow;
  1134. SafePointer<uint32_t> lowerTargetPixel = lowerTargetRow;
  1135. // Write to whole multiples of 8 pixels
  1136. int writeLeftX = 0;
  1137. while (writeLeftX + 8 <= clipWidth) {
  1138. // Read pixels
  1139. ALIGN16 SIMD_U32x4 sourcePixels = U32x4::readAligned(sourcePixel, "blockMagnify_2x2 @ whole sourcePixels").v;
  1140. sourcePixel += 4;
  1141. // Double the pixels by zipping with itself
  1142. ALIGN16 SIMD_U32x4x2 doubledPixels = ZIP_U32_SIMD(sourcePixels, sourcePixels);
  1143. // Write lower part
  1144. U32x4(doubledPixels.val[0]).writeAligned(upperTargetPixel, "blockMagnify_2x2 @ write upper left #1");
  1145. upperTargetPixel += 4;
  1146. U32x4(doubledPixels.val[0]).writeAligned(lowerTargetPixel, "blockMagnify_2x2 @ write lower left #1");
  1147. lowerTargetPixel += 4;
  1148. // Write upper part
  1149. U32x4(doubledPixels.val[1]).writeAligned(upperTargetPixel, "blockMagnify_2x2 @ write upper right #1");
  1150. upperTargetPixel += 4;
  1151. U32x4(doubledPixels.val[1]).writeAligned(lowerTargetPixel, "blockMagnify_2x2 @ write lower right #1");
  1152. lowerTargetPixel += 4;
  1153. // Count
  1154. writeLeftX += 8;
  1155. }
  1156. // Fill the last pixels using scalar operations to avoid going out of bound
  1157. while (writeLeftX + 2 <= clipWidth) {
  1158. // Read one pixel
  1159. uint32_t sourceColor = *sourcePixel;
  1160. // Write 2x2 pixels
  1161. *upperTargetPixel = sourceColor; upperTargetPixel += 1;
  1162. *upperTargetPixel = sourceColor; upperTargetPixel += 1;
  1163. *lowerTargetPixel = sourceColor; lowerTargetPixel += 1;
  1164. *lowerTargetPixel = sourceColor; lowerTargetPixel += 1;
  1165. // Count
  1166. writeLeftX += 2;
  1167. }
  1168. // Line feed
  1169. sourceRow.increaseBytes(source.stride);
  1170. upperTargetRow.increaseBytes(doubleTargetStride);
  1171. lowerTargetRow.increaseBytes(doubleTargetStride);
  1172. }
  1173. #else
  1174. blockMagnify_reference<false>(target, source, 2, 2, clipWidth, clipHeight);
  1175. #endif
  1176. }
  1177. // Pre-condition:
  1178. // * The source and target images have the same pack order
  1179. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1180. // * clipWidth % 3 == 0
  1181. // * clipHeight % 3 == 0
  1182. static void blockMagnify_3x3(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1183. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1184. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1185. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1186. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1187. int blockTargetStride = target.stride * 3;
  1188. for (int upperTargetY = 0; upperTargetY + 3 <= clipHeight; upperTargetY+=3) {
  1189. // Carriage return
  1190. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1191. SafePointer<uint32_t> targetPixelA = targetRowA;
  1192. SafePointer<uint32_t> targetPixelB = targetRowB;
  1193. SafePointer<uint32_t> targetPixelC = targetRowC;
  1194. int writeLeftX = 0;
  1195. while (writeLeftX + 3 <= clipWidth) {
  1196. // Read one pixel at a time
  1197. uint32_t scalarValue = *sourcePixel;
  1198. sourcePixel += 1;
  1199. // Write to a whole block of pixels
  1200. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue;
  1201. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue;
  1202. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue;
  1203. targetPixelA += 3;
  1204. targetPixelB += 3;
  1205. targetPixelC += 3;
  1206. // Count
  1207. writeLeftX += 3;
  1208. }
  1209. // Line feed
  1210. sourceRow.increaseBytes(source.stride);
  1211. targetRowA.increaseBytes(blockTargetStride);
  1212. targetRowB.increaseBytes(blockTargetStride);
  1213. targetRowC.increaseBytes(blockTargetStride);
  1214. }
  1215. }
  1216. // Pre-condition:
  1217. // * The source and target images have the same pack order
  1218. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1219. // * clipWidth % 4 == 0
  1220. // * clipHeight % 4 == 0
  1221. static void blockMagnify_4x4(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1222. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1223. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1224. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1225. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1226. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1227. int quadTargetStride = target.stride * 4;
  1228. for (int upperTargetY = 0; upperTargetY + 4 <= clipHeight; upperTargetY+=4) {
  1229. // Carriage return
  1230. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1231. SafePointer<uint32_t> targetPixelA = targetRowA;
  1232. SafePointer<uint32_t> targetPixelB = targetRowB;
  1233. SafePointer<uint32_t> targetPixelC = targetRowC;
  1234. SafePointer<uint32_t> targetPixelD = targetRowD;
  1235. int writeLeftX = 0;
  1236. while (writeLeftX + 4 <= clipWidth) {
  1237. // Read one pixel at a time
  1238. uint32_t scalarValue = *sourcePixel;
  1239. sourcePixel += 1;
  1240. // Convert scalar to SIMD vector of 4 repeated pixels
  1241. ALIGN16 U32x4 sourcePixels = U32x4(scalarValue);
  1242. // Write to 4x4 pixels using 4 SIMD writes
  1243. sourcePixels.writeAligned(targetPixelA, "blockMagnify_4x4 @ write A");
  1244. sourcePixels.writeAligned(targetPixelB, "blockMagnify_4x4 @ write B");
  1245. sourcePixels.writeAligned(targetPixelC, "blockMagnify_4x4 @ write C");
  1246. sourcePixels.writeAligned(targetPixelD, "blockMagnify_4x4 @ write D");
  1247. targetPixelA += 4;
  1248. targetPixelB += 4;
  1249. targetPixelC += 4;
  1250. targetPixelD += 4;
  1251. // Count
  1252. writeLeftX += 4;
  1253. }
  1254. // Line feed
  1255. sourceRow.increaseBytes(source.stride);
  1256. targetRowA.increaseBytes(quadTargetStride);
  1257. targetRowB.increaseBytes(quadTargetStride);
  1258. targetRowC.increaseBytes(quadTargetStride);
  1259. targetRowD.increaseBytes(quadTargetStride);
  1260. }
  1261. }
  1262. // Pre-condition:
  1263. // * The source and target images have the same pack order
  1264. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1265. // * clipWidth % 5 == 0
  1266. // * clipHeight % 5 == 0
  1267. static void blockMagnify_5x5(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1268. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1269. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1270. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1271. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1272. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1273. SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
  1274. int blockTargetStride = target.stride * 5;
  1275. for (int upperTargetY = 0; upperTargetY + 5 <= clipHeight; upperTargetY+=5) {
  1276. // Carriage return
  1277. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1278. SafePointer<uint32_t> targetPixelA = targetRowA;
  1279. SafePointer<uint32_t> targetPixelB = targetRowB;
  1280. SafePointer<uint32_t> targetPixelC = targetRowC;
  1281. SafePointer<uint32_t> targetPixelD = targetRowD;
  1282. SafePointer<uint32_t> targetPixelE = targetRowE;
  1283. int writeLeftX = 0;
  1284. while (writeLeftX + 5 <= clipWidth) {
  1285. // Read one pixel at a time
  1286. uint32_t scalarValue = *sourcePixel;
  1287. sourcePixel += 1;
  1288. // Write to a whole block of pixels
  1289. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue;
  1290. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue;
  1291. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue;
  1292. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue;
  1293. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue;
  1294. targetPixelA += 5;
  1295. targetPixelB += 5;
  1296. targetPixelC += 5;
  1297. targetPixelD += 5;
  1298. targetPixelE += 5;
  1299. // Count
  1300. writeLeftX += 5;
  1301. }
  1302. // Line feed
  1303. sourceRow.increaseBytes(source.stride);
  1304. targetRowA.increaseBytes(blockTargetStride);
  1305. targetRowB.increaseBytes(blockTargetStride);
  1306. targetRowC.increaseBytes(blockTargetStride);
  1307. targetRowD.increaseBytes(blockTargetStride);
  1308. targetRowE.increaseBytes(blockTargetStride);
  1309. }
  1310. }
  1311. // Pre-condition:
  1312. // * The source and target images have the same pack order
  1313. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1314. // * clipWidth % 6 == 0
  1315. // * clipHeight % 6 == 0
  1316. static void blockMagnify_6x6(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1317. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1318. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1319. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1320. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1321. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1322. SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
  1323. SafePointer<uint32_t> targetRowF = imageInternal::getSafeData<uint32_t>(target, 5);
  1324. int blockTargetStride = target.stride * 6;
  1325. for (int upperTargetY = 0; upperTargetY + 6 <= clipHeight; upperTargetY+=6) {
  1326. // Carriage return
  1327. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1328. SafePointer<uint32_t> targetPixelA = targetRowA;
  1329. SafePointer<uint32_t> targetPixelB = targetRowB;
  1330. SafePointer<uint32_t> targetPixelC = targetRowC;
  1331. SafePointer<uint32_t> targetPixelD = targetRowD;
  1332. SafePointer<uint32_t> targetPixelE = targetRowE;
  1333. SafePointer<uint32_t> targetPixelF = targetRowF;
  1334. int writeLeftX = 0;
  1335. while (writeLeftX + 6 <= clipWidth) {
  1336. // Read one pixel at a time
  1337. uint32_t scalarValue = *sourcePixel;
  1338. sourcePixel += 1;
  1339. // Write to a whole block of pixels
  1340. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue;
  1341. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue;
  1342. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue;
  1343. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue;
  1344. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue;
  1345. targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue;
  1346. targetPixelA += 6;
  1347. targetPixelB += 6;
  1348. targetPixelC += 6;
  1349. targetPixelD += 6;
  1350. targetPixelE += 6;
  1351. targetPixelF += 6;
  1352. // Count
  1353. writeLeftX += 6;
  1354. }
  1355. // Line feed
  1356. sourceRow.increaseBytes(source.stride);
  1357. targetRowA.increaseBytes(blockTargetStride);
  1358. targetRowB.increaseBytes(blockTargetStride);
  1359. targetRowC.increaseBytes(blockTargetStride);
  1360. targetRowD.increaseBytes(blockTargetStride);
  1361. targetRowE.increaseBytes(blockTargetStride);
  1362. targetRowF.increaseBytes(blockTargetStride);
  1363. }
  1364. }
  1365. // Pre-condition:
  1366. // * The source and target images have the same pack order
  1367. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1368. // * clipWidth % 7 == 0
  1369. // * clipHeight % 7 == 0
  1370. static void blockMagnify_7x7(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1371. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1372. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1373. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1374. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1375. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1376. SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
  1377. SafePointer<uint32_t> targetRowF = imageInternal::getSafeData<uint32_t>(target, 5);
  1378. SafePointer<uint32_t> targetRowG = imageInternal::getSafeData<uint32_t>(target, 6);
  1379. int blockTargetStride = target.stride * 7;
  1380. for (int upperTargetY = 0; upperTargetY + 7 <= clipHeight; upperTargetY+=7) {
  1381. // Carriage return
  1382. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1383. SafePointer<uint32_t> targetPixelA = targetRowA;
  1384. SafePointer<uint32_t> targetPixelB = targetRowB;
  1385. SafePointer<uint32_t> targetPixelC = targetRowC;
  1386. SafePointer<uint32_t> targetPixelD = targetRowD;
  1387. SafePointer<uint32_t> targetPixelE = targetRowE;
  1388. SafePointer<uint32_t> targetPixelF = targetRowF;
  1389. SafePointer<uint32_t> targetPixelG = targetRowG;
  1390. int writeLeftX = 0;
  1391. while (writeLeftX + 7 <= clipWidth) {
  1392. // Read one pixel at a time
  1393. uint32_t scalarValue = *sourcePixel;
  1394. sourcePixel += 1;
  1395. // Write to a whole block of pixels
  1396. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue;
  1397. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue;
  1398. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue;
  1399. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue;
  1400. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue;
  1401. targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue;
  1402. targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue;
  1403. targetPixelA += 7;
  1404. targetPixelB += 7;
  1405. targetPixelC += 7;
  1406. targetPixelD += 7;
  1407. targetPixelE += 7;
  1408. targetPixelF += 7;
  1409. targetPixelG += 7;
  1410. // Count
  1411. writeLeftX += 7;
  1412. }
  1413. // Line feed
  1414. sourceRow.increaseBytes(source.stride);
  1415. targetRowA.increaseBytes(blockTargetStride);
  1416. targetRowB.increaseBytes(blockTargetStride);
  1417. targetRowC.increaseBytes(blockTargetStride);
  1418. targetRowD.increaseBytes(blockTargetStride);
  1419. targetRowE.increaseBytes(blockTargetStride);
  1420. targetRowF.increaseBytes(blockTargetStride);
  1421. targetRowG.increaseBytes(blockTargetStride);
  1422. }
  1423. }
  1424. // Pre-condition:
  1425. // * The source and target images have the same pack order
  1426. // * Both source and target are 16-byte aligned, but does not have to own their padding
  1427. // * clipWidth % 8 == 0
  1428. // * clipHeight % 8 == 0
  1429. static void blockMagnify_8x8(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
  1430. const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
  1431. SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
  1432. SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
  1433. SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
  1434. SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
  1435. SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
  1436. SafePointer<uint32_t> targetRowF = imageInternal::getSafeData<uint32_t>(target, 5);
  1437. SafePointer<uint32_t> targetRowG = imageInternal::getSafeData<uint32_t>(target, 6);
  1438. SafePointer<uint32_t> targetRowH = imageInternal::getSafeData<uint32_t>(target, 7);
  1439. int blockTargetStride = target.stride * 8;
  1440. for (int upperTargetY = 0; upperTargetY + 8 <= clipHeight; upperTargetY+=8) {
  1441. // Carriage return
  1442. const SafePointer<uint32_t> sourcePixel = sourceRow;
  1443. SafePointer<uint32_t> targetPixelA = targetRowA;
  1444. SafePointer<uint32_t> targetPixelB = targetRowB;
  1445. SafePointer<uint32_t> targetPixelC = targetRowC;
  1446. SafePointer<uint32_t> targetPixelD = targetRowD;
  1447. SafePointer<uint32_t> targetPixelE = targetRowE;
  1448. SafePointer<uint32_t> targetPixelF = targetRowF;
  1449. SafePointer<uint32_t> targetPixelG = targetRowG;
  1450. SafePointer<uint32_t> targetPixelH = targetRowH;
  1451. int writeLeftX = 0;
  1452. while (writeLeftX + 8 <= clipWidth) {
  1453. // Read one pixel at a time
  1454. uint32_t scalarValue = *sourcePixel;
  1455. sourcePixel += 1;
  1456. // Write to a whole block of pixels
  1457. targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue; targetPixelA[7] = scalarValue;
  1458. targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue; targetPixelB[7] = scalarValue;
  1459. targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue; targetPixelC[7] = scalarValue;
  1460. targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue; targetPixelD[7] = scalarValue;
  1461. targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue; targetPixelE[7] = scalarValue;
  1462. targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue; targetPixelF[7] = scalarValue;
  1463. targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue; targetPixelG[7] = scalarValue;
  1464. targetPixelH[0] = scalarValue; targetPixelH[1] = scalarValue; targetPixelH[2] = scalarValue; targetPixelH[3] = scalarValue; targetPixelH[4] = scalarValue; targetPixelH[5] = scalarValue; targetPixelH[6] = scalarValue; targetPixelH[7] = scalarValue;
  1465. targetPixelA += 8;
  1466. targetPixelB += 8;
  1467. targetPixelC += 8;
  1468. targetPixelD += 8;
  1469. targetPixelE += 8;
  1470. targetPixelF += 8;
  1471. targetPixelG += 8;
  1472. targetPixelH += 8;
  1473. // Count
  1474. writeLeftX += 8;
  1475. }
  1476. // Line feed
  1477. sourceRow.increaseBytes(source.stride);
  1478. targetRowA.increaseBytes(blockTargetStride);
  1479. targetRowB.increaseBytes(blockTargetStride);
  1480. targetRowC.increaseBytes(blockTargetStride);
  1481. targetRowD.increaseBytes(blockTargetStride);
  1482. targetRowE.increaseBytes(blockTargetStride);
  1483. targetRowF.increaseBytes(blockTargetStride);
  1484. targetRowG.increaseBytes(blockTargetStride);
  1485. targetRowH.increaseBytes(blockTargetStride);
  1486. }
  1487. }
  1488. static void blackEdges(ImageRgbaU8Impl& target, int excludedWidth, int excludedHeight) {
  1489. // Right side
  1490. drawSolidRectangleMemset<Color4xU8>(target, excludedWidth, 0, target.width, excludedHeight, 0);
  1491. // Bottom and corner
  1492. drawSolidRectangleMemset<Color4xU8>(target, 0, excludedHeight, target.width, target.height, 0);
  1493. }
  1494. void dsr::imageImpl_blockMagnify(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int pixelWidth, int pixelHeight) {
  1495. if (pixelWidth < 1) { pixelWidth = 1; }
  1496. if (pixelHeight < 1) { pixelHeight = 1; }
  1497. bool sameOrder = target.packOrder.packOrderIndex == source.packOrder.packOrderIndex;
  1498. // Find the part of source which fits into target with whole pixels
  1499. int clipWidth = roundDown(std::min(target.width, source.width * pixelWidth), pixelWidth);
  1500. int clipHeight = roundDown(std::min(target.height, source.height * pixelHeight), pixelHeight);
  1501. if (sameOrder) {
  1502. if (imageIs16ByteAligned(source) && imageIs16ByteAligned(target)) {
  1503. if (pixelWidth == 2 && pixelHeight == 2) {
  1504. blockMagnify_2x2(target, source, clipWidth, clipHeight);
  1505. } else if (pixelWidth == 3 && pixelHeight == 3) {
  1506. blockMagnify_3x3(target, source, clipWidth, clipHeight);
  1507. } else if (pixelWidth == 4 && pixelHeight == 4) {
  1508. blockMagnify_4x4(target, source, clipWidth, clipHeight);
  1509. } else if (pixelWidth == 5 && pixelHeight == 5) {
  1510. blockMagnify_5x5(target, source, clipWidth, clipHeight);
  1511. } else if (pixelWidth == 6 && pixelHeight == 6) {
  1512. blockMagnify_6x6(target, source, clipWidth, clipHeight);
  1513. } else if (pixelWidth == 7 && pixelHeight == 7) {
  1514. blockMagnify_7x7(target, source, clipWidth, clipHeight);
  1515. } else if (pixelWidth == 8 && pixelHeight == 8) {
  1516. blockMagnify_8x8(target, source, clipWidth, clipHeight);
  1517. } else {
  1518. blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
  1519. }
  1520. } else {
  1521. blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
  1522. }
  1523. } else {
  1524. blockMagnify_reference<true>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
  1525. }
  1526. blackEdges(target, clipWidth, clipHeight);
  1527. }