ImageRgbaU8.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. // zlib open source license
  2. //
  3. // Copyright (c) 2017 to 2023 David Forsgren Piuva
  4. //
  5. // This software is provided 'as-is', without any express or implied
  6. // warranty. In no event will the authors be held liable for any damages
  7. // arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any purpose,
  10. // including commercial applications, and to alter it and redistribute it
  11. // freely, subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented; you must not
  14. // claim that you wrote the original software. If you use this software
  15. // in a product, an acknowledgment in the product documentation would be
  16. // appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such, and must not be
  19. // misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source
  22. // distribution.
  23. #include "ImageRgbaU8.h"
  24. #include "internal/imageInternal.h"
  25. #include "internal/imageTemplate.h"
  26. #include "draw.h"
  27. #include <algorithm>
  28. #include "../base/simd.h"
  29. using namespace dsr;
  30. static const int pixelSize = 4;
  31. IMAGE_DEFINITION(ImageRgbaU8Impl, pixelSize, Color4xU8, uint8_t);
  32. ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset, const PackOrder &packOrder) :
  33. ImageImpl(newWidth, newHeight, newStride, sizeof(Color4xU8), buffer, startOffset), packOrder(packOrder) {
  34. assert(buffer_getSize(buffer) - startOffset >= imageInternal::getUsedBytes(this));
  35. this->initializeRgbaImage();
  36. }
  37. ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight) :
  38. ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), DSR_MAXIMUM_ALIGNMENT), sizeof(Color4xU8)) {
  39. this->initializeRgbaImage();
  40. }
  41. // Native canvas constructor
  42. ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, PackOrderIndex packOrderIndex) :
  43. ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), DSR_MAXIMUM_ALIGNMENT), sizeof(Color4xU8)) {
  44. this->packOrder = PackOrder::getPackOrder(packOrderIndex);
  45. this->initializeRgbaImage();
  46. }
  47. bool ImageRgbaU8Impl::isTexture() const {
  48. return this->texture.exists();
  49. }
  50. bool ImageRgbaU8Impl::isTexture(const ImageRgbaU8Impl* image) {
  51. return image ? image->texture.exists() : false;
  52. }
  53. ImageRgbaU8Impl ImageRgbaU8Impl::getWithoutPadding() const {
  54. if (this->stride == this->width * pixelSize) {
  55. // No padding
  56. return *this;
  57. } else {
  58. // Copy each row without padding
  59. ImageRgbaU8Impl result = ImageRgbaU8Impl(this->width, this->height, this->packOrder.packOrderIndex);
  60. const SafePointer<uint8_t> sourceRow = imageInternal::getSafeData<uint8_t>(*this);
  61. int32_t sourceStride = this->stride;
  62. SafePointer<uint8_t> targetRow = imageInternal::getSafeData<uint8_t>(result);
  63. int32_t targetStride = result.stride;
  64. for (int32_t y = 0; y < this->height; y++) {
  65. safeMemoryCopy(targetRow, sourceRow, targetStride);
  66. sourceRow += sourceStride;
  67. targetRow += targetStride;
  68. }
  69. return result;
  70. }
  71. }
  72. static void extractChannel(SafePointer<uint8_t> targetData, int targetStride, const SafePointer<uint8_t> sourceData, int sourceStride, int sourceChannels, int channelIndex, int width, int height) {
  73. const SafePointer<uint8_t> sourceRow = sourceData + channelIndex;
  74. SafePointer<uint8_t> targetRow = targetData;
  75. for (int y = 0; y < height; y++) {
  76. const SafePointer<uint8_t> sourceElement = sourceRow;
  77. SafePointer<uint8_t> targetElement = targetRow;
  78. for (int x = 0; x < width; x++) {
  79. *targetElement = *sourceElement; // Copy one channel from the soruce
  80. sourceElement += sourceChannels; // Jump to the same channel in the next source pixel
  81. targetElement += 1; // Jump to the next monochrome target pixel
  82. }
  83. sourceRow.increaseBytes(sourceStride);
  84. targetRow.increaseBytes(targetStride);
  85. }
  86. }
  87. ImageU8Impl ImageRgbaU8Impl::getChannel(int32_t channelIndex) const {
  88. // Warning for debug mode
  89. assert(channelIndex >= 0 && channelIndex < channelCount);
  90. // Safety for release mode
  91. if (channelIndex < 0) { channelIndex = 0; }
  92. if (channelIndex > channelCount) { channelIndex = channelCount; }
  93. ImageU8Impl result(this->width, this->height);
  94. extractChannel(imageInternal::getSafeData<uint8_t>(result), result.stride, imageInternal::getSafeData<uint8_t>(*this), this->stride, channelCount, channelIndex, this->width, this->height);
  95. return result;
  96. }
  97. static const int32_t smallestSizeGroup = 5;
  98. static const int32_t largestSizeGroup = 14;
  99. static int32_t getSizeGroup(int32_t size) {
  100. int32_t group = -1;
  101. if (size == 1) {
  102. group = 0; // Too small for 16-byte alignment!
  103. } else if (size == 2) {
  104. group = 1; // Too small for 16-byte alignment! (SSE2)
  105. } else if (size == 4) {
  106. group = 2; // Too small for 32-byte alignment! (AVX2)
  107. } else if (size == 8) {
  108. group = 3; // Too small for 64-byte alignment! (AVX3)
  109. } else if (size == 16) {
  110. group = 4; // Too small for 128-byte alignment!
  111. } else if (size == 32) {
  112. group = 5; // Smallest allowed texture dimension, allowing 1024-bit SIMD.
  113. } else if (size == 64) {
  114. group = 6;
  115. } else if (size == 128) {
  116. group = 7;
  117. } else if (size == 256) {
  118. group = 8;
  119. } else if (size == 512) {
  120. group = 9;
  121. } else if (size == 1024) {
  122. group = 10;
  123. } else if (size == 2048) {
  124. group = 11;
  125. } else if (size == 4096) {
  126. group = 12;
  127. } else if (size == 8192) {
  128. group = 13;
  129. } else if (size == 16384) {
  130. group = 14; // Largest allowed texture dimension
  131. } // Higher dimensions should return -1, so that initializeRgbaImage avoids initializing the image as a texture and isTexture returns false
  132. return group;
  133. }
  134. inline int32_t sizeFromGroup(int32_t group) {
  135. return 1 << group;
  136. }
  137. // Round the size down, unless it is already too small.
  138. static int32_t roundSize(int32_t size) {
  139. for (int groupIndex = smallestSizeGroup; groupIndex < largestSizeGroup; groupIndex++) {
  140. int currentSize = sizeFromGroup(groupIndex);
  141. if (size < currentSize) {
  142. return currentSize;
  143. }
  144. }
  145. return sizeFromGroup(largestSizeGroup);
  146. }
  147. static int32_t getPyramidSize(int32_t width, int32_t height, int32_t levels) {
  148. uint32_t result = 0;
  149. uint32_t byteCount = width * height * pixelSize;
  150. for (int32_t l = 0; l < levels; l++) {
  151. result += byteCount; // Add image size to pyramid size
  152. byteCount = byteCount >> 2; // Divide size by 4
  153. }
  154. return (int32_t)result;
  155. }
  156. inline U32xX averageColor(const U32xX &colorA, const U32xX &colorB) {
  157. // TODO: Expand to 16 bits or use built in average intrinsics for full bit depth.
  158. // 7-bit precision for speed.
  159. return reinterpret_U32FromU8(reinterpret_U8FromU32((colorA >> 1) & U32xX(0b01111111011111110111111101111111)) + reinterpret_U8FromU32((colorB >> 1) & U32xX(0b01111111011111110111111101111111)));
  160. }
  161. inline U32xX pairwiseAverageColor(const U32xX &colorA, const U32xX &colorB) {
  162. // TODO: Vectorize with 32-bit unzipping of pixels and 8-bit average of channels.
  163. // Reference implementation
  164. ALIGN_BYTES(DSR_DEFAULT_ALIGNMENT) uint8_t elementsA[laneCountX_8Bit];
  165. ALIGN_BYTES(DSR_DEFAULT_ALIGNMENT) uint8_t elementsB[laneCountX_8Bit];
  166. ALIGN_BYTES(DSR_DEFAULT_ALIGNMENT) uint8_t elementsR[laneCountX_8Bit];
  167. colorA.writeAlignedUnsafe((uint32_t*)elementsA);
  168. colorB.writeAlignedUnsafe((uint32_t*)elementsB);
  169. int32_t halfPixels = laneCountX_32Bit / 2;
  170. for (int p = 0; p < halfPixels; p++) {
  171. for (int c = 0; c < 4; c++) {
  172. elementsR[p * 4 + c] = uint8_t((uint16_t(elementsA[p * 8 + c]) + uint16_t(elementsA[p * 8 + 4 + c])) >> 1);
  173. elementsR[(p + halfPixels) * 4 + c] = uint8_t((uint16_t(elementsB[p * 8 + c]) + uint16_t(elementsB[p * 8 + 4 + c])) >> 1);
  174. }
  175. }
  176. return U32xX::readAlignedUnsafe((uint32_t*)elementsR);
  177. }
  178. static void downScaleByTwo(SafePointer<uint32_t> targetData, const SafePointer<uint32_t> sourceData, int32_t targetWidth, int32_t targetHeight, int32_t targetStride) {
  179. int32_t sourceStride = targetStride * 2;
  180. int32_t doubleSourceStride = sourceStride * 2;
  181. SafePointer<uint32_t> targetRow = targetData;
  182. const SafePointer<uint32_t> sourceRow = sourceData;
  183. for (int32_t y = 0; y < targetHeight; y++) {
  184. const SafePointer<uint32_t> upperSourcePixel = sourceRow;
  185. const SafePointer<uint32_t> lowerSourcePixel = sourceRow;
  186. lowerSourcePixel.increaseBytes(sourceStride);
  187. SafePointer<uint32_t> targetPixel = targetRow;
  188. for (int32_t x = 0; x < targetWidth; x += laneCountX_32Bit) {
  189. U32xX upperLeft = U32xX::readAligned(upperSourcePixel, "upperLeftSource in downScaleByTwo");
  190. U32xX upperRight = U32xX::readAligned(lowerSourcePixel + laneCountX_32Bit, "upperLeftSource in downScaleByTwo");
  191. U32xX lowerLeft = U32xX::readAligned(lowerSourcePixel, "upperLeftSource in downScaleByTwo");
  192. U32xX lowerRight = U32xX::readAligned(lowerSourcePixel + laneCountX_32Bit, "upperLeftSource in downScaleByTwo");
  193. U32xX upperAverage = pairwiseAverageColor(upperLeft, upperRight);
  194. U32xX lowerAverage = pairwiseAverageColor(lowerLeft, lowerRight);
  195. U32xX finalAverage = averageColor(upperAverage, lowerAverage);
  196. finalAverage.writeAligned(targetPixel, "average result in downScaleByTwo");
  197. targetPixel += laneCountX_32Bit;
  198. upperSourcePixel += laneCountX_32Bit * 2;
  199. lowerSourcePixel += laneCountX_32Bit * 2;
  200. }
  201. targetRow.increaseBytes(targetStride);
  202. sourceRow.increaseBytes(doubleSourceStride);
  203. }
  204. }
  205. static void updatePyramid(TextureRgba &texture, int32_t layerCount) {
  206. // Downscale each following layer from the previous.
  207. for (int32_t targetIndex = 1; targetIndex < layerCount; targetIndex++) {
  208. int32_t sourceIndex = targetIndex - 1;
  209. int32_t targetWidth = texture.mips[targetIndex].width;
  210. int32_t targetHeight = texture.mips[targetIndex].height;
  211. downScaleByTwo(texture.data + texture.mips[targetIndex].startOffset, texture.data + texture.mips[sourceIndex].startOffset, targetWidth, targetHeight, targetWidth * pixelSize);
  212. }
  213. texture.layerCount = layerCount;
  214. }
  215. TextureRgbaLayer::TextureRgbaLayer() {}
  216. TextureRgbaLayer::TextureRgbaLayer(uint32_t startOffset, int32_t width, int32_t height) :
  217. startOffset(startOffset),
  218. widthShift(getSizeGroup(width)),
  219. widthMask(width - 1),
  220. heightMask(height - 1),
  221. width(width),
  222. height(height),
  223. subWidth(width * 256),
  224. subHeight(height * 256) {}
  225. void ImageRgbaU8Impl::generatePyramidStructure(int32_t layerCount) {
  226. int32_t currentWidth = this->width;
  227. int32_t currentHeight = this->height;
  228. // Allocate smaller pyramid images within the buffer
  229. uint32_t currentStart = 0;
  230. for (int32_t m = 0; m < layerCount; m++) {
  231. this->texture.mips[m] = TextureRgbaLayer(currentStart, currentWidth, currentHeight);
  232. currentStart += currentWidth * currentHeight;
  233. currentWidth /= 2;
  234. currentHeight /= 2;
  235. }
  236. // Fill unused mip levels with duplicates of the last mip level
  237. for (int32_t m = layerCount; m < MIP_BIN_COUNT; m++) {
  238. // m - 1 is never negative, because layerCount is clamped to at least 1 and nobody would choose zero for MIP_BIN_COUNT.
  239. this->texture.mips[m] = this->texture.mips[m - 1];
  240. }
  241. this->texture.layerCount = layerCount;
  242. this->texture.data = imageInternal::getSafeData<uint32_t>(*this);
  243. }
  244. void ImageRgbaU8Impl::removePyramidStructure() {
  245. // The mip layers have offsets relative to the texture's data pointer, which is already compensating for any offset from any parent image.
  246. for (int32_t m = 0; m < MIP_BIN_COUNT; m++) {
  247. this->texture.mips[m] = TextureRgbaLayer(0, this->width, this->height);
  248. }
  249. // Declare the old pyramid invalid so that it will not be displayed while rendering, but keep the extra memory for next time it is generated.
  250. this->texture.layerCount = 1;
  251. this->texture.data = imageInternal::getSafeData<uint32_t>(*this);
  252. }
  253. void ImageRgbaU8Impl::makeIntoTexture() {
  254. // Check if the image is a valid texture.
  255. if (!this->isTexture()) {
  256. // Get valid dimensions.
  257. int newWidth = roundSize(this->width);
  258. int newHeight = roundSize(this->height);
  259. // Create a new image with the correct dimensions.
  260. ImageRgbaU8Impl result = ImageRgbaU8Impl(newWidth, newHeight);
  261. // Resize the image content with bi-linear interpolation.
  262. imageImpl_resizeToTarget(result, *this, true);
  263. // Take over the new image's content.
  264. this->buffer = result.buffer;
  265. this->width = result.width;
  266. this->height = result.height;
  267. this->stride = result.stride;
  268. this->startOffset = 0; // Starts from the beginning.
  269. this->isSubImage = false; // No longer sharing buffer with any parent image.
  270. }
  271. }
  272. void ImageRgbaU8Impl::generatePyramid() {
  273. int32_t fullSizeGroup = getSizeGroup(std::min(this->width, this->height));
  274. int32_t layerCount = std::min(std::max(fullSizeGroup - smallestSizeGroup, 1), MIP_BIN_COUNT);
  275. if (this->texture.layerCount > 1) {
  276. // Regenerate smaller images without wasting time with any redundant checks,
  277. // because the image has already been approved the first time it had the pyramid allocated.
  278. updatePyramid(this->texture, layerCount);
  279. } else {
  280. // In the event of having to correct a bad image into a valid texture, there will be two reallocations.
  281. this->makeIntoTexture();
  282. Buffer oldBuffer = this->buffer;
  283. SafePointer<uint32_t> oldData = buffer_getSafeData<uint32_t>(oldBuffer, "Pyramid generation source") + this->startOffset;
  284. this->buffer = buffer_create(getPyramidSize(this->width, this->height, layerCount));
  285. this->generatePyramidStructure(layerCount);
  286. // Copy the image's old content while assuming that there is no padding.
  287. safeMemoryCopy(this->texture.data + this->texture.mips[0].startOffset, oldData, this->width * this->height * pixelSize);
  288. // Generate smaller images.
  289. updatePyramid(this->texture, layerCount);
  290. // Once an image had a pyramid generated, the new buffer will remain for as long as the image exists.
  291. this->texture.layerCount = layerCount;
  292. // Remove start offset because the old data has been cloned to create the new pyramid image.
  293. this->startOffset = 0;
  294. }
  295. }
  296. void ImageRgbaU8Impl::removePyramid() {
  297. // Duplicate the original image when no longer showing the pyramid.
  298. this->removePyramidStructure();
  299. }
  300. void ImageRgbaU8Impl::initializeRgbaImage() {
  301. // If the image fills the criterias of a texture
  302. if (getSizeGroup(this->width) >= smallestSizeGroup
  303. && getSizeGroup(this->height) >= smallestSizeGroup
  304. && this->stride == this->width * pixelSize) {
  305. // Initialize each mip bin to show the original image
  306. this->removePyramidStructure();
  307. }
  308. };
  309. Color4xU8 ImageRgbaU8Impl::packRgba(uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha) const {
  310. return Color4xU8(this->packOrder.packRgba(red, green, blue, alpha));
  311. }
  312. Color4xU8 ImageRgbaU8Impl::packRgba(ColorRgbaI32 color) const {
  313. return Color4xU8(this->packOrder.packRgba(color.red, color.green, color.blue, color.alpha));
  314. }
  315. ColorRgbaI32 ImageRgbaU8Impl::unpackRgba(Color4xU8 rgba, const PackOrder& order) {
  316. return ColorRgbaI32(
  317. getRed(rgba.packed, order),
  318. getGreen(rgba.packed, order),
  319. getBlue(rgba.packed, order),
  320. getAlpha(rgba.packed, order)
  321. );
  322. }
  323. ColorRgbaI32 ImageRgbaU8Impl::unpackRgba(Color4xU8 rgba) const {
  324. return unpackRgba(rgba, this->packOrder);
  325. }
  326. Color4xU8 ImageRgbaU8Impl::packRgb(uint8_t red, uint8_t green, uint8_t blue) const {
  327. return Color4xU8(this->packOrder.packRgba(red, green, blue, 255));
  328. }
  329. Color4xU8 ImageRgbaU8Impl::packRgb(ColorRgbI32 color) const {
  330. return Color4xU8(this->packOrder.packRgba(color.red, color.green, color.blue, 255));
  331. }
  332. ColorRgbI32 ImageRgbaU8Impl::unpackRgb(Color4xU8 rgb, const PackOrder& order) {
  333. return ColorRgbI32(
  334. getRed(rgb.packed, order),
  335. getGreen(rgb.packed, order),
  336. getBlue(rgb.packed, order)
  337. );
  338. }
  339. ColorRgbI32 ImageRgbaU8Impl::unpackRgb(Color4xU8 rgb) const {
  340. return unpackRgb(rgb, this->packOrder);
  341. }