| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530 |
-
- // zlib open source license
- //
- // Copyright (c) 2025 David Forsgren Piuva
- //
- // This software is provided 'as-is', without any express or implied
- // warranty. In no event will the authors be held liable for any damages
- // arising from the use of this software.
- //
- // Permission is granted to anyone to use this software for any purpose,
- // including commercial applications, and to alter it and redistribute it
- // freely, subject to the following restrictions:
- //
- // 1. The origin of this software must not be misrepresented; you must not
- // claim that you wrote the original software. If you use this software
- // in a product, an acknowledgment in the product documentation would be
- // appreciated but is not required.
- //
- // 2. Altered source versions must be plainly marked as such, and must not be
- // misrepresented as being the original software.
- //
- // 3. This notice may not be removed or altered from any source
- // distribution.
- // Everything stored directly in the image types is immutable to allow value types to behave like reference types using the data that they point to.
- // Image types can not be dynamically casted, because the inheritance is entirely static without any virtual functions.
- // TODO: Create a fast way to generate masks from an exponential scale floating mip level taken from sampling distances.
- // float samplingDistance (input expressed as some kind of distance in the uv coordinates between two adjacent pixels)
- // uint32_t tileXYMask (tiling should be applied to X and Y using the same mask after limiting to 16 bit integers)
- // uint32_t maxLevelMask
- // So how do we get the weights without shifting bits by the actual bit offset?
- // Maybe add one to the mask to get a single bit and then multiply.
- /*
- TODO: Try to handle negative texture coordinates and let positive UV be an optimization flag to enable when known to be valid.
- Convert to int32_t with less range and convert to unsigned correctly in modulo of 24 bits.
- // Use leading zeroes to create a mask, which can be turned into a power of two by adding one.
- // 0001000000000000 -> 0001111111111111
- // 0001011001000100 -> 0001111111111111
- // 0001111111111111 -> 0001111111111111
- // 0000010000000000 -> 0000011111111111
- // 0000010110010001 -> 0000011111111111
- // 0000011111111111 -> 0000011111111111
- // 0000000000100000 -> 0000000000111111
- // 0000000000101100 -> 0000000000111111
- // 0000000000111111 -> 0000000000111111
- uint16_t maskFromLeadingZeroes(uint16_t value) {
- // Turning 10 into 11
- uint16_t result = value | (value >> 1);
- // Turning 1100 into 1111
- result = result | (result >> 2);
- // Turning 11110000 into 11111111
- result = result | (result >> 4);
- // Turning 1111111100000000 into 1111111111111111
- result = result | (result >> 8);
- }
- Generate masks for sampling a specific texture at a specific mip level.
- They can then be reused for multiple samples.
- Pre-condition:
- 0.0f < samplingDistance
- Use min, max, absm et cetera to create a positive sampling distance.
- void createMasks(float samplingDistance) {
- uint32_t density = truncateToU32(reciprocal(samplingDistance));
- // Intel SSE2 does not have dynamic offset bit shifts, because it can only shift by constant bit offsets or dynamic byte offsets.
- // SSE2, AVX2 and NEON have low 16-bit unsigned multiplication.
- // _mm_mullo_epi16, _mm256_mullo_epi16 and vmulq_u16
- // Using lower bits might however not be enough and might take more time than simply shifting with scalar operations.
- // Then we might as well use SIMD comparisons and make bit masks the way to implement it on all platforms.
- // Because returning 1 can be used to return a mask as a fallback.
- // And one can also create many overloads for direct selection without the mask in between for future optimization.
- // Let textures created from images have 4 mip levels by default, and allow increasing the maximum depth with an optional argument.
- // Then make three comparisons to select a mip level.
- uint16_t mask = maskFromLeadingZeroes(density);
- // scale is a power of two 16-bit integer used to multiply uv coordinates.
- // But SSE2 also does not have 32-bit integer multiplication, so stay in 16 bits or use bit shifts!
- // Split into whole pixels and weights before the multiplication somehow.
- uint16_t scale = mask + 1;
- // Cast directly to uint16_t with saturation.
- tileXMask = texture.minimumWidth * scale;
- tileYMask = texture.minimumHeight * scale;
- startOffset = texture.startOffsetMask * scale * scale;
- }
- */
- #ifndef DFPSR_API_TEXTURE
- #define DFPSR_API_TEXTURE
- #include "../image/Texture.h"
- #include "../image/Image.h"
- #ifndef NDEBUG
- #include "../api/stringAPI.h"
- #endif
- #include "../base/DsrTraits.h"
- namespace dsr {
- // Post-condition: Returns true iff texture exists.
- inline bool texture_exists(const Texture &texture) { return texture.impl_buffer.isNotNull(); }
- // Post-condition: Returns the width in pixels for the highest resolution at mip level 0.
- inline int32_t texture_getMaxWidth(const Texture &texture) { return int32_t(1) << texture.impl_log2width; }
- // Post-condition: Returns the width in pixels for the resolution at mipLevel.
- inline int32_t texture_getWidth(const Texture &texture, uint32_t mipLevel) { return int32_t(1) << (texture.impl_log2width - mipLevel); }
- // Post-condition: Returns the height in pixels for the highest resolution at mip level 0.
- inline int32_t texture_getMaxHeight(const Texture &texture) { return int32_t(1) << texture.impl_log2height; }
- // Post-condition: Returns the height in pixels for the resolution at mipLevel.
- inline int32_t texture_getHeight(const Texture &texture, uint32_t mipLevel) { return int32_t(1) << (texture.impl_log2height - mipLevel); }
- // Get the maximum mip level, with zero overhead.
- // Post-condition: Returns an index to the highest mip level.
- inline int32_t texture_getSmallestMipLevel(const TextureRgbaU8& texture) { return texture.impl_maxMipLevel; }
- // Get the number of mip levels, or zero if the texture does not exist.
- // Useful for looping over all mip levels in a texture, by automatically skipping texture with no mip levels.
- // Post-condition: Returns the number of mip levels.
- inline int32_t texture_getMipLevelCount(const TextureRgbaU8& texture) { return texture_exists(texture) ? texture.impl_maxMipLevel + 1 : 0; }
- // Post-condition: Returns true iff texture has more than one mip level, so that updating the highest resolution needs to update lower layers.
- inline bool texture_hasPyramid(const Texture &texture) { return texture.impl_maxMipLevel != 0; }
- // Side-effect: Update all lower resolutions from the highest resolution using a basic linear average.
- void texture_generatePyramid(const TextureRgbaU8& texture);
- // mipLevel starts from 0 at the highest resolution and ends with the lowest resolution.
- // Pre-condition:
- // 0 <= mipLevel <= 15
- // Post-condition:
- // Returns the number of pixels from lower resolutions before the start of mipLevel.
- // Always returns 0 when there is only one mip level available.
- template<
- bool HIGHEST_RESOLUTION = false,
- typename U, // uint32_t, U32x4, U32x8, U32xX
- DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))>
- inline U texture_getPixelOffsetToLayer(const TextureRgbaU8 &texture, U mipLevel) {
- if (HIGHEST_RESOLUTION) {
- return U(texture.impl_startOffset);
- } else {
- return U(texture.impl_startOffset) & (U(texture.impl_maxLevelMask) >> bitShiftLeftImmediate<1>(mipLevel));
- }
- }
- // mipLevel starts from 0 at the highest resolution and ends with the lowest resolution.
- // Optimization arguments:
- // * SQUARE can be set to true when you know in compile time that texture has the same width and height.
- // * SINGLE_LAYER can be set to true when you know in compile time that there will only be a single resolution in the texture.
- // * XY_INSIDE can be set to true if you know that the pixel coordinates will always be within texture bounds (0 <= x < width, 0 <= y < height) without tiling.
- // * MIP_INSIDE can be set to true if you know that the mip level will always be within used indices (mipLevel <= texture_getSmallestMipLevel(texture)) without clamping.
- // Either way, mipLevel must always be within the 0..15 range, because dynamic bit shifting might truncate offsets that are too big.
- // * HIGHEST_RESOLUTION can be set to true if you want to ignore mipLevel and always sample the highest resolution at mipLevel 0.
- // Pre-condition:
- // mipLevel <= 15
- // Post-condition:
- // Returns the number of pixels before the pixel at (x, y) in mipLevel.
- template<
- bool SQUARE = false, // Width and height must be the same.
- bool SINGLE_LAYER = false, // Demanding that the texture only has a single layer.
- bool XY_INSIDE = false, // No pixels may be sampled outside.
- bool MIP_INSIDE = false, // Mip level may not go outside of existing layer indices.
- bool HIGHEST_RESOLUTION = false, // Ignoring any lower layers.
- typename U, // uint32_t, U32x4, U32x8, U32xX
- DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))>
- inline U texture_getPixelOffset(const TextureRgbaU8 &texture, U x, U y, U mipLevel) {
- // TODO: Reuse the tile masks when sampling a whole neighborhood for bi-linear sampling.
- // Clamp the mip-level using bitwise operations in a logarithmic scale, by masking out excess bits with zeroes and filling missing bits with ones.
- U tileMaskX = U(texture.impl_maxWidthAndMask );
- U tileMaskY = U(texture.impl_maxHeightAndMask);
- if (!HIGHEST_RESOLUTION) {
- tileMaskX = tileMaskX >> mipLevel;
- tileMaskY = tileMaskY >> mipLevel;
- }
- if (!MIP_INSIDE) {
- // If the mip level index might be higher than what is used in the texture, make sure that the tile masks have at least enough bits for the lowest texture resolution.
- tileMaskX = tileMaskX | texture.impl_minWidthOrMask;
- if (!SQUARE) {
- tileMaskY = tileMaskY | texture.impl_minHeightOrMask;
- }
- }
- U log2PixelStride = U(texture.impl_log2width);
- if (!HIGHEST_RESOLUTION) {
- log2PixelStride = log2PixelStride - mipLevel;
- }
- if (!XY_INSIDE) {
- x = x & tileMaskX;
- if (SQUARE) {
- // Apply the same mask to both for square images, so that the other mask can be optimized away.
- y = y & tileMaskX;
- } else {
- // Apply a separate mask for Y coordinates when the texture might not be square.
- y = y & tileMaskY;
- }
- }
- U coordinateOffset = ((y << log2PixelStride) | x);
- #ifndef NDEBUG
- // In debug mode, wrong use of optimization arguments will throw errors.
- if (SQUARE && (texture.impl_log2width != texture.impl_log2height)) {
- throwError(U"texture_getPixelOffset was told that the texture would have square dimensions using SQUARE, but ", texture_getMaxWidth(texture), U"x", texture_getMaxHeight(texture), U" is not square!\n");
- }
- if (SINGLE_LAYER && (texture_getSmallestMipLevel(texture) > 0)) {
- throwError(U"texture_getPixelOffset was told that the texture would only have a single layer using SINGLE_LAYER, but it has ", texture_getSmallestMipLevel(texture) + 1, U" layers!\n");
- }
- if (XY_INSIDE && !(allLanesEqual(x & ~tileMaskX, U(0)) && allLanesEqual(y & ~tileMaskY, U(0)))) {
- throwError(U"texture_getPixelOffset was told that the pixel coordinates would stay inside using XY_INSIDE, but the coordinate (", x, U", ", y, U") is not within", texture_getMaxWidth(texture), U"x", texture_getMaxHeight(texture), U" pixels!\n");
- }
- if (!HIGHEST_RESOLUTION) {
- if (!allLanesLesserOrEqual(mipLevel, U(15u))) {
- throwError(U"texture_getPixelOffset got mip level ", mipLevel, U", which is not within the fixed range of 0..15!\n");
- }
- if (MIP_INSIDE) {
- if (!allLanesLesserOrEqual(mipLevel, U(texture_getSmallestMipLevel(texture)))) {
- throwError(U"texture_getPixelOffset was told that the mip level would stay within valid indices using MIP_INSIDE, but mip level ", mipLevel, U" is not within 0..", texture_getSmallestMipLevel(texture), U"!\n");
- }
- }
- }
- #endif
- if (SINGLE_LAYER) {
- return coordinateOffset;
- } else {
- U startOffset = texture_getPixelOffsetToLayer<HIGHEST_RESOLUTION, U>(texture, mipLevel);
- return startOffset + coordinateOffset;
- }
- }
- template<
- bool SQUARE = false,
- bool SINGLE_LAYER = false,
- bool XY_INSIDE = false,
- bool MIP_INSIDE = false,
- bool HIGHEST_RESOLUTION = false,
- typename U, // uint32_t, U32x4, U32x8, U32xX
- DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))>
- inline U texture_readPixel(const TextureRgbaU8 &texture, U x, U y, U mipLevel) {
- #ifndef NDEBUG
- if (!texture_exists(texture)) {
- throwError(U"Tried to read pixels from a texture that does not exist!\n");
- }
- if (!HIGHEST_RESOLUTION) {
- if (!allLanesLesserOrEqual(mipLevel, U(15u))) {
- throwError(U"Tried to read pixels from mip level ", mipLevel, U", which is outside of the allowed 4-bit range 0..4!\n");
- }
- }
- #endif
- SafePointer<uint32_t> data = texture.impl_buffer.getSafe<uint32_t>("RgbaU8 pyramid pixel buffer for pixel reading");
- return gather_U32(data, texture_getPixelOffset<SQUARE, SINGLE_LAYER, XY_INSIDE, MIP_INSIDE, HIGHEST_RESOLUTION, U>(texture, x, y, mipLevel));
- }
- // Pre-condition:
- // 0 <= mipLevel <= 15
- inline void texture_writePixel(const TextureRgbaU8 &texture, uint32_t x, uint32_t y, uint32_t mipLevel, uint32_t packedColor) {
- #ifndef NDEBUG
- if (!texture_exists(texture)) {
- throwError(U"Tried to write a pixel to a texture that does not exist!\n");
- }
- if (mipLevel > 15u) {
- throwError(U"Tried to write a pixel to mip level ", mipLevel, U", which is outside of the allowed 4-bit range 0..4!\n");
- }
- #endif
- SafePointer<uint32_t> data = texture.impl_buffer.getSafe<uint32_t>("RgbaU8 pyramid pixel buffer for pixel writing");
- data[texture_getPixelOffset<false, false, false, false, false, uint32_t>(texture, x, y, mipLevel)] = packedColor;
- }
- // TODO: Use these template arguments in RgbaMultiply.h to improve performance for square textures with at least 4 mip levels and UV coordinates inside of the texture.
- // TODO: Can EXISTS be an argument to disable when non-existing images should be replaced with U(255u) for fast prototyping?
- // Sample the nearest pixel in a normalized UV scale where one unit equals one lap around the image.
- // Pre-condition:
- // 0.0f <= u, 0.0f <= v
- // Negative texture coordinates are not allowed, because they are converted to unsigned integers for bitwise operations.
- template<
- bool SQUARE = false,
- bool SINGLE_LAYER = false,
- bool MIP_INSIDE = false,
- bool HIGHEST_RESOLUTION = false,
- typename U, // uint32_t, U32x4, U32x8, U32xX
- typename F, // float, F32x4, F32x8, F32xX, F32xF
- DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U) && DSR_CHECK_PROPERTY(DsrTrait_Any_F32, F))>
- inline U texture_sample_nearest(const TextureRgbaU8 &texture, F u, F v, U mipLevel) {
- U scaleU = U(1u) << U(texture.impl_log2width );
- U scaleV = U(1u) << U(texture.impl_log2height);
- if (!HIGHEST_RESOLUTION) {
- scaleU = scaleU >> mipLevel;
- scaleV = scaleV >> mipLevel;
- }
- U xPixel = truncateToU32(u * floatFromU32(scaleU));
- U yPixel = truncateToU32(v * floatFromU32(scaleV));
- return texture_readPixel<SQUARE, SINGLE_LAYER, false, MIP_INSIDE, HIGHEST_RESOLUTION, U>(texture, xPixel, yPixel, mipLevel);
- }
- // Returns (colorA * weightA + colorB * weightB) / 256 as bytes
- // weightA and weightB should contain pairs of the same 16-bit weights for each of the 4 pixels in the corresponding A and B colors
- template <typename U32, typename U16, DSR_ENABLE_IF(
- DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U32) &&
- DSR_CHECK_PROPERTY(DsrTrait_Any_U16, U16)
- )>
- inline U32 weightColors(const U32 &colorA, const U16 &weightA, const U32 &colorB, const U16 &weightB) {
- U32 lowMask(0x00FF00FFu);
- U16 lowColorA = reinterpret_U16FromU32(colorA & lowMask);
- U16 lowColorB = reinterpret_U16FromU32(colorB & lowMask);
- U32 highMask(0xFF00FF00u);
- U16 highColorA = reinterpret_U16FromU32(bitShiftRightImmediate<8>(colorA & highMask));
- U16 highColorB = reinterpret_U16FromU32(bitShiftRightImmediate<8>(colorB & highMask));
- U32 lowColor = reinterpret_U32FromU16(((lowColorA * weightA) + (lowColorB * weightB)));
- U32 highColor = reinterpret_U32FromU16(((highColorA * weightA) + (highColorB * weightB)));
- return ((bitShiftRightImmediate<8>(lowColor) & lowMask) | (highColor & highMask));
- }
- // The more significant bits must be zero so that the lower bits can fill the space.
- // lowBits[x] < 2^16
- template <typename U32, DSR_ENABLE_IF(
- DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U32)
- )>
- inline auto repeatAs16Bits(const U32 &lowBits) {
- return reinterpret_U16FromU32(lowBits | bitShiftLeftImmediate<16>(lowBits));
- }
- // Returns 256 - weight
- template <typename U16, DSR_ENABLE_IF(
- DSR_CHECK_PROPERTY(DsrTrait_Any_U16, U16)
- )>
- inline U16 invertWeight(const U16 &weight) {
- return U16(0x01000100u) - weight;
- }
- /* TODO: Use for anisotropic or tri-linear sampling.
- template <typename U32, typename U16>
- inline U32 mix_L(const U32 &colorA, const U32 &colorB, const U32 &weight) {
- // Get inverse weights
- U16 weightB = repeatAs16Bits(weight);
- U16 weightA = invertWeight(weightB);
- // Multiply
- return weightColors(colorA, weightA, colorB, weightB);
- }
- */
- template <typename U32, typename U16>
- inline U32 mix_BL(const U32 &colorA, const U32 &colorB, const U32 &colorC, const U32 &colorD, const U32 &weightX, const U32 &weightY) {
- // Get inverse weights
- U16 weightXR = repeatAs16Bits<U32>(weightX);
- U16 weightYB = repeatAs16Bits<U32>(weightY);
- U16 weightXL = invertWeight<U16>(weightXR);
- U16 weightYT = invertWeight<U16>(weightYB);
- // Multiply
- return weightColors<U32, U16>(weightColors(colorA, weightXL, colorB, weightXR), weightYT, weightColors(colorC, weightXL, colorD, weightXR), weightYB);
- }
- template<
- bool SQUARE = false,
- bool SINGLE_LAYER = false,
- bool MIP_INSIDE = false,
- bool HIGHEST_RESOLUTION = false,
- typename U32, // uint32_t, U32x4, U32x8, U32xX
- typename U16, // uint32_t, U32x4, U32x8, U32xX
- typename F32, // float, F32x4, F32x8, F32xX, F32xF
- DSR_ENABLE_IF(
- DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U32) &&
- DSR_CHECK_PROPERTY(DsrTrait_Any_U16, U16) &&
- DSR_CHECK_PROPERTY(DsrTrait_Any_F32, F32)
- )>
- inline U32 texture_sample_bilinear(const TextureRgbaU8 &texture, F32 u, F32 v, U32 mipLevel) {
- U32 scaleU = U32(256u) << U32(texture.impl_log2width );
- U32 scaleV = U32(256u) << U32(texture.impl_log2height);
- if (!HIGHEST_RESOLUTION) {
- scaleU = scaleU >> mipLevel;
- scaleV = scaleV >> mipLevel;
- }
- // Convert from the normalized 0..1 scale to a 0..size*256 scale for 8 bits of sub-pixel precision.
- // Half a pixel is subtracted so that the seam between bi-linear patches end up at the center of texels.
- U32 subCenterX = truncateToU32(u * floatFromU32(scaleU)) - U32(128);
- U32 subCenterY = truncateToU32(v * floatFromU32(scaleV)) - U32(128);
- // Get the remainders as interpolation weights.
- U32 weightX = subCenterX & 0xFF;
- U32 weightY = subCenterY & 0xFF;
- // Divide and truncate sub-pixel coordinates to get whole pixel coordinates.
- U32 pixelLeft = bitShiftRightImmediate<8>(subCenterX);
- U32 pixelTop = bitShiftRightImmediate<8>(subCenterY);
- U32 pixelRight = pixelLeft + 1;
- U32 pixelBottom = pixelTop + 1;
- // Generate pixel tiling masks.
- U32 tileMaskX = U32(texture.impl_maxWidthAndMask );
- U32 tileMaskY = U32(texture.impl_maxHeightAndMask);
- if (!HIGHEST_RESOLUTION) {
- tileMaskX = tileMaskX >> mipLevel;
- tileMaskY = tileMaskY >> mipLevel;
- }
- if (!MIP_INSIDE) {
- tileMaskX = tileMaskX | texture.impl_minWidthOrMask;
- if (!SQUARE) {
- tileMaskY = tileMaskY | texture.impl_minHeightOrMask;
- }
- }
- // Get the stride.
- U32 log2PixelStride = U32(texture.impl_log2width);
- if (!HIGHEST_RESOLUTION) {
- log2PixelStride = log2PixelStride - mipLevel;
- }
- // Apply tiling masks
- pixelLeft = pixelLeft & tileMaskX;
- pixelRight = pixelRight & tileMaskX;
- if (SQUARE) {
- // Apply the same mask to both for square images, so that the other mask can be optimized away.
- pixelTop = pixelTop & tileMaskX;
- pixelBottom = pixelBottom & tileMaskX;
- } else {
- // Apply a separate mask for Y coordinates when the texture might not be square.
- pixelTop = pixelTop & tileMaskY;
- pixelBottom = pixelBottom & tileMaskY;
- }
- #ifndef NDEBUG
- // In debug mode, wrong use of optimization arguments will throw errors.
- if (SQUARE && (texture.impl_log2width != texture.impl_log2height)) {
- throwError(U"texture_getPixelOffset was told that the texture would have square dimensions using SQUARE, but ", texture_getMaxWidth(texture), U"x", texture_getMaxHeight(texture), U" is not square!\n");
- }
- if (SINGLE_LAYER && (texture_getSmallestMipLevel(texture) > 0)) {
- throwError(U"texture_getPixelOffset was told that the texture would only have a single layer using SINGLE_LAYER, but it has ", texture_getSmallestMipLevel(texture) + 1, U" layers!\n");
- }
- if (!HIGHEST_RESOLUTION) {
- if (!allLanesLesserOrEqual(mipLevel, U32(15u))) {
- throwError(U"texture_getPixelOffset got mip level ", mipLevel, U", which is not within the fixed range of 0..15!\n");
- }
- if (MIP_INSIDE) {
- if (!allLanesLesserOrEqual(mipLevel, U32(texture_getSmallestMipLevel(texture)))) {
- throwError(U"texture_getPixelOffset was told that the mip level would stay within valid indices using MIP_INSIDE, but mip level ", mipLevel, U" is not within 0..", texture_getSmallestMipLevel(texture), U"!\n");
- }
- }
- }
- #endif
- U32 upperOffset = pixelTop << log2PixelStride;
- U32 bottomOffset = pixelBottom << log2PixelStride;
- U32 upperLeftOffset = upperOffset | pixelLeft;
- U32 upperRightOffset = upperOffset | pixelRight;
- U32 bottomLeftOffset = bottomOffset | pixelLeft;
- U32 bottomRightOffset = bottomOffset | pixelRight;
- if (!SINGLE_LAYER) {
- U32 layerStartOffset = texture_getPixelOffsetToLayer<HIGHEST_RESOLUTION, U32>(texture, mipLevel);
- upperLeftOffset = upperLeftOffset + layerStartOffset;
- upperRightOffset = upperRightOffset + layerStartOffset;
- bottomLeftOffset = bottomLeftOffset + layerStartOffset;
- bottomRightOffset = bottomRightOffset + layerStartOffset;
- }
- SafePointer<uint32_t> data = texture.impl_buffer.getSafe<uint32_t>("RgbaU8 pyramid pixel buffer for bi-linear pixel sampling");
- U32 upperLeftColor = gather_U32(data, upperLeftOffset );
- U32 upperRightColor = gather_U32(data, upperRightOffset );
- U32 bottomLeftColor = gather_U32(data, bottomLeftOffset );
- U32 bottomRightColor = gather_U32(data, bottomRightOffset);
- return mix_BL<U32, U16>(upperLeftColor, upperRightColor, bottomLeftColor, bottomRightColor, weightX, weightY);
- }
- // resolutions is the maximum number of resolutions to create.
- // The actual number of layers in the texture is limited by the most narrow dimension.
- // A texture of 16x4 pixels can have up to three resolutions, 4x1, 8x2 and 16x4.
- // A texture of 8x8 pixels can have up to four resolutions, 1x1, 2x2, 4x4 and 8x8.
- // Pre-condition:
- // 1 <= width <= 32768
- // 1 <= height <= 32768
- // 0 <= resolutions <= 16
- // Post-condition:
- // Returns a pyramid image of the smallest power of two size capable of storing width x height pixels, by scaling up the resolution with interpolation if needed.
- TextureRgbaU8 texture_create_RgbaU8(int32_t width, int32_t height, int32_t resolutions);
- // Pre-condition:
- // 1 <= width <= 32768
- // 1 <= height <= 32768
- // 1 <= resolutions
- // Post-condition:
- // Returns a pyramid image created from image, or an empty pyramid if the image is empty.
- TextureRgbaU8 texture_create_RgbaU8(const ImageRgbaU8& image, int32_t resolutions);
- // Get a layer from the texture as an image.
- // Pre-condition:
- // texture_exists(texture)
- // 0 <= mipLevel <= texture_getSmallestMipLevel(texture)
- // Post-condition:
- // Returns an unaligned RGBA image sharing pixel data with the requested texture layer.
- ImageRgbaU8 texture_getMipLevelImage(const TextureRgbaU8& texture, int32_t mipLevel);
- // TODO: Pre-calculate the pixel offset, float scales and tile masks and merge into a reusable multi-layer sampling method.
- // Because dynamic bit shifts can not be vectorized on Intel processors and would be the same for 2x2 pixels anyway.
- // The hard part will be to implement it for ARM SVE with variable width vectors, so maybe calculate the
- // 2x2 derivation sparsely, interpolate a floating mip level and do comparisons vectorized with blend instructions to select masks.
- template <typename F>
- inline uint32_t texture_getMipLevelIndex(const TextureRgbaU8 &source, const F &u, const F &v) {
- // TODO: Support reading elements from SIMD vectors of any size somehow. Can use SVE's maximum size of 2048 bits as the space to allocate in advance to aligned stack memory.
- // Assume that U is at least 128 bits wide and reuse the result for additional pixels if there is more.
- auto ua = u.get();
- auto va = v.get();
- float offsetUX = fabs(ua.x - ua.y); // Left U - Right U
- float offsetUY = fabs(ua.x - ua.z); // Top U - Bottom U
- float offsetVX = fabs(va.x - va.y); // Left V - Right V
- float offsetVY = fabs(va.x - va.z); // Top V - Bottom V
- float offsetU = max(offsetUX, offsetUY) * source.impl_floatMaxWidth;
- float offsetV = max(offsetVX, offsetVY) * source.impl_floatMaxHeight;
- float offset = max(offsetU, offsetV);
- int result = 0;
- // TODO: Can count leading zeroes be used with integers to use all available mip levels?
- // It would make MIP_INSIDE useless for optimization.
- if (offset > 2.0f) { result = 1; }
- if (offset > 4.0f) { result = 2; }
- if (offset > 8.0f) { result = 3; }
- if (offset > 16.0f) { result = 4; }
- // TODO: Should it be possible to configure the number of mip levels?
- return result;
- }
- // TODO: Optimize using template arguments.
- // Pre-conditions:
- // 0 <= mipLevel <= texture_getSmallestMipLevel(texture)
- // Post-condition:
- // Returns a safe pointer to the first pixel at mipLevel in texture.
- template <typename U = uint32_t>
- inline SafePointer<U> texture_getSafePointer(const TextureRgbaU8& texture, uint32_t mipLevel) {
- // Get a pointer to the start of the image.
- return texture.impl_buffer.getSafe<U>("RgbaU8 pyramid pixel buffer").increaseBytes(texture_getPixelOffsetToLayer(texture, mipLevel) * sizeof(uint32_t));
- }
- // TODO: Optimize using template arguments.
- // Pre-conditions:
- // 0 <= mipLevel <= texture_getSmallestMipLevel(texture)
- // 0 <= rowIndex < (1 << mipLevel)
- // Post-condition:
- // Returns a safe pointer to the first pixel at rowIndex in mipLevel in texture.
- template <typename U = uint32_t>
- inline SafePointer<U> texture_getSafePointer(const TextureRgbaU8& texture, int32_t mipLevel, int32_t rowIndex) {
- return texture_getSafePointer<U>(texture, mipLevel).increaseBytes(texture_getWidth(texture, mipLevel) * sizeof(uint32_t) * rowIndex);
- }
- }
- #endif
|