| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582 |
- /*
- Copyright (c) 2015, Intel Corporation
- All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include "ispc_texcomp.h"
- #include "kernel_astc_ispc.h"
- #include <cassert>
- #include <cstring>
- #include <algorithm>
- #include <vector>
- #include <limits>
- void GetProfile_astc_fast(astc_enc_settings* settings, int block_width, int block_height)
- {
- settings->block_width = block_width;
- settings->block_height = block_height;
- settings->channels = 3;
- settings->fastSkipTreshold = 5;
- settings->refineIterations = 2;
- }
- void GetProfile_astc_alpha_fast(astc_enc_settings* settings, int block_width, int block_height)
- {
- settings->block_width = block_width;
- settings->block_height = block_height;
- settings->channels = 4;
- settings->fastSkipTreshold = 5;
- settings->refineIterations = 2;
- }
- void GetProfile_astc_alpha_slow(astc_enc_settings* settings, int block_width, int block_height)
- {
- settings->block_width = block_width;
- settings->block_height = block_height;
- settings->channels = 4;
- settings->fastSkipTreshold = 64;
- settings->refineIterations = 2;
- }
- struct astc_block
- {
- int width;
- int height;
- bool dual_plane;
- int weight_range;
- uint8_t weights[64];
- int color_component_selector;
- int partitions;
- int partition_id;
- int color_endpoint_pairs;
- int channels;
- int color_endpoint_modes[4];
- int endpoint_range;
- uint8_t endpoints[18];
- };
- bool can_store(int value, int bits)
- {
- if (value < 0) return false;
- if (value >= 1 << bits) return false;
- return true;
- }
- int pack_block_mode(astc_block* block)
- {
- int block_mode = 0;
- int D = !!block->dual_plane;
- int H = !!(block->weight_range >= 6);
- int DH = D * 2 + H;
- int R = block->weight_range + 2 - ((H > 0) ? 6 : 0);
- R = R / 2 + R % 2 * 4;
- if (can_store(block->width - 4, 2) && can_store(block->height - 2, 2))
- {
- int B = block->width - 4;
- int A = block->height - 2;
- block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | (R & 3);
- }
- if (can_store(block->width - 8, 2) && can_store(block->height - 2, 2))
- {
- int B = block->width - 8;
- int A = block->height - 2;
- block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | 4 | (R & 3);
- }
- if (can_store(block->width - 2, 2) && can_store(block->height - 8, 2))
- {
- int A = block->width - 2;
- int B = block->height - 8;
- block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | 8 | (R & 3);
- }
- if (can_store(block->width - 2, 2) && can_store(block->height - 6, 1))
- {
- int A = block->width - 2;
- int B = block->height - 6;
- block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | 12 | (R & 3);
- }
- if (can_store(block->width - 2, 1) && can_store(block->height - 2, 2))
- {
- int B = block->width;
- int A = block->height - 2;
- block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | 12 | (R & 3);
- }
- if (DH == 0 && can_store(block->width - 6, 2) && can_store(block->height - 6, 2))
- {
- int A = block->width - 6;
- int B = block->height - 6;
- block_mode = (B << 9) | 256 | (A << 5) | (R << 2);
- }
- return block_mode;
- }
- int range_table[][3] =
- {
- //2^ 3^ 5^
- { 1, 0, 0 }, // 0..1
- { 0, 1, 0 }, // 0..2
- { 2, 0, 0 }, // 0..3
- { 0, 0, 1 }, // 0..4
- { 1, 1, 0 }, // 0..5
- { 3, 0, 0 }, // 0..7
- { 1, 0, 1 }, // 0..9
- { 2, 1, 0 }, // 0..11
- { 4, 0, 0 }, // 0..15
- { 2, 0, 1 }, // 0..19
- { 3, 1, 0 }, // 0..23
- { 5, 0, 0 }, // 0..31
- { 3, 0, 1 }, // 0..39
- { 4, 1, 0 }, // 0..47
- { 6, 0, 0 }, // 0..63
- { 4, 0, 1 }, // 0..79
- { 5, 1, 0 }, // 0..95
- { 7, 0, 0 }, // 0..127
- { 5, 0, 1 }, // 0..159
- { 6, 1, 0 }, // 0..191
- { 8, 0, 0 }, // 0..255
- };
- int get_levels(int range)
- {
- return (1 + 2 * range_table[range][1] + 4 * range_table[range][2]) << range_table[range][0];
- }
- int sequence_bits(int count, int range)
- {
- int bits = count * range_table[range][0];
- bits += (count * range_table[range][1] * 8 + 4) / 5;
- bits += (count * range_table[range][2] * 7 + 2) / 3;
- return bits;
- }
- void set_bits(uint32_t data[4], int* pos, int bits, uint32_t value)
- {
- assert(bits <= 25);
- uint32_t word = *(uint32_t*)(((uint8_t*)data) + *pos / 8);
- uint32_t mask = (1 << bits) - 1;
- word |= value << (*pos % 8);
- *(uint32_t*)(((uint8_t*)data) + *pos / 8) = word;
- *pos += bits;
- }
- uint32_t get_field(uint32_t input, int a, int b)
- {
- assert(a >= b);
- return (input >> b) & ((1 << (a - b + 1)) - 1);
- }
- uint32_t get_bit(uint32_t input, int a)
- {
- return get_field(input, a, a);
- }
- void pack_five_trits(uint32_t data[4], int sequence[5], int* pos, int n)
- {
- int t[5];
- int m[5];
- for (int i = 0; i < 5; i++)
- {
- t[i] = sequence[i] >> n;
- m[i] = sequence[i] - (t[i] << n);
- }
- int C;
- if (t[1] == 2 && t[2] == 2)
- {
- C = 3 * 4 + t[0];
- }
- else if (t[2] == 2)
- {
- C = t[1] * 16 + t[0] * 4 + 3;
- }
- else
- {
- C = t[2] * 16 + t[1] * 4 + t[0];
- }
- int T;
- if (t[3] == 2 && t[4] == 2)
- {
- T = get_field(C, 4, 2) * 32 + 7 * 4 + get_field(C, 1, 0);
- }
- else
- {
- T = get_field(C, 4, 0);
- if (t[4] == 2)
- {
- T += t[3] * 128 + 3 * 32;
- }
- else
- {
- T += t[4] * 128 + t[3] * 32;
- }
- }
- uint32_t pack1 = 0;
- pack1 |= m[0];
- pack1 |= get_field(T, 1, 0) << n;
- pack1 |= m[1] << (2 + n);
- uint32_t pack2 = 0;
- pack2 |= get_field(T, 3, 2);
- pack2 |= m[2] << 2;
- pack2 |= get_field(T, 4, 4) << (2 + n);
- pack2 |= m[3] << (3 + n);
- pack2 |= get_field(T, 6, 5) << (3 + n * 2);
- pack2 |= m[4] << (5 + n * 2);
- pack2 |= get_field(T, 7, 7) << (5 + n * 3);
- set_bits(data, pos, 2 + n * 2, pack1);
- set_bits(data, pos, 6 + n * 3, pack2);
- }
- void pack_three_quint(uint32_t data[4], int sequence[3], int* pos, int n)
- {
- int q[3];
- int m[3];
- for (int i = 0; i < 3; i++)
- {
- q[i] = sequence[i] >> n;
- m[i] = sequence[i] - (q[i] << n);
- }
- int Q;
- if (q[0] == 4 && q[1] == 4)
- {
- Q = get_field(q[2], 1, 0) * 8 + 3 * 2 + get_bit(q[2], 2);
- }
- else
- {
- int C;
- if (q[1] == 4)
- {
- C = (q[0] << 3) + 5;
- }
- else
- {
- C = (q[1] << 3) + q[0];
- }
- if (q[2] == 4)
- {
- Q = get_field(~C, 2, 1) * 32 + get_field(C, 4, 3) * 8 + 3 * 2 + get_bit(C, 0);
- }
- else
- {
- Q = q[2] * 32 + get_field(C, 4, 0);
- }
- }
- uint32_t pack = 0;
- pack |= m[0];
- pack |= get_field(Q, 2, 0) << n;
- pack |= m[1] << (3 + n);
- pack |= get_field(Q, 4, 3) << (3 + n * 2);
- pack |= m[2] << (5 + n * 2);
- pack |= get_field(Q, 6, 5) << (5 + n * 3);
- set_bits(data, pos, 7 + n * 3, pack);
- }
- void pack_integer_sequence(uint32_t output_data[4], uint8_t sequence[], int pos, int count, int range)
- {
- int n = range_table[range][0];
- int bits = sequence_bits(count, range);
- int pos0 = pos;
- uint32_t data[5] = { 0 };
- if (range_table[range][1] == 1)
- {
- for (int j = 0; j < (count + 4) / 5; j++)
- {
- int temp[5] = { 0 };
- for (int i = 0; i < std::min(count - j * 5, 5); i++) temp[i] = sequence[j * 5 + i];
- pack_five_trits(data, temp, &pos, n);
- }
- }
- else if (range_table[range][2] == 1)
- {
- for (int j = 0; j < (count + 2) / 3; j++)
- {
- int temp[3] = { 0 };
- for (int i = 0; i < std::min(count - j * 3, 3); i++) temp[i] = sequence[j * 3 + i];
- pack_three_quint(data, temp, &pos, n);
- }
- }
- else
- {
- for (int i = 0; i < count; i++)
- {
- set_bits(data, &pos, n, sequence[i]);
- }
- }
- if (pos0 + bits < 96) data[3] = 0;
- if (pos0 + bits < 64) data[2] = 0;
- if (pos0 + bits < 32) data[1] = 0;
- data[(pos0 + bits) / 32] &= (1 << ((pos0 + bits) % 32)) - 1;
- for (int k = 0; k < 4; k++) output_data[k] |= data[k];
- }
- uint32_t reverse_bits_32(uint32_t input)
- {
- uint32_t t = input;
- t = (t << 16) | (t >> 16);
- t = ((t & 0x00FF00FF) << 8) | ((t & 0xFF00FF00) >> 8);
- t = ((t & 0x0F0F0F0F) << 4) | ((t & 0xF0F0F0F0) >> 4);
- t = ((t & 0x33333333) << 2) | ((t & 0xCCCCCCCC) >> 2);
- t = ((t & 0x55555555) << 1) | ((t & 0xAAAAAAAA) >> 1);
- return t;
- }
- void pack_block(uint32_t data[4], astc_block* block)
- {
- memset(data, 0, 16);
- int pos = 0;
- set_bits(data, &pos, 11, pack_block_mode(block));
- int num_weights = block->width * block->height * (block->dual_plane ? 2 : 1);
- int weight_bits = sequence_bits(num_weights, block->weight_range);
- int extra_bits = 0;
- assert(num_weights <= 64);
- assert(24 <= weight_bits && weight_bits <= 96);
- set_bits(data, &pos, 2, block->partitions - 1);
- if (block->partitions > 1)
- {
- set_bits(data, &pos, 10, block->partition_id);
- int min_cem = 16;
- int max_cem = 0;
- for (int j = 0; j < block->partitions; j++)
- {
- min_cem = std::min(min_cem, block->color_endpoint_modes[j]);
- max_cem = std::max(max_cem, block->color_endpoint_modes[j]);
- }
- assert(max_cem / 4 <= min_cem / 4 + 1);
- int CEM = block->color_endpoint_modes[0] << 2;
- if (max_cem != min_cem)
- {
- CEM = std::min(3, min_cem / 4 + 1);
- for (int j = 0; j < block->partitions; j++)
- {
- int c = block->color_endpoint_modes[j] / 4 - ((CEM & 3) - 1);
- int m = block->color_endpoint_modes[j] % 4;
- assert(c == 0 || c == 1);
- CEM |= c << (2 + j);
- CEM |= m << (2 + block->partitions + 2 * j);
- }
- extra_bits = 3 * block->partitions - 4;
- int pos2 = 128 - weight_bits - extra_bits;
- set_bits(data, &pos2, extra_bits, CEM >> 6);
- }
-
- set_bits(data, &pos, 6, CEM & 63);
- }
- else
- {
- set_bits(data, &pos, 4, block->color_endpoint_modes[0]);
- }
-
- if (block->dual_plane)
- {
- assert(block->partitions < 4);
- extra_bits += 2;
- int pos2 = 128 - weight_bits - extra_bits;
- set_bits(data, &pos2, 2, block->color_component_selector);
- }
- int config_bits = pos + extra_bits;
- int remaining_bits = 128 - config_bits - weight_bits;
- int num_cem_pairs = 0;
- for (int j = 0; j < block->partitions; j++) num_cem_pairs += 1 + block->color_endpoint_modes[j] / 4;
- assert(num_cem_pairs <= 9);
- int endpoint_range = -1;
- for (int range = 20; range>0; range--)
- {
- int bits = sequence_bits(2 * num_cem_pairs, range);
- if (bits <= remaining_bits)
- {
- endpoint_range = range;
- break;
- }
- }
- assert(endpoint_range >= 4);
- assert(block->endpoint_range == endpoint_range);
- pack_integer_sequence(data, block->endpoints, pos, 2 * num_cem_pairs, endpoint_range);
-
- uint32_t rdata[4] = { 0, 0, 0, 0 };
- pack_integer_sequence(rdata, block->weights, 0, num_weights, block->weight_range);
- for (int i = 0; i < 4; i++) data[i] |= reverse_bits_32(rdata[3 - i]);
- }
- void atsc_rank(const rgba_surface* src, int xx, int yy, uint32_t* mode_buffer, astc_enc_settings* settings)
- {
- ispc::astc_rank_ispc((ispc::rgba_surface*)src, xx, yy, mode_buffer, (ispc::astc_enc_settings*)settings);
- }
- extern "C" void pack_block_c(uint32_t data[4], ispc::astc_block* block)
- {
- assert(sizeof(ispc::astc_block) == sizeof(astc_block));
- pack_block(data, (astc_block*)block);
- }
- void setup_list_context(ispc::astc_enc_context* ctx, uint32_t packed_mode)
- {
- ctx->width = 2 + get_field(packed_mode, 15, 13); // 2..8 <= 2^3
- ctx->height = 2 + get_field(packed_mode, 18, 16); // 2..8 <= 2^3
- ctx->dual_plane = !!get_field(packed_mode, 19, 19); // 0 or 1
- ctx->partitions = 1;
-
- int color_endpoint_modes0 = get_field(packed_mode, 7, 6) * 2 + 6; // 6, 8, 10 or 12
- ctx->color_endpoint_pairs = 1 + (color_endpoint_modes0 / 4);
- ctx->channels = (color_endpoint_modes0 > 8) ? 4 : 3;
- }
- void astc_encode(const rgba_surface* src, float* block_scores, uint8_t* dst, uint64_t* list, astc_enc_settings* settings)
- {
- ispc::astc_enc_context list_context;
- setup_list_context(&list_context, uint32_t(list[1] & 0xFFFFFFFF));
- assert(sizeof(ispc::rgba_surface) == sizeof(rgba_surface));
- assert(sizeof(ispc::astc_enc_settings) == sizeof(astc_enc_settings));
- ispc::astc_encode_ispc((ispc::rgba_surface*)src, block_scores, dst, list, &list_context, (ispc::astc_enc_settings*)settings);
- }
- void CompressBlocksASTC(const rgba_surface* src, uint8_t* dst, astc_enc_settings* settings)
- {
- assert(src->height % settings->block_height == 0);
- assert(src->width % settings->block_width == 0);
-
- assert(settings->block_height <= 8);
- assert(settings->block_width <= 8);
-
- int tex_width = src->width / settings->block_width;
- int programCount = ispc::get_programCount();
- std::vector<float> block_scores(tex_width * src->height / settings->block_height);
- for (int yy = 0; yy < src->height / settings->block_height; yy++)
- for (int xx = 0; xx < tex_width; xx++)
- {
- block_scores[yy * tex_width + xx] = std::numeric_limits<float>::infinity();
- }
- int mode_list_size = 3334;
- int list_size = programCount;
- std::vector<uint64_t> mode_lists(list_size * mode_list_size);
- std::vector<uint32_t> mode_buffer(programCount * settings->fastSkipTreshold);
- for (int yy = 0; yy < src->height / settings->block_height; yy++)
- for (int _x = 0; _x < (tex_width + programCount - 1) / programCount; _x++)
- {
- int xx = _x * programCount;
- atsc_rank(src, xx, yy, mode_buffer.data(), settings);
-
- for (int i = 0; i < settings->fastSkipTreshold; i++)
- for (int k = 0; k < programCount; k++)
- {
- if (xx + k >= tex_width) continue;
-
- uint32_t offset = (yy << 16) + (xx + k);
- uint32_t mode = mode_buffer[programCount * i + k];
- int mode_bin = mode >> 20;
- uint64_t* mode_list = &mode_lists[list_size * mode_bin];
- if (*mode_list < programCount - 1)
- {
- int index = int(mode_list[0] + 1);
- mode_list[0] = index;
- mode_list[index] = (uint64_t(offset) << 32) + mode;
- }
- else
- {
- mode_list[0] = (uint64_t(offset) << 32) + mode;
- astc_encode(src, block_scores.data(), dst, mode_list, settings);
- memset(mode_list, 0, list_size * sizeof(uint64_t));
- }
- }
- }
- for (int mode_bin = 0; mode_bin < mode_list_size; mode_bin++)
- {
- uint64_t* mode_list = &mode_lists[list_size * mode_bin];
- if (mode_list[0] == 0) continue;
- mode_list[0] = 0;
- astc_encode(src, block_scores.data(), dst, mode_list, settings);
- memset(mode_list, 0, list_size * sizeof(uint64_t));
- }
- }
|