1 /*
2 * Copyright (C) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "image_compressor.h"
17
18 #include <unistd.h>
19 #include <fstream>
20
21 #include "securec.h"
22 #include "media_errors.h"
23 #include "image_log.h"
24
25 #undef LOG_DOMAIN
26 #define LOG_DOMAIN LOG_TAG_DOMAIN_ID_PLUGIN
27
28 #undef LOG_TAG
29 #define LOG_TAG "ClAstcEnc"
30
31 namespace OHOS {
32 namespace ImagePlugin {
33 namespace AstcEncBasedCl {
34 constexpr int MAX_WIDTH = 8192;
35 constexpr int MAX_HEIGHT = 8192;
36 constexpr int TEXTURE_HEAD_BYTES = 16;
37 constexpr int TEXTURE_BLOCK_BYTES = 16;
38 constexpr int MAGIC_FILE_CONSTANT = 0x5CA1AB13;
39 constexpr int DIM = 4;
40 constexpr uint8_t BIT_SHIFT_8BITS = 8;
41 constexpr uint8_t BIT_SHIFT_16BITS = 16;
42 constexpr uint8_t BIT_SHIFT_24BITS = 24;
43 constexpr uint8_t BYTES_MASK = 0xFF;
44 constexpr uint8_t GLOBAL_WH_NUM_CL = 2;
45 constexpr size_t MAX_MALLOC_BYTES = 10000000; // max 10MB
46 constexpr size_t WORK_GROUP_SIZE = 8;
47
48 const char *g_programSource = R"(
49 // Notice: the code from line 42 to line 1266 is openCL language
50 // openCL cound only support C language style and could not support constexpr and static_cast in same platform
51 #define DIM (4)
52 #define BLOCK_SIZE (16)
53 #define X_GRIDS (4)
54 #define Y_GRIDS (4)
55 #define SMALL_VALUE (0.00001f) // avoid divide 0
56 #define BLOCK_MAX_WEIGHTS (64)
57 #define BLOCK_MAX_WEIGHTS_SHORT (64)
58 #define BLOCK_MAX_WEIGHTS_FLOAT (64.0f)
59 #define BLOCK_MAX_WEIGHTS_2PLANE (32)
60 #define WEIGHTS_PLANE2_OFFSET (32)
61 #define CEM_LDR_RGB_DIRECT (8)
62 #define CEM_LDR_RGB_BASE_OFFSET (9)
63 #define CEM_LDR_RGBA_DIRECT (12)
64 #define CEM_LDR_RGBA_BASE_OFFSET (13)
65 #define PIXEL_MAX_VALUE (255.0f)
66
67 #define QUANT_2 (0)
68 #define QUANT_3 (1)
69 #define QUANT_4 (2)
70 #define QUANT_5 (3)
71 #define QUANT_6 (4)
72 #define QUANT_8 (5)
73 #define QUANT_10 (6)
74 #define QUANT_12 (7)
75 #define QUANT_16 (8)
76 #define QUANT_20 (9)
77 #define QUANT_24 (10)
78 #define QUANT_32 (11)
79 #define QUANT_40 (12)
80 #define QUANT_48 (13)
81 #define QUANT_64 (14)
82 #define QUANT_80 (15)
83 #define QUANT_96 (16)
84 #define QUANT_128 (17)
85 #define QUANT_160 (18)
86 #define QUANT_192 (19)
87 #define QUANT_256 (20)
88 #define QUANT_MAX (21)
89
90 #define WEIGHT_RANGE_6 (6)
91 #define WEIGHT_QUANTIZE_NUM (32)
92 #define COLOR_NUM (256)
93 #define MAX_PARTITION_COUNT (4)
94 #define PARTITION_COUNT (2)
95 #define MAX_BLOCK_SIZE (32)
96 #define WEIGHT_QUANTIZE_GROUP (12)
97 #define SECOND_PARTITION_INDEX (1)
98
99 #define START_INDEX (0)
100 #define FLOAT_ZERO (0.0f)
101 #define FLOAT_ONE (1.0f)
102 #define INT_ZERO (0)
103 #define INT_ONE (1)
104 #define SHORT_ZERO (0)
105 #define UINT_ZERO (0)
106 #define UINT_ONE (1u)
107 #define EP0_INDEX (0)
108 #define EP1_INDEX (1)
109 #define END_POINT_NUM (2)
110 #define EP0_R_INDEX (0)
111 #define EP1_R_INDEX (1)
112 #define EP0_G_INDEX (2)
113 #define EP1_G_INDEX (3)
114 #define EP0_B_INDEX (4)
115 #define EP1_B_INDEX (5)
116 #define EP0_A_INDEX (6)
117 #define EP1_A_INDEX (7)
118 #define COLOR_COMPONENT_NUM (8)
119 #define QUANTIZE_WEIGHT_MIN (0)
120
121 #define TRIT_MSB_SIZE (8)
122 #define TRIT_BLOCK_SIZE (5)
123 #define TRIT_ROUND_NUM (4)
124 #define QUINT_MSB_SIZE (7)
125 #define QUINT_BLOCK_SIZE (3)
126 #define QUINT_ROUND_NUM (2)
127 #define ISE_0 (0)
128 #define ISE_1 (1)
129 #define ISE_2 (2)
130 #define ISE_3 (3)
131 #define ISE_4 (4)
132
133 #define WEIGHT_0 (0)
134 #define WEIGHT_1 (1)
135 #define WEIGHT_2 (2)
136 #define WEIGHT_3 (3)
137 #define WEIGHT_4 (4)
138 #define WEIGHT_5 (5)
139 #define WEIGHT_6 (6)
140 #define WEIGHT_7 (7)
141 #define WEIGHT_8 (8)
142 #define WEIGHT_9 (9)
143 #define WEIGHT_10 (10)
144 #define WEIGHT_11 (11)
145 #define WEIGHT_12 (12)
146 #define WEIGHT_13 (13)
147 #define WEIGHT_14 (14)
148 #define WEIGHT_15 (15)
149
150 #define BYTE_1_POS (8)
151 #define BYTE_2_POS (16)
152 #define BYTE_3_POS (24)
153 #define BYTE_MASK (0xFFu)
154 #define CEM_POS (13)
155 #define COLOR_EP_POS (17)
156 #define COLOR_EP_HIGH_BIT (15)
157 #define MASK_FOR_4BITS (0xFu)
158 #define MASK_FOR_15BITS (0x7FFFu)
159 #define MASK_FOR_17BITS (0x1FFFFu)
160
161 #define HEIGHT_BITS_OFFSET (2)
162 #define WIDTH_BITS_OFFSET (4)
163 #define MASK_FOR_2BITS (0x3u)
164 #define MASK_FOR_1BITS (0x1u)
165 #define WEIGHT_METHOD_OFFSET (2u)
166 #define WEIGHT_METHOD_RIGHT_BIT (1)
167 #define WEIGHT_METHOD_POS (4u)
168 #define BLOCK_WIDTH_POS (5u)
169 #define BLOCK_HEIGHT_POS (5u)
170 #define WEIGHT_PRECISION_POS (9u)
171 #define IS_DUALPLANE_POS (10u)
172
173 typedef struct {
174 int partid;
175 uint bitmaps[PARTITION_COUNT];
176 } PartInfo;
177
178 int GetPart(PartInfo* partInfo, int i)
179 {
180 if (i >= MAX_BLOCK_SIZE) {
181 return 0;
182 }
183 return (int)(((*partInfo).bitmaps[SECOND_PARTITION_INDEX] >> i) & MASK_FOR_1BITS);
184 }
185
186 __constant short g_scrambleTable[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
187 0, 1,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0,
190 0, 1, 2,
191 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192 0, 1, 2, 3,
193 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194 0, 1, 2, 3, 4,
195 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
196 0, 2, 4, 5, 3, 1,
197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
198 0, 1, 2, 3, 4, 5, 6, 7,
199 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
200 0, 2, 4, 6, 8, 9, 7, 5, 3, 1,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202 0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1,
203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
204 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
206 0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1,
207 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
208 0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19,
209 11, 3, 17, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0,
210 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
211 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
212 };
213
214 __constant short g_weightUnquant[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
215 0, 64,
216 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217 0, 32, 64,
218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 21, 43, 64,
220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 16, 32, 48, 64,
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 64, 12, 52, 25, 39,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 9, 18, 27, 37, 46, 55, 64,
226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227 0, 64, 7, 57, 14, 50, 21, 43, 28, 36,
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
229 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36,
230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64,
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
233 0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35,
234 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
235 0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34,
236 0, 0, 0, 0, 0, 0, 0, 0,
237 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38,
238 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
239 };
240 __constant short g_integerFromTrits[243] = { // the numbers of integer to derivated from trits is 243
241 0, 1, 2, 4, 5, 6, 8, 9, 10,
242 16, 17, 18, 20, 21, 22, 24, 25, 26,
243 3, 7, 15, 19, 23, 27, 12, 13, 14,
244 32, 33, 34, 36, 37, 38, 40, 41, 42,
245 48, 49, 50, 52, 53, 54, 56, 57, 58,
246 35, 39, 47, 51, 55, 59, 44, 45, 46,
247 64, 65, 66, 68, 69, 70, 72, 73, 74,
248 80, 81, 82, 84, 85, 86, 88, 89, 90,
249 67, 71, 79, 83, 87, 91, 76, 77, 78,
250
251 128, 129, 130, 132, 133, 134, 136, 137, 138,
252 144, 145, 146, 148, 149, 150, 152, 153, 154,
253 131, 135, 143, 147, 151, 155, 140, 141, 142,
254 160, 161, 162, 164, 165, 166, 168, 169, 170,
255 176, 177, 178, 180, 181, 182, 184, 185, 186,
256 163, 167, 175, 179, 183, 187, 172, 173, 174,
257 192, 193, 194, 196, 197, 198, 200, 201, 202,
258 208, 209, 210, 212, 213, 214, 216, 217, 218,
259 195, 199, 207, 211, 215, 219, 204, 205, 206,
260
261 96, 97, 98, 100, 101, 102, 104, 105, 106,
262 112, 113, 114, 116, 117, 118, 120, 121, 122,
263 99, 103, 111, 115, 119, 123, 108, 109, 110,
264 224, 225, 226, 228, 229, 230, 232, 233, 234,
265 240, 241, 242, 244, 245, 246, 248, 249, 250,
266 227, 231, 239, 243, 247, 251, 236, 237, 238,
267 28, 29, 30, 60, 61, 62, 92, 93, 94,
268 156, 157, 158, 188, 189, 190, 220, 221, 222,
269 31, 63, 127, 159, 191, 255, 252, 253, 254
270 };
271
272 __constant int g_bitsTritsQuintsTable[QUANT_MAX * 3] = { // 1 quints match 3 number
273 1, 0, 0, // RANGE_2
274 0, 1, 0, // RANGE_3
275 2, 0, 0, // RANGE_4
276 0, 0, 1, // RANGE_5
277 1, 1, 0, // RANGE_6
278 3, 0, 0, // RANGE_8
279 1, 0, 1, // RANGE_10
280 2, 1, 0, // RANGE_12
281 4, 0, 0, // RANGE_16
282 2, 0, 1, // RANGE_20
283 3, 1, 0, // RANGE_24
284 5, 0, 0, // RANGE_32
285 3, 0, 1, // RANGE_40
286 4, 1, 0, // RANGE_48
287 6, 0, 0, // RANGE_64
288 4, 0, 1, // RANGE_80
289 5, 1, 0, // RANGE_96
290 7, 0, 0, // RANGE_128
291 5, 0, 1, // RANGE_160
292 6, 1, 0, // RANGE_192
293 8, 0, 0 // RANGE_256
294 };
295
296 __constant short g_integerFromQuints[125] = { // the numbers of integer to derivated from quints is 125
297 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6,
298 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, 58, 59, 60, 37, 45, 53,
299 61, 14,
300 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85,
301 93, 22,
302 96, 97, 98, 99, 100, 104, 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123,
303 124, 101, 109, 117, 125, 30,
304 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, 126, 127, 94, 95, 62, 39,
305 47, 55, 63, 31
306 };
307
308 __constant short g_colorQuantTables[QUANT_MAX * COLOR_NUM] = {
309 // QUANT_2
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
312 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
313 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
314 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
315 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
317 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
320 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
321 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
322 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
323 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
324 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
325 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326 // QUANT_3
327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
332 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
337 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
338 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
339 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
340 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
341 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
342 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
343 // QUANT_4
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
347 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
349 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
351 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
353 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
354 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
355 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
356 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
357 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
358 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
359 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
360 // QUANT_5
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
366 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
367 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
368 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
369 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
370 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
371 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
372 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
373 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
374 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
375 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
376 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
377 // QUANT_6
378 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
380 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4,
383 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
384 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
386 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
387 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
388 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
389 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
390 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
391 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
392 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
393 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
394 // QUANT_8
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
397 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
398 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
399 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
401 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
402 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
403 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
404 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
405 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
406 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
407 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
408 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
409 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
410 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
411 // QUANT_10
412 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
413 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
414 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
415 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
416 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6,
417 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
418 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
419 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
420 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
421 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7,
422 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
423 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 5,
424 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
425 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
426 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
427 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
428 // QUANT_12
429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4,
430 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
431 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
432 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
433 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
434 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
435 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10,
436 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
437 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
438 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7,
439 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
440 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
441 3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
442 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5,
443 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
444 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
445 // QUANT_16
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
448 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
449 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
450 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
451 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
452 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
453 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
454 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
455 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
456 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
457 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
458 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
459 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
460 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
461 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
462 // QUANT_20
463 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
464 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
465 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
466 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2,
467 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
468 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10,
469 10, 10, 10, 10, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
470 14, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
471 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 15, 15, 15,
472 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11,
473 11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7,
474 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
475 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
476 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 9,
477 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5,
478 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1,
479 // QUANT_24
480 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
481 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2, 2,
482 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10,
483 10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 4, 4, 4,
484 4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12,
485 12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 6, 6,
486 6, 6, 6, 6, 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14,
487 14, 14, 14, 14, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
488 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 15, 15, 15, 15,
489 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7,
490 7, 7, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 13, 13, 13,
491 13, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5, 5, 5, 5,
492 5, 5, 5, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 11, 11,
493 11, 11, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, 3,
494 3, 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9,
495 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
496 // QUANT_32
497 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
498 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
499 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
500 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
501 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10,
502 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
503 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
504 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
505 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
506 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
507 19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
508 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
509 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25,
510 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
511 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
512 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31,
513 // QUANT_40
514 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,
515 16, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 2, 2,
516 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18,
517 18, 26, 26, 26, 26, 26, 26, 26, 34, 34, 34, 34, 34, 34, 4, 4,
518 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20,
519 20, 20, 28, 28, 28, 28, 28, 28, 36, 36, 36, 36, 36, 36, 36, 6,
520 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14, 22, 22, 22, 22,
521 22, 22, 30, 30, 30, 30, 30, 30, 30, 38, 38, 38, 38, 38, 38, 38,
522 39, 39, 39, 39, 39, 39, 39, 31, 31, 31, 31, 31, 31, 31, 23, 23,
523 23, 23, 23, 23, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7,
524 7, 37, 37, 37, 37, 37, 37, 37, 29, 29, 29, 29, 29, 29, 21, 21,
525 21, 21, 21, 21, 21, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5,
526 5, 5, 35, 35, 35, 35, 35, 35, 27, 27, 27, 27, 27, 27, 27, 19,
527 19, 19, 19, 19, 19, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3,
528 3, 3, 33, 33, 33, 33, 33, 33, 33, 25, 25, 25, 25, 25, 25, 17,
529 17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1,
530 // QUANT_48
531 0, 0, 0, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 2, 2,
532 2, 2, 2, 18, 18, 18, 18, 18, 18, 34, 34, 34, 34, 34, 4, 4,
533 4, 4, 4, 4, 20, 20, 20, 20, 20, 36, 36, 36, 36, 36, 6, 6,
534 6, 6, 6, 6, 22, 22, 22, 22, 22, 38, 38, 38, 38, 38, 38, 8,
535 8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 40, 40, 40, 40, 40, 10,
536 10, 10, 10, 10, 26, 26, 26, 26, 26, 26, 42, 42, 42, 42, 42, 12,
537 12, 12, 12, 12, 12, 28, 28, 28, 28, 28, 44, 44, 44, 44, 44, 14,
538 14, 14, 14, 14, 14, 30, 30, 30, 30, 30, 46, 46, 46, 46, 46, 46,
539 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 15, 15, 15, 15, 15,
540 15, 45, 45, 45, 45, 45, 29, 29, 29, 29, 29, 13, 13, 13, 13, 13,
541 13, 43, 43, 43, 43, 43, 27, 27, 27, 27, 27, 27, 11, 11, 11, 11,
542 11, 41, 41, 41, 41, 41, 25, 25, 25, 25, 25, 25, 9, 9, 9, 9,
543 9, 39, 39, 39, 39, 39, 39, 23, 23, 23, 23, 23, 7, 7, 7, 7,
544 7, 7, 37, 37, 37, 37, 37, 21, 21, 21, 21, 21, 5, 5, 5, 5,
545 5, 5, 35, 35, 35, 35, 35, 19, 19, 19, 19, 19, 19, 3, 3, 3,
546 3, 3, 33, 33, 33, 33, 33, 17, 17, 17, 17, 17, 17, 1, 1, 1,
547 // QUANT_64
548 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
549 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
550 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
551 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16,
552 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
553 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
554 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
555 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
556 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
557 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
558 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
559 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
560 47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51,
561 51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55,
562 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59,
563 59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63,
564 // QUANT_80
565 0, 0, 16, 16, 16, 32, 32, 32, 48, 48, 48, 48, 64, 64, 64, 2,
566 2, 2, 18, 18, 18, 34, 34, 34, 50, 50, 50, 50, 66, 66, 66, 4,
567 4, 4, 20, 20, 20, 36, 36, 36, 36, 52, 52, 52, 68, 68, 68, 6,
568 6, 6, 22, 22, 22, 38, 38, 38, 38, 54, 54, 54, 70, 70, 70, 8,
569 8, 8, 24, 24, 24, 24, 40, 40, 40, 56, 56, 56, 72, 72, 72, 10,
570 10, 10, 26, 26, 26, 26, 42, 42, 42, 58, 58, 58, 74, 74, 74, 12,
571 12, 12, 12, 28, 28, 28, 44, 44, 44, 60, 60, 60, 76, 76, 76, 14,
572 14, 14, 14, 30, 30, 30, 46, 46, 46, 62, 62, 62, 78, 78, 78, 78,
573 79, 79, 79, 79, 63, 63, 63, 47, 47, 47, 31, 31, 31, 15, 15, 15,
574 15, 77, 77, 77, 61, 61, 61, 45, 45, 45, 29, 29, 29, 13, 13, 13,
575 13, 75, 75, 75, 59, 59, 59, 43, 43, 43, 27, 27, 27, 27, 11, 11,
576 11, 73, 73, 73, 57, 57, 57, 41, 41, 41, 25, 25, 25, 25, 9, 9,
577 9, 71, 71, 71, 55, 55, 55, 39, 39, 39, 39, 23, 23, 23, 7, 7,
578 7, 69, 69, 69, 53, 53, 53, 37, 37, 37, 37, 21, 21, 21, 5, 5,
579 5, 67, 67, 67, 51, 51, 51, 51, 35, 35, 35, 19, 19, 19, 3, 3,
580 3, 65, 65, 65, 49, 49, 49, 49, 33, 33, 33, 17, 17, 17, 1, 1,
581 // QUANT_96
582 0, 0, 32, 32, 64, 64, 64, 2, 2, 2, 34, 34, 66, 66, 66, 4,
583 4, 4, 36, 36, 68, 68, 68, 6, 6, 6, 38, 38, 70, 70, 70, 8,
584 8, 8, 40, 40, 40, 72, 72, 10, 10, 10, 42, 42, 42, 74, 74, 12,
585 12, 12, 44, 44, 44, 76, 76, 14, 14, 14, 46, 46, 46, 78, 78, 16,
586 16, 16, 48, 48, 48, 80, 80, 80, 18, 18, 50, 50, 50, 82, 82, 82,
587 20, 20, 52, 52, 52, 84, 84, 84, 22, 22, 54, 54, 54, 86, 86, 86,
588 24, 24, 56, 56, 56, 88, 88, 88, 26, 26, 58, 58, 58, 90, 90, 90,
589 28, 28, 60, 60, 60, 92, 92, 92, 30, 30, 62, 62, 62, 94, 94, 94,
590 95, 95, 95, 63, 63, 63, 31, 31, 93, 93, 93, 61, 61, 61, 29, 29,
591 91, 91, 91, 59, 59, 59, 27, 27, 89, 89, 89, 57, 57, 57, 25, 25,
592 87, 87, 87, 55, 55, 55, 23, 23, 85, 85, 85, 53, 53, 53, 21, 21,
593 83, 83, 83, 51, 51, 51, 19, 19, 81, 81, 81, 49, 49, 49, 17, 17,
594 17, 79, 79, 47, 47, 47, 15, 15, 15, 77, 77, 45, 45, 45, 13, 13,
595 13, 75, 75, 43, 43, 43, 11, 11, 11, 73, 73, 41, 41, 41, 9, 9,
596 9, 71, 71, 71, 39, 39, 7, 7, 7, 69, 69, 69, 37, 37, 5, 5,
597 5, 67, 67, 67, 35, 35, 3, 3, 3, 65, 65, 65, 33, 33, 1, 1,
598 // QUANT_128
599 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
600 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
601 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23,
602 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31,
603 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39,
604 40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47,
605 48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55,
606 56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63,
607 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
608 72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
609 80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87,
610 88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
611 96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103,
612 104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
613 112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119,
614 120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127,
615 // QUANT_160
616 0, 32, 32, 64, 96, 96, 128, 128, 2, 34, 34, 66, 98, 98, 130, 130,
617 4, 36, 36, 68, 100, 100, 132, 132, 6, 38, 38, 70, 102, 102, 134, 134,
618 8, 40, 40, 72, 104, 104, 136, 136, 10, 42, 42, 74, 106, 106, 138, 138,
619 12, 44, 44, 76, 108, 108, 140, 140, 14, 46, 46, 78, 110, 110, 142, 142,
620 16, 48, 48, 80, 112, 112, 144, 144, 18, 50, 50, 82, 114, 114, 146, 146,
621 20, 52, 52, 84, 116, 116, 148, 148, 22, 54, 54, 86, 118, 118, 150, 150,
622 24, 56, 56, 88, 120, 120, 152, 152, 26, 58, 58, 90, 122, 122, 154, 154,
623 28, 60, 60, 92, 124, 124, 156, 156, 30, 62, 62, 94, 126, 126, 158, 158,
624 159, 159, 127, 127, 95, 63, 63, 31, 157, 157, 125, 125, 93, 61, 61, 29,
625 155, 155, 123, 123, 91, 59, 59, 27, 153, 153, 121, 121, 89, 57, 57, 25,
626 151, 151, 119, 119, 87, 55, 55, 23, 149, 149, 117, 117, 85, 53, 53, 21,
627 147, 147, 115, 115, 83, 51, 51, 19, 145, 145, 113, 113, 81, 49, 49, 17,
628 143, 143, 111, 111, 79, 47, 47, 15, 141, 141, 109, 109, 77, 45, 45, 13,
629 139, 139, 107, 107, 75, 43, 43, 11, 137, 137, 105, 105, 73, 41, 41, 9,
630 135, 135, 103, 103, 71, 39, 39, 7, 133, 133, 101, 101, 69, 37, 37, 5,
631 131, 131, 99, 99, 67, 35, 35, 3, 129, 129, 97, 97, 65, 33, 33, 1,
632 // QUANT_192
633 0, 64, 128, 128, 2, 66, 130, 130, 4, 68, 132, 132, 6, 70, 134, 134,
634 8, 72, 136, 136, 10, 74, 138, 138, 12, 76, 140, 140, 14, 78, 142, 142,
635 16, 80, 144, 144, 18, 82, 146, 146, 20, 84, 148, 148, 22, 86, 150, 150,
636 24, 88, 152, 152, 26, 90, 154, 154, 28, 92, 156, 156, 30, 94, 158, 158,
637 32, 96, 160, 160, 34, 98, 162, 162, 36, 100, 164, 164, 38, 102, 166, 166,
638 40, 104, 168, 168, 42, 106, 170, 170, 44, 108, 172, 172, 46, 110, 174, 174,
639 48, 112, 176, 176, 50, 114, 178, 178, 52, 116, 180, 180, 54, 118, 182, 182,
640 56, 120, 184, 184, 58, 122, 186, 186, 60, 124, 188, 188, 62, 126, 190, 190,
641 191, 191, 127, 63, 189, 189, 125, 61, 187, 187, 123, 59, 185, 185, 121, 57,
642 183, 183, 119, 55, 181, 181, 117, 53, 179, 179, 115, 51, 177, 177, 113, 49,
643 175, 175, 111, 47, 173, 173, 109, 45, 171, 171, 107, 43, 169, 169, 105, 41,
644 167, 167, 103, 39, 165, 165, 101, 37, 163, 163, 99, 35, 161, 161, 97, 33,
645 159, 159, 95, 31, 157, 157, 93, 29, 155, 155, 91, 27, 153, 153, 89, 25,
646 151, 151, 87, 23, 149, 149, 85, 21, 147, 147, 83, 19, 145, 145, 81, 17,
647 143, 143, 79, 15, 141, 141, 77, 13, 139, 139, 75, 11, 137, 137, 73, 9,
648 135, 135, 71, 7, 133, 133, 69, 5, 131, 131, 67, 3, 129, 129, 65, 1,
649 // QUANT_256
650 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
651 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
652 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
653 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
654 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
655 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
656 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
657 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
658 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
659 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
660 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
661 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
662 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
663 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
664 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
665 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
666 };
667
668 __constant short color_unquant_tables[QUANT_MAX][COLOR_NUM] = {
669 {
670 0, 255
671 },
672 {
673 0, 128, 255
674 },
675 {
676 0, 85, 170, 255
677 },
678 {
679 0, 64, 128, 192, 255
680 },
681 {
682 0, 255, 51, 204, 102, 153
683 },
684 {
685 0, 36, 73, 109, 146, 182, 219, 255
686 },
687 {
688 0, 255, 28, 227, 56, 199, 84, 171, 113, 142
689 },
690 {
691 0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139
692 },
693 { // 16
694 0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255
695 },
696 { // 20
697 0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
698 54, 201, 121, 134
699 },
700 { // 24
701 0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
702 22, 233, 55, 200, 88, 167, 121, 134
703 },
704 { // 32
705 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
706 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255
707 },
708 { // 40
709 0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
710 13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
711 26, 229, 58, 197, 91, 164, 123, 132
712 },
713 { // 48
714 0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
715 5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
716 11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131
717 },
718 { // 64
719 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
720 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
721 130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
722 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255
723 },
724 { // 80
725 0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
726 3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
727 6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
728 9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
729 13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130
730 },
731 { // 96
732 0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
733 64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
734 2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
735 67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
736 5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
737 70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
738 },
739 { // 128
740 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
741 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
742 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
743 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
744 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
745 161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
746 193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
747 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255
748 },
749 { // 160
750 0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
751 64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
752 1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
753 65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
754 3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
755 67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
756 4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
757 68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
758 6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
759 70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
760 },
761 {
762 0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
763 32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
764 64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
765 96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
766 1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
767 33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
768 65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
769 97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
770 2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
771 34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
772 66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
773 98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129
774 },
775 {
776 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
777 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
778 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
779 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
780 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
781 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
782 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
783 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
784 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
785 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
786 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
787 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
788 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
789 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
790 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
791 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
792 }
793 };
794
795 void Swap(float4* lhs, float4* rhs)
796 {
797 if ((lhs == NULL) || (rhs == NULL)) {
798 return;
799 }
800 float4 tmp = *lhs;
801 *lhs = *rhs;
802 *rhs = tmp;
803 }
804
805 void FindMinMax(float4* texels, float4 ptMean, float4 vecK, float4* e0, float4* e1)
806 {
807 if ((texels == NULL) || (e0 == NULL) || (e1 == NULL)) {
808 return;
809 }
810 float a = 1e31f; // max float is clipped to 1e31f
811 float b = -1e31f; // min float is clipped to -1e31f
812 for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
813 float t = dot(texels[i] - ptMean, vecK);
814 a = min(a, t);
815 b = max(b, t);
816 }
817 *e0 = clamp(vecK * a + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
818 *e1 = clamp(vecK * b + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
819 // if the direction_vector ends up pointing from light to dark, FLIP IT!
820 // this will make the endpoint the darkest one;
821 float4 e0u = round(*e0);
822 float4 e1u = round(*e1);
823 if (e0u.x + e0u.y + e0u.z > e1u.x + e1u.y + e1u.z) {
824 Swap(e0, e1);
825 }
826 }
827
828 void MaxAccumulationPixelDirection(float4* texels, float4 ptMean, float4* e0, float4* e1, bool hasAlpha)
829 {
830 if ((texels == NULL) || (e0 == NULL) || (e1 == NULL)) {
831 return;
832 }
833 float4 sumR = (float4)(FLOAT_ZERO);
834 float4 sumG = (float4)(FLOAT_ZERO);
835 float4 sumB = (float4)(FLOAT_ZERO);
836 float4 sumA = (float4)(FLOAT_ZERO);
837 for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
838 float4 dt = texels[i] - ptMean;
839 sumR += (dt.x > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
840 sumG += (dt.y > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
841 sumB += (dt.z > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
842 sumA += (dt.w > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
843 }
844 float dotR = dot(sumR, sumR);
845 float dotG = dot(sumG, sumG);
846 float dotB = dot(sumB, sumB);
847 float dotA = dot(sumA, sumA);
848 float maxDot = dotR;
849 float4 vecK = sumR;
850 if (dotG > maxDot) {
851 vecK = sumG;
852 maxDot = dotG;
853 }
854 if (dotB > maxDot) {
855 vecK = sumB;
856 maxDot = dotB;
857 }
858 if (hasAlpha && dotA > maxDot) {
859 vecK = sumA;
860 maxDot = dotA;
861 }
862 // safe normalize
863 float lenk = length(vecK);
864 vecK = (lenk < SMALL_VALUE) ? vecK : normalize(vecK);
865 FindMinMax(texels, ptMean, vecK, e0, e1);
866 }
867
868 void EncodeColorNormal(short quantLevel, float4 e0, float4 e1, short* endpointQuantized)
869 {
870 if (endpointQuantized == NULL) {
871 return;
872 }
873 int4 e0q = (int4)((int)(round(e0.x)), (int)(round(e0.y)),
874 (int)(round(e0.z)), (int)(round(e0.w)));
875 int4 e1q = (int4)((int)(round(e1.x)), (int)(round(e1.y)),
876 (int)(round(e1.z)), (int)(round(e1.w)));
877 endpointQuantized[EP0_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.x];
878 endpointQuantized[EP1_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.x];
879 endpointQuantized[EP0_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.y];
880 endpointQuantized[EP1_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.y];
881 endpointQuantized[EP0_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.z];
882 endpointQuantized[EP1_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.z];
883 endpointQuantized[EP0_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.w];
884 endpointQuantized[EP1_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.w];
885 }
886
887 void DecodeColor(short quantLevel, short endpointQuantized[COLOR_COMPONENT_NUM], float4* e0, float4* e1)
888 {
889 if ((endpointQuantized == NULL) || (e0 == NULL) || (e1 == NULL)) {
890 return;
891 }
892 (*e0).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_R_INDEX]]);
893 (*e1).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_R_INDEX]]);
894 (*e0).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_G_INDEX]]);
895 (*e1).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_G_INDEX]]);
896 (*e0).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_B_INDEX]]);
897 (*e1).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_B_INDEX]]);
898 (*e0).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_A_INDEX]]);
899 (*e1).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_A_INDEX]]);
900 }
901
902 // calculate quantize weights
903 short QuantizeWeight(uint weightRange, float weight)
904 {
905 short q = (short)(round(weight * ((float)(weightRange))));
906 return clamp(q, (short)(QUANTIZE_WEIGHT_MIN), (short)(weightRange));
907 }
908
909 void CalculateNormalWeights(int part, PartInfo* partInfo, float4* texels,
910 float4 endPoint[END_POINT_NUM], float* projw)
911 {
912 if ((partInfo == NULL) || (texels == NULL) || (endPoint == NULL) || (projw == NULL)) {
913 return;
914 }
915 int i = START_INDEX;
916 float4 vecK = endPoint[EP1_INDEX] - endPoint[EP0_INDEX];
917 if (length(vecK) < SMALL_VALUE && !partInfo) {
918 for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
919 projw[i] = FLOAT_ZERO;
920 }
921 } else {
922 vecK = normalize(vecK);
923 float minw = 1e31f; // max float is clipped to 1e31f
924 float maxw = -1e31f; // min float is clipped to -1e31f
925 for (i = START_INDEX; i < BLOCK_SIZE; ++i) {
926 if ((!partInfo) || (GetPart(partInfo, i) == part)) {
927 float w = dot(vecK, texels[i] - endPoint[EP0_INDEX]);
928 minw = min(w, minw);
929 maxw = max(w, maxw);
930 projw[i] = w;
931 }
932 }
933 float invlen = maxw - minw;
934 invlen = max(SMALL_VALUE, invlen);
935 invlen = FLOAT_ONE / invlen; // invlen min is SMALL_VALUE, not zero
936 for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
937 if ((!partInfo) || (GetPart(partInfo, i) == part)) {
938 projw[i] = (projw[i] - minw) * invlen;
939 }
940 }
941 }
942 }
943
944 void QuantizeWeights(float projw[X_GRIDS * Y_GRIDS], uint weightRange, short* weights)
945 {
946 for (int i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
947 weights[i] = QuantizeWeight(weightRange, projw[i]);
948 }
949 }
950
951 void CalculateQuantizedWeights(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM], short* weights)
952 {
953 if ((texels == NULL) || (endPoint == NULL) || (weights == NULL)) {
954 return;
955 }
956 float projw[X_GRIDS * Y_GRIDS];
957 CalculateNormalWeights(INT_ZERO, NULL, texels, endPoint, projw);
958 QuantizeWeights(projw, weightRange, weights);
959 }
960
961 void Orbits8Ptr(uint4* outputs, uint* bitoffset, uint number, uint bitcount)
962 {
963 if ((outputs == NULL) || (bitoffset == NULL)) {
964 return;
965 }
966 uint newpos = *bitoffset + bitcount;
967 uint nidx = newpos >> 5; // split low bits (5 bits) to get high bits
968 uint uidx = *bitoffset >> 5; // split low bits (5 bits) to get high bits
969 uint bitIdx = *bitoffset & 31u; // split low bits to get low bits (31 for mask 5 bits)
970 if (uidx == 0) { // high bits is 0 for x
971 (*outputs).x |= (number << bitIdx);
972 (*outputs).y |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
973 } else if (uidx == 1) { // high bits is 1 for y
974 (*outputs).y |= (number << bitIdx);
975 (*outputs).z |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
976 } else if (uidx == 2) { // high bits is 2 for z
977 (*outputs).z |= (number << bitIdx);
978 (*outputs).w |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
979 }
980 *bitoffset = newpos;
981 }
982
983 void SplitHighLow(uint n, uint i, int* high, uint* low)
984 {
985 uint low_mask = (UINT_ONE << i) - UINT_ONE;
986 *low = n & low_mask;
987 *high = ((int)(n >> i)) & 0xFF; // mask 0xFF to get low 8 bits
988 }
989
990 uint ReverseByte(uint p)
991 {
992 p = ((p & 0xFu) << 4) | ((p >> 4) & 0xFu); // 0xFu 4 for reverse
993 p = ((p & 0x33u) << 2) | ((p >> 2) & 0x33u); // 0x33u 2 for reverse
994 p = ((p & 0x55u) << 1) | ((p >> 1) & 0x55u); // 0x55u 1 for reverse
995 return p;
996 }
997
998 void EncodeTrits(uint bitcount, uint tritInput[TRIT_BLOCK_SIZE], uint4* outputs, uint* outpos)
999 {
1000 int t0;
1001 int t1;
1002 int t2;
1003 int t3;
1004 int t4;
1005 uint m0;
1006 uint m1;
1007 uint m2;
1008 uint m3;
1009 uint m4;
1010 SplitHighLow(tritInput[ISE_0], bitcount, &t0, &m0);
1011 SplitHighLow(tritInput[ISE_1], bitcount, &t1, &m1);
1012 SplitHighLow(tritInput[ISE_2], bitcount, &t2, &m2);
1013 SplitHighLow(tritInput[ISE_3], bitcount, &t3, &m3);
1014 SplitHighLow(tritInput[ISE_4], bitcount, &t4, &m4);
1015 ushort packhigh = (ushort)(
1016 g_integerFromTrits[t4 * 81 + t3 * 27 + t2 * 9 + t1 * 3 + t0]); // trits for 3 9 27 81
1017 Orbits8Ptr(outputs, outpos, m0, bitcount);
1018 Orbits8Ptr(outputs, outpos, packhigh & 3u, 2u); // low 2bits (mask 3u) offset 2u
1019
1020 Orbits8Ptr(outputs, outpos, m1, bitcount);
1021 Orbits8Ptr(outputs, outpos, (packhigh >> 2) & 3u, 2u); // right shift 2 bits for low 2bits (mask 3u) offset 2u
1022
1023 Orbits8Ptr(outputs, outpos, m2, bitcount);
1024 Orbits8Ptr(outputs, outpos, (packhigh >> 4) & 1u, 1u); // right shift 4 bits for low 1bits (mask 1u) offset 1u
1025
1026 Orbits8Ptr(outputs, outpos, m3, bitcount);
1027 Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1028
1029 Orbits8Ptr(outputs, outpos, m4, bitcount);
1030 Orbits8Ptr(outputs, outpos, (packhigh >> 7) & 1u, 1u); // right shift 7 bits for low 1bits (mask 1u) offset 1u
1031 }
1032
1033 void EncodeQuints(uint bitcount, uint quintInput[QUINT_BLOCK_SIZE], uint4* outputs, uint* outpos)
1034 {
1035 int q0;
1036 int q1;
1037 int q2;
1038 uint m0;
1039 uint m1;
1040 uint m2;
1041 SplitHighLow(quintInput[ISE_0], bitcount, &q0, &m0);
1042 SplitHighLow(quintInput[ISE_1], bitcount, &q1, &m1);
1043 SplitHighLow(quintInput[ISE_2], bitcount, &q2, &m2);
1044 ushort packhigh = (ushort)(g_integerFromQuints[q2 * 25 + q1 * 5 + q0]); // Quints 5 25
1045 Orbits8Ptr(outputs, outpos, m0, bitcount);
1046 Orbits8Ptr(outputs, outpos, packhigh & 7u, 3u); // low 3bits (mask 7u) offset 3u
1047 Orbits8Ptr(outputs, outpos, m1, bitcount);
1048 Orbits8Ptr(outputs, outpos, (packhigh >> 3) & 3u, 2u); // right shift 3 bits for low 2bits (mask 3u) offset 2u
1049 Orbits8Ptr(outputs, outpos, m2, bitcount);
1050 Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1051 }
1052
1053 void BiseEndpoints(short numbers[COLOR_COMPONENT_NUM], int range, uint4* outputs, bool hasAlpha, uint* bitPos)
1054 {
1055 uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1056 uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1057 uint quints = (uint)(g_bitsTritsQuintsTable[range * 3 + 2]); // Quints 3 offset 2
1058 uint count = hasAlpha ? 8u : 6u; // RGBA 4x2 = 8 or RGB 3x2 = 6
1059 if (trits == UINT_ONE) {
1060 uint tritsInput[TRIT_BLOCK_SIZE];
1061 tritsInput[ISE_0] = numbers[EP0_R_INDEX];
1062 tritsInput[ISE_1] = numbers[EP1_R_INDEX];
1063 tritsInput[ISE_2] = numbers[EP0_G_INDEX];
1064 tritsInput[ISE_3] = numbers[EP1_G_INDEX];
1065 tritsInput[ISE_4] = numbers[EP0_B_INDEX];
1066 EncodeTrits(bits, tritsInput, outputs, bitPos);
1067 tritsInput[ISE_0] = numbers[EP1_B_INDEX];
1068 tritsInput[ISE_1] = numbers[EP0_A_INDEX];
1069 tritsInput[ISE_2] = numbers[EP1_A_INDEX];
1070 tritsInput[ISE_3] = UINT_ZERO;
1071 tritsInput[ISE_4] = UINT_ZERO;
1072 EncodeTrits(bits, tritsInput, outputs, bitPos);
1073 *bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * count + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1074 } else if (quints == UINT_ONE) {
1075 uint quintsInput[QUINT_BLOCK_SIZE];
1076 quintsInput[ISE_0] = numbers[EP0_R_INDEX];
1077 quintsInput[ISE_1] = numbers[EP1_R_INDEX];
1078 quintsInput[ISE_2] = numbers[EP0_G_INDEX];
1079 EncodeQuints(bits, quintsInput, outputs, bitPos);
1080 quintsInput[ISE_0] = numbers[EP1_G_INDEX];
1081 quintsInput[ISE_1] = numbers[EP0_B_INDEX];
1082 quintsInput[ISE_2] = numbers[EP1_B_INDEX];
1083 EncodeQuints(bits, quintsInput, outputs, bitPos);
1084 quintsInput[ISE_0] = numbers[EP0_A_INDEX];
1085 quintsInput[ISE_1] = numbers[EP1_A_INDEX];
1086 quintsInput[ISE_2] = UINT_ZERO;
1087 EncodeQuints(bits, quintsInput, outputs, bitPos);
1088 *bitPos = ((QUINT_MSB_SIZE + QUINT_BLOCK_SIZE * bits) * count + QUINT_ROUND_NUM) / QUINT_BLOCK_SIZE;
1089 } else {
1090 for (uint i = UINT_ZERO; i < count; ++i) {
1091 Orbits8Ptr(outputs, bitPos, numbers[i], bits);
1092 }
1093 }
1094 }
1095
1096 void BiseWeights(short numbers[BLOCK_SIZE], int range, uint4* outputs)
1097 {
1098 uint bitPos = UINT_ZERO;
1099 uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1100 uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1101 if (trits == UINT_ONE) {
1102 uint tritsInput[TRIT_BLOCK_SIZE];
1103 tritsInput[ISE_0] = numbers[WEIGHT_0];
1104 tritsInput[ISE_1] = numbers[WEIGHT_1];
1105 tritsInput[ISE_2] = numbers[WEIGHT_2];
1106 tritsInput[ISE_3] = numbers[WEIGHT_3];
1107 tritsInput[ISE_4] = numbers[WEIGHT_4];
1108 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1109 tritsInput[ISE_0] = numbers[WEIGHT_5];
1110 tritsInput[ISE_1] = numbers[WEIGHT_6];
1111 tritsInput[ISE_2] = numbers[WEIGHT_7];
1112 tritsInput[ISE_3] = numbers[WEIGHT_8];
1113 tritsInput[ISE_4] = numbers[WEIGHT_9];
1114 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1115 tritsInput[ISE_0] = numbers[WEIGHT_10];
1116 tritsInput[ISE_1] = numbers[WEIGHT_11];
1117 tritsInput[ISE_2] = numbers[WEIGHT_12];
1118 tritsInput[ISE_3] = numbers[WEIGHT_13];
1119 tritsInput[ISE_4] = numbers[WEIGHT_14];
1120 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1121 tritsInput[ISE_0] = numbers[WEIGHT_15];
1122 tritsInput[ISE_1] = UINT_ZERO;
1123 tritsInput[ISE_2] = UINT_ZERO;
1124 tritsInput[ISE_3] = UINT_ZERO;
1125 tritsInput[ISE_4] = UINT_ZERO;
1126 EncodeTrits(bits, tritsInput, outputs, &bitPos);
1127 bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * BLOCK_SIZE + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1128 } else {
1129 for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
1130 Orbits8Ptr(outputs, &bitPos, numbers[i], bits);
1131 }
1132 }
1133 }
1134
1135 uint4 AssembleBlock(uint blockMode, uint colorEndpointMode, uint4 epIse, uint4 wtIse)
1136 {
1137 uint4 phyBlk = (uint4)(0, 0, 0, 0); // initialize to (0, 0, 0, 0)
1138 phyBlk.w |= ReverseByte(wtIse.x & BYTE_MASK) << BYTE_3_POS;
1139 phyBlk.w |= ReverseByte((wtIse.x >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1140 phyBlk.w |= ReverseByte((wtIse.x >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1141 phyBlk.w |= ReverseByte((wtIse.x >> BYTE_3_POS) & BYTE_MASK);
1142 phyBlk.z |= ReverseByte(wtIse.y & BYTE_MASK) << BYTE_3_POS;
1143 phyBlk.z |= ReverseByte((wtIse.y >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1144 phyBlk.z |= ReverseByte((wtIse.y >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1145 phyBlk.z |= ReverseByte((wtIse.y >> BYTE_3_POS) & BYTE_MASK);
1146 phyBlk.y |= ReverseByte(wtIse.z & BYTE_MASK) << BYTE_3_POS;
1147 phyBlk.y |= ReverseByte((wtIse.z >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1148 phyBlk.y |= ReverseByte((wtIse.z >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1149 phyBlk.y |= ReverseByte((wtIse.z >> BYTE_3_POS) & BYTE_MASK);
1150 phyBlk.x = blockMode;
1151
1152 phyBlk.x |= (colorEndpointMode & MASK_FOR_4BITS) << CEM_POS;
1153 phyBlk.x |= (epIse.x & MASK_FOR_15BITS) << COLOR_EP_POS;
1154 phyBlk.y |= ((epIse.x >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1155 phyBlk.y |= (epIse.y & MASK_FOR_15BITS) << COLOR_EP_POS;
1156 phyBlk.z |= ((epIse.y >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1157
1158 return phyBlk;
1159 }
1160
1161 uint AssembleBlockmode(uint weightQuantmethod, bool isDualPlane)
1162 {
1163 uint a = (uint)((Y_GRIDS - HEIGHT_BITS_OFFSET) & MASK_FOR_2BITS);
1164 uint b = (uint)((X_GRIDS - WIDTH_BITS_OFFSET) & MASK_FOR_2BITS);
1165 uint d = isDualPlane ? UINT_ONE : UINT_ZERO;
1166 uint h = (weightQuantmethod < 6u) ? UINT_ZERO : UINT_ONE; // low/high-precision limit is 6u
1167 uint r = (weightQuantmethod % 6u) + WEIGHT_METHOD_OFFSET; // low/high-precision limit is 6u
1168 uint blockMode = (r >> WEIGHT_METHOD_RIGHT_BIT) & MASK_FOR_2BITS;
1169 blockMode |= (r & MASK_FOR_1BITS) << WEIGHT_METHOD_POS;
1170 blockMode |= (a & MASK_FOR_2BITS) << BLOCK_WIDTH_POS;
1171 blockMode |= (b & MASK_FOR_2BITS) << BLOCK_HEIGHT_POS;
1172 blockMode |= h << WEIGHT_PRECISION_POS;
1173 blockMode |= d << IS_DUALPLANE_POS;
1174 return blockMode;
1175 }
1176
1177 uint4 EndpointIse(float4* ep0, float4* ep1, short endpointQuantmethod, bool hasAlpha)
1178 {
1179 short epQuantized[COLOR_COMPONENT_NUM];
1180 EncodeColorNormal(endpointQuantmethod, *ep0, *ep1, epQuantized);
1181 DecodeColor(endpointQuantmethod, epQuantized, ep0, ep1);
1182 if (!hasAlpha) {
1183 epQuantized[EP0_A_INDEX] = SHORT_ZERO;
1184 epQuantized[EP1_A_INDEX] = SHORT_ZERO;
1185 }
1186 uint4 epIse = (uint4)(UINT_ZERO);
1187 uint bitPos = UINT_ZERO;
1188 BiseEndpoints(epQuantized, endpointQuantmethod, &epIse, hasAlpha, &bitPos);
1189 return epIse;
1190 }
1191
1192 float4 CalTexel(short weight, float4 ep0, float4 ep1)
1193 {
1194 short weight0 = BLOCK_MAX_WEIGHTS_SHORT - weight;
1195 return (ep0 * weight0 + ep1 * weight) / BLOCK_MAX_WEIGHTS_FLOAT;
1196 }
1197
1198 uint4 WeightIse(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM],
1199 short weightQuantmethod, float* errval)
1200 {
1201 int i = START_INDEX;
1202 short wtQuantized[X_GRIDS * Y_GRIDS];
1203 CalculateQuantizedWeights(texels, weightRange, endPoint, wtQuantized);
1204 float sumErr = FLOAT_ZERO;
1205 for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
1206 short w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1207 wtQuantized[i] = g_scrambleTable[w];
1208 w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1209 short wt = g_weightUnquant[w];
1210 float4 new_texel = CalTexel(wt, endPoint[EP0_INDEX], endPoint[EP1_INDEX]);
1211 float4 diff = new_texel - texels[i];
1212 sumErr += dot(diff, diff);
1213 }
1214 *errval = sumErr;
1215 uint4 wtIse = (uint4)(UINT_ZERO);
1216 BiseWeights(wtQuantized, (int)(weightQuantmethod), &wtIse);
1217 return wtIse;
1218 }
1219
1220 float TryEncode(float4* texels, float4 texelsMean, uint4* epIse, uint4* wtIse, short3* bestBlockmode)
1221 {
1222 float errval;
1223 bool hasAlpha = true;
1224 *bestBlockmode = (short3)(QUANT_6, QUANT_256, WEIGHT_RANGE_6);
1225 short weightQuantmethod = (*bestBlockmode).x;
1226 short endpointQuantmethod = (*bestBlockmode).y;
1227 short weightRange = (*bestBlockmode).z;
1228 float4 ep0;
1229 float4 ep1;
1230 float4 endPoint[END_POINT_NUM];
1231 MaxAccumulationPixelDirection(texels, texelsMean, &ep0, &ep1, hasAlpha);
1232 *epIse = EndpointIse(&ep0, &ep1, endpointQuantmethod, hasAlpha);
1233 endPoint[EP0_INDEX] = ep0;
1234 endPoint[EP1_INDEX] = ep1;
1235 *wtIse = WeightIse(texels, weightRange - UINT_ONE, endPoint, weightQuantmethod, &errval);
1236 return errval;
1237 }
1238
1239 uint4 EncodeBlock(float4* texels, float4 texelsMean, int blockID, __global uint* errs)
1240 {
1241 bool hasAlpha = true;
1242 bool isDualPlane = false;
1243 float errval = 10000000.0f; // the errval is initialized to 10000000.0f
1244
1245 uint4 epIse, wtIse;
1246 short3 bestBlockmode, tmpBestBlockMode;
1247 errval = TryEncode(texels, texelsMean, &epIse, &wtIse, &bestBlockmode);
1248
1249 uint blockMode = AssembleBlockmode(bestBlockmode.x, isDualPlane);
1250 uint ColorEndpointMode;
1251 if (hasAlpha) {
1252 ColorEndpointMode = CEM_LDR_RGBA_DIRECT;
1253 } else {
1254 ColorEndpointMode = CEM_LDR_RGB_DIRECT;
1255 }
1256 errs[blockID] = (uint)(errval);
1257 return AssembleBlock(blockMode, ColorEndpointMode, epIse, wtIse);
1258 }
1259
1260 void GotTexelFromImage(read_only image2d_t inputImage, float4 texels[BLOCK_SIZE],
1261 int width, int height, float4 *texelMean)
1262 {
1263 int2 pos = (int2)(get_global_id(0), get_global_id(1));
1264 pos.x *= DIM;
1265 pos.y *= DIM;
1266 for (int i = 0; i < DIM; ++i) {
1267 for (int j = 0; j < DIM; ++j) {
1268 int2 pixelPos = pos + (int2)(j, i);
1269 if (pixelPos.x >= width) {
1270 pixelPos.x = width - 1;
1271 }
1272 if (pixelPos.y >= height) {
1273 pixelPos.y = height - 1;
1274 }
1275 float4 texel = read_imagef(inputImage, pixelPos);
1276 texels[i * DIM + j] = texel * PIXEL_MAX_VALUE;
1277 *texelMean += texel * PIXEL_MAX_VALUE;
1278 }
1279 }
1280 }
1281
1282 kernel void AstcCl(read_only image2d_t inputImage, __global uint4* astcArr, __global uint* errs,
1283 int width, int height)
1284 {
1285 const int2 globalSize = (int2)(get_global_size(0), get_global_size(1));
1286 const int2 globalId = (int2)(get_global_id(0), get_global_id(1));
1287 int blockID = globalId.y * globalSize.x + globalId.x;
1288 float4 texels[BLOCK_SIZE];
1289 float4 texelMean = 0;
1290 GotTexelFromImage(inputImage, texels, width, height, &texelMean);
1291 texelMean = texelMean / ((float)(BLOCK_SIZE));
1292 astcArr[blockID] = EncodeBlock(texels, texelMean, blockID, errs);
1293 }
1294 )";
1295
1296 class OpenCLSoManager {
1297 public:
1298 OpenCLSoManager();
1299 ~OpenCLSoManager();
1300 bool LoadOpenCLSo();
1301 private:
1302 void *clSoHandle = nullptr;
1303 bool loadSuccess = false;
1304 };
1305
1306 static OpenCLSoManager g_clSoManager;
1307
OpenCLSoManager()1308 OpenCLSoManager::OpenCLSoManager()
1309 {
1310 clSoHandle = nullptr;
1311 loadSuccess = false;
1312 }
1313
~OpenCLSoManager()1314 OpenCLSoManager::~OpenCLSoManager()
1315 {
1316 if (!UnLoadCLExtern(clSoHandle)) {
1317 IMAGE_LOGE("astcenc OpenCLSoManager UnLoad failed!");
1318 } else {
1319 IMAGE_LOGD("astcenc OpenCLSoManager UnLoad success!");
1320 loadSuccess = false;
1321 }
1322 }
1323
LoadOpenCLSo()1324 bool OpenCLSoManager::LoadOpenCLSo()
1325 {
1326 if (!loadSuccess) {
1327 loadSuccess = InitOpenCLExtern(&clSoHandle);
1328 }
1329 return loadSuccess;
1330 }
1331
AstcClClose(ClAstcHandle * clAstcHandle)1332 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClClose(ClAstcHandle *clAstcHandle)
1333 {
1334 if (clAstcHandle == nullptr) {
1335 IMAGE_LOGE("astc AstcClClose clAstcHandle is nullptr!");
1336 return CL_ASTC_ENC_FAILED;
1337 }
1338 cl_int clRet;
1339 if (clAstcHandle->kernel != nullptr) {
1340 clRet = clReleaseKernel(clAstcHandle->kernel);
1341 if (clRet != CL_SUCCESS) {
1342 IMAGE_LOGE("astc clReleaseKernel failed ret %{public}d!", clRet);
1343 return CL_ASTC_ENC_FAILED;
1344 }
1345 clAstcHandle->kernel = nullptr;
1346 }
1347 if (clAstcHandle->queue != nullptr) {
1348 clRet = clReleaseCommandQueue(clAstcHandle->queue);
1349 if (clRet != CL_SUCCESS) {
1350 IMAGE_LOGE("astc clReleaseCommandQueue failed ret %{public}d!", clRet);
1351 return CL_ASTC_ENC_FAILED;
1352 }
1353 clAstcHandle->queue = nullptr;
1354 }
1355 if (clAstcHandle->context != nullptr) {
1356 clRet = clReleaseContext(clAstcHandle->context);
1357 if (clRet != CL_SUCCESS) {
1358 IMAGE_LOGE("astc clReleaseContext failed ret %{public}d!", clRet);
1359 return CL_ASTC_ENC_FAILED;
1360 }
1361 clAstcHandle->context = nullptr;
1362 }
1363 if (clAstcHandle->encObj.blockErrs_ != nullptr) {
1364 free(clAstcHandle->encObj.blockErrs_);
1365 clAstcHandle->encObj.blockErrs_ = nullptr;
1366 }
1367 if (clAstcHandle != nullptr) {
1368 free(clAstcHandle);
1369 }
1370 return CL_ASTC_ENC_SUCCESS;
1371 }
1372
CheckClBinIsExist(const std::string & name)1373 static bool CheckClBinIsExist(const std::string &name)
1374 {
1375 return (access(name.c_str(), F_OK) != -1); // -1 means that the file is not exist
1376 }
1377
SaveClBin(cl_program program,const std::string & clBinPath)1378 static CL_ASTC_STATUS SaveClBin(cl_program program, const std::string &clBinPath)
1379 {
1380 size_t programBinarySizes;
1381 cl_int clRet = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &programBinarySizes, NULL);
1382 if (clRet != CL_SUCCESS) {
1383 IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARY_SIZES failed ret %{public}d!", clRet);
1384 return CL_ASTC_ENC_FAILED;
1385 }
1386 bool genBinFail = (programBinarySizes == 0) || (programBinarySizes > MAX_MALLOC_BYTES);
1387 if (genBinFail) {
1388 IMAGE_LOGE("astc clGetProgramInfo programBinarySizes %{public}zu too big!", programBinarySizes);
1389 return CL_ASTC_ENC_FAILED;
1390 }
1391 uint8_t *programBinaries = static_cast<uint8_t *>(malloc(programBinarySizes));
1392 if (programBinaries == nullptr) {
1393 IMAGE_LOGE("astc programBinaries malloc failed!");
1394 return CL_ASTC_ENC_FAILED;
1395 }
1396 clRet = clGetProgramInfo(program, CL_PROGRAM_BINARIES, programBinarySizes, &programBinaries, NULL);
1397 if (clRet != CL_SUCCESS) {
1398 IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARIES failed ret %{public}d!", clRet);
1399 free(programBinaries);
1400 return CL_ASTC_ENC_FAILED;
1401 }
1402 FILE *fp = fopen(clBinPath.c_str(), "wb");
1403 if (fp == nullptr) {
1404 IMAGE_LOGE("astc create file: %{public}s failed!", clBinPath.c_str());
1405 free(programBinaries);
1406 return CL_ASTC_ENC_FAILED;
1407 }
1408 CL_ASTC_STATUS ret = CL_ASTC_ENC_SUCCESS;
1409 if (fwrite(programBinaries, 1, programBinarySizes, fp) != programBinarySizes) {
1410 IMAGE_LOGE("astc fwrite programBinaries file failed!");
1411 ret = CL_ASTC_ENC_FAILED;
1412 }
1413 if (fclose(fp) != 0) {
1414 IMAGE_LOGE("astc SaveClBin close file failed!");
1415 ret = CL_ASTC_ENC_FAILED;
1416 }
1417 fp = nullptr;
1418 free(programBinaries);
1419 return ret;
1420 }
1421
BuildProgramAndCreateKernel(cl_program program,ClAstcHandle * clAstcHandle)1422 static CL_ASTC_STATUS BuildProgramAndCreateKernel(cl_program program, ClAstcHandle *clAstcHandle)
1423 {
1424 cl_int clRet = clBuildProgram(program, 1, &clAstcHandle->deviceID, "-cl-std=CL3.0", nullptr, nullptr);
1425 if (clRet != CL_SUCCESS) {
1426 IMAGE_LOGE("astc clBuildProgram failed ret %{public}d!", clRet);
1427 return CL_ASTC_ENC_FAILED;
1428 }
1429 clAstcHandle->kernel = clCreateKernel(program, "AstcCl", &clRet);
1430 if (clRet != CL_SUCCESS) {
1431 IMAGE_LOGE("astc clCreateKernel failed ret %{public}d!", clRet);
1432 return CL_ASTC_ENC_FAILED;
1433 }
1434 return CL_ASTC_ENC_SUCCESS;
1435 }
1436
AstcClBuildProgram(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1437 static CL_ASTC_STATUS AstcClBuildProgram(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1438 {
1439 cl_int clRet;
1440 cl_program program = nullptr;
1441 if (!CheckClBinIsExist(clBinPath)) {
1442 size_t sourceSize = strlen(g_programSource) + 1; // '\0' occupies 1 bytes
1443 program = clCreateProgramWithSource(clAstcHandle->context, 1, &g_programSource, &sourceSize, &clRet);
1444 if (clRet != CL_SUCCESS) {
1445 IMAGE_LOGE("astc clCreateProgramWithSource failed ret %{public}d!", clRet);
1446 return CL_ASTC_ENC_FAILED;
1447 }
1448 if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1449 IMAGE_LOGE("astc BuildProgramAndCreateKernel failed ret %{public}d!", clRet);
1450 clReleaseProgram(program);
1451 return CL_ASTC_ENC_FAILED;
1452 }
1453 if (SaveClBin(program, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1454 IMAGE_LOGI("astc SaveClBin failed!");
1455 }
1456 } else {
1457 std::ifstream contents{clBinPath};
1458 std::string binaryContent{std::istreambuf_iterator<char>{contents}, {}};
1459 size_t binSize = binaryContent.length();
1460 bool invaildSize = (binSize == 0) || (binSize > MAX_MALLOC_BYTES);
1461 if (invaildSize) {
1462 IMAGE_LOGE("astc AstcClBuildProgram read CLbin file lenth error %{public}zu!", binSize);
1463 return CL_ASTC_ENC_FAILED;
1464 }
1465 const char *binary = static_cast<const char *>(binaryContent.c_str());
1466 program = clCreateProgramWithBinary(clAstcHandle->context, 1, &clAstcHandle->deviceID, &binSize,
1467 (const unsigned char **)&binary, nullptr, &clRet);
1468 if (clRet != CL_SUCCESS) {
1469 IMAGE_LOGE("astc clCreateProgramWithBinary failed ret %{public}d!", clRet);
1470 return CL_ASTC_ENC_FAILED;
1471 }
1472 if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1473 IMAGE_LOGE("astc BuildProgramAndCreateKernel with bin failed!");
1474 clReleaseProgram(program);
1475 return CL_ASTC_ENC_FAILED;
1476 }
1477 }
1478 clRet = clReleaseProgram(program);
1479 if (clRet != CL_SUCCESS) {
1480 IMAGE_LOGE("astc clReleaseProgram failed ret %{public}d!", clRet);
1481 return CL_ASTC_ENC_FAILED;
1482 }
1483 return CL_ASTC_ENC_SUCCESS;
1484 }
1485
AstcCreateClKernel(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1486 static CL_ASTC_STATUS AstcCreateClKernel(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1487 {
1488 if (!g_clSoManager.LoadOpenCLSo()) {
1489 IMAGE_LOGE("astc InitOpenCL error!");
1490 return CL_ASTC_ENC_FAILED;
1491 }
1492 cl_int clRet;
1493 cl_platform_id platformID;
1494 clRet = clGetPlatformIDs(1, &platformID, NULL);
1495 if (clRet != CL_SUCCESS) {
1496 IMAGE_LOGE("astc clGetPlatformIDs failed ret %{public}d!", clRet);
1497 return CL_ASTC_ENC_FAILED;
1498 }
1499 clRet = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &clAstcHandle->deviceID, NULL);
1500 if (clRet != CL_SUCCESS) {
1501 IMAGE_LOGE("astc clGetDeviceIDs failed ret %{public}d!", clRet);
1502 return CL_ASTC_ENC_FAILED;
1503 }
1504 clAstcHandle->context = clCreateContext(0, 1, &clAstcHandle->deviceID, NULL, NULL, &clRet);
1505 if (clRet != CL_SUCCESS) {
1506 IMAGE_LOGE("astc clCreateContext failed ret %{public}d!", clRet);
1507 return CL_ASTC_ENC_FAILED;
1508 }
1509 cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0};
1510 clAstcHandle->queue = clCreateCommandQueueWithProperties(clAstcHandle->context,
1511 clAstcHandle->deviceID, props, &clRet);
1512 if (clRet != CL_SUCCESS) {
1513 IMAGE_LOGE("astc clCreateCommandQueueWithProperties failed ret %{public}d!", clRet);
1514 return CL_ASTC_ENC_FAILED;
1515 }
1516 if (AstcClBuildProgram(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1517 IMAGE_LOGE("astc AstcClBuildProgram failed!");
1518 return CL_ASTC_ENC_FAILED;
1519 }
1520 return CL_ASTC_ENC_SUCCESS;
1521 }
1522
AstcClCreate(ClAstcHandle ** handle,const std::string & clBinPath)1523 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClCreate(ClAstcHandle **handle, const std::string &clBinPath)
1524 {
1525 ClAstcHandle *clAstcHandle = static_cast<ClAstcHandle *>(calloc(1, sizeof(ClAstcHandle)));
1526 if (clAstcHandle == nullptr) {
1527 IMAGE_LOGE("astc AstcClCreate handle calloc failed!");
1528 return CL_ASTC_ENC_FAILED;
1529 }
1530 *handle = clAstcHandle;
1531 size_t numMaxBlocks = static_cast<size_t>(((MAX_WIDTH + DIM - 1) / DIM) * ((MAX_HEIGHT + DIM - 1) / DIM));
1532 clAstcHandle->encObj.blockErrs_ =
1533 static_cast<uint32_t *>(malloc((numMaxBlocks * sizeof(uint32_t)))); // 8MB mem Max
1534 if (clAstcHandle->encObj.blockErrs_ == nullptr) {
1535 IMAGE_LOGE("astc blockErrs_ malloc failed!");
1536 AstcClClose(*handle);
1537 return CL_ASTC_ENC_FAILED;
1538 }
1539 if (AstcCreateClKernel(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1540 IMAGE_LOGE("astc AstcCreateClKernel failed!");
1541 AstcClClose(*handle);
1542 return CL_ASTC_ENC_FAILED;
1543 }
1544 return CL_ASTC_ENC_SUCCESS;
1545 }
1546
AstcClEncImageCheckImageOption(const ClAstcImageOption * imageIn)1547 static CL_ASTC_STATUS AstcClEncImageCheckImageOption(const ClAstcImageOption *imageIn)
1548 {
1549 if ((imageIn->width <= 0) || (imageIn->height <= 0) || (imageIn->stride < imageIn->width)) {
1550 IMAGE_LOGE("astc AstcClEncImage width <= 0 or height <= 0 or stride < width!");
1551 return CL_ASTC_ENC_FAILED;
1552 }
1553 if ((imageIn->width > MAX_WIDTH) || (imageIn->height > MAX_HEIGHT)) {
1554 IMAGE_LOGE("astc AstcClEncImage width[%{public}d] \
1555 need be [1, %{public}d] and height[%{public}d] need be [1, %{public}d]", \
1556 imageIn->width, MAX_WIDTH, imageIn->height, MAX_HEIGHT);
1557 return CL_ASTC_ENC_FAILED;
1558 }
1559 return CL_ASTC_ENC_SUCCESS;
1560 }
1561
AstcClFillImage(ClAstcImageOption * imageIn,uint8_t * data,int32_t stride,int32_t width,int32_t height)1562 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClFillImage(ClAstcImageOption *imageIn, uint8_t *data, int32_t stride,
1563 int32_t width, int32_t height)
1564 {
1565 if (imageIn == nullptr) {
1566 IMAGE_LOGE("astc AstcClFillImage imageIn is nullptr!");
1567 return CL_ASTC_ENC_FAILED;
1568 }
1569 imageIn->data = data;
1570 imageIn->stride = stride;
1571 imageIn->width = width;
1572 imageIn->height = height;
1573 if (AstcClEncImageCheckImageOption(imageIn) != CL_ASTC_ENC_SUCCESS) {
1574 IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1575 return CL_ASTC_ENC_FAILED;
1576 }
1577 return CL_ASTC_ENC_SUCCESS;
1578 }
1579
GenAstcHeader(uint8_t * buffer,uint8_t blockX,uint8_t blockY,uint32_t dimX,uint32_t dimY)1580 static void GenAstcHeader(uint8_t *buffer, uint8_t blockX, uint8_t blockY, uint32_t dimX, uint32_t dimY)
1581 {
1582 uint8_t *headInfo = buffer;
1583 *headInfo++ = MAGIC_FILE_CONSTANT & BYTES_MASK;
1584 *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_8BITS) & BYTES_MASK;
1585 *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_16BITS) & BYTES_MASK;
1586 *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_24BITS) & BYTES_MASK;
1587 *headInfo++ = static_cast<uint8_t>(blockX);
1588 *headInfo++ = static_cast<uint8_t>(blockY);
1589 *headInfo++ = 1;
1590 *headInfo++ = dimX & BYTES_MASK;
1591 *headInfo++ = (dimX >> BIT_SHIFT_8BITS) & BYTES_MASK;
1592 *headInfo++ = (dimX >> BIT_SHIFT_16BITS) & BYTES_MASK;
1593 *headInfo++ = dimY & BYTES_MASK;
1594 *headInfo++ = (dimY >> BIT_SHIFT_8BITS) & BYTES_MASK;
1595 *headInfo++ = (dimY >> BIT_SHIFT_16BITS) & BYTES_MASK;
1596 *headInfo++ = 1;
1597 *headInfo++ = 0;
1598 *headInfo++ = 0;
1599 }
1600
ReleaseClAstcObj(ClAstcObjEnc * obj)1601 static void ReleaseClAstcObj(ClAstcObjEnc *obj)
1602 {
1603 cl_int clRet;
1604 if (obj != nullptr) {
1605 if (obj->inputImage != nullptr) {
1606 clRet = clReleaseMemObject(obj->inputImage);
1607 if (clRet != CL_SUCCESS) {
1608 IMAGE_LOGE("astc inputImage release failed ret %{public}d!", clRet);
1609 }
1610 obj->inputImage = nullptr;
1611 }
1612 if (obj->astcResult != nullptr) {
1613 clRet = clReleaseMemObject(obj->astcResult);
1614 if (clRet != CL_SUCCESS) {
1615 IMAGE_LOGE("astc astcResult release failed ret %{public}d!", clRet);
1616 }
1617 obj->astcResult = nullptr;
1618 }
1619 if (obj->errBuffer != nullptr) {
1620 clRet = clReleaseMemObject(obj->errBuffer);
1621 if (clRet != CL_SUCCESS) {
1622 IMAGE_LOGE("astc errBuffer release failed ret %{public}d!", clRet);
1623 }
1624 obj->errBuffer = nullptr;
1625 }
1626 }
1627 }
1628
GetMaxAndSumVal(size_t numBlocks,uint32_t * blockErrs,uint32_t & maxVal,uint32_t & sumVal)1629 static void GetMaxAndSumVal(size_t numBlocks, uint32_t *blockErrs, uint32_t &maxVal, uint32_t &sumVal)
1630 {
1631 sumVal = 0;
1632 for (size_t i = 0; i < numBlocks; i++) {
1633 sumVal += blockErrs[i];
1634 maxVal = fmax(maxVal, blockErrs[i]);
1635 }
1636 }
1637
ClCreateBufferAndImage(const ClAstcImageOption * imageIn,ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj)1638 static CL_ASTC_STATUS ClCreateBufferAndImage(const ClAstcImageOption *imageIn,
1639 ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj)
1640 {
1641 uint8_t *data = imageIn->data;
1642 int32_t stride = imageIn->stride;
1643 int32_t width = imageIn->width;
1644 int32_t height = imageIn->height;
1645 size_t numBlocks = static_cast<size_t>(((width + DIM - 1) / DIM) * ((height + DIM - 1) / DIM));
1646 uint32_t *blockErrs = encObj->blockErrs_;
1647 size_t blockErrBytes = sizeof(uint32_t) * numBlocks;
1648 encObj->astcSize = numBlocks * TEXTURE_BLOCK_BYTES;
1649 if ((blockErrs == nullptr) || (memset_s(blockErrs, blockErrBytes, 0, blockErrBytes))) {
1650 IMAGE_LOGE("astc blockErrs is nullptr or memset failed!");
1651 return CL_ASTC_ENC_FAILED;
1652 }
1653 cl_image_format imageFormat = { CL_RGBA, CL_UNORM_INT8 };
1654 cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, stride, height };
1655 cl_int clRet;
1656 encObj->inputImage = clCreateImage(clAstcHandle->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imageFormat,
1657 &desc, data, &clRet);
1658 if (clRet != CL_SUCCESS) {
1659 IMAGE_LOGE("astc clCreateImage failed ret %{public}d!", clRet);
1660 return CL_ASTC_ENC_FAILED;
1661 }
1662 encObj->astcResult = clCreateBuffer(clAstcHandle->context,
1663 CL_MEM_ALLOC_HOST_PTR, encObj->astcSize, NULL, &clRet);
1664 if (clRet != CL_SUCCESS) {
1665 IMAGE_LOGE("astc clCreateBuffer astcResult failed ret %{public}d!", clRet);
1666 return CL_ASTC_ENC_FAILED;
1667 }
1668 encObj->errBuffer = clCreateBuffer(clAstcHandle->context, CL_MEM_USE_HOST_PTR, blockErrBytes, blockErrs, &clRet);
1669 if (clRet != CL_SUCCESS) {
1670 IMAGE_LOGE("astc clCreateBuffer errBuffer failed ret %{public}d!", clRet);
1671 return CL_ASTC_ENC_FAILED;
1672 }
1673 return CL_ASTC_ENC_SUCCESS;
1674 }
1675
ClKernelArgSet(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1676 static CL_ASTC_STATUS ClKernelArgSet(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1677 {
1678 int32_t kernelId = 0;
1679 cl_int clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->inputImage);
1680 if (clRet != CL_SUCCESS) {
1681 IMAGE_LOGE("astc clSetKernelArg inputImage failed ret %{public}d!", clRet);
1682 return CL_ASTC_ENC_FAILED;
1683 }
1684 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->astcResult);
1685 if (clRet != CL_SUCCESS) {
1686 IMAGE_LOGE("astc clSetKernelArg astcResult failed ret %{public}d!", clRet);
1687 return CL_ASTC_ENC_FAILED;
1688 }
1689 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->errBuffer);
1690 if (clRet != CL_SUCCESS) {
1691 IMAGE_LOGE("astc clSetKernelArg errBuffer failed ret %{public}d!", clRet);
1692 return CL_ASTC_ENC_FAILED;
1693 }
1694 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &width);
1695 if (clRet != CL_SUCCESS) {
1696 IMAGE_LOGE("astc clSetKernelArg width failed ret %{public}d!", clRet);
1697 return CL_ASTC_ENC_FAILED;
1698 }
1699 clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &height);
1700 if (clRet != CL_SUCCESS) {
1701 IMAGE_LOGE("astc clSetKernelArg height failed ret %{public}d!", clRet);
1702 return CL_ASTC_ENC_FAILED;
1703 }
1704 return CL_ASTC_ENC_SUCCESS;
1705 }
1706
ClKernelArgSetAndRun(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1707 static CL_ASTC_STATUS ClKernelArgSetAndRun(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1708 {
1709 if (ClKernelArgSet(clAstcHandle, encObj, width, height) != CL_ASTC_ENC_SUCCESS) {
1710 IMAGE_LOGE("astc ClKernelArgSet failed!");
1711 return CL_ASTC_ENC_FAILED;
1712 }
1713 size_t local[] = {WORK_GROUP_SIZE, WORK_GROUP_SIZE};
1714 size_t global[GLOBAL_WH_NUM_CL];
1715 global[0] = static_cast<size_t>((width + DIM - 1) / DIM);
1716 global[1] = static_cast<size_t>((height + DIM - 1) / DIM);
1717 size_t localMax;
1718 cl_int clRet = clGetKernelWorkGroupInfo(clAstcHandle->kernel, clAstcHandle->deviceID, CL_KERNEL_WORK_GROUP_SIZE,
1719 sizeof(size_t), &localMax, nullptr);
1720 if (clRet != CL_SUCCESS) {
1721 IMAGE_LOGE("astc clGetKernelWorkGroupInfo failed ret %{public}d!", clRet);
1722 return CL_ASTC_ENC_FAILED;
1723 }
1724 while (local[0] * local[1] > localMax) {
1725 local[0]--;
1726 local[1]--;
1727 }
1728 if ((local[0] < 1) || (local[1] < 1)) {
1729 IMAGE_LOGE("astc ClKernelArgSetAndRun local set failed!");
1730 return CL_ASTC_ENC_FAILED;
1731 }
1732 clRet = clEnqueueNDRangeKernel(clAstcHandle->queue, clAstcHandle->kernel, GLOBAL_WH_NUM_CL, nullptr, global, local,
1733 0, nullptr, nullptr);
1734 if (clRet != CL_SUCCESS) {
1735 IMAGE_LOGE("astc clEnqueueNDRangeKernel failed ret %{public}d!", clRet);
1736 return CL_ASTC_ENC_FAILED;
1737 }
1738 clRet = clFinish(clAstcHandle->queue);
1739 if (clRet != CL_SUCCESS) {
1740 IMAGE_LOGE("astc clFinish failed ret %{public}d!", clRet);
1741 return CL_ASTC_ENC_FAILED;
1742 }
1743 return CL_ASTC_ENC_SUCCESS;
1744 }
1745
ClReadAstcBufAndBlockError(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,const ClAstcImageOption * imageIn,uint8_t * buffer)1746 static CL_ASTC_STATUS ClReadAstcBufAndBlockError(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj,
1747 const ClAstcImageOption *imageIn, uint8_t *buffer)
1748 {
1749 cl_int clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->astcResult, CL_TRUE,
1750 0, encObj->astcSize, buffer + TEXTURE_HEAD_BYTES, 0, NULL, NULL);
1751 if (clRet != CL_SUCCESS) {
1752 IMAGE_LOGE("astc clEnqueueReadBuffer astcResult failed ret %{public}d!", clRet);
1753 return CL_ASTC_ENC_FAILED;
1754 }
1755 uint32_t maxVal = 0;
1756 uint32_t sumVal = 0;
1757 size_t numBlocks = ((imageIn->width + DIM - 1) / DIM) * ((imageIn->height + DIM - 1) / DIM);
1758 clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->errBuffer, CL_TRUE,
1759 0, sizeof(uint32_t) * numBlocks, encObj->blockErrs_, 0, NULL, NULL);
1760 if (clRet != CL_SUCCESS) {
1761 IMAGE_LOGE("astc clEnqueueReadBuffer blockErrs failed ret %{public}d!", clRet);
1762 return CL_ASTC_ENC_FAILED;
1763 }
1764 GetMaxAndSumVal(numBlocks, encObj->blockErrs_, maxVal, sumVal);
1765 return CL_ASTC_ENC_SUCCESS;
1766 }
1767
AstcClEncImage(ClAstcHandle * clAstcHandle,const ClAstcImageOption * imageIn,uint8_t * buffer)1768 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClEncImage(ClAstcHandle *clAstcHandle,
1769 const ClAstcImageOption *imageIn, uint8_t *buffer)
1770 {
1771 if ((clAstcHandle == nullptr) || (imageIn == nullptr) || (buffer == nullptr)) {
1772 IMAGE_LOGE("astc AstcClEncImage clAstcHandle or imageIn or buffer is nullptr!");
1773 return CL_ASTC_ENC_FAILED;
1774 }
1775 if (AstcClEncImageCheckImageOption(imageIn) != CL_ASTC_ENC_SUCCESS) {
1776 IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1777 return CL_ASTC_ENC_FAILED;
1778 }
1779 GenAstcHeader(buffer, DIM, DIM, imageIn->width, imageIn->height);
1780 ClAstcObjEnc *encObj = &clAstcHandle->encObj;
1781 if (encObj == nullptr) {
1782 IMAGE_LOGE("astc AstcClEncImage clAstcHandle encObj is nullptr!");
1783 return CL_ASTC_ENC_FAILED;
1784 }
1785 if (ClCreateBufferAndImage(imageIn, clAstcHandle, encObj) != CL_ASTC_ENC_SUCCESS) {
1786 ReleaseClAstcObj(encObj);
1787 IMAGE_LOGE("astc ClCreateBufferAndImage failed!");
1788 return CL_ASTC_ENC_FAILED;
1789 }
1790 if (ClKernelArgSetAndRun(clAstcHandle, encObj, imageIn->width, imageIn->height) != CL_ASTC_ENC_SUCCESS) {
1791 ReleaseClAstcObj(encObj);
1792 IMAGE_LOGE("astc ClKernelArgSetAndRun failed!");
1793 return CL_ASTC_ENC_FAILED;
1794 }
1795 if (ClReadAstcBufAndBlockError(clAstcHandle, encObj, imageIn, buffer) != CL_ASTC_ENC_SUCCESS) {
1796 ReleaseClAstcObj(encObj);
1797 IMAGE_LOGE("astc ClReadAstcBufAndBlockError failed!");
1798 return CL_ASTC_ENC_FAILED;
1799 }
1800 ReleaseClAstcObj(encObj);
1801 return CL_ASTC_ENC_SUCCESS;
1802 }
1803 }
1804 }
1805 }