1 /*
2  * Copyright (C) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "image_compressor.h"
17 
18 #include <unistd.h>
19 #include <fstream>
20 
21 #include "securec.h"
22 #include "media_errors.h"
23 #include "image_log.h"
24 
25 #undef LOG_DOMAIN
26 #define LOG_DOMAIN LOG_TAG_DOMAIN_ID_PLUGIN
27 
28 #undef LOG_TAG
29 #define LOG_TAG "ClAstcEnc"
30 
31 namespace OHOS {
32 namespace ImagePlugin {
33 namespace AstcEncBasedCl {
34 constexpr int MAX_WIDTH = 8192;
35 constexpr int MAX_HEIGHT = 8192;
36 constexpr int TEXTURE_HEAD_BYTES = 16;
37 constexpr int TEXTURE_BLOCK_BYTES = 16;
38 constexpr int MAGIC_FILE_CONSTANT = 0x5CA1AB13;
39 constexpr int DIM = 4;
40 constexpr uint8_t BIT_SHIFT_8BITS = 8;
41 constexpr uint8_t BIT_SHIFT_16BITS = 16;
42 constexpr uint8_t BIT_SHIFT_24BITS = 24;
43 constexpr uint8_t BYTES_MASK = 0xFF;
44 constexpr uint8_t GLOBAL_WH_NUM_CL = 2;
45 constexpr size_t MAX_MALLOC_BYTES = 10000000; // max 10MB
46 constexpr size_t WORK_GROUP_SIZE = 8;
47 
48 const char *g_programSource = R"(
49 // Notice: the code from line 42 to line 1266 is openCL language
50 // openCL cound only support C language style and could not support constexpr and static_cast in same platform
51 #define DIM (4)
52 #define BLOCK_SIZE (16)
53 #define X_GRIDS (4)
54 #define Y_GRIDS (4)
55 #define SMALL_VALUE (0.00001f) // avoid divide 0
56 #define BLOCK_MAX_WEIGHTS (64)
57 #define BLOCK_MAX_WEIGHTS_SHORT (64)
58 #define BLOCK_MAX_WEIGHTS_FLOAT (64.0f)
59 #define BLOCK_MAX_WEIGHTS_2PLANE (32)
60 #define WEIGHTS_PLANE2_OFFSET (32)
61 #define CEM_LDR_RGB_DIRECT (8)
62 #define CEM_LDR_RGB_BASE_OFFSET (9)
63 #define CEM_LDR_RGBA_DIRECT (12)
64 #define CEM_LDR_RGBA_BASE_OFFSET (13)
65 #define PIXEL_MAX_VALUE (255.0f)
66 
67 #define QUANT_2 (0)
68 #define QUANT_3 (1)
69 #define QUANT_4 (2)
70 #define QUANT_5 (3)
71 #define QUANT_6 (4)
72 #define QUANT_8 (5)
73 #define QUANT_10 (6)
74 #define QUANT_12 (7)
75 #define QUANT_16 (8)
76 #define QUANT_20 (9)
77 #define QUANT_24 (10)
78 #define QUANT_32 (11)
79 #define QUANT_40 (12)
80 #define QUANT_48 (13)
81 #define QUANT_64 (14)
82 #define QUANT_80 (15)
83 #define QUANT_96 (16)
84 #define QUANT_128 (17)
85 #define QUANT_160 (18)
86 #define QUANT_192 (19)
87 #define QUANT_256 (20)
88 #define QUANT_MAX (21)
89 
90 #define WEIGHT_RANGE_6 (6)
91 #define WEIGHT_QUANTIZE_NUM (32)
92 #define COLOR_NUM (256)
93 #define MAX_PARTITION_COUNT (4)
94 #define PARTITION_COUNT (2)
95 #define MAX_BLOCK_SIZE (32)
96 #define WEIGHT_QUANTIZE_GROUP (12)
97 #define SECOND_PARTITION_INDEX (1)
98 
99 #define START_INDEX (0)
100 #define FLOAT_ZERO (0.0f)
101 #define FLOAT_ONE (1.0f)
102 #define INT_ZERO (0)
103 #define INT_ONE (1)
104 #define SHORT_ZERO (0)
105 #define UINT_ZERO (0)
106 #define UINT_ONE (1u)
107 #define EP0_INDEX (0)
108 #define EP1_INDEX (1)
109 #define END_POINT_NUM (2)
110 #define EP0_R_INDEX (0)
111 #define EP1_R_INDEX (1)
112 #define EP0_G_INDEX (2)
113 #define EP1_G_INDEX (3)
114 #define EP0_B_INDEX (4)
115 #define EP1_B_INDEX (5)
116 #define EP0_A_INDEX (6)
117 #define EP1_A_INDEX (7)
118 #define COLOR_COMPONENT_NUM (8)
119 #define QUANTIZE_WEIGHT_MIN (0)
120 
121 #define TRIT_MSB_SIZE (8)
122 #define TRIT_BLOCK_SIZE (5)
123 #define TRIT_ROUND_NUM (4)
124 #define QUINT_MSB_SIZE (7)
125 #define QUINT_BLOCK_SIZE (3)
126 #define QUINT_ROUND_NUM (2)
127 #define ISE_0 (0)
128 #define ISE_1 (1)
129 #define ISE_2 (2)
130 #define ISE_3 (3)
131 #define ISE_4 (4)
132 
133 #define WEIGHT_0 (0)
134 #define WEIGHT_1 (1)
135 #define WEIGHT_2 (2)
136 #define WEIGHT_3 (3)
137 #define WEIGHT_4 (4)
138 #define WEIGHT_5 (5)
139 #define WEIGHT_6 (6)
140 #define WEIGHT_7 (7)
141 #define WEIGHT_8 (8)
142 #define WEIGHT_9 (9)
143 #define WEIGHT_10 (10)
144 #define WEIGHT_11 (11)
145 #define WEIGHT_12 (12)
146 #define WEIGHT_13 (13)
147 #define WEIGHT_14 (14)
148 #define WEIGHT_15 (15)
149 
150 #define BYTE_1_POS (8)
151 #define BYTE_2_POS (16)
152 #define BYTE_3_POS (24)
153 #define BYTE_MASK (0xFFu)
154 #define CEM_POS (13)
155 #define COLOR_EP_POS (17)
156 #define COLOR_EP_HIGH_BIT (15)
157 #define MASK_FOR_4BITS (0xFu)
158 #define MASK_FOR_15BITS (0x7FFFu)
159 #define MASK_FOR_17BITS (0x1FFFFu)
160 
161 #define HEIGHT_BITS_OFFSET (2)
162 #define WIDTH_BITS_OFFSET (4)
163 #define MASK_FOR_2BITS (0x3u)
164 #define MASK_FOR_1BITS (0x1u)
165 #define WEIGHT_METHOD_OFFSET (2u)
166 #define WEIGHT_METHOD_RIGHT_BIT (1)
167 #define WEIGHT_METHOD_POS (4u)
168 #define BLOCK_WIDTH_POS (5u)
169 #define BLOCK_HEIGHT_POS (5u)
170 #define WEIGHT_PRECISION_POS (9u)
171 #define IS_DUALPLANE_POS (10u)
172 
173 typedef struct {
174     int partid;
175     uint bitmaps[PARTITION_COUNT];
176 } PartInfo;
177 
178 int GetPart(PartInfo* partInfo, int i)
179 {
180     if (i >= MAX_BLOCK_SIZE) {
181         return 0;
182     }
183     return (int)(((*partInfo).bitmaps[SECOND_PARTITION_INDEX] >> i) & MASK_FOR_1BITS);
184 }
185 
186 __constant short g_scrambleTable[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
187     0, 1,
188     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189     0,
190     0, 1, 2,
191     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192     0, 1, 2, 3,
193     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194     0, 1, 2, 3, 4,
195     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
196     0, 2, 4, 5, 3, 1,
197     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
198     0, 1, 2, 3, 4, 5, 6, 7,
199     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
200     0, 2, 4, 6, 8, 9, 7, 5, 3, 1,
201     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202     0, 4, 8, 2, 6, 10, 11, 7, 3, 9, 5, 1,
203     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
204     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
205     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
206     0, 4, 8, 12, 16, 2, 6, 10, 14, 18, 19, 15, 11, 7, 3, 17, 13, 9, 5, 1,
207     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
208     0, 8, 16, 2, 10, 18, 4, 12, 20, 6, 14, 22, 23, 15, 7, 21, 13, 5, 19,
209     11, 3, 17, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0,
210     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
211     20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
212 };
213 
214 __constant short g_weightUnquant[WEIGHT_QUANTIZE_GROUP * WEIGHT_QUANTIZE_NUM] = {
215     0, 64,
216     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217     0, 32, 64,
218     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219     0, 21, 43, 64,
220     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221     0, 16, 32, 48, 64,
222     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223     0, 64, 12, 52, 25, 39,
224     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225     0, 9, 18, 27, 37, 46, 55, 64,
226     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227     0, 64, 7, 57, 14, 50, 21, 43, 28, 36,
228     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
229     0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36,
230     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231     0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64,
232     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
233     0, 64, 16, 48, 3, 61, 19, 45, 6, 58, 23, 41, 9, 55, 26, 38, 13, 51, 29, 35,
234     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
235     0, 64, 8, 56, 16, 48, 24, 40, 2, 62, 11, 53, 19, 45, 27, 37, 5, 59, 13, 51, 22, 42, 30, 34,
236     0, 0, 0, 0, 0, 0, 0, 0,
237     0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38,
238     40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
239 };
240 __constant short g_integerFromTrits[243] = { // the numbers of integer to derivated from trits is 243
241     0, 1, 2, 4, 5, 6, 8, 9, 10,
242     16, 17, 18, 20, 21, 22, 24, 25, 26,
243     3, 7, 15, 19, 23, 27, 12, 13, 14,
244     32, 33, 34, 36, 37, 38, 40, 41, 42,
245     48, 49, 50, 52, 53, 54, 56, 57, 58,
246     35, 39, 47, 51, 55, 59, 44, 45, 46,
247     64, 65, 66, 68, 69, 70, 72, 73, 74,
248     80, 81, 82, 84, 85, 86, 88, 89, 90,
249     67, 71, 79, 83, 87, 91, 76, 77, 78,
250 
251     128, 129, 130, 132, 133, 134, 136, 137, 138,
252     144, 145, 146, 148, 149, 150, 152, 153, 154,
253     131, 135, 143, 147, 151, 155, 140, 141, 142,
254     160, 161, 162, 164, 165, 166, 168, 169, 170,
255     176, 177, 178, 180, 181, 182, 184, 185, 186,
256     163, 167, 175, 179, 183, 187, 172, 173, 174,
257     192, 193, 194, 196, 197, 198, 200, 201, 202,
258     208, 209, 210, 212, 213, 214, 216, 217, 218,
259     195, 199, 207, 211, 215, 219, 204, 205, 206,
260 
261     96, 97, 98, 100, 101, 102, 104, 105, 106,
262     112, 113, 114, 116, 117, 118, 120, 121, 122,
263     99, 103, 111, 115, 119, 123, 108, 109, 110,
264     224, 225, 226, 228, 229, 230, 232, 233, 234,
265     240, 241, 242, 244, 245, 246, 248, 249, 250,
266     227, 231, 239, 243, 247, 251, 236, 237, 238,
267     28, 29, 30, 60, 61, 62, 92, 93, 94,
268     156, 157, 158, 188, 189, 190, 220, 221, 222,
269     31, 63, 127, 159, 191, 255, 252, 253, 254
270 };
271 
272 __constant int g_bitsTritsQuintsTable[QUANT_MAX * 3] = { // 1 quints match 3 number
273     1, 0, 0, // RANGE_2
274     0, 1, 0, // RANGE_3
275     2, 0, 0, // RANGE_4
276     0, 0, 1, // RANGE_5
277     1, 1, 0, // RANGE_6
278     3, 0, 0, // RANGE_8
279     1, 0, 1, // RANGE_10
280     2, 1, 0, // RANGE_12
281     4, 0, 0, // RANGE_16
282     2, 0, 1, // RANGE_20
283     3, 1, 0, // RANGE_24
284     5, 0, 0, // RANGE_32
285     3, 0, 1, // RANGE_40
286     4, 1, 0, // RANGE_48
287     6, 0, 0, // RANGE_64
288     4, 0, 1, // RANGE_80
289     5, 1, 0, // RANGE_96
290     7, 0, 0, // RANGE_128
291     5, 0, 1, // RANGE_160
292     6, 1, 0, // RANGE_192
293     8, 0, 0 // RANGE_256
294 };
295 
296 __constant short g_integerFromQuints[125] = { // the numbers of integer to derivated from quints is 125
297     0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6,
298     32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, 58, 59, 60, 37, 45, 53,
299     61, 14,
300     64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85,
301     93, 22,
302     96, 97, 98, 99, 100, 104, 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123,
303     124, 101, 109, 117, 125, 30,
304     102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, 126, 127, 94, 95, 62, 39,
305     47, 55, 63, 31
306 };
307 
308 __constant short g_colorQuantTables[QUANT_MAX * COLOR_NUM] = {
309     // QUANT_2
310     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
312     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
313     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
314     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
315     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
317     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
319     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
320     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
321     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
322     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
323     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
324     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
325     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326     // QUANT_3
327     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
332     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
337     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
338     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
339     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
340     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
341     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
342     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
343     // QUANT_4
344     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
346     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
347     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
349     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
351     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
353     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
354     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
355     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
356     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
357     2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
358     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
359     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
360     // QUANT_5
361     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
364     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
366     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
367     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
368     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
369     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
370     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
371     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
372     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
373     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
374     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
375     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
376     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
377     // QUANT_6
378     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
380     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
381     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
382     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4,
383     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
384     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
386     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
387     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
388     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
389     5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
390     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
391     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
392     3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
393     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
394     // QUANT_8
395     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
397     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
398     1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
399     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
400     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
401     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
402     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
403     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
404     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
405     4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
406     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
407     5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
408     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
409     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
410     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
411     // QUANT_10
412     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
413     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
414     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
415     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
416     4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6,
417     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
418     6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
419     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
420     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
421     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7,
422     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
423     7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 5,
424     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
425     5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
426     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
427     3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
428     // QUANT_12
429     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4,
430     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
431     4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
432     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
433     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
434     2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
435     6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10,
436     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
437     11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
438     11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7, 7,
439     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3,
440     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
441     3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
442     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5,
443     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
444     5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
445     // QUANT_16
446     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
447     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
448     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
449     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
450     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
451     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
452     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
453     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
454     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
455     8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
456     9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
457     10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
458     11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
459     12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
460     13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
461     14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
462     // QUANT_20
463     0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
464     4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
465     8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
466     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2,
467     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
468     6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10,
469     10, 10, 10, 10, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
470     14, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
471     19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 15, 15, 15,
472     15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11,
473     11, 11, 11, 11, 11, 11, 11, 11, 7, 7, 7, 7, 7, 7, 7, 7,
474     7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
475     3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
476     13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 9,
477     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5,
478     5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1,
479     // QUANT_24
480     0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
481     8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 2, 2, 2,
482     2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10,
483     10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 4, 4, 4,
484     4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12,
485     12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 6, 6,
486     6, 6, 6, 6, 6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14,
487     14, 14, 14, 14, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
488     23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 15, 15, 15, 15,
489     15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7,
490     7, 7, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 13, 13, 13,
491     13, 13, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5, 5, 5, 5,
492     5, 5, 5, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 11, 11,
493     11, 11, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, 3,
494     3, 3, 3, 3, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9,
495     9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
496     // QUANT_32
497     0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
498     2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
499     4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
500     6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8,
501     8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10,
502     10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
503     12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13,
504     14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
505     16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
506     18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
507     19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
508     21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
509     23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25,
510     25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
511     27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
512     29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31,
513     // QUANT_40
514     0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16,
515     16, 24, 24, 24, 24, 24, 24, 32, 32, 32, 32, 32, 32, 32, 2, 2,
516     2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18,
517     18, 26, 26, 26, 26, 26, 26, 26, 34, 34, 34, 34, 34, 34, 4, 4,
518     4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20,
519     20, 20, 28, 28, 28, 28, 28, 28, 36, 36, 36, 36, 36, 36, 36, 6,
520     6, 6, 6, 6, 6, 14, 14, 14, 14, 14, 14, 14, 22, 22, 22, 22,
521     22, 22, 30, 30, 30, 30, 30, 30, 30, 38, 38, 38, 38, 38, 38, 38,
522     39, 39, 39, 39, 39, 39, 39, 31, 31, 31, 31, 31, 31, 31, 23, 23,
523     23, 23, 23, 23, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7,
524     7, 37, 37, 37, 37, 37, 37, 37, 29, 29, 29, 29, 29, 29, 21, 21,
525     21, 21, 21, 21, 21, 13, 13, 13, 13, 13, 13, 5, 5, 5, 5, 5,
526     5, 5, 35, 35, 35, 35, 35, 35, 27, 27, 27, 27, 27, 27, 27, 19,
527     19, 19, 19, 19, 19, 11, 11, 11, 11, 11, 11, 11, 3, 3, 3, 3,
528     3, 3, 33, 33, 33, 33, 33, 33, 33, 25, 25, 25, 25, 25, 25, 17,
529     17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1,
530     // QUANT_48
531     0, 0, 0, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 2, 2,
532     2, 2, 2, 18, 18, 18, 18, 18, 18, 34, 34, 34, 34, 34, 4, 4,
533     4, 4, 4, 4, 20, 20, 20, 20, 20, 36, 36, 36, 36, 36, 6, 6,
534     6, 6, 6, 6, 22, 22, 22, 22, 22, 38, 38, 38, 38, 38, 38, 8,
535     8, 8, 8, 8, 24, 24, 24, 24, 24, 24, 40, 40, 40, 40, 40, 10,
536     10, 10, 10, 10, 26, 26, 26, 26, 26, 26, 42, 42, 42, 42, 42, 12,
537     12, 12, 12, 12, 12, 28, 28, 28, 28, 28, 44, 44, 44, 44, 44, 14,
538     14, 14, 14, 14, 14, 30, 30, 30, 30, 30, 46, 46, 46, 46, 46, 46,
539     47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 15, 15, 15, 15, 15,
540     15, 45, 45, 45, 45, 45, 29, 29, 29, 29, 29, 13, 13, 13, 13, 13,
541     13, 43, 43, 43, 43, 43, 27, 27, 27, 27, 27, 27, 11, 11, 11, 11,
542     11, 41, 41, 41, 41, 41, 25, 25, 25, 25, 25, 25, 9, 9, 9, 9,
543     9, 39, 39, 39, 39, 39, 39, 23, 23, 23, 23, 23, 7, 7, 7, 7,
544     7, 7, 37, 37, 37, 37, 37, 21, 21, 21, 21, 21, 5, 5, 5, 5,
545     5, 5, 35, 35, 35, 35, 35, 19, 19, 19, 19, 19, 19, 3, 3, 3,
546     3, 3, 33, 33, 33, 33, 33, 17, 17, 17, 17, 17, 17, 1, 1, 1,
547     // QUANT_64
548     0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,
549     4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8,
550     8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
551     12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16,
552     16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
553     20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
554     24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
555     28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
556     32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
557     36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
558     40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
559     44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
560     47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51,
561     51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55,
562     55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59,
563     59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63,
564     // QUANT_80
565     0, 0, 16, 16, 16, 32, 32, 32, 48, 48, 48, 48, 64, 64, 64, 2,
566     2, 2, 18, 18, 18, 34, 34, 34, 50, 50, 50, 50, 66, 66, 66, 4,
567     4, 4, 20, 20, 20, 36, 36, 36, 36, 52, 52, 52, 68, 68, 68, 6,
568     6, 6, 22, 22, 22, 38, 38, 38, 38, 54, 54, 54, 70, 70, 70, 8,
569     8, 8, 24, 24, 24, 24, 40, 40, 40, 56, 56, 56, 72, 72, 72, 10,
570     10, 10, 26, 26, 26, 26, 42, 42, 42, 58, 58, 58, 74, 74, 74, 12,
571     12, 12, 12, 28, 28, 28, 44, 44, 44, 60, 60, 60, 76, 76, 76, 14,
572     14, 14, 14, 30, 30, 30, 46, 46, 46, 62, 62, 62, 78, 78, 78, 78,
573     79, 79, 79, 79, 63, 63, 63, 47, 47, 47, 31, 31, 31, 15, 15, 15,
574     15, 77, 77, 77, 61, 61, 61, 45, 45, 45, 29, 29, 29, 13, 13, 13,
575     13, 75, 75, 75, 59, 59, 59, 43, 43, 43, 27, 27, 27, 27, 11, 11,
576     11, 73, 73, 73, 57, 57, 57, 41, 41, 41, 25, 25, 25, 25, 9, 9,
577     9, 71, 71, 71, 55, 55, 55, 39, 39, 39, 39, 23, 23, 23, 7, 7,
578     7, 69, 69, 69, 53, 53, 53, 37, 37, 37, 37, 21, 21, 21, 5, 5,
579     5, 67, 67, 67, 51, 51, 51, 51, 35, 35, 35, 19, 19, 19, 3, 3,
580     3, 65, 65, 65, 49, 49, 49, 49, 33, 33, 33, 17, 17, 17, 1, 1,
581     // QUANT_96
582     0, 0, 32, 32, 64, 64, 64, 2, 2, 2, 34, 34, 66, 66, 66, 4,
583     4, 4, 36, 36, 68, 68, 68, 6, 6, 6, 38, 38, 70, 70, 70, 8,
584     8, 8, 40, 40, 40, 72, 72, 10, 10, 10, 42, 42, 42, 74, 74, 12,
585     12, 12, 44, 44, 44, 76, 76, 14, 14, 14, 46, 46, 46, 78, 78, 16,
586     16, 16, 48, 48, 48, 80, 80, 80, 18, 18, 50, 50, 50, 82, 82, 82,
587     20, 20, 52, 52, 52, 84, 84, 84, 22, 22, 54, 54, 54, 86, 86, 86,
588     24, 24, 56, 56, 56, 88, 88, 88, 26, 26, 58, 58, 58, 90, 90, 90,
589     28, 28, 60, 60, 60, 92, 92, 92, 30, 30, 62, 62, 62, 94, 94, 94,
590     95, 95, 95, 63, 63, 63, 31, 31, 93, 93, 93, 61, 61, 61, 29, 29,
591     91, 91, 91, 59, 59, 59, 27, 27, 89, 89, 89, 57, 57, 57, 25, 25,
592     87, 87, 87, 55, 55, 55, 23, 23, 85, 85, 85, 53, 53, 53, 21, 21,
593     83, 83, 83, 51, 51, 51, 19, 19, 81, 81, 81, 49, 49, 49, 17, 17,
594     17, 79, 79, 47, 47, 47, 15, 15, 15, 77, 77, 45, 45, 45, 13, 13,
595     13, 75, 75, 43, 43, 43, 11, 11, 11, 73, 73, 41, 41, 41, 9, 9,
596     9, 71, 71, 71, 39, 39, 7, 7, 7, 69, 69, 69, 37, 37, 5, 5,
597     5, 67, 67, 67, 35, 35, 3, 3, 3, 65, 65, 65, 33, 33, 1, 1,
598     // QUANT_128
599     0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
600     8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
601     16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23,
602     24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31,
603     32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39,
604     40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46, 46, 47, 47,
605     48, 48, 49, 49, 50, 50, 51, 51, 52, 52, 53, 53, 54, 54, 55, 55,
606     56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63,
607     64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
608     72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
609     80, 80, 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87,
610     88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, 95,
611     96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 102, 102, 103, 103,
612     104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 110, 110, 111, 111,
613     112, 112, 113, 113, 114, 114, 115, 115, 116, 116, 117, 117, 118, 118, 119, 119,
614     120, 120, 121, 121, 122, 122, 123, 123, 124, 124, 125, 125, 126, 126, 127, 127,
615     // QUANT_160
616     0, 32, 32, 64, 96, 96, 128, 128, 2, 34, 34, 66, 98, 98, 130, 130,
617     4, 36, 36, 68, 100, 100, 132, 132, 6, 38, 38, 70, 102, 102, 134, 134,
618     8, 40, 40, 72, 104, 104, 136, 136, 10, 42, 42, 74, 106, 106, 138, 138,
619     12, 44, 44, 76, 108, 108, 140, 140, 14, 46, 46, 78, 110, 110, 142, 142,
620     16, 48, 48, 80, 112, 112, 144, 144, 18, 50, 50, 82, 114, 114, 146, 146,
621     20, 52, 52, 84, 116, 116, 148, 148, 22, 54, 54, 86, 118, 118, 150, 150,
622     24, 56, 56, 88, 120, 120, 152, 152, 26, 58, 58, 90, 122, 122, 154, 154,
623     28, 60, 60, 92, 124, 124, 156, 156, 30, 62, 62, 94, 126, 126, 158, 158,
624     159, 159, 127, 127, 95, 63, 63, 31, 157, 157, 125, 125, 93, 61, 61, 29,
625     155, 155, 123, 123, 91, 59, 59, 27, 153, 153, 121, 121, 89, 57, 57, 25,
626     151, 151, 119, 119, 87, 55, 55, 23, 149, 149, 117, 117, 85, 53, 53, 21,
627     147, 147, 115, 115, 83, 51, 51, 19, 145, 145, 113, 113, 81, 49, 49, 17,
628     143, 143, 111, 111, 79, 47, 47, 15, 141, 141, 109, 109, 77, 45, 45, 13,
629     139, 139, 107, 107, 75, 43, 43, 11, 137, 137, 105, 105, 73, 41, 41, 9,
630     135, 135, 103, 103, 71, 39, 39, 7, 133, 133, 101, 101, 69, 37, 37, 5,
631     131, 131, 99, 99, 67, 35, 35, 3, 129, 129, 97, 97, 65, 33, 33, 1,
632     // QUANT_192
633     0, 64, 128, 128, 2, 66, 130, 130, 4, 68, 132, 132, 6, 70, 134, 134,
634     8, 72, 136, 136, 10, 74, 138, 138, 12, 76, 140, 140, 14, 78, 142, 142,
635     16, 80, 144, 144, 18, 82, 146, 146, 20, 84, 148, 148, 22, 86, 150, 150,
636     24, 88, 152, 152, 26, 90, 154, 154, 28, 92, 156, 156, 30, 94, 158, 158,
637     32, 96, 160, 160, 34, 98, 162, 162, 36, 100, 164, 164, 38, 102, 166, 166,
638     40, 104, 168, 168, 42, 106, 170, 170, 44, 108, 172, 172, 46, 110, 174, 174,
639     48, 112, 176, 176, 50, 114, 178, 178, 52, 116, 180, 180, 54, 118, 182, 182,
640     56, 120, 184, 184, 58, 122, 186, 186, 60, 124, 188, 188, 62, 126, 190, 190,
641     191, 191, 127, 63, 189, 189, 125, 61, 187, 187, 123, 59, 185, 185, 121, 57,
642     183, 183, 119, 55, 181, 181, 117, 53, 179, 179, 115, 51, 177, 177, 113, 49,
643     175, 175, 111, 47, 173, 173, 109, 45, 171, 171, 107, 43, 169, 169, 105, 41,
644     167, 167, 103, 39, 165, 165, 101, 37, 163, 163, 99, 35, 161, 161, 97, 33,
645     159, 159, 95, 31, 157, 157, 93, 29, 155, 155, 91, 27, 153, 153, 89, 25,
646     151, 151, 87, 23, 149, 149, 85, 21, 147, 147, 83, 19, 145, 145, 81, 17,
647     143, 143, 79, 15, 141, 141, 77, 13, 139, 139, 75, 11, 137, 137, 73, 9,
648     135, 135, 71, 7, 133, 133, 69, 5, 131, 131, 67, 3, 129, 129, 65, 1,
649     // QUANT_256
650     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
651     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
652     32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
653     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
654     64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
655     80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
656     96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
657     112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
658     128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
659     144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
660     160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
661     176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
662     192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
663     208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
664     224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
665     240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
666 };
667 
668 __constant short color_unquant_tables[QUANT_MAX][COLOR_NUM] = {
669     {
670         0, 255
671     },
672     {
673         0, 128, 255
674     },
675     {
676         0, 85, 170, 255
677     },
678     {
679         0, 64, 128, 192, 255
680     },
681     {
682         0, 255, 51, 204, 102, 153
683     },
684     {
685         0, 36, 73, 109, 146, 182, 219, 255
686     },
687     {
688         0, 255, 28, 227, 56, 199, 84, 171, 113, 142
689     },
690     {
691         0, 255, 69, 186, 23, 232, 92, 163, 46, 209, 116, 139
692     },
693     { // 16
694         0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255
695     },
696     { // 20
697         0, 255, 67, 188, 13, 242, 80, 175, 27, 228, 94, 161, 40, 215, 107, 148,
698         54, 201, 121, 134
699     },
700     { // 24
701         0, 255, 33, 222, 66, 189, 99, 156, 11, 244, 44, 211, 77, 178, 110, 145,
702         22, 233, 55, 200, 88, 167, 121, 134
703     },
704     { // 32
705         0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123,
706         132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255
707     },
708     { // 40
709         0, 255, 32, 223, 65, 190, 97, 158, 6, 249, 39, 216, 71, 184, 104, 151,
710         13, 242, 45, 210, 78, 177, 110, 145, 19, 236, 52, 203, 84, 171, 117, 138,
711         26, 229, 58, 197, 91, 164, 123, 132
712     },
713     { // 48
714         0, 255, 16, 239, 32, 223, 48, 207, 65, 190, 81, 174, 97, 158, 113, 142,
715         5, 250, 21, 234, 38, 217, 54, 201, 70, 185, 86, 169, 103, 152, 119, 136,
716         11, 244, 27, 228, 43, 212, 59, 196, 76, 179, 92, 163, 108, 147, 124, 131
717     },
718     { // 64
719         0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
720         65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125,
721         130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
722         195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255
723     },
724     { // 80
725         0, 255, 16, 239, 32, 223, 48, 207, 64, 191, 80, 175, 96, 159, 112, 143,
726         3, 252, 19, 236, 35, 220, 51, 204, 67, 188, 83, 172, 100, 155, 116, 139,
727         6, 249, 22, 233, 38, 217, 54, 201, 71, 184, 87, 168, 103, 152, 119, 136,
728         9, 246, 25, 230, 42, 213, 58, 197, 74, 181, 90, 165, 106, 149, 122, 133,
729         13, 242, 29, 226, 45, 210, 61, 194, 77, 178, 93, 162, 109, 146, 125, 130
730     },
731     { // 96
732         0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
733         64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
734         2, 253, 10, 245, 18, 237, 26, 229, 35, 220, 43, 212, 51, 204, 59, 196,
735         67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
736         5, 250, 13, 242, 21, 234, 29, 226, 37, 218, 45, 210, 53, 202, 61, 194,
737         70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
738     },
739     { // 128
740         0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
741         32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62,
742         64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94,
743         96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
744         129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159,
745         161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191,
746         193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223,
747         225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255
748     },
749     { // 160
750         0, 255, 8, 247, 16, 239, 24, 231, 32, 223, 40, 215, 48, 207, 56, 199,
751         64, 191, 72, 183, 80, 175, 88, 167, 96, 159, 104, 151, 112, 143, 120, 135,
752         1, 254, 9, 246, 17, 238, 25, 230, 33, 222, 41, 214, 49, 206, 57, 198,
753         65, 190, 73, 182, 81, 174, 89, 166, 97, 158, 105, 150, 113, 142, 121, 134,
754         3, 252, 11, 244, 19, 236, 27, 228, 35, 220, 43, 212, 51, 204, 59, 196,
755         67, 188, 75, 180, 83, 172, 91, 164, 99, 156, 107, 148, 115, 140, 123, 132,
756         4, 251, 12, 243, 20, 235, 28, 227, 36, 219, 44, 211, 52, 203, 60, 195,
757         68, 187, 76, 179, 84, 171, 92, 163, 100, 155, 108, 147, 116, 139, 124, 131,
758         6, 249, 14, 241, 22, 233, 30, 225, 38, 217, 46, 209, 54, 201, 62, 193,
759         70, 185, 78, 177, 86, 169, 94, 161, 102, 153, 110, 145, 118, 137, 126, 129
760     },
761     {
762         0, 255, 4, 251, 8, 247, 12, 243, 16, 239, 20, 235, 24, 231, 28, 227,
763         32, 223, 36, 219, 40, 215, 44, 211, 48, 207, 52, 203, 56, 199, 60, 195,
764         64, 191, 68, 187, 72, 183, 76, 179, 80, 175, 84, 171, 88, 167, 92, 163,
765         96, 159, 100, 155, 104, 151, 108, 147, 112, 143, 116, 139, 120, 135, 124, 131,
766         1, 254, 5, 250, 9, 246, 13, 242, 17, 238, 21, 234, 25, 230, 29, 226,
767         33, 222, 37, 218, 41, 214, 45, 210, 49, 206, 53, 202, 57, 198, 61, 194,
768         65, 190, 69, 186, 73, 182, 77, 178, 81, 174, 85, 170, 89, 166, 93, 162,
769         97, 158, 101, 154, 105, 150, 109, 146, 113, 142, 117, 138, 121, 134, 125, 130,
770         2, 253, 6, 249, 10, 245, 14, 241, 18, 237, 22, 233, 26, 229, 30, 225,
771         34, 221, 38, 217, 42, 213, 46, 209, 50, 205, 54, 201, 58, 197, 62, 193,
772         66, 189, 70, 185, 74, 181, 78, 177, 82, 173, 86, 169, 90, 165, 94, 161,
773         98, 157, 102, 153, 106, 149, 110, 145, 114, 141, 118, 137, 122, 133, 126, 129
774     },
775     {
776         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
777         16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
778         32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
779         48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
780         64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
781         80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
782         96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
783         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
784         128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
785         144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
786         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
787         176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
788         192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
789         208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
790         224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
791         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
792     }
793 };
794 
795 void Swap(float4* lhs, float4* rhs)
796 {
797     if ((lhs == NULL) || (rhs == NULL)) {
798         return;
799     }
800     float4 tmp = *lhs;
801     *lhs = *rhs;
802     *rhs = tmp;
803 }
804 
805 void FindMinMax(float4* texels, float4 ptMean, float4 vecK, float4* e0, float4* e1)
806 {
807     if ((texels == NULL) || (e0 == NULL) || (e1 == NULL)) {
808         return;
809     }
810     float a = 1e31f; // max float is clipped to 1e31f
811     float b = -1e31f; // min float is clipped to -1e31f
812     for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
813         float t = dot(texels[i] - ptMean, vecK);
814         a = min(a, t);
815         b = max(b, t);
816     }
817     *e0 = clamp(vecK * a + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
818     *e1 = clamp(vecK * b + ptMean, 0.0f, 255.0f); // 8bit max is 255.0f
819     // if the direction_vector ends up pointing from light to dark, FLIP IT!
820     // this will make the endpoint the darkest one;
821     float4 e0u = round(*e0);
822     float4 e1u = round(*e1);
823     if (e0u.x + e0u.y + e0u.z > e1u.x + e1u.y + e1u.z) {
824         Swap(e0, e1);
825     }
826 }
827 
828 void MaxAccumulationPixelDirection(float4* texels, float4 ptMean, float4* e0, float4* e1, bool hasAlpha)
829 {
830     if ((texels == NULL) || (e0 == NULL) || (e1 == NULL)) {
831         return;
832     }
833     float4 sumR = (float4)(FLOAT_ZERO);
834     float4 sumG = (float4)(FLOAT_ZERO);
835     float4 sumB = (float4)(FLOAT_ZERO);
836     float4 sumA = (float4)(FLOAT_ZERO);
837     for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
838         float4 dt = texels[i] - ptMean;
839         sumR += (dt.x > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
840         sumG += (dt.y > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
841         sumB += (dt.z > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
842         sumA += (dt.w > FLOAT_ZERO) ? dt : (float4)(FLOAT_ZERO);
843     }
844     float dotR = dot(sumR, sumR);
845     float dotG = dot(sumG, sumG);
846     float dotB = dot(sumB, sumB);
847     float dotA = dot(sumA, sumA);
848     float maxDot = dotR;
849     float4 vecK = sumR;
850     if (dotG > maxDot) {
851         vecK = sumG;
852         maxDot = dotG;
853     }
854     if (dotB > maxDot) {
855         vecK = sumB;
856         maxDot = dotB;
857     }
858     if (hasAlpha && dotA > maxDot) {
859         vecK = sumA;
860         maxDot = dotA;
861     }
862     // safe normalize
863     float lenk = length(vecK);
864     vecK = (lenk < SMALL_VALUE) ? vecK : normalize(vecK);
865     FindMinMax(texels, ptMean, vecK, e0, e1);
866 }
867 
868 void EncodeColorNormal(short quantLevel, float4 e0, float4 e1, short* endpointQuantized)
869 {
870     if (endpointQuantized == NULL) {
871         return;
872     }
873     int4 e0q = (int4)((int)(round(e0.x)), (int)(round(e0.y)),
874         (int)(round(e0.z)), (int)(round(e0.w)));
875     int4 e1q = (int4)((int)(round(e1.x)), (int)(round(e1.y)),
876         (int)(round(e1.z)), (int)(round(e1.w)));
877     endpointQuantized[EP0_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.x];
878     endpointQuantized[EP1_R_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.x];
879     endpointQuantized[EP0_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.y];
880     endpointQuantized[EP1_G_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.y];
881     endpointQuantized[EP0_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.z];
882     endpointQuantized[EP1_B_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.z];
883     endpointQuantized[EP0_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e0q.w];
884     endpointQuantized[EP1_A_INDEX] = g_colorQuantTables[quantLevel * COLOR_NUM + e1q.w];
885 }
886 
887 void DecodeColor(short quantLevel, short endpointQuantized[COLOR_COMPONENT_NUM], float4* e0, float4* e1)
888 {
889     if ((endpointQuantized == NULL) || (e0 == NULL) || (e1 == NULL)) {
890         return;
891     }
892     (*e0).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_R_INDEX]]);
893     (*e1).x = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_R_INDEX]]);
894     (*e0).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_G_INDEX]]);
895     (*e1).y = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_G_INDEX]]);
896     (*e0).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_B_INDEX]]);
897     (*e1).z = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_B_INDEX]]);
898     (*e0).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP0_A_INDEX]]);
899     (*e1).w = (float)(color_unquant_tables[quantLevel][endpointQuantized[EP1_A_INDEX]]);
900 }
901 
902 // calculate quantize weights
903 short QuantizeWeight(uint weightRange, float weight)
904 {
905     short q = (short)(round(weight * ((float)(weightRange))));
906     return clamp(q, (short)(QUANTIZE_WEIGHT_MIN), (short)(weightRange));
907 }
908 
909 void CalculateNormalWeights(int part, PartInfo* partInfo, float4* texels,
910     float4 endPoint[END_POINT_NUM], float* projw)
911 {
912     if ((partInfo == NULL) || (texels == NULL) || (endPoint == NULL) || (projw == NULL)) {
913         return;
914     }
915     int i = START_INDEX;
916     float4 vecK = endPoint[EP1_INDEX] - endPoint[EP0_INDEX];
917     if (length(vecK) < SMALL_VALUE && !partInfo) {
918         for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
919             projw[i] = FLOAT_ZERO;
920         }
921     } else {
922         vecK = normalize(vecK);
923         float minw = 1e31f; // max float is clipped to 1e31f
924         float maxw = -1e31f; // min float is clipped to -1e31f
925         for (i = START_INDEX; i < BLOCK_SIZE; ++i) {
926             if ((!partInfo) || (GetPart(partInfo, i) == part)) {
927                 float w = dot(vecK, texels[i] - endPoint[EP0_INDEX]);
928                 minw = min(w, minw);
929                 maxw = max(w, maxw);
930                 projw[i] = w;
931             }
932         }
933         float invlen = maxw - minw;
934         invlen = max(SMALL_VALUE, invlen);
935         invlen = FLOAT_ONE / invlen; // invlen min is SMALL_VALUE, not zero
936         for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
937             if ((!partInfo) || (GetPart(partInfo, i) == part)) {
938                 projw[i] = (projw[i] - minw) * invlen;
939             }
940         }
941     }
942 }
943 
944 void QuantizeWeights(float projw[X_GRIDS * Y_GRIDS], uint weightRange, short* weights)
945 {
946     for (int i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
947         weights[i] = QuantizeWeight(weightRange, projw[i]);
948     }
949 }
950 
951 void CalculateQuantizedWeights(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM], short* weights)
952 {
953     if ((texels == NULL) || (endPoint == NULL) || (weights == NULL)) {
954         return;
955     }
956     float projw[X_GRIDS * Y_GRIDS];
957     CalculateNormalWeights(INT_ZERO, NULL, texels, endPoint, projw);
958     QuantizeWeights(projw, weightRange, weights);
959 }
960 
961 void Orbits8Ptr(uint4* outputs, uint* bitoffset, uint number, uint bitcount)
962 {
963     if ((outputs == NULL) || (bitoffset == NULL)) {
964         return;
965     }
966     uint newpos = *bitoffset + bitcount;
967     uint nidx = newpos >> 5; // split low bits (5 bits) to get high bits
968     uint uidx = *bitoffset >> 5; // split low bits (5 bits) to get high bits
969     uint bitIdx = *bitoffset & 31u; // split low bits to get low bits (31 for mask 5 bits)
970     if (uidx == 0) { // high bits is 0 for x
971         (*outputs).x |= (number << bitIdx);
972         (*outputs).y |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
973     } else if (uidx == 1) { // high bits is 1 for y
974         (*outputs).y |= (number << bitIdx);
975         (*outputs).z |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
976     } else if (uidx == 2) { // high bits is 2 for z
977         (*outputs).z |= (number << bitIdx);
978         (*outputs).w |= (nidx > uidx) ? (number >> (32u - bitIdx)) : UINT_ZERO; // uint 32 bits
979     }
980     *bitoffset = newpos;
981 }
982 
983 void SplitHighLow(uint n, uint i, int* high, uint* low)
984 {
985     uint low_mask = (UINT_ONE << i) - UINT_ONE;
986     *low = n & low_mask;
987     *high = ((int)(n >> i)) & 0xFF; // mask 0xFF to get low 8 bits
988 }
989 
990 uint ReverseByte(uint p)
991 {
992     p = ((p & 0xFu) << 4) | ((p >> 4) & 0xFu); // 0xFu 4 for reverse
993     p = ((p & 0x33u) << 2) | ((p >> 2) & 0x33u); // 0x33u 2 for reverse
994     p = ((p & 0x55u) << 1) | ((p >> 1) & 0x55u); // 0x55u 1 for reverse
995     return p;
996 }
997 
998 void EncodeTrits(uint bitcount, uint tritInput[TRIT_BLOCK_SIZE], uint4* outputs, uint* outpos)
999 {
1000     int t0;
1001     int t1;
1002     int t2;
1003     int t3;
1004     int t4;
1005     uint m0;
1006     uint m1;
1007     uint m2;
1008     uint m3;
1009     uint m4;
1010     SplitHighLow(tritInput[ISE_0], bitcount, &t0, &m0);
1011     SplitHighLow(tritInput[ISE_1], bitcount, &t1, &m1);
1012     SplitHighLow(tritInput[ISE_2], bitcount, &t2, &m2);
1013     SplitHighLow(tritInput[ISE_3], bitcount, &t3, &m3);
1014     SplitHighLow(tritInput[ISE_4], bitcount, &t4, &m4);
1015     ushort packhigh = (ushort)(
1016         g_integerFromTrits[t4 * 81 + t3 * 27 + t2 * 9 + t1 * 3 + t0]); // trits for 3 9 27 81
1017     Orbits8Ptr(outputs, outpos, m0, bitcount);
1018     Orbits8Ptr(outputs, outpos, packhigh & 3u, 2u); // low 2bits (mask 3u) offset 2u
1019 
1020     Orbits8Ptr(outputs, outpos, m1, bitcount);
1021     Orbits8Ptr(outputs, outpos, (packhigh >> 2) & 3u, 2u); // right shift 2 bits for low 2bits (mask 3u) offset 2u
1022 
1023     Orbits8Ptr(outputs, outpos, m2, bitcount);
1024     Orbits8Ptr(outputs, outpos, (packhigh >> 4) & 1u, 1u); // right shift 4 bits for low 1bits (mask 1u) offset 1u
1025 
1026     Orbits8Ptr(outputs, outpos, m3, bitcount);
1027     Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1028 
1029     Orbits8Ptr(outputs, outpos, m4, bitcount);
1030     Orbits8Ptr(outputs, outpos, (packhigh >> 7) & 1u, 1u); // right shift 7 bits for low 1bits (mask 1u) offset 1u
1031 }
1032 
1033 void EncodeQuints(uint bitcount, uint quintInput[QUINT_BLOCK_SIZE], uint4* outputs, uint* outpos)
1034 {
1035     int q0;
1036     int q1;
1037     int q2;
1038     uint m0;
1039     uint m1;
1040     uint m2;
1041     SplitHighLow(quintInput[ISE_0], bitcount, &q0, &m0);
1042     SplitHighLow(quintInput[ISE_1], bitcount, &q1, &m1);
1043     SplitHighLow(quintInput[ISE_2], bitcount, &q2, &m2);
1044     ushort packhigh = (ushort)(g_integerFromQuints[q2 * 25 + q1 * 5 + q0]); // Quints 5 25
1045     Orbits8Ptr(outputs, outpos, m0, bitcount);
1046     Orbits8Ptr(outputs, outpos, packhigh & 7u, 3u); // low 3bits (mask 7u) offset 3u
1047     Orbits8Ptr(outputs, outpos, m1, bitcount);
1048     Orbits8Ptr(outputs, outpos, (packhigh >> 3) & 3u, 2u); // right shift 3 bits for low 2bits (mask 3u) offset 2u
1049     Orbits8Ptr(outputs, outpos, m2, bitcount);
1050     Orbits8Ptr(outputs, outpos, (packhigh >> 5) & 3u, 2u); // right shift 5 bits for low 2bits (mask 3u) offset 2u
1051 }
1052 
1053 void BiseEndpoints(short numbers[COLOR_COMPONENT_NUM], int range, uint4* outputs, bool hasAlpha, uint* bitPos)
1054 {
1055     uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1056     uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1057     uint quints = (uint)(g_bitsTritsQuintsTable[range * 3 + 2]); // Quints 3 offset 2
1058     uint count = hasAlpha ? 8u : 6u; // RGBA 4x2 = 8 or RGB 3x2 = 6
1059     if (trits == UINT_ONE) {
1060         uint tritsInput[TRIT_BLOCK_SIZE];
1061         tritsInput[ISE_0] = numbers[EP0_R_INDEX];
1062         tritsInput[ISE_1] = numbers[EP1_R_INDEX];
1063         tritsInput[ISE_2] = numbers[EP0_G_INDEX];
1064         tritsInput[ISE_3] = numbers[EP1_G_INDEX];
1065         tritsInput[ISE_4] = numbers[EP0_B_INDEX];
1066         EncodeTrits(bits, tritsInput, outputs, bitPos);
1067         tritsInput[ISE_0] = numbers[EP1_B_INDEX];
1068         tritsInput[ISE_1] = numbers[EP0_A_INDEX];
1069         tritsInput[ISE_2] = numbers[EP1_A_INDEX];
1070         tritsInput[ISE_3] = UINT_ZERO;
1071         tritsInput[ISE_4] = UINT_ZERO;
1072         EncodeTrits(bits, tritsInput, outputs, bitPos);
1073         *bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * count + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1074     } else if (quints == UINT_ONE) {
1075         uint quintsInput[QUINT_BLOCK_SIZE];
1076         quintsInput[ISE_0] = numbers[EP0_R_INDEX];
1077         quintsInput[ISE_1] = numbers[EP1_R_INDEX];
1078         quintsInput[ISE_2] = numbers[EP0_G_INDEX];
1079         EncodeQuints(bits, quintsInput, outputs, bitPos);
1080         quintsInput[ISE_0] = numbers[EP1_G_INDEX];
1081         quintsInput[ISE_1] = numbers[EP0_B_INDEX];
1082         quintsInput[ISE_2] = numbers[EP1_B_INDEX];
1083         EncodeQuints(bits, quintsInput, outputs, bitPos);
1084         quintsInput[ISE_0] = numbers[EP0_A_INDEX];
1085         quintsInput[ISE_1] = numbers[EP1_A_INDEX];
1086         quintsInput[ISE_2] = UINT_ZERO;
1087         EncodeQuints(bits, quintsInput, outputs, bitPos);
1088         *bitPos = ((QUINT_MSB_SIZE + QUINT_BLOCK_SIZE * bits) * count + QUINT_ROUND_NUM) / QUINT_BLOCK_SIZE;
1089     } else {
1090         for (uint i = UINT_ZERO; i < count; ++i) {
1091             Orbits8Ptr(outputs, bitPos, numbers[i], bits);
1092         }
1093     }
1094 }
1095 
1096 void BiseWeights(short numbers[BLOCK_SIZE], int range, uint4* outputs)
1097 {
1098     uint bitPos = UINT_ZERO;
1099     uint bits = (uint)(g_bitsTritsQuintsTable[range * 3 + 0]); // Quints 3 offset 0
1100     uint trits = (uint)(g_bitsTritsQuintsTable[range * 3 + 1]); // Quints 3 offset 1
1101     if (trits == UINT_ONE) {
1102         uint tritsInput[TRIT_BLOCK_SIZE];
1103         tritsInput[ISE_0] = numbers[WEIGHT_0];
1104         tritsInput[ISE_1] = numbers[WEIGHT_1];
1105         tritsInput[ISE_2] = numbers[WEIGHT_2];
1106         tritsInput[ISE_3] = numbers[WEIGHT_3];
1107         tritsInput[ISE_4] = numbers[WEIGHT_4];
1108         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1109         tritsInput[ISE_0] = numbers[WEIGHT_5];
1110         tritsInput[ISE_1] = numbers[WEIGHT_6];
1111         tritsInput[ISE_2] = numbers[WEIGHT_7];
1112         tritsInput[ISE_3] = numbers[WEIGHT_8];
1113         tritsInput[ISE_4] = numbers[WEIGHT_9];
1114         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1115         tritsInput[ISE_0] = numbers[WEIGHT_10];
1116         tritsInput[ISE_1] = numbers[WEIGHT_11];
1117         tritsInput[ISE_2] = numbers[WEIGHT_12];
1118         tritsInput[ISE_3] = numbers[WEIGHT_13];
1119         tritsInput[ISE_4] = numbers[WEIGHT_14];
1120         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1121         tritsInput[ISE_0] = numbers[WEIGHT_15];
1122         tritsInput[ISE_1] = UINT_ZERO;
1123         tritsInput[ISE_2] = UINT_ZERO;
1124         tritsInput[ISE_3] = UINT_ZERO;
1125         tritsInput[ISE_4] = UINT_ZERO;
1126         EncodeTrits(bits, tritsInput, outputs, &bitPos);
1127         bitPos = ((TRIT_MSB_SIZE + TRIT_BLOCK_SIZE * bits) * BLOCK_SIZE + TRIT_ROUND_NUM) / TRIT_BLOCK_SIZE;
1128     } else {
1129         for (int i = START_INDEX; i < BLOCK_SIZE; ++i) {
1130             Orbits8Ptr(outputs, &bitPos, numbers[i], bits);
1131         }
1132     }
1133 }
1134 
1135 uint4 AssembleBlock(uint blockMode, uint colorEndpointMode, uint4 epIse, uint4 wtIse)
1136 {
1137     uint4 phyBlk = (uint4)(0, 0, 0, 0); // initialize to (0, 0, 0, 0)
1138     phyBlk.w |= ReverseByte(wtIse.x & BYTE_MASK) << BYTE_3_POS;
1139     phyBlk.w |= ReverseByte((wtIse.x >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1140     phyBlk.w |= ReverseByte((wtIse.x >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1141     phyBlk.w |= ReverseByte((wtIse.x >> BYTE_3_POS) & BYTE_MASK);
1142     phyBlk.z |= ReverseByte(wtIse.y & BYTE_MASK) << BYTE_3_POS;
1143     phyBlk.z |= ReverseByte((wtIse.y >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1144     phyBlk.z |= ReverseByte((wtIse.y >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1145     phyBlk.z |= ReverseByte((wtIse.y >> BYTE_3_POS) & BYTE_MASK);
1146     phyBlk.y |= ReverseByte(wtIse.z & BYTE_MASK) << BYTE_3_POS;
1147     phyBlk.y |= ReverseByte((wtIse.z >> BYTE_1_POS) & BYTE_MASK) << BYTE_2_POS;
1148     phyBlk.y |= ReverseByte((wtIse.z >> BYTE_2_POS) & BYTE_MASK) << BYTE_1_POS;
1149     phyBlk.y |= ReverseByte((wtIse.z >> BYTE_3_POS) & BYTE_MASK);
1150     phyBlk.x = blockMode;
1151 
1152     phyBlk.x |= (colorEndpointMode & MASK_FOR_4BITS) << CEM_POS;
1153     phyBlk.x |= (epIse.x & MASK_FOR_15BITS) << COLOR_EP_POS;
1154     phyBlk.y |= ((epIse.x >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1155     phyBlk.y |= (epIse.y & MASK_FOR_15BITS) << COLOR_EP_POS;
1156     phyBlk.z |= ((epIse.y >> COLOR_EP_HIGH_BIT) & MASK_FOR_17BITS);
1157 
1158     return phyBlk;
1159 }
1160 
1161 uint AssembleBlockmode(uint weightQuantmethod, bool isDualPlane)
1162 {
1163     uint a = (uint)((Y_GRIDS - HEIGHT_BITS_OFFSET) & MASK_FOR_2BITS);
1164     uint b = (uint)((X_GRIDS - WIDTH_BITS_OFFSET) & MASK_FOR_2BITS);
1165     uint d = isDualPlane ? UINT_ONE : UINT_ZERO;
1166     uint h = (weightQuantmethod < 6u) ? UINT_ZERO : UINT_ONE; // low/high-precision limit is 6u
1167     uint r = (weightQuantmethod % 6u) + WEIGHT_METHOD_OFFSET; // low/high-precision limit is 6u
1168     uint blockMode = (r >> WEIGHT_METHOD_RIGHT_BIT) & MASK_FOR_2BITS;
1169     blockMode |= (r & MASK_FOR_1BITS) << WEIGHT_METHOD_POS;
1170     blockMode |= (a & MASK_FOR_2BITS) << BLOCK_WIDTH_POS;
1171     blockMode |= (b & MASK_FOR_2BITS) << BLOCK_HEIGHT_POS;
1172     blockMode |= h << WEIGHT_PRECISION_POS;
1173     blockMode |= d << IS_DUALPLANE_POS;
1174     return blockMode;
1175 }
1176 
1177 uint4 EndpointIse(float4* ep0, float4* ep1, short endpointQuantmethod, bool hasAlpha)
1178 {
1179     short epQuantized[COLOR_COMPONENT_NUM];
1180     EncodeColorNormal(endpointQuantmethod, *ep0, *ep1, epQuantized);
1181     DecodeColor(endpointQuantmethod, epQuantized, ep0, ep1);
1182     if (!hasAlpha) {
1183         epQuantized[EP0_A_INDEX] = SHORT_ZERO;
1184         epQuantized[EP1_A_INDEX] = SHORT_ZERO;
1185     }
1186     uint4 epIse = (uint4)(UINT_ZERO);
1187     uint bitPos = UINT_ZERO;
1188     BiseEndpoints(epQuantized, endpointQuantmethod, &epIse, hasAlpha, &bitPos);
1189     return epIse;
1190 }
1191 
1192 float4 CalTexel(short weight, float4 ep0, float4 ep1)
1193 {
1194     short weight0 = BLOCK_MAX_WEIGHTS_SHORT - weight;
1195     return (ep0 * weight0 + ep1 * weight) / BLOCK_MAX_WEIGHTS_FLOAT;
1196 }
1197 
1198 uint4 WeightIse(float4* texels, uint weightRange, float4 endPoint[END_POINT_NUM],
1199     short weightQuantmethod, float* errval)
1200 {
1201     int i = START_INDEX;
1202     short wtQuantized[X_GRIDS * Y_GRIDS];
1203     CalculateQuantizedWeights(texels, weightRange, endPoint, wtQuantized);
1204     float sumErr = FLOAT_ZERO;
1205     for (i = START_INDEX; i < X_GRIDS * Y_GRIDS; ++i) {
1206         short w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1207         wtQuantized[i] = g_scrambleTable[w];
1208         w = weightQuantmethod * WEIGHT_QUANTIZE_NUM + wtQuantized[i];
1209         short wt = g_weightUnquant[w];
1210         float4 new_texel = CalTexel(wt, endPoint[EP0_INDEX], endPoint[EP1_INDEX]);
1211         float4 diff = new_texel - texels[i];
1212         sumErr += dot(diff, diff);
1213     }
1214     *errval = sumErr;
1215     uint4 wtIse = (uint4)(UINT_ZERO);
1216     BiseWeights(wtQuantized, (int)(weightQuantmethod), &wtIse);
1217     return wtIse;
1218 }
1219 
1220 float TryEncode(float4* texels, float4 texelsMean, uint4* epIse, uint4* wtIse, short3* bestBlockmode)
1221 {
1222     float errval;
1223     bool hasAlpha = true;
1224     *bestBlockmode = (short3)(QUANT_6, QUANT_256, WEIGHT_RANGE_6);
1225     short weightQuantmethod = (*bestBlockmode).x;
1226     short endpointQuantmethod = (*bestBlockmode).y;
1227     short weightRange = (*bestBlockmode).z;
1228     float4 ep0;
1229     float4 ep1;
1230     float4 endPoint[END_POINT_NUM];
1231     MaxAccumulationPixelDirection(texels, texelsMean, &ep0, &ep1, hasAlpha);
1232     *epIse = EndpointIse(&ep0, &ep1, endpointQuantmethod, hasAlpha);
1233     endPoint[EP0_INDEX] = ep0;
1234     endPoint[EP1_INDEX] = ep1;
1235     *wtIse = WeightIse(texels, weightRange - UINT_ONE, endPoint, weightQuantmethod, &errval);
1236     return errval;
1237 }
1238 
1239 uint4 EncodeBlock(float4* texels, float4 texelsMean, int blockID, __global uint* errs)
1240 {
1241     bool hasAlpha = true;
1242     bool isDualPlane = false;
1243     float errval = 10000000.0f; // the errval is initialized to 10000000.0f
1244 
1245     uint4 epIse, wtIse;
1246     short3 bestBlockmode, tmpBestBlockMode;
1247     errval = TryEncode(texels, texelsMean, &epIse, &wtIse, &bestBlockmode);
1248 
1249     uint blockMode = AssembleBlockmode(bestBlockmode.x, isDualPlane);
1250     uint ColorEndpointMode;
1251     if (hasAlpha) {
1252         ColorEndpointMode = CEM_LDR_RGBA_DIRECT;
1253     } else {
1254         ColorEndpointMode = CEM_LDR_RGB_DIRECT;
1255     }
1256     errs[blockID] = (uint)(errval);
1257     return AssembleBlock(blockMode, ColorEndpointMode, epIse, wtIse);
1258 }
1259 
1260 void GotTexelFromImage(read_only image2d_t inputImage, float4 texels[BLOCK_SIZE],
1261     int width, int height, float4 *texelMean)
1262 {
1263     int2 pos = (int2)(get_global_id(0), get_global_id(1));
1264     pos.x *= DIM;
1265     pos.y *= DIM;
1266     for (int i = 0; i < DIM; ++i) {
1267         for (int j = 0; j < DIM; ++j) {
1268             int2 pixelPos = pos + (int2)(j, i);
1269             if (pixelPos.x >= width) {
1270                 pixelPos.x = width - 1;
1271             }
1272             if (pixelPos.y >= height) {
1273                 pixelPos.y = height - 1;
1274             }
1275             float4 texel = read_imagef(inputImage, pixelPos);
1276             texels[i * DIM + j] = texel * PIXEL_MAX_VALUE;
1277             *texelMean += texel * PIXEL_MAX_VALUE;
1278         }
1279     }
1280 }
1281 
1282 kernel void AstcCl(read_only image2d_t inputImage, __global uint4* astcArr, __global uint* errs,
1283     int width, int height)
1284 {
1285     const int2 globalSize = (int2)(get_global_size(0), get_global_size(1));
1286     const int2 globalId = (int2)(get_global_id(0), get_global_id(1));
1287     int blockID = globalId.y * globalSize.x + globalId.x;
1288     float4 texels[BLOCK_SIZE];
1289     float4 texelMean = 0;
1290     GotTexelFromImage(inputImage, texels, width, height, &texelMean);
1291     texelMean = texelMean / ((float)(BLOCK_SIZE));
1292     astcArr[blockID] = EncodeBlock(texels, texelMean, blockID, errs);
1293 }
1294 )";
1295 
1296 class OpenCLSoManager {
1297 public:
1298     OpenCLSoManager();
1299     ~OpenCLSoManager();
1300     bool LoadOpenCLSo();
1301 private:
1302     void *clSoHandle = nullptr;
1303     bool loadSuccess = false;
1304 };
1305 
1306 static OpenCLSoManager g_clSoManager;
1307 
OpenCLSoManager()1308 OpenCLSoManager::OpenCLSoManager()
1309 {
1310     clSoHandle = nullptr;
1311     loadSuccess = false;
1312 }
1313 
~OpenCLSoManager()1314 OpenCLSoManager::~OpenCLSoManager()
1315 {
1316     if (!UnLoadCLExtern(clSoHandle)) {
1317         IMAGE_LOGE("astcenc OpenCLSoManager UnLoad failed!");
1318     } else {
1319         IMAGE_LOGD("astcenc OpenCLSoManager UnLoad success!");
1320         loadSuccess = false;
1321     }
1322 }
1323 
LoadOpenCLSo()1324 bool OpenCLSoManager::LoadOpenCLSo()
1325 {
1326     if (!loadSuccess) {
1327         loadSuccess = InitOpenCLExtern(&clSoHandle);
1328     }
1329     return loadSuccess;
1330 }
1331 
AstcClClose(ClAstcHandle * clAstcHandle)1332 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClClose(ClAstcHandle *clAstcHandle)
1333 {
1334     if (clAstcHandle == nullptr) {
1335         IMAGE_LOGE("astc AstcClClose clAstcHandle is nullptr!");
1336         return CL_ASTC_ENC_FAILED;
1337     }
1338     cl_int clRet;
1339     if (clAstcHandle->kernel != nullptr) {
1340         clRet = clReleaseKernel(clAstcHandle->kernel);
1341         if (clRet != CL_SUCCESS) {
1342             IMAGE_LOGE("astc clReleaseKernel failed ret %{public}d!", clRet);
1343             return CL_ASTC_ENC_FAILED;
1344         }
1345         clAstcHandle->kernel = nullptr;
1346     }
1347     if (clAstcHandle->queue != nullptr) {
1348         clRet = clReleaseCommandQueue(clAstcHandle->queue);
1349         if (clRet != CL_SUCCESS) {
1350             IMAGE_LOGE("astc clReleaseCommandQueue failed ret %{public}d!", clRet);
1351             return CL_ASTC_ENC_FAILED;
1352         }
1353         clAstcHandle->queue = nullptr;
1354     }
1355     if (clAstcHandle->context != nullptr) {
1356         clRet = clReleaseContext(clAstcHandle->context);
1357         if (clRet != CL_SUCCESS) {
1358             IMAGE_LOGE("astc clReleaseContext failed ret %{public}d!", clRet);
1359             return CL_ASTC_ENC_FAILED;
1360         }
1361         clAstcHandle->context = nullptr;
1362     }
1363     if (clAstcHandle->encObj.blockErrs_ != nullptr) {
1364         free(clAstcHandle->encObj.blockErrs_);
1365         clAstcHandle->encObj.blockErrs_ = nullptr;
1366     }
1367     if (clAstcHandle != nullptr) {
1368         free(clAstcHandle);
1369     }
1370     return CL_ASTC_ENC_SUCCESS;
1371 }
1372 
CheckClBinIsExist(const std::string & name)1373 static bool CheckClBinIsExist(const std::string &name)
1374 {
1375     return (access(name.c_str(), F_OK) != -1); // -1 means that the file is  not exist
1376 }
1377 
SaveClBin(cl_program program,const std::string & clBinPath)1378 static CL_ASTC_STATUS SaveClBin(cl_program program, const std::string &clBinPath)
1379 {
1380     size_t programBinarySizes;
1381     cl_int clRet = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &programBinarySizes, NULL);
1382     if (clRet != CL_SUCCESS) {
1383         IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARY_SIZES failed ret %{public}d!", clRet);
1384         return CL_ASTC_ENC_FAILED;
1385     }
1386     bool genBinFail = (programBinarySizes == 0) || (programBinarySizes > MAX_MALLOC_BYTES);
1387     if (genBinFail) {
1388         IMAGE_LOGE("astc clGetProgramInfo programBinarySizes %{public}zu too big!", programBinarySizes);
1389         return CL_ASTC_ENC_FAILED;
1390     }
1391     uint8_t *programBinaries = static_cast<uint8_t *>(malloc(programBinarySizes));
1392     if (programBinaries == nullptr) {
1393         IMAGE_LOGE("astc programBinaries malloc failed!");
1394         return CL_ASTC_ENC_FAILED;
1395     }
1396     clRet = clGetProgramInfo(program, CL_PROGRAM_BINARIES, programBinarySizes, &programBinaries, NULL);
1397     if (clRet != CL_SUCCESS) {
1398         IMAGE_LOGE("astc clGetProgramInfo CL_PROGRAM_BINARIES failed ret %{public}d!", clRet);
1399         free(programBinaries);
1400         return CL_ASTC_ENC_FAILED;
1401     }
1402     FILE *fp = fopen(clBinPath.c_str(), "wb");
1403     if (fp == nullptr) {
1404         IMAGE_LOGE("astc create file: %{public}s failed!", clBinPath.c_str());
1405         free(programBinaries);
1406         return CL_ASTC_ENC_FAILED;
1407     }
1408     CL_ASTC_STATUS ret = CL_ASTC_ENC_SUCCESS;
1409     if (fwrite(programBinaries, 1, programBinarySizes, fp) != programBinarySizes) {
1410         IMAGE_LOGE("astc fwrite programBinaries file failed!");
1411         ret = CL_ASTC_ENC_FAILED;
1412     }
1413     if (fclose(fp) != 0) {
1414         IMAGE_LOGE("astc SaveClBin close file failed!");
1415         ret = CL_ASTC_ENC_FAILED;
1416     }
1417     fp = nullptr;
1418     free(programBinaries);
1419     return ret;
1420 }
1421 
BuildProgramAndCreateKernel(cl_program program,ClAstcHandle * clAstcHandle)1422 static CL_ASTC_STATUS BuildProgramAndCreateKernel(cl_program program, ClAstcHandle *clAstcHandle)
1423 {
1424     cl_int clRet = clBuildProgram(program, 1, &clAstcHandle->deviceID, "-cl-std=CL3.0", nullptr, nullptr);
1425     if (clRet != CL_SUCCESS) {
1426         IMAGE_LOGE("astc clBuildProgram failed ret %{public}d!", clRet);
1427         return CL_ASTC_ENC_FAILED;
1428     }
1429     clAstcHandle->kernel = clCreateKernel(program, "AstcCl", &clRet);
1430     if (clRet != CL_SUCCESS) {
1431         IMAGE_LOGE("astc clCreateKernel failed ret %{public}d!", clRet);
1432         return CL_ASTC_ENC_FAILED;
1433     }
1434     return CL_ASTC_ENC_SUCCESS;
1435 }
1436 
AstcClBuildProgram(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1437 static CL_ASTC_STATUS AstcClBuildProgram(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1438 {
1439     cl_int clRet;
1440     cl_program program = nullptr;
1441     if (!CheckClBinIsExist(clBinPath)) {
1442         size_t sourceSize = strlen(g_programSource) + 1; // '\0' occupies 1 bytes
1443         program = clCreateProgramWithSource(clAstcHandle->context, 1, &g_programSource, &sourceSize, &clRet);
1444         if (clRet != CL_SUCCESS) {
1445             IMAGE_LOGE("astc clCreateProgramWithSource failed ret %{public}d!", clRet);
1446             return CL_ASTC_ENC_FAILED;
1447         }
1448         if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1449             IMAGE_LOGE("astc BuildProgramAndCreateKernel failed ret %{public}d!", clRet);
1450             clReleaseProgram(program);
1451             return CL_ASTC_ENC_FAILED;
1452         }
1453         if (SaveClBin(program, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1454             IMAGE_LOGI("astc SaveClBin failed!");
1455         }
1456     } else {
1457         std::ifstream contents{clBinPath};
1458         std::string binaryContent{std::istreambuf_iterator<char>{contents}, {}};
1459         size_t binSize = binaryContent.length();
1460         bool invaildSize = (binSize == 0) || (binSize > MAX_MALLOC_BYTES);
1461         if (invaildSize) {
1462             IMAGE_LOGE("astc AstcClBuildProgram read CLbin file lenth error %{public}zu!", binSize);
1463             return CL_ASTC_ENC_FAILED;
1464         }
1465         const char *binary = static_cast<const char *>(binaryContent.c_str());
1466         program = clCreateProgramWithBinary(clAstcHandle->context, 1, &clAstcHandle->deviceID, &binSize,
1467             (const unsigned char **)&binary, nullptr, &clRet);
1468         if (clRet != CL_SUCCESS) {
1469             IMAGE_LOGE("astc clCreateProgramWithBinary failed ret %{public}d!", clRet);
1470             return CL_ASTC_ENC_FAILED;
1471         }
1472         if (BuildProgramAndCreateKernel(program, clAstcHandle) != CL_ASTC_ENC_SUCCESS) {
1473             IMAGE_LOGE("astc BuildProgramAndCreateKernel with bin failed!");
1474             clReleaseProgram(program);
1475             return CL_ASTC_ENC_FAILED;
1476         }
1477     }
1478     clRet = clReleaseProgram(program);
1479     if (clRet != CL_SUCCESS) {
1480         IMAGE_LOGE("astc clReleaseProgram failed ret %{public}d!", clRet);
1481         return CL_ASTC_ENC_FAILED;
1482     }
1483     return CL_ASTC_ENC_SUCCESS;
1484 }
1485 
AstcCreateClKernel(ClAstcHandle * clAstcHandle,const std::string & clBinPath)1486 static CL_ASTC_STATUS AstcCreateClKernel(ClAstcHandle *clAstcHandle, const std::string &clBinPath)
1487 {
1488     if (!g_clSoManager.LoadOpenCLSo()) {
1489         IMAGE_LOGE("astc InitOpenCL error!");
1490         return CL_ASTC_ENC_FAILED;
1491     }
1492     cl_int clRet;
1493     cl_platform_id platformID;
1494     clRet = clGetPlatformIDs(1, &platformID, NULL);
1495     if (clRet != CL_SUCCESS) {
1496         IMAGE_LOGE("astc clGetPlatformIDs failed ret %{public}d!", clRet);
1497         return CL_ASTC_ENC_FAILED;
1498     }
1499     clRet = clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &clAstcHandle->deviceID, NULL);
1500     if (clRet != CL_SUCCESS) {
1501         IMAGE_LOGE("astc clGetDeviceIDs failed ret %{public}d!", clRet);
1502         return CL_ASTC_ENC_FAILED;
1503     }
1504     clAstcHandle->context = clCreateContext(0, 1, &clAstcHandle->deviceID, NULL, NULL, &clRet);
1505     if (clRet != CL_SUCCESS) {
1506         IMAGE_LOGE("astc clCreateContext failed ret %{public}d!", clRet);
1507         return CL_ASTC_ENC_FAILED;
1508     }
1509     cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0};
1510     clAstcHandle->queue = clCreateCommandQueueWithProperties(clAstcHandle->context,
1511         clAstcHandle->deviceID, props, &clRet);
1512     if (clRet != CL_SUCCESS) {
1513         IMAGE_LOGE("astc clCreateCommandQueueWithProperties failed ret %{public}d!", clRet);
1514         return CL_ASTC_ENC_FAILED;
1515     }
1516     if (AstcClBuildProgram(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1517         IMAGE_LOGE("astc AstcClBuildProgram failed!");
1518         return CL_ASTC_ENC_FAILED;
1519     }
1520     return CL_ASTC_ENC_SUCCESS;
1521 }
1522 
AstcClCreate(ClAstcHandle ** handle,const std::string & clBinPath)1523 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClCreate(ClAstcHandle **handle, const std::string &clBinPath)
1524 {
1525     ClAstcHandle *clAstcHandle = static_cast<ClAstcHandle *>(calloc(1, sizeof(ClAstcHandle)));
1526     if (clAstcHandle == nullptr) {
1527         IMAGE_LOGE("astc AstcClCreate handle calloc failed!");
1528         return CL_ASTC_ENC_FAILED;
1529     }
1530     *handle = clAstcHandle;
1531     size_t numMaxBlocks = static_cast<size_t>(((MAX_WIDTH + DIM - 1) / DIM) * ((MAX_HEIGHT + DIM - 1) / DIM));
1532     clAstcHandle->encObj.blockErrs_ =
1533         static_cast<uint32_t *>(malloc((numMaxBlocks * sizeof(uint32_t)))); // 8MB mem Max
1534     if (clAstcHandle->encObj.blockErrs_ == nullptr) {
1535         IMAGE_LOGE("astc blockErrs_ malloc failed!");
1536         AstcClClose(*handle);
1537         return CL_ASTC_ENC_FAILED;
1538     }
1539     if (AstcCreateClKernel(clAstcHandle, clBinPath) != CL_ASTC_ENC_SUCCESS) {
1540         IMAGE_LOGE("astc AstcCreateClKernel failed!");
1541         AstcClClose(*handle);
1542         return CL_ASTC_ENC_FAILED;
1543     }
1544     return CL_ASTC_ENC_SUCCESS;
1545 }
1546 
AstcClEncImageCheckImageOption(const ClAstcImageOption * imageIn)1547 static CL_ASTC_STATUS AstcClEncImageCheckImageOption(const ClAstcImageOption *imageIn)
1548 {
1549     if ((imageIn->width <= 0) || (imageIn->height <= 0) || (imageIn->stride < imageIn->width)) {
1550         IMAGE_LOGE("astc AstcClEncImage width <= 0 or height <= 0 or stride < width!");
1551         return CL_ASTC_ENC_FAILED;
1552     }
1553     if ((imageIn->width > MAX_WIDTH) || (imageIn->height > MAX_HEIGHT)) {
1554         IMAGE_LOGE("astc AstcClEncImage width[%{public}d] \
1555             need be [1, %{public}d] and height[%{public}d] need be [1, %{public}d]", \
1556             imageIn->width, MAX_WIDTH, imageIn->height, MAX_HEIGHT);
1557         return CL_ASTC_ENC_FAILED;
1558     }
1559     return CL_ASTC_ENC_SUCCESS;
1560 }
1561 
AstcClFillImage(ClAstcImageOption * imageIn,uint8_t * data,int32_t stride,int32_t width,int32_t height)1562 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClFillImage(ClAstcImageOption *imageIn, uint8_t *data, int32_t stride,
1563     int32_t width, int32_t height)
1564 {
1565     if (imageIn == nullptr) {
1566         IMAGE_LOGE("astc AstcClFillImage imageIn is  nullptr!");
1567         return CL_ASTC_ENC_FAILED;
1568     }
1569     imageIn->data = data;
1570     imageIn->stride = stride;
1571     imageIn->width = width;
1572     imageIn->height = height;
1573     if (AstcClEncImageCheckImageOption(imageIn) != CL_ASTC_ENC_SUCCESS) {
1574         IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1575         return CL_ASTC_ENC_FAILED;
1576     }
1577     return CL_ASTC_ENC_SUCCESS;
1578 }
1579 
GenAstcHeader(uint8_t * buffer,uint8_t blockX,uint8_t blockY,uint32_t dimX,uint32_t dimY)1580 static void GenAstcHeader(uint8_t *buffer, uint8_t blockX, uint8_t blockY, uint32_t dimX, uint32_t dimY)
1581 {
1582     uint8_t *headInfo = buffer;
1583     *headInfo++ = MAGIC_FILE_CONSTANT & BYTES_MASK;
1584     *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_8BITS) & BYTES_MASK;
1585     *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_16BITS) & BYTES_MASK;
1586     *headInfo++ = (MAGIC_FILE_CONSTANT >> BIT_SHIFT_24BITS) & BYTES_MASK;
1587     *headInfo++ = static_cast<uint8_t>(blockX);
1588     *headInfo++ = static_cast<uint8_t>(blockY);
1589     *headInfo++ = 1;
1590     *headInfo++ = dimX & BYTES_MASK;
1591     *headInfo++ = (dimX >> BIT_SHIFT_8BITS) & BYTES_MASK;
1592     *headInfo++ = (dimX >> BIT_SHIFT_16BITS) & BYTES_MASK;
1593     *headInfo++ = dimY & BYTES_MASK;
1594     *headInfo++ = (dimY >> BIT_SHIFT_8BITS) & BYTES_MASK;
1595     *headInfo++ = (dimY >> BIT_SHIFT_16BITS) & BYTES_MASK;
1596     *headInfo++ = 1;
1597     *headInfo++ = 0;
1598     *headInfo++ = 0;
1599 }
1600 
ReleaseClAstcObj(ClAstcObjEnc * obj)1601 static void ReleaseClAstcObj(ClAstcObjEnc *obj)
1602 {
1603     cl_int clRet;
1604     if (obj != nullptr) {
1605         if (obj->inputImage != nullptr) {
1606             clRet = clReleaseMemObject(obj->inputImage);
1607             if (clRet != CL_SUCCESS) {
1608                 IMAGE_LOGE("astc inputImage release failed ret %{public}d!", clRet);
1609             }
1610             obj->inputImage = nullptr;
1611         }
1612         if (obj->astcResult != nullptr) {
1613             clRet = clReleaseMemObject(obj->astcResult);
1614             if (clRet != CL_SUCCESS) {
1615                 IMAGE_LOGE("astc astcResult release failed ret %{public}d!", clRet);
1616             }
1617             obj->astcResult = nullptr;
1618         }
1619         if (obj->errBuffer != nullptr) {
1620             clRet = clReleaseMemObject(obj->errBuffer);
1621             if (clRet != CL_SUCCESS) {
1622                 IMAGE_LOGE("astc errBuffer release failed ret %{public}d!", clRet);
1623             }
1624             obj->errBuffer = nullptr;
1625         }
1626     }
1627 }
1628 
GetMaxAndSumVal(size_t numBlocks,uint32_t * blockErrs,uint32_t & maxVal,uint32_t & sumVal)1629 static void GetMaxAndSumVal(size_t numBlocks, uint32_t *blockErrs, uint32_t &maxVal, uint32_t &sumVal)
1630 {
1631     sumVal = 0;
1632     for (size_t i = 0; i < numBlocks; i++) {
1633         sumVal += blockErrs[i];
1634         maxVal = fmax(maxVal, blockErrs[i]);
1635     }
1636 }
1637 
ClCreateBufferAndImage(const ClAstcImageOption * imageIn,ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj)1638 static CL_ASTC_STATUS ClCreateBufferAndImage(const ClAstcImageOption *imageIn,
1639     ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj)
1640 {
1641     uint8_t *data = imageIn->data;
1642     int32_t stride = imageIn->stride;
1643     int32_t width = imageIn->width;
1644     int32_t height = imageIn->height;
1645     size_t numBlocks = static_cast<size_t>(((width + DIM - 1) / DIM) * ((height + DIM - 1) / DIM));
1646     uint32_t *blockErrs = encObj->blockErrs_;
1647     size_t blockErrBytes = sizeof(uint32_t) * numBlocks;
1648     encObj->astcSize = numBlocks * TEXTURE_BLOCK_BYTES;
1649     if ((blockErrs == nullptr) || (memset_s(blockErrs, blockErrBytes, 0, blockErrBytes))) {
1650         IMAGE_LOGE("astc blockErrs is nullptr or memset failed!");
1651         return CL_ASTC_ENC_FAILED;
1652     }
1653     cl_image_format imageFormat = { CL_RGBA, CL_UNORM_INT8 };
1654     cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, stride, height };
1655     cl_int clRet;
1656     encObj->inputImage = clCreateImage(clAstcHandle->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imageFormat,
1657         &desc, data, &clRet);
1658     if (clRet != CL_SUCCESS) {
1659         IMAGE_LOGE("astc clCreateImage failed ret %{public}d!", clRet);
1660         return CL_ASTC_ENC_FAILED;
1661     }
1662     encObj->astcResult = clCreateBuffer(clAstcHandle->context,
1663         CL_MEM_ALLOC_HOST_PTR, encObj->astcSize, NULL, &clRet);
1664     if (clRet != CL_SUCCESS) {
1665         IMAGE_LOGE("astc clCreateBuffer astcResult failed ret %{public}d!", clRet);
1666         return CL_ASTC_ENC_FAILED;
1667     }
1668     encObj->errBuffer = clCreateBuffer(clAstcHandle->context, CL_MEM_USE_HOST_PTR, blockErrBytes, blockErrs, &clRet);
1669     if (clRet != CL_SUCCESS) {
1670         IMAGE_LOGE("astc clCreateBuffer errBuffer failed ret %{public}d!", clRet);
1671         return CL_ASTC_ENC_FAILED;
1672     }
1673     return CL_ASTC_ENC_SUCCESS;
1674 }
1675 
ClKernelArgSet(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1676 static CL_ASTC_STATUS ClKernelArgSet(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1677 {
1678     int32_t kernelId = 0;
1679     cl_int clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->inputImage);
1680     if (clRet != CL_SUCCESS) {
1681         IMAGE_LOGE("astc clSetKernelArg inputImage failed ret %{public}d!", clRet);
1682         return CL_ASTC_ENC_FAILED;
1683     }
1684     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->astcResult);
1685     if (clRet != CL_SUCCESS) {
1686         IMAGE_LOGE("astc clSetKernelArg astcResult failed ret %{public}d!", clRet);
1687         return CL_ASTC_ENC_FAILED;
1688     }
1689     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(cl_mem), &encObj->errBuffer);
1690     if (clRet != CL_SUCCESS) {
1691         IMAGE_LOGE("astc clSetKernelArg errBuffer failed ret %{public}d!", clRet);
1692         return CL_ASTC_ENC_FAILED;
1693     }
1694     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &width);
1695     if (clRet != CL_SUCCESS) {
1696         IMAGE_LOGE("astc clSetKernelArg width failed ret %{public}d!", clRet);
1697         return CL_ASTC_ENC_FAILED;
1698     }
1699     clRet = clSetKernelArg(clAstcHandle->kernel, kernelId++, sizeof(int), &height);
1700     if (clRet != CL_SUCCESS) {
1701         IMAGE_LOGE("astc clSetKernelArg height failed ret %{public}d!", clRet);
1702         return CL_ASTC_ENC_FAILED;
1703     }
1704     return CL_ASTC_ENC_SUCCESS;
1705 }
1706 
ClKernelArgSetAndRun(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,int width,int height)1707 static CL_ASTC_STATUS ClKernelArgSetAndRun(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj, int width, int height)
1708 {
1709     if (ClKernelArgSet(clAstcHandle, encObj, width, height) != CL_ASTC_ENC_SUCCESS) {
1710         IMAGE_LOGE("astc ClKernelArgSet failed!");
1711         return CL_ASTC_ENC_FAILED;
1712     }
1713     size_t local[] = {WORK_GROUP_SIZE, WORK_GROUP_SIZE};
1714     size_t global[GLOBAL_WH_NUM_CL];
1715     global[0] = static_cast<size_t>((width + DIM - 1) / DIM);
1716     global[1] = static_cast<size_t>((height + DIM - 1) / DIM);
1717     size_t localMax;
1718     cl_int clRet = clGetKernelWorkGroupInfo(clAstcHandle->kernel, clAstcHandle->deviceID, CL_KERNEL_WORK_GROUP_SIZE,
1719         sizeof(size_t), &localMax, nullptr);
1720     if (clRet != CL_SUCCESS) {
1721         IMAGE_LOGE("astc clGetKernelWorkGroupInfo failed ret %{public}d!", clRet);
1722         return CL_ASTC_ENC_FAILED;
1723     }
1724     while (local[0] * local[1] > localMax) {
1725         local[0]--;
1726         local[1]--;
1727     }
1728     if ((local[0] < 1) || (local[1] < 1)) {
1729         IMAGE_LOGE("astc ClKernelArgSetAndRun local set failed!");
1730         return CL_ASTC_ENC_FAILED;
1731     }
1732     clRet = clEnqueueNDRangeKernel(clAstcHandle->queue, clAstcHandle->kernel, GLOBAL_WH_NUM_CL, nullptr, global, local,
1733         0, nullptr, nullptr);
1734     if (clRet != CL_SUCCESS) {
1735         IMAGE_LOGE("astc clEnqueueNDRangeKernel failed ret %{public}d!", clRet);
1736         return CL_ASTC_ENC_FAILED;
1737     }
1738     clRet = clFinish(clAstcHandle->queue);
1739     if (clRet != CL_SUCCESS) {
1740         IMAGE_LOGE("astc clFinish failed ret %{public}d!", clRet);
1741         return CL_ASTC_ENC_FAILED;
1742     }
1743     return CL_ASTC_ENC_SUCCESS;
1744 }
1745 
ClReadAstcBufAndBlockError(ClAstcHandle * clAstcHandle,ClAstcObjEnc * encObj,const ClAstcImageOption * imageIn,uint8_t * buffer)1746 static CL_ASTC_STATUS ClReadAstcBufAndBlockError(ClAstcHandle *clAstcHandle, ClAstcObjEnc *encObj,
1747     const ClAstcImageOption *imageIn, uint8_t *buffer)
1748 {
1749     cl_int clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->astcResult, CL_TRUE,
1750         0, encObj->astcSize, buffer + TEXTURE_HEAD_BYTES, 0, NULL, NULL);
1751     if (clRet != CL_SUCCESS) {
1752         IMAGE_LOGE("astc clEnqueueReadBuffer astcResult failed ret %{public}d!", clRet);
1753         return CL_ASTC_ENC_FAILED;
1754     }
1755     uint32_t maxVal = 0;
1756     uint32_t sumVal = 0;
1757     size_t numBlocks = ((imageIn->width + DIM - 1) / DIM) * ((imageIn->height + DIM - 1) / DIM);
1758     clRet = clEnqueueReadBuffer(clAstcHandle->queue, encObj->errBuffer, CL_TRUE,
1759         0, sizeof(uint32_t) * numBlocks, encObj->blockErrs_, 0, NULL, NULL);
1760     if (clRet != CL_SUCCESS) {
1761         IMAGE_LOGE("astc clEnqueueReadBuffer blockErrs failed ret %{public}d!", clRet);
1762         return CL_ASTC_ENC_FAILED;
1763     }
1764     GetMaxAndSumVal(numBlocks, encObj->blockErrs_, maxVal, sumVal);
1765     return CL_ASTC_ENC_SUCCESS;
1766 }
1767 
AstcClEncImage(ClAstcHandle * clAstcHandle,const ClAstcImageOption * imageIn,uint8_t * buffer)1768 CL_ASTC_SHARE_LIB_API CL_ASTC_STATUS AstcClEncImage(ClAstcHandle *clAstcHandle,
1769     const ClAstcImageOption *imageIn, uint8_t *buffer)
1770 {
1771     if ((clAstcHandle == nullptr) || (imageIn == nullptr) || (buffer == nullptr)) {
1772         IMAGE_LOGE("astc AstcClEncImage clAstcHandle or imageIn or buffer is nullptr!");
1773         return CL_ASTC_ENC_FAILED;
1774     }
1775     if (AstcClEncImageCheckImageOption(imageIn) != CL_ASTC_ENC_SUCCESS) {
1776         IMAGE_LOGE("astc AstcClEncImageCheckImageOption failed!");
1777         return CL_ASTC_ENC_FAILED;
1778     }
1779     GenAstcHeader(buffer, DIM, DIM, imageIn->width, imageIn->height);
1780     ClAstcObjEnc *encObj = &clAstcHandle->encObj;
1781     if (encObj == nullptr) {
1782         IMAGE_LOGE("astc AstcClEncImage clAstcHandle encObj is nullptr!");
1783         return CL_ASTC_ENC_FAILED;
1784     }
1785     if (ClCreateBufferAndImage(imageIn, clAstcHandle, encObj) != CL_ASTC_ENC_SUCCESS) {
1786         ReleaseClAstcObj(encObj);
1787         IMAGE_LOGE("astc ClCreateBufferAndImage failed!");
1788         return CL_ASTC_ENC_FAILED;
1789     }
1790     if (ClKernelArgSetAndRun(clAstcHandle, encObj, imageIn->width, imageIn->height) != CL_ASTC_ENC_SUCCESS) {
1791         ReleaseClAstcObj(encObj);
1792         IMAGE_LOGE("astc ClKernelArgSetAndRun failed!");
1793         return CL_ASTC_ENC_FAILED;
1794     }
1795     if (ClReadAstcBufAndBlockError(clAstcHandle, encObj, imageIn, buffer) != CL_ASTC_ENC_SUCCESS) {
1796         ReleaseClAstcObj(encObj);
1797         IMAGE_LOGE("astc ClReadAstcBufAndBlockError failed!");
1798         return CL_ASTC_ENC_FAILED;
1799     }
1800     ReleaseClAstcObj(encObj);
1801     return CL_ASTC_ENC_SUCCESS;
1802 }
1803 }
1804 }
1805 }