1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef API_BASE_MATH_FLOAT_PACKER_H
17 #define API_BASE_MATH_FLOAT_PACKER_H
18
19 #include <cstdint>
20
21 #include <base/math/mathf.h>
22 #include <base/math/vector.h>
23
BASE_BEGIN_NAMESPACE()24 BASE_BEGIN_NAMESPACE()
25 namespace Math {
26 constexpr const uint32_t F32_EXPONENT_BITS = 0xFF;
27 constexpr const uint32_t F32_EXPONENT_SHIFT = 23;
28 constexpr const uint32_t F32_SIGN_BIT = 31;
29 constexpr const uint32_t F32_INFINITY = (F32_EXPONENT_BITS << F32_EXPONENT_SHIFT);
30
31 constexpr const uint32_t F16_EXPONENT_BITS = 0x1F;
32 constexpr const uint32_t F16_EXPONENT_SHIFT = 10;
33 constexpr const uint32_t F16_SIGN_BIT = 15;
34 constexpr const uint32_t F16_SIGN_SHIFT = (F32_SIGN_BIT - F16_SIGN_BIT);
35 constexpr const uint32_t F16_MANTISSA_SHIFT = (F32_EXPONENT_SHIFT - F16_EXPONENT_SHIFT);
36 constexpr const uint32_t F16_INFINITY = (F16_EXPONENT_BITS << F16_EXPONENT_SHIFT);
37
38 /** \addtogroup group_math_floatpacker
39 * @{
40 */
41 /** Converts 32 bit floating point number to 16 bit float value
42 */
43 inline uint16_t F32ToF16(float val)
44 {
45 union {
46 float f;
47 uint32_t ui;
48 } f32 { val };
49
50 uint32_t noSign = f32.ui & 0x7fffffff; // Non-sign bits
51 uint32_t sign = f32.ui & 0x80000000; // Sign bit
52 uint32_t exponent = f32.ui & F32_INFINITY; // Exponent
53
54 noSign >>= F16_MANTISSA_SHIFT; // Align mantissa on MSB
55 sign >>= F16_SIGN_SHIFT; // Shift sign bit into position
56
57 // 16bit bias = 15, 32bit bias = 127
58 // (-127 + 15) << 10 = (-112) << 10 = -0x1c000
59 noSign -= 0x1c000; // Adjust bias
60
61 // 16bit min exponent = -14, 32bit bias 127
62 // (-14 + 127) << 23 = 0x38800000
63 noSign = (exponent < 0x38800000) ? 0 : noSign; // Flush-to-zero
64 // 16bit max exponent = 15, 32bit bias 127
65 // (15 + 127) << 23 = 0x47000000
66 noSign = (exponent > 0x47000000) ? F16_INFINITY : noSign; // Clamp-to-inf
67
68 // Re-insert sign bit
69 return static_cast<uint16_t>(noSign | sign);
70 }
71
72 /** Converts 16 bit floating point number to 32 bit float
73 */
74 inline float F16ToF32(uint16_t val)
75 {
76 union {
77 float f = 0.f;
78 uint32_t ui;
79 } f32;
80
81 uint32_t noSign = val & 0x7fffU; // Non-sign bits
82 uint32_t sign = val & 0x8000U; // Sign bit
83 uint32_t exponent = val & F16_INFINITY; // Exponent
84
85 noSign <<= F16_MANTISSA_SHIFT; // Align mantissa on MSB
86 sign <<= F16_SIGN_SHIFT; // Shift sign bit into position
87
88 // 16bit bias = 15, 32bit bias = 127
89 // (-15 + 127) << 23 = 0x38000000
90 noSign += 0x38000000; // Adjust bias
91
92 noSign = (exponent == 0 ? 0 : noSign); // Denormals-as-zero
93 noSign = (exponent == F16_INFINITY ? F32_INFINITY : noSign); // Clamp-to-inf
94
95 f32.ui = noSign | sign; // Re-insert sign bit
96
97 return f32.f;
98 }
99
100 /** Pack single vector2(32bit x 2) to 32 bit integer (unsigned packed values) */
101 inline uint32_t PackUnorm2X16(const Vec2& v)
102 {
103 union {
104 uint16_t in[2];
105 uint32_t out;
106 } u;
107
108 u.in[0] = uint16_t(round(clamp(v[0], 0.f, +1.f) * 65535.0f));
109 u.in[1] = uint16_t(round(clamp(v[1], 0.f, +1.f) * 65535.0f));
110
111 return u.out;
112 }
113
114 /** Unpack 32 bit integer to default lume vector2
115 */
116 inline Vec2 UnpackUnorm2X16(uint32_t p)
117 {
118 const union {
119 uint32_t in;
120 uint16_t out[2];
121 } u { p };
122
123 return Vec2(u.out[0] * 1.5259021896696421759365224689097e-5f, u.out[1] * 1.5259021896696421759365224689097e-5f);
124 }
125
126 /** Pack single vector2(32bit x 2) to 32 bit integer (signed packed values)
127 */
128 inline uint32_t PackSnorm2X16(const Vec2& v)
129 {
130 union {
131 int16_t in[2];
132 uint32_t out;
133 } u;
134
135 u.in[0] = (int16_t)(round(clamp(v.x, -1.0f, +1.0f) * 32767.0f));
136 u.in[1] = (int16_t)(round(clamp(v.y, -1.0f, +1.0f) * 32767.0f));
137
138 return u.out;
139 }
140
141 /** Unpack 32 bit integer to default lume vector2
142 */
143 inline Vec2 UnpackSnorm2X16(uint32_t p)
144 {
145 const union {
146 uint32_t in;
147 int16_t out[2];
148 } u { p };
149
150 return Vec2(clamp(u.out[0] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f),
151 clamp(u.out[1] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f));
152 }
153
154 /** Pack vector2 to 32 bit integer with half precision
155 */
156 inline uint32_t PackHalf2X16(const Vec2& v)
157 {
158 const union {
159 uint16_t in[2];
160 uint32_t out;
161 } u { { F32ToF16(v.x), F32ToF16(v.y) } };
162
163 return u.out;
164 }
165
166 /** Unpack 32 bit integer to normal lume vector2 and rise precision from 16 bit to 32 bits
167 */
168 inline Vec2 UnpackHalf2X16(uint32_t v)
169 {
170 const union {
171 uint32_t in;
172 uint16_t out[2];
173 } u { v };
174
175 return Vec2(F16ToF32(u.out[0]), F16ToF32(u.out[1]));
176 }
177 /** @} */
178 } // namespace Math
179 BASE_END_NAMESPACE()
180
181 #endif // API_BASE_MATH_FLOAT_PACKER_H
182