1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef API_BASE_MATH_FLOAT_PACKER_H
17 #define API_BASE_MATH_FLOAT_PACKER_H
18 
19 #include <cstdint>
20 
21 #include <base/math/mathf.h>
22 #include <base/math/vector.h>
23 
BASE_BEGIN_NAMESPACE()24 BASE_BEGIN_NAMESPACE()
25 namespace Math {
26 constexpr const uint32_t F32_EXPONENT_BITS = 0xFF;
27 constexpr const uint32_t F32_EXPONENT_SHIFT = 23;
28 constexpr const uint32_t F32_SIGN_BIT = 31;
29 constexpr const uint32_t F32_INFINITY = (F32_EXPONENT_BITS << F32_EXPONENT_SHIFT);
30 
31 constexpr const uint32_t F16_EXPONENT_BITS = 0x1F;
32 constexpr const uint32_t F16_EXPONENT_SHIFT = 10;
33 constexpr const uint32_t F16_SIGN_BIT = 15;
34 constexpr const uint32_t F16_SIGN_SHIFT = (F32_SIGN_BIT - F16_SIGN_BIT);
35 constexpr const uint32_t F16_MANTISSA_SHIFT = (F32_EXPONENT_SHIFT - F16_EXPONENT_SHIFT);
36 constexpr const uint32_t F16_INFINITY = (F16_EXPONENT_BITS << F16_EXPONENT_SHIFT);
37 
38 /** \addtogroup group_math_floatpacker
39  *  @{
40  */
41 /** Converts 32 bit floating point number to 16 bit float value
42  */
43 inline uint16_t F32ToF16(float val)
44 {
45     union {
46         float f;
47         uint32_t ui;
48     } f32 { val };
49 
50     uint32_t noSign = f32.ui & 0x7fffffff;     // Non-sign bits
51     uint32_t sign = f32.ui & 0x80000000;       // Sign bit
52     uint32_t exponent = f32.ui & F32_INFINITY; // Exponent
53 
54     noSign >>= F16_MANTISSA_SHIFT; // Align mantissa on MSB
55     sign >>= F16_SIGN_SHIFT;       // Shift sign bit into position
56 
57     // 16bit bias = 15, 32bit bias = 127
58     // (-127 + 15) << 10 = (-112) << 10 = -0x1c000
59     noSign -= 0x1c000; // Adjust bias
60 
61     // 16bit min exponent = -14, 32bit bias 127
62     // (-14 + 127) << 23 = 0x38800000
63     noSign = (exponent < 0x38800000) ? 0 : noSign; // Flush-to-zero
64     // 16bit max exponent = 15, 32bit bias 127
65     // (15 + 127) << 23 = 0x47000000
66     noSign = (exponent > 0x47000000) ? F16_INFINITY : noSign; // Clamp-to-inf
67 
68     // Re-insert sign bit
69     return static_cast<uint16_t>(noSign | sign);
70 }
71 
72 /** Converts 16 bit floating point number to 32 bit float
73  */
74 inline float F16ToF32(uint16_t val)
75 {
76     union {
77         float f = 0.f;
78         uint32_t ui;
79     } f32;
80 
81     uint32_t noSign = val & 0x7fffU;        // Non-sign bits
82     uint32_t sign = val & 0x8000U;          // Sign bit
83     uint32_t exponent = val & F16_INFINITY; // Exponent
84 
85     noSign <<= F16_MANTISSA_SHIFT; // Align mantissa on MSB
86     sign <<= F16_SIGN_SHIFT;       // Shift sign bit into position
87 
88     // 16bit bias = 15, 32bit bias = 127
89     // (-15 + 127) << 23 = 0x38000000
90     noSign += 0x38000000; // Adjust bias
91 
92     noSign = (exponent == 0 ? 0 : noSign);                       // Denormals-as-zero
93     noSign = (exponent == F16_INFINITY ? F32_INFINITY : noSign); // Clamp-to-inf
94 
95     f32.ui = noSign | sign; // Re-insert sign bit
96 
97     return f32.f;
98 }
99 
100 /** Pack single vector2(32bit x 2) to 32 bit integer (unsigned packed values) */
101 inline uint32_t PackUnorm2X16(const Vec2& v)
102 {
103     union {
104         uint16_t in[2];
105         uint32_t out;
106     } u;
107 
108     u.in[0] = uint16_t(round(clamp(v[0], 0.f, +1.f) * 65535.0f));
109     u.in[1] = uint16_t(round(clamp(v[1], 0.f, +1.f) * 65535.0f));
110 
111     return u.out;
112 }
113 
114 /** Unpack 32 bit integer to default lume vector2
115  */
116 inline Vec2 UnpackUnorm2X16(uint32_t p)
117 {
118     const union {
119         uint32_t in;
120         uint16_t out[2];
121     } u { p };
122 
123     return Vec2(u.out[0] * 1.5259021896696421759365224689097e-5f, u.out[1] * 1.5259021896696421759365224689097e-5f);
124 }
125 
126 /** Pack single vector2(32bit x 2) to 32 bit integer (signed packed values)
127  */
128 inline uint32_t PackSnorm2X16(const Vec2& v)
129 {
130     union {
131         int16_t in[2];
132         uint32_t out;
133     } u;
134 
135     u.in[0] = (int16_t)(round(clamp(v.x, -1.0f, +1.0f) * 32767.0f));
136     u.in[1] = (int16_t)(round(clamp(v.y, -1.0f, +1.0f) * 32767.0f));
137 
138     return u.out;
139 }
140 
141 /** Unpack 32 bit integer to default lume vector2
142  */
143 inline Vec2 UnpackSnorm2X16(uint32_t p)
144 {
145     const union {
146         uint32_t in;
147         int16_t out[2];
148     } u { p };
149 
150     return Vec2(clamp(u.out[0] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f),
151         clamp(u.out[1] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f));
152 }
153 
154 /** Pack vector2 to 32 bit integer with half precision
155  */
156 inline uint32_t PackHalf2X16(const Vec2& v)
157 {
158     const union {
159         uint16_t in[2];
160         uint32_t out;
161     } u { { F32ToF16(v.x), F32ToF16(v.y) } };
162 
163     return u.out;
164 }
165 
166 /** Unpack 32 bit integer to normal lume vector2 and rise precision from 16 bit to 32 bits
167  */
168 inline Vec2 UnpackHalf2X16(uint32_t v)
169 {
170     const union {
171         uint32_t in;
172         uint16_t out[2];
173     } u { v };
174 
175     return Vec2(F16ToF32(u.out[0]), F16ToF32(u.out[1]));
176 }
177 /** @} */
178 } // namespace Math
179 BASE_END_NAMESPACE()
180 
181 #endif // API_BASE_MATH_FLOAT_PACKER_H
182