1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "gpu_program_util.h"
17 
18 #include <algorithm>
19 #include <cstdint>
20 #include <iterator>
21 #include <numeric>
22 
23 #include <render/device/pipeline_layout_desc.h>
24 #include <render/namespace.h>
25 
26 #include "util/log.h"
27 
28 using namespace BASE_NS;
29 
30 RENDER_BEGIN_NAMESPACE()
31 namespace GpuProgramUtil {
32 namespace {
33 struct VertexAttributeInfo {
34     uint32_t byteSize { 0 };
35     VertexInputDeclaration::VertexInputAttributeDescription description;
36 };
37 } // namespace
38 
AddBindings(const DescriptorSetLayout & inDescriptorSetLayout,DescriptorSetLayout & outDescriptorSetLayout)39 bool AddBindings(const DescriptorSetLayout& inDescriptorSetLayout, DescriptorSetLayout& outDescriptorSetLayout)
40 {
41     const auto& inBindings = inDescriptorSetLayout.bindings;
42     auto& outBindings = outDescriptorSetLayout.bindings;
43     if (outBindings.size() < inBindings.size()) {
44         outBindings.reserve(inBindings.size());
45     }
46     bool validCombination = true;
47     for (size_t idx = 0; idx < inDescriptorSetLayout.bindings.size(); ++idx) {
48         bool bindingAlreadyFound = false;
49         const auto& inBinding = inDescriptorSetLayout.bindings[idx];
50         const uint32_t currBindingIndex = inBinding.binding;
51         for (auto& outRef : outBindings) {
52             if (currBindingIndex == outRef.binding) {
53                 bindingAlreadyFound = true;
54                 outRef.shaderStageFlags |= inBinding.shaderStageFlags;
55                 if ((inBinding.descriptorType != outRef.descriptorType) ||
56                     (inBinding.descriptorCount != outRef.descriptorCount)) {
57                     validCombination = false;
58                     PLUGIN_LOG_E(
59                         "Invalid descriptor set combination with binding %u. Descriptor type %u = %u. Descriptor count "
60                         "%u = %u",
61                         currBindingIndex, inBinding.descriptorType, outRef.descriptorType, inBinding.descriptorCount,
62                         outRef.descriptorCount);
63                     // more error log printed in higher level with more info
64                 }
65             }
66         }
67         if (!bindingAlreadyFound) {
68             outBindings.push_back(inBinding);
69         }
70     }
71     return validCombination;
72 }
73 
CombinePipelineLayouts(const array_view<const PipelineLayout> inPl,PipelineLayout & outPl)74 void CombinePipelineLayouts(const array_view<const PipelineLayout> inPl, PipelineLayout& outPl)
75 {
76     auto& descriptorSetLayouts = outPl.descriptorSetLayouts;
77     for (const auto& plRef : inPl) {
78         for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
79             if (plRef.descriptorSetLayouts[idx].set != PipelineLayoutConstants::INVALID_INDEX) {
80                 descriptorSetLayouts[idx].set = plRef.descriptorSetLayouts[idx].set;
81                 const bool validComb =
82                     GpuProgramUtil::AddBindings(plRef.descriptorSetLayouts[idx], descriptorSetLayouts[idx]);
83                 if (!validComb) {
84                     PLUGIN_LOG_E(
85                         "Invalid shader module descriptor set combination for shader program. Descriptor set %u.", idx);
86                 }
87             }
88         }
89         outPl.pushConstant.shaderStageFlags |= plRef.pushConstant.shaderStageFlags;
90         outPl.pushConstant.byteSize = Math::max(outPl.pushConstant.byteSize, plRef.pushConstant.byteSize);
91     }
92 
93     uint32_t descriptorSetCount = 0;
94     for (const DescriptorSetLayout& currLayout : outPl.descriptorSetLayouts) {
95         if (currLayout.set != PipelineLayoutConstants::INVALID_INDEX) {
96             descriptorSetCount++;
97         }
98     }
99     outPl.descriptorSetCount = descriptorSetCount;
100 
101     // sort bindings inside sets
102     for (DescriptorSetLayout& currSet : outPl.descriptorSetLayouts) {
103         if (currSet.set != PipelineLayoutConstants::INVALID_INDEX) {
104             std::sort(currSet.bindings.begin(), currSet.bindings.end(),
105                 [](auto const& lhs, auto const& rhs) { return (lhs.binding < rhs.binding); });
106         }
107     }
108 }
109 
SpecializationByteSize(ShaderSpecialization::Constant::Type type)110 uint32_t SpecializationByteSize(ShaderSpecialization::Constant::Type type)
111 {
112     switch (type) {
113         case RENDER_NS::ShaderSpecialization::Constant::Type::BOOL:
114             [[fallthrough]];
115         case RENDER_NS::ShaderSpecialization::Constant::Type::UINT32:
116             [[fallthrough]];
117         case RENDER_NS::ShaderSpecialization::Constant::Type::INT32:
118             [[fallthrough]];
119         case RENDER_NS::ShaderSpecialization::Constant::Type::FLOAT:
120             return 4;
121         default:
122             break;
123     }
124     return 4;
125 }
126 
AddSpecializationConstants(const array_view<const ShaderSpecialization::Constant> inSpecializationConstants,vector<ShaderSpecialization::Constant> & outSpecializationConstants)127 void AddSpecializationConstants(const array_view<const ShaderSpecialization::Constant> inSpecializationConstants,
128     vector<ShaderSpecialization::Constant>& outSpecializationConstants)
129 {
130     uint32_t offset = 0;
131     if (!outSpecializationConstants.empty()) {
132         offset =
133             outSpecializationConstants.back().offset + SpecializationByteSize(outSpecializationConstants.back().type);
134     }
135     for (auto const& constant : inSpecializationConstants) {
136         outSpecializationConstants.push_back(
137             ShaderSpecialization::Constant { constant.shaderStage, constant.id, constant.type, offset });
138         offset += SpecializationByteSize(constant.type);
139     }
140 }
141 
CombineSpecializationConstants(const BASE_NS::array_view<const ShaderSpecialization::Constant> inSc,BASE_NS::vector<ShaderSpecialization::Constant> & outSc)142 void CombineSpecializationConstants(const BASE_NS::array_view<const ShaderSpecialization::Constant> inSc,
143     BASE_NS::vector<ShaderSpecialization::Constant>& outSc)
144 {
145     if (!inSc.empty()) {
146         GpuProgramUtil::AddSpecializationConstants(inSc, outSc);
147     }
148     // sorted based on offset due to offset mapping with shader combinations
149     // NOTE: id and name indexing
150     std::sort(outSc.begin(), outSc.end(), [](const auto& lhs, const auto& rhs) { return (lhs.offset < rhs.offset); });
151 }
152 
FormatByteSize(Format format)153 uint32_t FormatByteSize(Format format)
154 {
155     switch (format) {
156         case BASE_FORMAT_UNDEFINED:
157             return 0;
158 
159         case BASE_FORMAT_R4G4_UNORM_PACK8:
160             return 1;
161 
162         case BASE_FORMAT_R4G4B4A4_UNORM_PACK16:
163         case BASE_FORMAT_B4G4R4A4_UNORM_PACK16:
164         case BASE_FORMAT_R5G6B5_UNORM_PACK16:
165         case BASE_FORMAT_B5G6R5_UNORM_PACK16:
166         case BASE_FORMAT_R5G5B5A1_UNORM_PACK16:
167         case BASE_FORMAT_B5G5R5A1_UNORM_PACK16:
168         case BASE_FORMAT_A1R5G5B5_UNORM_PACK16:
169             return 2;
170 
171         case BASE_FORMAT_R8_UNORM:
172         case BASE_FORMAT_R8_SNORM:
173         case BASE_FORMAT_R8_USCALED:
174         case BASE_FORMAT_R8_SSCALED:
175         case BASE_FORMAT_R8_UINT:
176         case BASE_FORMAT_R8_SINT:
177         case BASE_FORMAT_R8_SRGB:
178             return 1;
179 
180         case BASE_FORMAT_R8G8_UNORM:
181         case BASE_FORMAT_R8G8_SNORM:
182         case BASE_FORMAT_R8G8_USCALED:
183         case BASE_FORMAT_R8G8_SSCALED:
184         case BASE_FORMAT_R8G8_UINT:
185         case BASE_FORMAT_R8G8_SINT:
186         case BASE_FORMAT_R8G8_SRGB:
187             return 2;
188 
189         case BASE_FORMAT_R8G8B8_UNORM:
190         case BASE_FORMAT_R8G8B8_SNORM:
191         case BASE_FORMAT_R8G8B8_USCALED:
192         case BASE_FORMAT_R8G8B8_SSCALED:
193         case BASE_FORMAT_R8G8B8_UINT:
194         case BASE_FORMAT_R8G8B8_SINT:
195         case BASE_FORMAT_R8G8B8_SRGB:
196         case BASE_FORMAT_B8G8R8_UNORM:
197         case BASE_FORMAT_B8G8R8_SNORM:
198         case BASE_FORMAT_B8G8R8_UINT:
199         case BASE_FORMAT_B8G8R8_SINT:
200         case BASE_FORMAT_B8G8R8_SRGB:
201             return 3;
202 
203         case BASE_FORMAT_R8G8B8A8_UNORM:
204         case BASE_FORMAT_R8G8B8A8_SNORM:
205         case BASE_FORMAT_R8G8B8A8_USCALED:
206         case BASE_FORMAT_R8G8B8A8_SSCALED:
207         case BASE_FORMAT_R8G8B8A8_UINT:
208         case BASE_FORMAT_R8G8B8A8_SINT:
209         case BASE_FORMAT_R8G8B8A8_SRGB:
210         case BASE_FORMAT_B8G8R8A8_UNORM:
211         case BASE_FORMAT_B8G8R8A8_SNORM:
212         case BASE_FORMAT_B8G8R8A8_UINT:
213         case BASE_FORMAT_B8G8R8A8_SINT:
214         case BASE_FORMAT_B8G8R8A8_SRGB:
215         case BASE_FORMAT_A8B8G8R8_UNORM_PACK32:
216         case BASE_FORMAT_A8B8G8R8_SNORM_PACK32:
217         case BASE_FORMAT_A8B8G8R8_USCALED_PACK32:
218         case BASE_FORMAT_A8B8G8R8_SSCALED_PACK32:
219         case BASE_FORMAT_A8B8G8R8_UINT_PACK32:
220         case BASE_FORMAT_A8B8G8R8_SINT_PACK32:
221         case BASE_FORMAT_A8B8G8R8_SRGB_PACK32:
222         case BASE_FORMAT_A2R10G10B10_UNORM_PACK32:
223         case BASE_FORMAT_A2R10G10B10_UINT_PACK32:
224         case BASE_FORMAT_A2R10G10B10_SINT_PACK32:
225         case BASE_FORMAT_A2B10G10R10_UNORM_PACK32:
226         case BASE_FORMAT_A2B10G10R10_SNORM_PACK32:
227         case BASE_FORMAT_A2B10G10R10_USCALED_PACK32:
228         case BASE_FORMAT_A2B10G10R10_SSCALED_PACK32:
229         case BASE_FORMAT_A2B10G10R10_UINT_PACK32:
230         case BASE_FORMAT_A2B10G10R10_SINT_PACK32:
231             return 4;
232 
233         case BASE_FORMAT_R16_UNORM:
234         case BASE_FORMAT_R16_SNORM:
235         case BASE_FORMAT_R16_USCALED:
236         case BASE_FORMAT_R16_SSCALED:
237         case BASE_FORMAT_R16_UINT:
238         case BASE_FORMAT_R16_SINT:
239         case BASE_FORMAT_R16_SFLOAT:
240             return 2;
241 
242         case BASE_FORMAT_R16G16_UNORM:
243         case BASE_FORMAT_R16G16_SNORM:
244         case BASE_FORMAT_R16G16_USCALED:
245         case BASE_FORMAT_R16G16_SSCALED:
246         case BASE_FORMAT_R16G16_UINT:
247         case BASE_FORMAT_R16G16_SINT:
248         case BASE_FORMAT_R16G16_SFLOAT:
249             return 4;
250 
251         case BASE_FORMAT_R16G16B16_UNORM:
252         case BASE_FORMAT_R16G16B16_SNORM:
253         case BASE_FORMAT_R16G16B16_USCALED:
254         case BASE_FORMAT_R16G16B16_SSCALED:
255         case BASE_FORMAT_R16G16B16_UINT:
256         case BASE_FORMAT_R16G16B16_SINT:
257         case BASE_FORMAT_R16G16B16_SFLOAT:
258             return 6;
259 
260         case BASE_FORMAT_R16G16B16A16_UNORM:
261         case BASE_FORMAT_R16G16B16A16_SNORM:
262         case BASE_FORMAT_R16G16B16A16_USCALED:
263         case BASE_FORMAT_R16G16B16A16_SSCALED:
264         case BASE_FORMAT_R16G16B16A16_UINT:
265         case BASE_FORMAT_R16G16B16A16_SINT:
266         case BASE_FORMAT_R16G16B16A16_SFLOAT:
267             return 8;
268 
269         case BASE_FORMAT_R32_UINT:
270         case BASE_FORMAT_R32_SINT:
271         case BASE_FORMAT_R32_SFLOAT:
272             return 4;
273 
274         case BASE_FORMAT_R32G32_UINT:
275         case BASE_FORMAT_R32G32_SINT:
276         case BASE_FORMAT_R32G32_SFLOAT:
277             return 8;
278 
279         case BASE_FORMAT_R32G32B32_UINT:
280         case BASE_FORMAT_R32G32B32_SINT:
281         case BASE_FORMAT_R32G32B32_SFLOAT:
282             return 24;
283 
284         case BASE_FORMAT_R32G32B32A32_UINT:
285         case BASE_FORMAT_R32G32B32A32_SINT:
286         case BASE_FORMAT_R32G32B32A32_SFLOAT:
287         case BASE_FORMAT_B10G11R11_UFLOAT_PACK32:
288         case BASE_FORMAT_E5B9G9R9_UFLOAT_PACK32:
289             return 32;
290 
291         case BASE_FORMAT_D16_UNORM:
292             return 2;
293 
294         case BASE_FORMAT_X8_D24_UNORM_PACK32:
295         case BASE_FORMAT_D32_SFLOAT:
296             return 4;
297 
298         case BASE_FORMAT_S8_UINT:
299             return 1;
300 
301         case BASE_FORMAT_D24_UNORM_S8_UINT:
302             return 4;
303 
304         default:
305             return 0;
306     }
307 }
308 } // namespace GpuProgramUtil
309 RENDER_END_NAMESPACE()
310