1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "node_context_descriptor_set_manager_vk.h"
16 
17 #include <algorithm>
18 #include <cinttypes>
19 #include <cstdint>
20 #include <vulkan/vulkan_core.h>
21 
22 #include <base/math/mathf.h>
23 #include <render/device/pipeline_state_desc.h>
24 #include <render/namespace.h>
25 
26 #include "device/device.h"
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/node_context_descriptor_set_manager.h"
30 #include "util/log.h"
31 #include "vulkan/device_vk.h"
32 #include "vulkan/gpu_image_vk.h"
33 #include "vulkan/gpu_sampler_vk.h"
34 #include "vulkan/validate_vk.h"
35 
36 using namespace BASE_NS;
37 
38 RENDER_BEGIN_NAMESPACE()
39 namespace {
GetSampler(const GpuResourceManager & gpuResourceMgr,const RenderHandle handle)40 const VkSampler* GetSampler(const GpuResourceManager& gpuResourceMgr, const RenderHandle handle)
41 {
42     if (const auto* gpuSampler = static_cast<GpuSamplerVk*>(gpuResourceMgr.GetSampler(handle)); gpuSampler) {
43         return &(gpuSampler->GetPlatformData().sampler);
44     } else {
45         return nullptr;
46     }
47 }
48 } // namespace
49 
NodeContextDescriptorSetManagerVk(Device & device)50 NodeContextDescriptorSetManagerVk::NodeContextDescriptorSetManagerVk(Device& device)
51     : NodeContextDescriptorSetManager(), device_ { device },
52       bufferingCount_(
53           Math::min(LowLevelContextDescriptorPoolVk::MAX_BUFFERING_COUNT, device_.GetCommandBufferingCount()))
54 {
55 #if (RENDER_VALIDATION_ENABLED == 1)
56     if (device_.GetCommandBufferingCount() > LowLevelContextDescriptorPoolVk::MAX_BUFFERING_COUNT) {
57         PLUGIN_LOG_ONCE_W("device_command_buffering_count_desc_set_vk_buffering",
58             "RENDER_VALIDATION: device command buffering count (%u) is larger than supported vulkan descriptor set "
59             "buffering count (%u)",
60             device_.GetCommandBufferingCount(), LowLevelContextDescriptorPoolVk::MAX_BUFFERING_COUNT);
61     }
62 #endif
63 }
64 
~NodeContextDescriptorSetManagerVk()65 NodeContextDescriptorSetManagerVk::~NodeContextDescriptorSetManagerVk()
66 {
67     DestroyPool(descriptorPool_[DESCRIPTOR_SET_INDEX_TYPE_STATIC]);
68     DestroyPool(descriptorPool_[DESCRIPTOR_SET_INDEX_TYPE_ONE_FRAME]);
69     for (auto& ref : pendingDeallocations_) {
70         DestroyPool(ref.descriptorPool);
71     }
72 }
73 
DestroyPool(LowLevelContextDescriptorPoolVk & descriptorPool)74 void NodeContextDescriptorSetManagerVk::DestroyPool(LowLevelContextDescriptorPoolVk& descriptorPool)
75 {
76     const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
77 
78     for (auto& ref : descriptorPool.descriptorSets) {
79         for (uint32_t bufferingIdx = 0; bufferingIdx < bufferingCount_; ++bufferingIdx) {
80             auto& bufferingSetRef = ref.bufferingSet[bufferingIdx];
81             if (bufferingSetRef.descriptorSetLayout) {
82                 vkDestroyDescriptorSetLayout(device,     // device
83                     bufferingSetRef.descriptorSetLayout, // descriptorSetLayout
84                     nullptr);                            // pAllocator
85                 bufferingSetRef.descriptorSetLayout = VK_NULL_HANDLE;
86             }
87         }
88         if (ref.additionalPlatformSet.descriptorSetLayout) {
89             vkDestroyDescriptorSetLayout(device,               // device
90                 ref.additionalPlatformSet.descriptorSetLayout, // descriptorSetLayout
91                 nullptr);                                      // pAllocator
92             ref.additionalPlatformSet.descriptorSetLayout = VK_NULL_HANDLE;
93         }
94     }
95     descriptorPool.descriptorSets.clear();
96     if (descriptorPool.descriptorPool) {
97         vkDestroyDescriptorPool(device,    // device
98             descriptorPool.descriptorPool, // descriptorPool
99             nullptr);                      // pAllocator
100         descriptorPool.descriptorPool = VK_NULL_HANDLE;
101     }
102     if (descriptorPool.additionalPlatformDescriptorPool) {
103         vkDestroyDescriptorPool(device,                      // device
104             descriptorPool.additionalPlatformDescriptorPool, // descriptorPool
105             nullptr);                                        // pAllocator
106         descriptorPool.additionalPlatformDescriptorPool = VK_NULL_HANDLE;
107     }
108 }
109 
ResetAndReserve(const DescriptorCounts & descriptorCounts)110 void NodeContextDescriptorSetManagerVk::ResetAndReserve(const DescriptorCounts& descriptorCounts)
111 {
112     NodeContextDescriptorSetManager::ResetAndReserve(descriptorCounts);
113     if (maxSets_ > 0) {
114         auto& descriptorPool = descriptorPool_[DESCRIPTOR_SET_INDEX_TYPE_STATIC];
115         // usually there are less descriptor sets than max sets count
116         // due to maxsets count has been calculated for single descriptors
117         // (one descriptor set has multiple descriptors)
118         const uint32_t reserveCount = maxSets_ / 2u; // questimate for possible max vector size;
119 
120         constexpr VkDescriptorPoolCreateFlags descriptorPoolCreateFlags { 0 };
121         if (descriptorPool.descriptorPool) { // push for dealloation vec
122             PendingDeallocations pd;
123             pd.descriptorPool.descriptorPool = move(descriptorPool.descriptorPool);
124             pd.descriptorPool.descriptorSets = move(descriptorPool.descriptorSets);
125             pd.frameIndex = device_.GetFrameCount();
126             pendingDeallocations_.push_back(move(pd));
127 
128             descriptorPool.descriptorSets.clear();
129             descriptorPool.descriptorPool = VK_NULL_HANDLE;
130         }
131 
132         descriptorPoolSizes_.clear();
133         descriptorPoolSizes_.reserve(descriptorCounts.counts.size()); // max count reserve
134         for (const auto& ref : descriptorCounts.counts) {
135             if (ref.count > 0) {
136                 descriptorPoolSizes_.push_back(
137                     VkDescriptorPoolSize { (VkDescriptorType)ref.type, ref.count * bufferingCount_ });
138             }
139         }
140 
141         if (!descriptorPoolSizes_.empty()) {
142             const VkDescriptorPoolCreateInfo descriptorPoolCreateInfo {
143                 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, // sType
144                 nullptr,                                       // pNext
145                 descriptorPoolCreateFlags,                     // flags
146                 maxSets_ * bufferingCount_,                    // maxSets
147                 static_cast<uint32_t>(descriptorPoolSizes_.size()),         // poolSizeCount
148                 descriptorPoolSizes_.data(),                   // pPoolSizes
149             };
150 
151             const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
152             VALIDATE_VK_RESULT(vkCreateDescriptorPool(device, // device
153                 &descriptorPoolCreateInfo,                    // pCreateInfo
154                 nullptr,                                      // pAllocator
155                 &descriptorPool.descriptorPool));             // pDescriptorPool
156 
157             descriptorPool.descriptorSets.reserve(reserveCount);
158         }
159     }
160 }
161 
BeginFrame()162 void NodeContextDescriptorSetManagerVk::BeginFrame()
163 {
164     NodeContextDescriptorSetManager::BeginFrame();
165 
166     ClearDescriptorSetWriteData();
167 
168     oneFrameDescriptorNeed_ = {};
169     auto& oneFrameDescriptorPool = descriptorPool_[DESCRIPTOR_SET_INDEX_TYPE_ONE_FRAME];
170     if (oneFrameDescriptorPool.descriptorPool || oneFrameDescriptorPool.additionalPlatformDescriptorPool) {
171         const uint32_t descriptorSetCount = static_cast<uint32_t>(oneFrameDescriptorPool.descriptorSets.size());
172         PendingDeallocations pd;
173         pd.descriptorPool.descriptorPool = exchange(oneFrameDescriptorPool.descriptorPool, VK_NULL_HANDLE);
174         pd.descriptorPool.additionalPlatformDescriptorPool =
175             exchange(oneFrameDescriptorPool.additionalPlatformDescriptorPool, VK_NULL_HANDLE);
176         pd.descriptorPool.descriptorSets = move(oneFrameDescriptorPool.descriptorSets);
177         pd.frameIndex = device_.GetFrameCount();
178         pendingDeallocations_.push_back(move(pd));
179 
180         oneFrameDescriptorPool.descriptorSets.reserve(descriptorSetCount);
181     }
182     oneFrameDescriptorPool.descriptorSets.clear();
183 
184     // we need to check through platform special format desriptor sets/pool
185     auto& descriptorPool = descriptorPool_[DESCRIPTOR_SET_INDEX_TYPE_STATIC];
186     if (descriptorPool.additionalPlatformDescriptorPool) {
187         PendingDeallocations pd;
188         pd.descriptorPool.additionalPlatformDescriptorPool = move(descriptorPool.additionalPlatformDescriptorPool);
189         // no buffering set
190         pd.frameIndex = device_.GetFrameCount();
191         pendingDeallocations_.push_back(move(pd));
192         descriptorPool.additionalPlatformDescriptorPool = VK_NULL_HANDLE;
193         // immediate desctruction of descriptor set layouts
194         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
195         for (auto& descriptorSetRef : descriptorPool.descriptorSets) {
196             if (descriptorSetRef.additionalPlatformSet.descriptorSetLayout) {
197                 vkDestroyDescriptorSetLayout(device,                            // device
198                     descriptorSetRef.additionalPlatformSet.descriptorSetLayout, // descriptorSetLayout
199                     nullptr);                                                   // pAllocator
200                 descriptorSetRef.additionalPlatformSet.descriptorSetLayout = VK_NULL_HANDLE;
201             }
202         }
203         auto& cpuDescriptorSet = cpuDescriptorSets_[DESCRIPTOR_SET_INDEX_TYPE_STATIC];
204         for (auto& descriptorSetRef : cpuDescriptorSet) {
205             descriptorSetRef.hasPlatformConversionBindings = false;
206         }
207     }
208 
209     // clear aged descriptor pools
210     if (!pendingDeallocations_.empty()) {
211         // this is normally empty or only has single item
212         const auto minAge = device_.GetCommandBufferingCount() + 1;
213         const auto ageLimit = (device_.GetFrameCount() < minAge) ? 0 : (device_.GetFrameCount() - minAge);
214 
215         auto oldRes = std::partition(pendingDeallocations_.begin(), pendingDeallocations_.end(),
216             [ageLimit](auto const& pd) { return pd.frameIndex >= ageLimit; });
217 
218         std::for_each(oldRes, pendingDeallocations_.end(), [this](auto& res) { DestroyPool(res.descriptorPool); });
219         pendingDeallocations_.erase(oldRes, pendingDeallocations_.end());
220     }
221 
222 #if (RENDER_VALIDATION_ENABLED == 1)
223     oneFrameDescSetGeneration_ = (oneFrameDescSetGeneration_ + 1) % MAX_ONE_FRAME_GENERATION_IDX;
224 #endif
225 }
226 
BeginBackendFrame()227 void NodeContextDescriptorSetManagerVk::BeginBackendFrame()
228 {
229     // resize vector data
230     ResizeDescriptorSetWriteData();
231 
232     auto CreateDescriptorPool = [](const VkDevice device, const uint32_t descriptorSetCount,
233                                     VkDescriptorPool& descriptorPool,
234                                     vector<VkDescriptorPoolSize>& descriptorPoolSizes) {
235         constexpr VkDescriptorPoolCreateFlags descriptorPoolCreateFlags { 0 };
236         const VkDescriptorPoolCreateInfo descriptorPoolCreateInfo {
237             VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, // sType
238             nullptr,                                       // pNext
239             descriptorPoolCreateFlags,                     // flags
240             descriptorSetCount,                            // maxSets
241             static_cast<uint32_t>(descriptorPoolSizes.size()),          // poolSizeCount
242             descriptorPoolSizes.data(),                    // pPoolSizes
243         };
244 
245         VALIDATE_VK_RESULT(vkCreateDescriptorPool(device, // device
246             &descriptorPoolCreateInfo,                    // pCreateInfo
247             nullptr,                                      // pAllocator
248             &descriptorPool));                            // pDescriptorPool
249     };
250 
251     const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
252     // reserve descriptors for descriptors sets that need platform special formats for one frame
253     if (hasPlatformConversionBindings_) {
254         const auto& cpuDescriptorSets = cpuDescriptorSets_[DESCRIPTOR_SET_INDEX_TYPE_STATIC];
255         uint32_t descriptorSetCount = 0u;
256         uint8_t descriptorCounts[OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE] { 0 };
257         for (const auto& cpuDescriptorSetRef : cpuDescriptorSets) {
258             if (cpuDescriptorSetRef.hasPlatformConversionBindings) {
259                 descriptorSetCount++;
260                 for (const auto& bindingRef : cpuDescriptorSetRef.bindings) {
261                     uint32_t descriptorCount = bindingRef.binding.descriptorCount;
262                     const uint32_t descTypeIndex = static_cast<uint32_t>(bindingRef.binding.descriptorType);
263                     if (descTypeIndex < OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE) {
264                         if ((bindingRef.binding.descriptorType ==
265                                 DescriptorType::CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
266                             RenderHandleUtil::IsPlatformConversionResource(
267                                 cpuDescriptorSetRef.images[bindingRef.resourceIndex].resource.handle)) {
268                             // expecting planar formats and making sure that there is enough descriptors
269                             constexpr uint32_t descriptorCountMultiplier = 3u;
270                             descriptorCount *= descriptorCountMultiplier;
271                         }
272                         descriptorCounts[descTypeIndex] += static_cast<uint8_t>(descriptorCount);
273                     }
274                 }
275             }
276         }
277         if (descriptorSetCount > 0) {
278             auto& descriptorPool = descriptorPool_[DESCRIPTOR_SET_INDEX_TYPE_STATIC];
279             PLUGIN_ASSERT(descriptorPool.additionalPlatformDescriptorPool == VK_NULL_HANDLE);
280             descriptorPoolSizes_.clear();
281             descriptorPoolSizes_.reserve(OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE);
282             // no buffering, only descriptors for one frame
283             for (uint32_t idx = 0; idx < OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE; ++idx) {
284                 const uint8_t count = descriptorCounts[idx];
285                 if (count > 0) {
286                     descriptorPoolSizes_.push_back(VkDescriptorPoolSize { (VkDescriptorType)idx, count });
287                 }
288             }
289             if (!descriptorPoolSizes_.empty()) {
290                 CreateDescriptorPool(
291                     device, descriptorSetCount, descriptorPool.additionalPlatformDescriptorPool, descriptorPoolSizes_);
292             }
293         }
294     }
295     // create one frame descriptor pool
296     {
297         auto& descriptorPool = descriptorPool_[DESCRIPTOR_SET_INDEX_TYPE_ONE_FRAME];
298         auto& cpuDescriptorSets = cpuDescriptorSets_[DESCRIPTOR_SET_INDEX_TYPE_ONE_FRAME];
299 
300         PLUGIN_ASSERT(descriptorPool.descriptorPool == VK_NULL_HANDLE);
301         const uint32_t descriptorSetCount = static_cast<uint32_t>(cpuDescriptorSets.size());
302         if (descriptorSetCount > 0) {
303             descriptorPoolSizes_.clear();
304             descriptorPoolSizes_.reserve(OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE);
305             for (uint32_t idx = 0; idx < OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE; ++idx) {
306                 const uint8_t count = oneFrameDescriptorNeed_.descriptorCount[idx];
307                 if (count > 0) {
308                     descriptorPoolSizes_.push_back(VkDescriptorPoolSize { (VkDescriptorType)idx, count });
309                 }
310             }
311 
312             if (!descriptorPoolSizes_.empty()) {
313                 CreateDescriptorPool(device, descriptorSetCount, descriptorPool.descriptorPool, descriptorPoolSizes_);
314             }
315         }
316         // check the need for additional platform conversion bindings
317         if (hasPlatformConversionBindings_) {
318             uint32_t platConvDescriptorSetCount = 0u;
319             uint8_t platConvDescriptorCounts[OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE] { 0 };
320             for (const auto& cpuDescriptorSetRef : cpuDescriptorSets) {
321                 if (cpuDescriptorSetRef.hasPlatformConversionBindings) {
322                     platConvDescriptorSetCount++;
323                     for (const auto& bindingRef : cpuDescriptorSetRef.bindings) {
324                         uint32_t descriptorCount = bindingRef.binding.descriptorCount;
325                         const uint32_t descTypeIndex = static_cast<uint32_t>(bindingRef.binding.descriptorType);
326                         if (descTypeIndex < OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE) {
327                             if ((bindingRef.binding.descriptorType ==
328                                     DescriptorType::CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
329                                 RenderHandleUtil::IsPlatformConversionResource(
330                                     cpuDescriptorSetRef.images[bindingRef.resourceIndex].resource.handle)) {
331                                 // expecting planar formats and making sure that there is enough descriptors
332                                 constexpr uint32_t descriptorCountMultiplier = 3u;
333                                 descriptorCount *= descriptorCountMultiplier;
334                             }
335                             platConvDescriptorCounts[descTypeIndex] += static_cast<uint8_t>(descriptorCount);
336                         }
337                     }
338                 }
339             }
340             if (descriptorSetCount > 0) {
341                 PLUGIN_ASSERT(descriptorPool.additionalPlatformDescriptorPool == VK_NULL_HANDLE);
342                 descriptorPoolSizes_.clear();
343                 descriptorPoolSizes_.reserve(OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE);
344                 // no buffering, only descriptors for one frame
345                 for (uint32_t idx = 0; idx < OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE; ++idx) {
346                     const uint8_t count = platConvDescriptorCounts[idx];
347                     if (count > 0) {
348                         descriptorPoolSizes_.push_back(VkDescriptorPoolSize { (VkDescriptorType)idx, count });
349                     }
350                 }
351                 if (!descriptorPoolSizes_.empty()) {
352                     CreateDescriptorPool(device, descriptorSetCount, descriptorPool.additionalPlatformDescriptorPool,
353                         descriptorPoolSizes_);
354                 }
355             }
356         }
357     }
358 }
359 
360 namespace {
IncreaseDescriptorSetCounts(const DescriptorSetLayoutBinding & refBinding,LowLevelDescriptorCountsVk & descSetCounts,uint32_t & dynamicOffsetCount)361 void IncreaseDescriptorSetCounts(const DescriptorSetLayoutBinding& refBinding,
362     LowLevelDescriptorCountsVk& descSetCounts, uint32_t& dynamicOffsetCount)
363 {
364     if (NodeContextDescriptorSetManager::IsDynamicDescriptor(refBinding.descriptorType)) {
365         dynamicOffsetCount++;
366     }
367     const uint32_t descriptorCount = refBinding.descriptorCount;
368     if (refBinding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
369         descSetCounts.samplerCount += descriptorCount;
370     } else if (((refBinding.descriptorType >= CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
371                         (refBinding.descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE)) ||
372                     (refBinding.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
373         descSetCounts.imageCount += descriptorCount;
374     } else if (((refBinding.descriptorType >= CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) &&
375                         (refBinding.descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) ||
376                     (refBinding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE)) {
377         descSetCounts.bufferCount += descriptorCount;
378     }
379 #if (RENDER_VALIDATION_ENABLED == 1)
380     if (!((refBinding.descriptorType <= CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) ||
381             (refBinding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE))) {
382         PLUGIN_LOG_W("RENDER_VALIDATION: descriptor type not found");
383     }
384 #endif
385 }
386 } // namespace
387 
CreateDescriptorSet(const array_view<const DescriptorSetLayoutBinding> descriptorSetLayoutBindings)388 RenderHandle NodeContextDescriptorSetManagerVk::CreateDescriptorSet(
389     const array_view<const DescriptorSetLayoutBinding> descriptorSetLayoutBindings)
390 {
391     RenderHandle clientHandle;
392     auto& cpuDescriptorSets = cpuDescriptorSets_[DESCRIPTOR_SET_INDEX_TYPE_STATIC];
393     auto& descriptorPool = descriptorPool_[DESCRIPTOR_SET_INDEX_TYPE_STATIC];
394 #if (RENDER_VALIDATION_ENABLED == 1)
395     if (cpuDescriptorSets.size() >= maxSets_) {
396         PLUGIN_LOG_E("RENDER_VALIDATION: No more descriptor sets available");
397     }
398 #endif
399     if (cpuDescriptorSets.size() < maxSets_) {
400         uint32_t dynamicOffsetCount = 0;
401         CpuDescriptorSet newSet;
402         LowLevelContextDescriptorPoolVk::DescriptorSetData descSetData;
403 
404         newSet.bindings.reserve(descriptorSetLayoutBindings.size());
405         descSetData.descriptorCounts.writeDescriptorCount = static_cast<uint32_t>(descriptorSetLayoutBindings.size());
406         for (const auto& refBinding : descriptorSetLayoutBindings) {
407             // NOTE: sort from 0 to n
408             newSet.bindings.push_back({ refBinding, {} });
409             IncreaseDescriptorSetCounts(refBinding, descSetData.descriptorCounts, dynamicOffsetCount);
410         }
411         newSet.buffers.resize(descSetData.descriptorCounts.bufferCount);
412         newSet.images.resize(descSetData.descriptorCounts.imageCount);
413         newSet.samplers.resize(descSetData.descriptorCounts.samplerCount);
414 
415         const uint32_t arrayIndex = static_cast<uint32_t>(cpuDescriptorSets.size());
416         cpuDescriptorSets.push_back(move(newSet));
417 
418         auto& currCpuDescriptorSet = cpuDescriptorSets[arrayIndex];
419         currCpuDescriptorSet.dynamicOffsetDescriptors.resize(dynamicOffsetCount);
420 
421         // allocate storage from vector to gpu descriptor sets
422         // don't create the actual gpu descriptor sets yet
423         descriptorPool.descriptorSets.push_back(descSetData);
424 
425         // NOTE: can be used directly to index
426         clientHandle = RenderHandleUtil::CreateHandle(RenderHandleType::DESCRIPTOR_SET, arrayIndex, 0);
427     }
428 
429     return clientHandle;
430 }
431 
CreateOneFrameDescriptorSet(const array_view<const DescriptorSetLayoutBinding> descriptorSetLayoutBindings)432 RenderHandle NodeContextDescriptorSetManagerVk::CreateOneFrameDescriptorSet(
433     const array_view<const DescriptorSetLayoutBinding> descriptorSetLayoutBindings)
434 {
435     RenderHandle clientHandle;
436     auto& cpuDescriptorSets = cpuDescriptorSets_[DESCRIPTOR_SET_INDEX_TYPE_ONE_FRAME];
437     auto& descriptorPool = descriptorPool_[DESCRIPTOR_SET_INDEX_TYPE_ONE_FRAME];
438     uint32_t dynamicOffsetCount = 0;
439     CpuDescriptorSet newSet;
440     LowLevelContextDescriptorPoolVk::DescriptorSetData descSetData;
441 
442     newSet.bindings.reserve(descriptorSetLayoutBindings.size());
443     descSetData.descriptorCounts.writeDescriptorCount = static_cast<uint32_t>(descriptorSetLayoutBindings.size());
444     for (const auto& refBinding : descriptorSetLayoutBindings) {
445         // NOTE: sort from 0 to n
446         newSet.bindings.push_back({ refBinding, {} });
447         IncreaseDescriptorSetCounts(refBinding, descSetData.descriptorCounts, dynamicOffsetCount);
448 
449         if (static_cast<uint32_t>(refBinding.descriptorType) < OneFrameDescriptorNeed::DESCRIPTOR_ARRAY_SIZE) {
450             oneFrameDescriptorNeed_.descriptorCount[refBinding.descriptorType] +=
451                 static_cast<uint8_t>(refBinding.descriptorCount);
452         }
453     }
454 
455     newSet.buffers.resize(descSetData.descriptorCounts.bufferCount);
456     newSet.images.resize(descSetData.descriptorCounts.imageCount);
457     newSet.samplers.resize(descSetData.descriptorCounts.samplerCount);
458 
459     const uint32_t arrayIndex = static_cast<uint32_t>(cpuDescriptorSets.size());
460     cpuDescriptorSets.push_back(move(newSet));
461 
462     auto& currCpuDescriptorSet = cpuDescriptorSets[arrayIndex];
463     currCpuDescriptorSet.dynamicOffsetDescriptors.resize(dynamicOffsetCount);
464 
465     // allocate storage from vector to gpu descriptor sets
466     // don't create the actual gpu descriptor sets yet
467     descriptorPool.descriptorSets.push_back(descSetData);
468 
469     // NOTE: can be used directly to index
470     clientHandle = RenderHandleUtil::CreateHandle(
471         RenderHandleType::DESCRIPTOR_SET, arrayIndex, oneFrameDescSetGeneration_, ONE_FRAME_DESC_SET_BIT);
472 
473     return clientHandle;
474 }
475 
CreateGpuDescriptorSet(const uint32_t bufferCount,const RenderHandle clientHandle,const CpuDescriptorSet & cpuDescriptorSet,LowLevelContextDescriptorPoolVk & descriptorPool)476 void NodeContextDescriptorSetManagerVk::CreateGpuDescriptorSet(const uint32_t bufferCount,
477     const RenderHandle clientHandle, const CpuDescriptorSet& cpuDescriptorSet,
478     LowLevelContextDescriptorPoolVk& descriptorPool)
479 {
480 #if (RENDER_VALIDATION_ENABLED == 1)
481     if (cpuDescriptorSet.bindings.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT) {
482         PLUGIN_LOG_W("RENDER_VALIDATION: descriptor set binding count exceeds (max:%u, current:%u)",
483             PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT,
484             static_cast<uint32_t>(cpuDescriptorSet.bindings.size()));
485     }
486 #endif
487     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(clientHandle);
488     VkDescriptorBindingFlags descriptorBindingFlags[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
489     VkDescriptorSetLayoutBinding descriptorSetLayoutBindings[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT];
490     const uint32_t bindingCount = Math::min(static_cast<uint32_t>(cpuDescriptorSet.bindings.size()),
491         PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT);
492     const bool hasPlatformBindings = cpuDescriptorSet.hasPlatformConversionBindings;
493     const bool hasBindImmutableSamplers = cpuDescriptorSet.hasImmutableSamplers;
494     uint16_t immutableSamplerBitmask = 0;
495     const auto& gpuResourceMgr = static_cast<const GpuResourceManager&>(device_.GetGpuResourceManager());
496     // NOTE: if we cannot provide explicit flags that custom immutable sampler with conversion is needed
497     // we should first loop through the bindings and check
498     // normal hw buffers do not need any rebindings or immutable samplers
499     for (uint32_t idx = 0; idx < bindingCount; ++idx) {
500         const DescriptorSetLayoutBindingResource& cpuBinding = cpuDescriptorSet.bindings[idx];
501         const VkDescriptorType descriptorType = (VkDescriptorType)cpuBinding.binding.descriptorType;
502         const VkShaderStageFlags stageFlags = (VkShaderStageFlags)cpuBinding.binding.shaderStageFlags;
503         const uint32_t bindingIdx = cpuBinding.binding.binding;
504         const VkSampler* immutableSampler = nullptr;
505         if (hasBindImmutableSamplers) {
506             if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
507                 const auto& imgRef = cpuDescriptorSet.images[cpuBinding.resourceIndex];
508                 if (imgRef.additionalFlags & CORE_ADDITIONAL_DESCRIPTOR_IMMUTABLE_SAMPLER_BIT) {
509                     immutableSampler = GetSampler(gpuResourceMgr, imgRef.resource.samplerHandle);
510                     immutableSamplerBitmask |= (1 << bindingIdx);
511                 }
512             } else if (descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) {
513                 const auto& samRef = cpuDescriptorSet.samplers[cpuBinding.resourceIndex];
514                 if (samRef.additionalFlags & CORE_ADDITIONAL_DESCRIPTOR_IMMUTABLE_SAMPLER_BIT) {
515                     immutableSampler = GetSampler(gpuResourceMgr, samRef.resource.handle);
516                     immutableSamplerBitmask |= (1 << bindingIdx);
517                 }
518             }
519         } else if (hasPlatformBindings && (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)) {
520             const RenderHandle handle = cpuDescriptorSet.images[cpuBinding.resourceIndex].resource.handle;
521             if (RenderHandleUtil::IsPlatformConversionResource(handle)) {
522                 if (const auto* gpuImage = static_cast<GpuImageVk*>(gpuResourceMgr.GetImage(handle)); gpuImage) {
523                     const GpuImage::AdditionalFlags additionalFlags = gpuImage->GetAdditionalFlags();
524                     immutableSampler = &(gpuImage->GetPlaformDataConversion().sampler);
525                     if ((additionalFlags & GpuImage::AdditionalFlagBits::ADDITIONAL_PLATFORM_CONVERSION_BIT) &&
526                         immutableSampler) {
527                         immutableSamplerBitmask |= (1 << bindingIdx);
528                     }
529 #if (RENDER_VALIDATION_ENABLED == 1)
530                     if (!immutableSampler) {
531                         PLUGIN_LOG_W("RENDER_VALIDATION: immutable sampler for platform conversion resource not found");
532                     }
533 #endif
534                 }
535             }
536         }
537         descriptorSetLayoutBindings[idx] = {
538             bindingIdx,                         // binding
539             descriptorType,                     // descriptorType
540             cpuBinding.binding.descriptorCount, // descriptorCount
541             stageFlags,                         // stageFlags
542             immutableSampler,                   // pImmutableSamplers
543         };
544         // NOTE: partially bound is not used at the moment
545         descriptorBindingFlags[idx] = 0U;
546     }
547 
548     const DeviceVk& deviceVk = (const DeviceVk&)device_;
549 
550     const VkDescriptorSetLayoutBindingFlagsCreateInfo descriptorSetLayoutBindingFlagsCreateInfo {
551         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, // sType
552         nullptr,                                                           // pNext
553         bindingCount,                                                      // bindingCount
554         descriptorBindingFlags,                                            // pBindingFlags
555     };
556     const bool dsiEnabled = deviceVk.GetCommonDeviceExtensions().descriptorIndexing;
557     const void* pNextPtr = dsiEnabled ? (&descriptorSetLayoutBindingFlagsCreateInfo) : nullptr;
558     // NOTE: update after bind etc. are not currently in use
559     // descriptor set indexing is used with normal binding model
560     constexpr VkDescriptorSetLayoutCreateFlags descriptorSetLayoutCreateFlags { 0U };
561     const VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo {
562         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // sType
563         pNextPtr,                                            // pNext
564         descriptorSetLayoutCreateFlags,                      // flags
565         bindingCount,                                        // bindingCount
566         descriptorSetLayoutBindings,                         // pBindings
567     };
568 
569     const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
570     for (uint32_t idx = 0; idx < bufferCount; ++idx) {
571         LowLevelDescriptorSetVk newDescriptorSet;
572         newDescriptorSet.flags |=
573             (immutableSamplerBitmask != 0) ? LowLevelDescriptorSetVk::DESCRIPTOR_SET_LAYOUT_IMMUTABLE_SAMPLER_BIT : 0u;
574         newDescriptorSet.immutableSamplerBitmask = immutableSamplerBitmask;
575 
576         VALIDATE_VK_RESULT(vkCreateDescriptorSetLayout(device, // device
577             &descriptorSetLayoutCreateInfo,                    // pCreateInfo
578             nullptr,                                           // pAllocator
579             &newDescriptorSet.descriptorSetLayout));           // pSetLayout
580 
581         // for platform immutable set we use created additional descriptor pool (currently only used with ycbcr)
582         const bool platImmutable = (hasPlatformBindings && (immutableSamplerBitmask != 0));
583         const VkDescriptorPool descriptorPoolVk =
584             platImmutable ? descriptorPool.additionalPlatformDescriptorPool : descriptorPool.descriptorPool;
585         PLUGIN_ASSERT(descriptorPoolVk);
586         const VkDescriptorSetAllocateInfo descriptorSetAllocateInfo {
587             VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
588             nullptr,                                        // pNext
589             descriptorPoolVk,                               // descriptorPool
590             1u,                                             // descriptorSetCount
591             &newDescriptorSet.descriptorSetLayout,          // pSetLayouts
592         };
593 
594         VALIDATE_VK_RESULT(vkAllocateDescriptorSets(device, // device
595             &descriptorSetAllocateInfo,                     // pAllocateInfo
596             &newDescriptorSet.descriptorSet));              // pDescriptorSets
597 
598         if (platImmutable) {
599             descriptorPool.descriptorSets[arrayIndex].additionalPlatformSet = newDescriptorSet;
600         } else {
601             PLUGIN_ASSERT(descriptorPool.descriptorSets[arrayIndex].bufferingSet[idx].descriptorSet == VK_NULL_HANDLE);
602             descriptorPool.descriptorSets[arrayIndex].bufferingSet[idx] = newDescriptorSet;
603         }
604         // NOTE: descriptor sets could be tagged with debug name
605         // might be a bit overkill to do it always
606 #if (RENDER_VALIDATION_ENABLED == 1)
607         if (newDescriptorSet.descriptorSet == VK_NULL_HANDLE) {
608             PLUGIN_LOG_E("RENDER_VALIDATION: gpu descriptor set creation failed, ds node: %s, ds binding count: %u",
609                 debugName_.c_str(), bindingCount);
610         }
611 #endif
612     }
613 }
614 
GetLowLevelDescriptorCounts(const RenderHandle handle)615 const LowLevelDescriptorCountsVk& NodeContextDescriptorSetManagerVk::GetLowLevelDescriptorCounts(
616     const RenderHandle handle)
617 {
618     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
619     const uint32_t oneFrameDescBit = RenderHandleUtil::GetAdditionalData(handle);
620     const uint32_t descSetIdx = (oneFrameDescBit == ONE_FRAME_DESC_SET_BIT) ? DESCRIPTOR_SET_INDEX_TYPE_ONE_FRAME
621                                                                             : DESCRIPTOR_SET_INDEX_TYPE_STATIC;
622     const auto& descriptorPool = descriptorPool_[descSetIdx];
623     if (arrayIndex < static_cast<uint32_t>(descriptorPool.descriptorSets.size())) {
624         return descriptorPool.descriptorSets[arrayIndex].descriptorCounts;
625     } else {
626         PLUGIN_LOG_E("invalid handle in descriptor set management");
627         return defaultLowLevelDescriptorSetMemoryStoreVk_;
628     }
629 }
630 
GetDescriptorSet(const RenderHandle handle) const631 const LowLevelDescriptorSetVk* NodeContextDescriptorSetManagerVk::GetDescriptorSet(const RenderHandle handle) const
632 {
633     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
634     const uint32_t oneFrameDescBit = RenderHandleUtil::GetAdditionalData(handle);
635     const uint32_t descSetIdx = (oneFrameDescBit == ONE_FRAME_DESC_SET_BIT) ? DESCRIPTOR_SET_INDEX_TYPE_ONE_FRAME
636                                                                             : DESCRIPTOR_SET_INDEX_TYPE_STATIC;
637     const auto& cpuDescriptorSets = cpuDescriptorSets_[descSetIdx];
638     const auto& descriptorPool = descriptorPool_[descSetIdx];
639     const LowLevelDescriptorSetVk* set = nullptr;
640     if (arrayIndex < static_cast<uint32_t>(cpuDescriptorSets.size())) {
641         if (arrayIndex < descriptorPool.descriptorSets.size()) {
642             // additional set is only used there are platform buffer bindings and additional set created
643             const bool useAdditionalSet =
644                 (cpuDescriptorSets[arrayIndex].hasPlatformConversionBindings &&
645                     descriptorPool.descriptorSets[arrayIndex].additionalPlatformSet.descriptorSet);
646             if (useAdditionalSet) {
647 #if (RENDER_VALIDATION_ENABLED == 1)
648                 if (!descriptorPool.descriptorSets[arrayIndex].additionalPlatformSet.descriptorSet) {
649                     PLUGIN_LOG_ONCE_E(debugName_.c_str() + to_string(handle.id) + "_dsnu0",
650                         "RENDER_VALIDATION: descriptor set not updated (handle:%" PRIx64 ")", handle.id);
651                 }
652 #endif
653                 set = &descriptorPool.descriptorSets[arrayIndex].additionalPlatformSet;
654             } else {
655                 const uint32_t bufferingIndex = cpuDescriptorSets[arrayIndex].currentGpuBufferingIndex;
656 #if (RENDER_VALIDATION_ENABLED == 1)
657                 if (!descriptorPool.descriptorSets[arrayIndex].bufferingSet[bufferingIndex].descriptorSet) {
658                     PLUGIN_LOG_ONCE_E(debugName_.c_str() + to_string(handle.id) + "_dsn1",
659                         "RENDER_VALIDATION: descriptor set not updated (handle:%" PRIx64 ")", handle.id);
660                 }
661 #endif
662                 set = &descriptorPool.descriptorSets[arrayIndex].bufferingSet[bufferingIndex];
663             }
664         }
665 
666 #if (RENDER_VALIDATION_ENABLED == 1)
667         if (set) {
668             if (set->descriptorSet == VK_NULL_HANDLE) {
669                 PLUGIN_LOG_ONCE_E(debugName_.c_str() + to_string(handle.id) + "_dsnu2",
670                     "RENDER_VALIDATION: descriptor set has not been updated prior to binding");
671                 PLUGIN_LOG_ONCE_E(debugName_.c_str() + to_string(handle.id) + "_dsnu3",
672                     "RENDER_VALIDATION: gpu descriptor set created? %u,
673                     descriptor set node: %s, set: %u, "
674                     "buffer count: %u, "
675                     "image count: %u, sampler count: %u",
676                     static_cast<uint32_t>(cpuDescriptorSets[arrayIndex].gpuDescriptorSetCreated),
677                     debugName_.c_str(), descSetIdx,
678                     descriptorPool.descriptorSets[arrayIndex].descriptorCounts.bufferCount,
679                     descriptorPool.descriptorSets[arrayIndex].descriptorCounts.imageCount,
680                     descriptorPool.descriptorSets[arrayIndex].descriptorCounts.samplerCount);
681             }
682         }
683 #endif
684     }
685 
686     return set;
687 }
688 
GetLowLevelDescriptorWriteData()689 LowLevelContextDescriptorWriteDataVk& NodeContextDescriptorSetManagerVk::GetLowLevelDescriptorWriteData()
690 {
691     return lowLevelDescriptorWriteData_;
692 }
693 
UpdateDescriptorSetGpuHandle(const RenderHandle handle)694 void NodeContextDescriptorSetManagerVk::UpdateDescriptorSetGpuHandle(const RenderHandle handle)
695 {
696     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
697     const uint32_t oneFrameDescBit = RenderHandleUtil::GetAdditionalData(handle);
698     const uint32_t descSetIdx = (oneFrameDescBit == ONE_FRAME_DESC_SET_BIT) ? DESCRIPTOR_SET_INDEX_TYPE_ONE_FRAME
699                                                                             : DESCRIPTOR_SET_INDEX_TYPE_STATIC;
700     auto& cpuDescriptorSets = cpuDescriptorSets_[descSetIdx];
701     auto& descriptorPool = descriptorPool_[descSetIdx];
702     const uint32_t bufferingCount = (oneFrameDescBit == ONE_FRAME_DESC_SET_BIT) ? 1u : bufferingCount_;
703     if (arrayIndex < static_cast<uint32_t>(cpuDescriptorSets.size())) {
704         CpuDescriptorSet& refCpuSet = cpuDescriptorSets[arrayIndex];
705 
706         // with platform buffer bindings descriptor set creation needs to be checked
707         if (refCpuSet.hasPlatformConversionBindings) {
708             // no buffering
709             CreateGpuDescriptorSet(1u, handle, refCpuSet, descriptorPool);
710         } else if (!refCpuSet.gpuDescriptorSetCreated) { // deferred creation
711             CreateGpuDescriptorSet(bufferingCount, handle, refCpuSet, descriptorPool);
712             refCpuSet.gpuDescriptorSetCreated = true;
713         }
714 
715         if (refCpuSet.isDirty) {
716             refCpuSet.isDirty = false;
717             // advance to next gpu descriptor set
718             if (oneFrameDescBit != ONE_FRAME_DESC_SET_BIT) {
719                 refCpuSet.currentGpuBufferingIndex = (refCpuSet.currentGpuBufferingIndex + 1) % bufferingCount_;
720             }
721         }
722     } else {
723 #if (RENDER_VALIDATION_ENABLED == 1)
724         PLUGIN_LOG_E("invalid handle in descriptor set management");
725 #endif
726     }
727 #if (RENDER_VALIDATION_ENABLED == 1)
728     if (oneFrameDescBit == ONE_FRAME_DESC_SET_BIT) {
729         const uint32_t generationIndex = RenderHandleUtil::GetGenerationIndexPart(handle);
730         if (generationIndex != oneFrameDescSetGeneration_) {
731             PLUGIN_LOG_E(
732                 "RENDER_VALIDATION: invalid one frame descriptor set handle generation. One frame descriptor sets "
733                 "can only be used once.");
734         }
735     }
736 #endif
737 }
738 
UpdateCpuDescriptorSetPlatform(const DescriptorSetLayoutBindingResources & bindingResources)739 void NodeContextDescriptorSetManagerVk::UpdateCpuDescriptorSetPlatform(
740     const DescriptorSetLayoutBindingResources& bindingResources)
741 {
742     lowLevelDescriptorWriteData_.writeBindingCount += static_cast<uint32_t>(bindingResources.bindings.size());
743 
744     lowLevelDescriptorWriteData_.bufferBindingCount += static_cast<uint32_t>(bindingResources.buffers.size());
745     lowLevelDescriptorWriteData_.imageBindingCount += static_cast<uint32_t>(bindingResources.images.size());
746     lowLevelDescriptorWriteData_.samplerBindingCount += static_cast<uint32_t>(bindingResources.samplers.size());
747 }
748 
ClearDescriptorSetWriteData()749 void NodeContextDescriptorSetManagerVk::ClearDescriptorSetWriteData()
750 {
751     lowLevelDescriptorWriteData_.Clear();
752 }
753 
ResizeDescriptorSetWriteData()754 void NodeContextDescriptorSetManagerVk::ResizeDescriptorSetWriteData()
755 {
756     auto& descWd = lowLevelDescriptorWriteData_;
757     if (descWd.writeBindingCount > 0U) {
758         lowLevelDescriptorWriteData_.writeDescriptorSets.resize(descWd.writeBindingCount);
759         lowLevelDescriptorWriteData_.descriptorBufferInfos.resize(descWd.bufferBindingCount);
760         lowLevelDescriptorWriteData_.descriptorImageInfos.resize(descWd.imageBindingCount);
761         lowLevelDescriptorWriteData_.descriptorSamplerInfos.resize(descWd.samplerBindingCount);
762 #if (RENDER_VULKAN_RT_ENABLED == 1)
763         lowLevelDescriptorWriteData_.descriptorAccelInfos.resize(descWd.bufferBindingCount);
764 #endif
765     }
766 }
767 
768 RENDER_END_NAMESPACE()
769