1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_command_list.h"
17 
18 #include <cinttypes>
19 #include <cstdint>
20 
21 #include <base/containers/array_view.h>
22 #include <render/device/pipeline_layout_desc.h>
23 #include <render/namespace.h>
24 #include <render/nodecontext/intf_render_command_list.h>
25 #include <render/render_data_structures.h>
26 
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/node_context_descriptor_set_manager.h"
30 #include "nodecontext/node_context_pso_manager.h"
31 #include "nodecontext/render_node_context_manager.h"
32 #include "util/linear_allocator.h"
33 #include "util/log.h"
34 
35 using namespace BASE_NS;
36 
37 RENDER_BEGIN_NAMESPACE()
38 PLUGIN_STATIC_ASSERT(PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT == 4);
39 PLUGIN_STATIC_ASSERT(PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT == 8u);
40 namespace {
41 #if (RENDER_VALIDATION_ENABLED == 1)
ValidateImageUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const ImageUsageFlags imageUsageFlags,const string_view str)42 void ValidateImageUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
43     const RenderHandle handl, const ImageUsageFlags imageUsageFlags, const string_view str)
44 {
45     if ((gpuResourceMgr.GetImageDescriptor(handl).usageFlags & imageUsageFlags) == 0) {
46         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateImageUsageFlags_",
47             "RENDER_VALIDATION: gpu image (handle: %" PRIu64
48             ") (name: %s), not created with needed flags: %s, (node: %s)",
49             handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
50     }
51 }
52 
ValidateBufferUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const BufferUsageFlags bufferUsageFlags,const string_view str)53 void ValidateBufferUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
54     const RenderHandle handl, const BufferUsageFlags bufferUsageFlags, const string_view str)
55 {
56     if ((gpuResourceMgr.GetBufferDescriptor(handl).usageFlags & bufferUsageFlags) == 0) {
57         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateBufferUsageFlags_",
58             "RENDER_VALIDATION: gpu buffer (handle: %" PRIu64
59             ") (name: %s), not created with needed flags: %s, (node: %s)",
60             handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
61     }
62 }
63 
ValidateDescriptorTypeBinding(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const DescriptorSetLayoutBindingResources & bindingRes)64 void ValidateDescriptorTypeBinding(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
65     const DescriptorSetLayoutBindingResources& bindingRes)
66 {
67     for (const auto& ref : bindingRes.buffers) {
68         if (!RenderHandleUtil::IsGpuBuffer(ref.resource.handle)) {
69             PLUGIN_LOG_E("RENDER_VALIDATION: invalid GPU buffer");
70         }
71         if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) {
72             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
73                 CORE_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
74         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
75             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
76                 CORE_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
77         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
78             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
79                 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
80         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
81             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
82                 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
83         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
84             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
85                 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
86         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
87             ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
88                 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
89         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
90         } else {
91             PLUGIN_LOG_E("RENDER_VALIDATION: unsupported buffer descriptor type: %u", ref.binding.descriptorType);
92         }
93     }
94     for (const auto& ref : bindingRes.images) {
95         if ((ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
96             (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE)) {
97             ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_SAMPLED_BIT,
98                 "CORE_IMAGE_USAGE_SAMPLED_BIT");
99         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
100             ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_STORAGE_BIT,
101                 "CORE_IMAGE_USAGE_STORAGE_BIT");
102         } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
103             ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
104                 CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
105         } else {
106             PLUGIN_LOG_E("RENDER_VALIDATION: unsupported image descriptor type: %u", ref.binding.descriptorType);
107         }
108     }
109     for (const auto& ref : bindingRes.samplers) {
110         if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
111         } else {
112             PLUGIN_LOG_E("RENDER_VALIDATION: unsupported sampler descriptor type: %u", ref.binding.descriptorType);
113         }
114     }
115 }
116 
ValidateRenderPassAttachment(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)117 void ValidateRenderPassAttachment(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
118     const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
119 {
120     const GpuImageDesc baseDesc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[0]);
121     const uint32_t baseWidth = baseDesc.width;
122     const uint32_t baseHeight = baseDesc.height;
123     // NOTE: we do not check fragment shading rate attachment size
124     for (uint32_t attachmentIdx = 1; attachmentIdx < renderPassDesc.attachmentCount; ++attachmentIdx) {
125         const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[attachmentIdx]);
126         if (desc.width != baseWidth || desc.height != baseHeight) {
127             for (const auto& subpassRef : subpassDescs) {
128                 auto CheckAttachments = [](const auto& indices, const uint32_t count, const uint32_t attachmentIndex) {
129                     for (uint32_t idx = 0; idx < count; ++idx) {
130                         if (indices[idx] == attachmentIndex) {
131                             return false;
132                         }
133                     }
134                     return true;
135                 };
136                 bool valid = true;
137                 valid &=
138                     CheckAttachments(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount, attachmentIdx);
139                 valid &=
140                     CheckAttachments(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount, attachmentIdx);
141                 valid &= CheckAttachments(
142                     subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount, attachmentIdx);
143                 if ((subpassRef.depthAttachmentIndex == attachmentIdx) ||
144                     (subpassRef.depthResolveAttachmentIndex == attachmentIdx)) {
145                     valid = false;
146                 }
147                 if (!valid) {
148                     if (RenderHandleUtil::IsSwapchain(renderPassDesc.attachmentHandles[attachmentIdx]) &&
149                         RenderHandleUtil::IsDepthImage(renderPassDesc.attachmentHandles[0])) {
150                         PLUGIN_LOG_ONCE_W(nodeName + "_RCL_ValidateSize1_",
151                             "RENDER_VALIDATION: Depth and swapchain input missmatch: baseWidth:%u baseHeight:%u "
152                             "currWidth:%u currHeight:%u",
153                             baseWidth, baseHeight, desc.width, desc.height);
154                     } else {
155                         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
156                             "RENDER_VALIDATION: render pass attachment size does not match with attachment index: %u",
157                             attachmentIdx);
158                         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
159                             "RENDER_VALIDATION: baseWidth:%u baseHeight:%u currWidth:%u currHeight:%u", baseWidth,
160                             baseHeight, desc.width, desc.height);
161                     }
162                 }
163             }
164         }
165     }
166     if ((renderPassDesc.renderArea.extentWidth == 0) || (renderPassDesc.renderArea.extentHeight == 0)) {
167         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaExtent_",
168             "RENDER_VALIDATION: render area cannot be zero (width: %u, height: %u) (node: %s)",
169             renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight, nodeName.data());
170     }
171     if ((renderPassDesc.renderArea.offsetX >= static_cast<int32_t>(baseWidth)) ||
172         (renderPassDesc.renderArea.offsetY >= static_cast<int32_t>(baseHeight))) {
173         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaOffset_",
174             "RENDER_VALIDATION: render area offset cannot go out of screen (offsetX: %i, offsetY: %i) (baseWidth: "
175             "%u, baseHeight: %u, (node: %s)",
176             renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY, baseWidth, baseHeight,
177             nodeName.data());
178     }
179 }
180 
ValidateImageSubresourceRange(const GpuResourceManager & gpuResourceMgr,const RenderHandle handle,const ImageSubresourceRange & imageSubresourceRange)181 void ValidateImageSubresourceRange(const GpuResourceManager& gpuResourceMgr, const RenderHandle handle,
182     const ImageSubresourceRange& imageSubresourceRange)
183 {
184     const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(handle);
185     if (imageSubresourceRange.baseMipLevel >= desc.mipCount) {
186         PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange mipLevel: %u, is greater or equal to mipCount: %u",
187             imageSubresourceRange.baseMipLevel, desc.mipCount);
188     }
189     if (imageSubresourceRange.baseArrayLayer >= desc.layerCount) {
190         PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange layer: %u, is greater or equal to layerCount: %u",
191             imageSubresourceRange.baseArrayLayer, desc.layerCount);
192     }
193 }
194 
ValidateViewport(const string_view nodeName,const ViewportDesc & vd)195 void ValidateViewport(const string_view nodeName, const ViewportDesc& vd)
196 {
197     if ((vd.width < 1.0f) || (vd.height < 1.0f)) {
198         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateViewport_",
199             "RENDER_VALIDATION : viewport width (%f) and height (%f) must be one or larger (node: %s)", vd.width,
200             vd.height, nodeName.data());
201     }
202 }
203 
ValidateScissor(const string_view nodeName,const ScissorDesc & sd)204 void ValidateScissor(const string_view nodeName, const ScissorDesc& sd)
205 {
206     if ((sd.extentWidth == 0) || (sd.extentHeight == 0)) {
207         PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateScissor_",
208             "RENDER_VALIDATION : scissor extentWidth (%u) and scissor extentHeight (%u) cannot be zero (node: %s)",
209             sd.extentWidth, sd.extentHeight, nodeName.data());
210     }
211 }
212 
ValidateFragmentShadingRate(const Size2D & size)213 void ValidateFragmentShadingRate(const Size2D& size)
214 {
215     bool valid = true;
216     if ((size.width == 0) || (size.height == 0)) {
217         valid = false;
218     } else if ((size.width == 3u) || (size.height == 3u)) {
219         valid = false;
220     } else if ((size.width > 4u) || (size.height > 4u)) {
221         valid = false;
222     }
223     if (!valid) {
224         PLUGIN_LOG_W("RENDER_VALIDATION_ENABLED: fragmentSize must be less than or equal to 4 and the value must be a "
225                      "power of two (width = %u, height = %u)",
226             size.width, size.height);
227     }
228 }
229 #endif // RENDER_VALIDATION_ENABLED
230 
231 constexpr uint32_t INVALID_CL_IDX { ~0u };
232 
233 constexpr size_t BYTE_SIZE_ALIGNMENT { 64 };
234 constexpr size_t FRAME_RESERVE_EXTRA_DIVIDE { 8 };
235 constexpr size_t MIN_ALLOCATION_SIZE { 1024 * 2 };
236 
237 // automatic acquire and release barriers
238 constexpr uint32_t INITIAL_MULTI_QUEUE_BARRIER_COUNT { 2u };
239 
GetAlignedBytesize(const size_t byteSize,const size_t alignment)240 size_t GetAlignedBytesize(const size_t byteSize, const size_t alignment)
241 {
242     return (byteSize + alignment - 1) & (~(alignment - 1));
243 }
244 
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,const size_t alignment,const size_t byteSz)245 void* AllocateRenderData(
246     RenderCommandList::LinearAllocatorStruct& allocator, const size_t alignment, const size_t byteSz)
247 {
248     PLUGIN_ASSERT(byteSz > 0);
249     void* rc = nullptr;
250     if (!allocator.allocators.empty()) {
251         const size_t currentIndex = allocator.allocators.size() - 1;
252         rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
253     }
254 
255     if (rc == nullptr) { // current allocator is out of memory
256         size_t allocatorByteSize = Math::max(MIN_ALLOCATION_SIZE, GetAlignedBytesize(byteSz, BYTE_SIZE_ALIGNMENT));
257         const size_t currentIndex = allocator.allocators.size();
258         if (currentIndex > 0) {
259             allocatorByteSize =
260                 Math::max(allocatorByteSize, allocator.allocators[currentIndex - 1]->GetCurrentByteSize() * 2u);
261         }
262         allocator.allocators.push_back(make_unique<LinearAllocator>(allocatorByteSize));
263 
264         rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
265         if (rc == nullptr) {
266             PLUGIN_LOG_E("RenderCommandList: render command list allocation : out of memory");
267             PLUGIN_ASSERT(false);
268         }
269     }
270     return rc;
271 }
272 
273 template<typename T>
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,uint32_t count)274 T* AllocateRenderData(RenderCommandList::LinearAllocatorStruct& allocator, uint32_t count)
275 {
276     return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T) * count));
277 }
278 
279 template<typename T>
AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct & allocator)280 T* AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct& allocator)
281 {
282     return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T)));
283 }
284 } // namespace
285 
RenderCommandList(const BASE_NS::string_view nodeName,NodeContextDescriptorSetManager & nodeContextDescriptorSetMgr,const GpuResourceManager & gpuResourceMgr,const NodeContextPsoManager & nodeContextPsoMgr,const GpuQueue & queue,const bool enableMultiQueue)286 RenderCommandList::RenderCommandList(const BASE_NS::string_view nodeName,
287     NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr, const GpuResourceManager& gpuResourceMgr,
288     const NodeContextPsoManager& nodeContextPsoMgr, const GpuQueue& queue, const bool enableMultiQueue)
289     : IRenderCommandList(), nodeName_(nodeName),
290 #if (RENDER_VALIDATION_ENABLED == 1)
291       gpuResourceMgr_(gpuResourceMgr), psoMgr_(nodeContextPsoMgr),
292 #endif
293       nodeContextDescriptorSetManager_(nodeContextDescriptorSetMgr), gpuQueue_(queue),
294       enableMultiQueue_(enableMultiQueue)
295 {}
296 
BeginFrame()297 void RenderCommandList::BeginFrame()
298 {
299     if (allocator_.allocators.size() == 1) { // size is good for this frame
300         allocator_.allocators[0]->Reset();
301     } else if (allocator_.allocators.size() > 1) {
302         size_t fullByteSize = 0;
303         size_t alignment = 0;
304         for (auto& ref : allocator_.allocators) {
305             fullByteSize += ref->GetCurrentByteSize();
306             alignment = Math::max(alignment, (size_t)ref->GetAlignment());
307             ref.reset();
308         }
309         allocator_.allocators.clear();
310 
311         // add some room for current frame allocation for new render commands
312         const size_t extraBytes = Math::max(fullByteSize / FRAME_RESERVE_EXTRA_DIVIDE, BYTE_SIZE_ALIGNMENT);
313         fullByteSize += extraBytes;
314 
315         // create new single allocation for combined previous size and some extra bytes
316         const size_t memAllocationByteSize = GetAlignedBytesize(fullByteSize, BYTE_SIZE_ALIGNMENT);
317         allocator_.allocators.push_back(make_unique<LinearAllocator>(memAllocationByteSize, alignment));
318     }
319 
320     ResetStateData();
321 
322     const auto clearAndReserve = [](auto& vec) {
323         const size_t count = vec.size();
324         vec.clear();
325         vec.reserve(count);
326     };
327 
328     clearAndReserve(renderCommands_);
329     clearAndReserve(customBarriers_);
330     clearAndReserve(rpVertexInputBufferBarriers_);
331     clearAndReserve(rpIndirectBufferBarriers_);
332     clearAndReserve(descriptorSetHandlesForBarriers_);
333     clearAndReserve(descriptorSetHandlesForUpdates_);
334 
335     validReleaseAcquire_ = false;
336     hasMultiRpCommandListSubpasses_ = false;
337     multiRpCommandListData_ = {};
338 }
339 
SetValidGpuQueueReleaseAcquireBarriers()340 void RenderCommandList::SetValidGpuQueueReleaseAcquireBarriers()
341 {
342     if (enableMultiQueue_) {
343         validReleaseAcquire_ = true;
344     }
345 }
346 
BeforeRenderNodeExecuteFrame()347 void RenderCommandList::BeforeRenderNodeExecuteFrame()
348 {
349     // add possible barrier point for gpu queue transfer acquire
350     if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
351         AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
352     }
353 }
354 
AfterRenderNodeExecuteFrame()355 void RenderCommandList::AfterRenderNodeExecuteFrame()
356 {
357 #if (RENDER_VALIDATION_ENABLED == 1)
358     if (stateData_.renderPassHasBegun) {
359         PLUGIN_LOG_E("RENDER_VALIDATION: EndRenderPass() not called?");
360     }
361     if (!stateData_.automaticBarriersEnabled) {
362         PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarriers() not called?");
363     }
364 #endif
365 
366     if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
367         if (stateData_.currentCustomBarrierIndices.dirtyCustomBarriers) {
368             AddBarrierPoint(RenderCommandType::BARRIER_POINT);
369         }
370 
371         // add possible barrier point for gpu queue transfer release
372         AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
373     }
374 }
375 
GetRenderCommands() const376 array_view<const RenderCommandWithType> RenderCommandList::GetRenderCommands() const
377 {
378     if ((!stateData_.validCommandList) || stateData_.renderPassHasBegun) {
379 #if (RENDER_VALIDATION_ENABLED == 1)
380         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_GetRenderCommands_",
381             "RenderCommandList: invalid state data in render command list (node: %s)", nodeName_.c_str());
382 #endif
383         return {};
384     } else {
385         return array_view<const RenderCommandWithType>(renderCommands_.data(), renderCommands_.size());
386     }
387 }
388 
HasValidRenderCommands() const389 bool RenderCommandList::HasValidRenderCommands() const
390 {
391     const uint32_t renderCommandCount = GetRenderCommandCount();
392     bool valid = false;
393     if (enableMultiQueue_) {
394         if (renderCommandCount == INITIAL_MULTI_QUEUE_BARRIER_COUNT) { // only acquire and release barrier commands
395             // if there are patched explicit resource barriers, we need to execute this cmdlist in the backend
396             valid = validReleaseAcquire_;
397         } else if (renderCommandCount > INITIAL_MULTI_QUEUE_BARRIER_COUNT) {
398             valid = true;
399         }
400     } else {
401         valid = (renderCommandCount > 0);
402     }
403     valid = valid && stateData_.validCommandList;
404 
405     return valid;
406 }
407 
GetRenderCommandCount() const408 uint32_t RenderCommandList::GetRenderCommandCount() const
409 {
410     return static_cast<uint32_t>(renderCommands_.size());
411 }
412 
GetGpuQueue() const413 GpuQueue RenderCommandList::GetGpuQueue() const
414 {
415     return gpuQueue_;
416 }
417 
HasMultiRenderCommandListSubpasses() const418 bool RenderCommandList::HasMultiRenderCommandListSubpasses() const
419 {
420     return hasMultiRpCommandListSubpasses_;
421 }
422 
GetMultiRenderCommandListData() const423 MultiRenderPassCommandListData RenderCommandList::GetMultiRenderCommandListData() const
424 {
425     return multiRpCommandListData_;
426 }
427 
GetCustomBarriers() const428 array_view<const CommandBarrier> RenderCommandList::GetCustomBarriers() const
429 {
430     return array_view<const CommandBarrier>(customBarriers_.data(), customBarriers_.size());
431 }
432 
GetRenderpassVertexInputBufferBarriers() const433 array_view<const VertexBuffer> RenderCommandList::GetRenderpassVertexInputBufferBarriers() const
434 {
435     return array_view<const VertexBuffer>(rpVertexInputBufferBarriers_.data(), rpVertexInputBufferBarriers_.size());
436 }
437 
GetRenderpassIndirectBufferBarriers() const438 array_view<const VertexBuffer> RenderCommandList::GetRenderpassIndirectBufferBarriers() const
439 {
440     return array_view<const VertexBuffer>(rpIndirectBufferBarriers_.data(), rpIndirectBufferBarriers_.size());
441 }
442 
GetDescriptorSetHandles() const443 array_view<const RenderHandle> RenderCommandList::GetDescriptorSetHandles() const
444 {
445     return { descriptorSetHandlesForBarriers_.data(), descriptorSetHandlesForBarriers_.size() };
446 }
447 
GetUpdateDescriptorSetHandles() const448 array_view<const RenderHandle> RenderCommandList::GetUpdateDescriptorSetHandles() const
449 {
450     return { descriptorSetHandlesForUpdates_.data(), descriptorSetHandlesForUpdates_.size() };
451 }
452 
AddBarrierPoint(const RenderCommandType renderCommandType)453 void RenderCommandList::AddBarrierPoint(const RenderCommandType renderCommandType)
454 {
455     if (!stateData_.automaticBarriersEnabled) {
456         return; // no barrier point added
457     }
458 
459     RenderCommandBarrierPoint* data = AllocateRenderCommand<RenderCommandBarrierPoint>(allocator_);
460     if (data) {
461         *data = {}; // zero initialize
462 
463         data->renderCommandType = renderCommandType;
464         data->barrierPointIndex = stateData_.currentBarrierPointIndex++;
465 
466         // update new index (within render pass there might not be any dirty descriptor sets at this stage)
467         const uint32_t descriptorSetBeginIndex = static_cast<uint32_t>(descriptorSetHandlesForBarriers_.size());
468         data->descriptorSetHandleIndexBegin = descriptorSetBeginIndex;
469         data->descriptorSetHandleCount = 0U;
470         // update new index (only valid with render pass)
471         data->vertexIndexBarrierIndexBegin = static_cast<uint32_t>(rpVertexInputBufferBarriers_.size());
472         data->vertexIndexBarrierCount = 0U;
473         // update new index (only valid with render pass)
474         data->indirectBufferBarrierIndexBegin = static_cast<uint32_t>(rpIndirectBufferBarriers_.size());
475         data->indirectBufferBarrierCount = 0U;
476 
477         // barriers are always needed e.g. when dynamic resource is bound for writing in multiple dispatches
478         const bool handleDescriptorSets = stateData_.dirtyDescriptorSetsForBarriers ||
479                                           renderCommandType == RenderCommandType::DISPATCH ||
480                                           renderCommandType == RenderCommandType::DISPATCH_INDIRECT;
481         if (handleDescriptorSets) {
482             stateData_.dirtyDescriptorSetsForBarriers = false;
483             for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
484                 // only add descriptor set handles for barriers if there are dynamic barrier resources
485                 if (stateData_.currentBoundSets[idx].hasDynamicBarrierResources) {
486                     descriptorSetHandlesForBarriers_.push_back(stateData_.currentBoundSets[idx].descriptorSetHandle);
487                 }
488             }
489             data->descriptorSetHandleCount =
490                 static_cast<uint32_t>(descriptorSetHandlesForBarriers_.size()) - descriptorSetBeginIndex;
491         }
492 
493         const bool handleCustomBarriers =
494             ((!customBarriers_.empty()) && stateData_.currentCustomBarrierIndices.dirtyCustomBarriers);
495         if (handleCustomBarriers) {
496             const int32_t newCount = static_cast<int32_t>(customBarriers_.size()) -
497                 stateData_.currentCustomBarrierIndices.prevSize;
498             if (newCount > 0) {
499                 data->customBarrierIndexBegin = static_cast<uint32_t>(stateData_.currentCustomBarrierIndices.prevSize);
500                 data->customBarrierCount = static_cast<uint32_t>(newCount);
501 
502                 stateData_.currentCustomBarrierIndices.prevSize = static_cast<int32_t>(customBarriers_.size());
503                 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = false;
504             }
505         }
506 
507         // store current barrier point for render command list
508         // * binding descriptor sets (with dynamic barrier resources)
509         // * binding vertex and index buffers (with dynamic barrier resources)
510         // * indirect args buffer (with dynamic barrier resources)
511         // inside a render pass adds barriers directly to the RenderCommandBarrierPoint behind this pointer
512         stateData_.currentBarrierPoint = data;
513 
514         renderCommands_.push_back({ RenderCommandType::BARRIER_POINT, data });
515     }
516 }
517 
Draw(const uint32_t vertexCount,const uint32_t instanceCount,const uint32_t firstVertex,const uint32_t firstInstance)518 void RenderCommandList::Draw(
519     const uint32_t vertexCount, const uint32_t instanceCount, const uint32_t firstVertex, const uint32_t firstInstance)
520 {
521 #if (RENDER_VALIDATION_ENABLED == 1)
522     if (!stateData_.renderPassHasBegun) {
523         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_Draw_",
524             "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw)");
525     }
526 #endif
527 
528     if (vertexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
529         ValidatePipeline();
530         ValidatePipelineLayout();
531 
532         RenderCommandDraw* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
533         if (data) {
534             data->drawType = DrawType::DRAW;
535             data->vertexCount = vertexCount;
536             data->instanceCount = instanceCount;
537             data->firstVertex = firstVertex;
538             data->firstInstance = firstInstance;
539             data->indexCount = 0;
540             data->firstIndex = 0;
541             data->vertexOffset = 0;
542 
543             renderCommands_.push_back({ RenderCommandType::DRAW, data });
544         }
545     }
546 }
547 
DrawIndexed(const uint32_t indexCount,const uint32_t instanceCount,const uint32_t firstIndex,const int32_t vertexOffset,const uint32_t firstInstance)548 void RenderCommandList::DrawIndexed(const uint32_t indexCount, const uint32_t instanceCount, const uint32_t firstIndex,
549     const int32_t vertexOffset, const uint32_t firstInstance)
550 {
551 #if (RENDER_VALIDATION_ENABLED == 1)
552     if (!stateData_.renderPassHasBegun) {
553         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DrawIndexed_",
554             "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw).");
555     }
556 #endif
557 
558     if (indexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
559         ValidatePipeline();
560         ValidatePipelineLayout();
561 
562         RenderCommandDraw* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
563         if (data) {
564             data->drawType = DrawType::DRAW_INDEXED;
565             data->vertexCount = 0;
566             data->instanceCount = instanceCount;
567             data->firstVertex = 0;
568             data->firstInstance = firstInstance;
569             data->indexCount = indexCount;
570             data->firstIndex = firstIndex;
571             data->vertexOffset = vertexOffset;
572 
573             renderCommands_.push_back({ RenderCommandType::DRAW, data });
574         }
575     }
576 }
577 
DrawIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)578 void RenderCommandList::DrawIndirect(
579     const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
580 {
581 #if (RENDER_VALIDATION_ENABLED == 1)
582     if (!stateData_.renderPassHasBegun) {
583         PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
584     }
585     if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
586         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DI_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
587     }
588 #endif
589 
590     if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
591         ValidatePipeline();
592         ValidatePipelineLayout();
593 
594         RenderCommandDrawIndirect* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
595         if (data) {
596             data->drawType = DrawType::DRAW_INDIRECT;
597             data->argsHandle = bufferHandle;
598             data->offset = offset;
599             data->drawCount = drawCount;
600             data->stride = stride;
601 
602             // add possible indirect buffer barrier before render pass
603             if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
604                 constexpr uint32_t drawIndirectCommandSize { 4U * sizeof(uint32_t) };
605                 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
606                 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
607                 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
608             }
609 
610             renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
611         }
612     }
613 }
614 
DrawIndexedIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)615 void RenderCommandList::DrawIndexedIndirect(
616     const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
617 {
618 #if (RENDER_VALIDATION_ENABLED == 1)
619     if (!stateData_.renderPassHasBegun) {
620         PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
621     }
622     if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
623         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DII_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
624     }
625 #endif
626 
627     if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
628         ValidatePipeline();
629         ValidatePipelineLayout();
630 
631         RenderCommandDrawIndirect* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
632         if (data) {
633             data->drawType = DrawType::DRAW_INDEXED_INDIRECT;
634             data->argsHandle = bufferHandle;
635             data->offset = offset;
636             data->drawCount = drawCount;
637             data->stride = stride;
638 
639             // add possible indirect buffer barrier before render pass
640             if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
641                 constexpr uint32_t drawIndirectCommandSize { 5U * sizeof(uint32_t) };
642                 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
643                 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
644                 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
645             }
646 
647             renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
648         }
649     }
650 }
651 
Dispatch(const uint32_t groupCountX,const uint32_t groupCountY,const uint32_t groupCountZ)652 void RenderCommandList::Dispatch(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ)
653 {
654     if (groupCountX > 0 && groupCountY > 0 && groupCountZ > 0) { // prevent zero dispatches
655         ValidatePipeline();
656         ValidatePipelineLayout();
657 
658         AddBarrierPoint(RenderCommandType::DISPATCH);
659 
660         RenderCommandDispatch* data = AllocateRenderCommand<RenderCommandDispatch>(allocator_);
661         if (data) {
662             data->groupCountX = groupCountX;
663             data->groupCountY = groupCountY;
664             data->groupCountZ = groupCountZ;
665 
666             renderCommands_.push_back({ RenderCommandType::DISPATCH, data });
667         }
668     }
669 }
670 
DispatchIndirect(const RenderHandle bufferHandle,const uint32_t offset)671 void RenderCommandList::DispatchIndirect(const RenderHandle bufferHandle, const uint32_t offset)
672 {
673     ValidatePipeline();
674     ValidatePipelineLayout();
675 
676     AddBarrierPoint(RenderCommandType::DISPATCH_INDIRECT);
677 
678     RenderCommandDispatchIndirect* data = AllocateRenderCommand<RenderCommandDispatchIndirect>(allocator_);
679     if (data) {
680         data->argsHandle = bufferHandle;
681         data->offset = offset;
682 
683         renderCommands_.push_back({ RenderCommandType::DISPATCH_INDIRECT, data });
684     }
685 }
686 
BindPipeline(const RenderHandle psoHandle)687 void RenderCommandList::BindPipeline(const RenderHandle psoHandle)
688 {
689     // NOTE: we cannot early out with the same pso handle
690     // the render pass and it's hashes might have been changed
691     // the final pso needs to be hashed with final render pass
692     // the backends try to check the re-binding of the same pipeline
693     // another approach would be to check when render pass changes to re-bind psos if needed
694 
695     bool valid = RenderHandleUtil::IsValid(psoHandle);
696 
697     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(psoHandle);
698     PipelineBindPoint pipelineBindPoint {};
699     if (handleType == RenderHandleType::COMPUTE_PSO) {
700         pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE;
701     } else if (handleType == RenderHandleType::GRAPHICS_PSO) {
702         pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS;
703     } else {
704         valid = false;
705     }
706 
707     stateData_.checkBindPipelineLayout = true;
708 #if (RENDER_VALIDATION_ENABLED == 1)
709     if (pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
710         if (!stateData_.renderPassHasBegun) {
711             valid = false;
712             PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_BindPipeline_",
713                 "RENDER_VALIDATION: RenderCommandList: bind pipeline after render pass begin.");
714         }
715     }
716 #endif
717 
718     stateData_.validPso = valid;
719     ValidatePipeline();
720 
721     stateData_.currentPsoHandle = psoHandle;
722     stateData_.currentPsoBindPoint = pipelineBindPoint;
723 
724     RenderCommandBindPipeline* data = AllocateRenderCommand<RenderCommandBindPipeline>(allocator_);
725     if (data) {
726         data->psoHandle = psoHandle;
727         data->pipelineBindPoint = pipelineBindPoint;
728 
729         renderCommands_.push_back({ RenderCommandType::BIND_PIPELINE, data });
730     }
731 }
732 
PushConstantData(const RENDER_NS::PushConstant & pushConstant,const BASE_NS::array_view<const uint8_t> data)733 void RenderCommandList::PushConstantData(
734     const RENDER_NS::PushConstant& pushConstant, const BASE_NS::array_view<const uint8_t> data)
735 {
736     ValidatePipeline();
737 
738     // push constant is not used/allocated if byte size is bigger than supported max
739     if ((pushConstant.byteSize > 0) &&
740         (pushConstant.byteSize <= PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE) && (!data.empty())) {
741         RenderCommandPushConstant* rc = AllocateRenderCommand<RenderCommandPushConstant>(allocator_);
742         // use aligment of uint32 as currently the push constants are uint32s
743         // the data is allocated by shader/pipeline needs
744         uint8_t* pushData =
745             static_cast<uint8_t*>(AllocateRenderData(allocator_, std::alignment_of<uint32_t>(), pushConstant.byteSize));
746         if (rc && pushData) {
747             rc->psoHandle = stateData_.currentPsoHandle;
748             rc->pushConstant = pushConstant;
749             rc->data = pushData;
750             // the max amount of visible data is copied
751             const size_t minData = Math::min(static_cast<size_t>(pushConstant.byteSize), data.size_bytes());
752             const bool res = CloneData(rc->data, pushConstant.byteSize, data.data(), minData);
753             PLUGIN_UNUSED(res);
754             PLUGIN_ASSERT(res);
755 
756             renderCommands_.push_back(RenderCommandWithType { RenderCommandType::PUSH_CONSTANT, rc });
757         }
758     } else if (pushConstant.byteSize > 0) {
759 #if (RENDER_VALIDATION_ENABLED == 1)
760         PLUGIN_LOG_E("RENDER_VALIDATION: push constant byte size must be smaller or equal to %u bytes.",
761             PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE);
762 #endif
763     }
764 }
765 
PushConstant(const RENDER_NS::PushConstant & pushConstant,const uint8_t * data)766 void RenderCommandList::PushConstant(const RENDER_NS::PushConstant& pushConstant, const uint8_t* data)
767 {
768     if ((pushConstant.byteSize > 0) && data) {
769         PushConstantData(pushConstant, { data, pushConstant.byteSize });
770     }
771 }
772 
BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)773 void RenderCommandList::BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)
774 {
775     ValidatePipeline();
776 
777 #if (RENDER_VALIDATION_ENABLED == 1)
778     if (vertexBuffers.size() > PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT) {
779         PLUGIN_LOG_W("RENDER_VALIDATION : max vertex buffer count exceeded, binding only max vertex buffer count");
780     }
781 #endif
782 
783     if (!vertexBuffers.empty()) {
784         RenderCommandBindVertexBuffers* data = AllocateRenderCommand<RenderCommandBindVertexBuffers>(allocator_);
785         if (data) {
786             VertexBuffer dynamicBarrierVertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
787             uint32_t dynamicBarrierVertexBufferCount = 0;
788             const uint32_t vertexBufferCount =
789                 Math::min(PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT, static_cast<uint32_t>(vertexBuffers.size()));
790             data->vertexBufferCount = vertexBufferCount;
791             RenderHandle previousVbHandle; // often all vertex buffers are withing the same buffer with offsets
792             for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
793                 data->vertexBuffers[idx] = vertexBuffers[idx];
794                 const RenderHandle currVbHandle = vertexBuffers[idx].bufferHandle;
795                 if ((previousVbHandle.id != currVbHandle.id) && RenderHandleUtil::IsDynamicResource(currVbHandle) &&
796                     (vertexBuffers[idx].byteSize > 0)) {
797                     // NOTE: we do not try to create perfect barriers with vertex inputs (just barrier the whole rc)
798                     dynamicBarrierVertexBuffers[dynamicBarrierVertexBufferCount++] = { currVbHandle, 0,
799                         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
800                     previousVbHandle = currVbHandle;
801                 }
802             }
803 
804             // add possible vertex/index buffer barriers before render pass
805             if (stateData_.renderPassHasBegun && (dynamicBarrierVertexBufferCount > 0)) {
806                 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
807                 stateData_.currentBarrierPoint->vertexIndexBarrierCount += dynamicBarrierVertexBufferCount;
808                 const size_t currCount = rpVertexInputBufferBarriers_.size();
809                 rpVertexInputBufferBarriers_.resize(currCount + static_cast<size_t>(dynamicBarrierVertexBufferCount));
810                 for (uint32_t dynIdx = 0; dynIdx < dynamicBarrierVertexBufferCount; ++dynIdx) {
811                     rpVertexInputBufferBarriers_[currCount + dynIdx] = dynamicBarrierVertexBuffers[dynIdx];
812                 }
813             }
814 
815             renderCommands_.push_back({ RenderCommandType::BIND_VERTEX_BUFFERS, data });
816         }
817     }
818 }
819 
BindIndexBuffer(const IndexBuffer & indexBuffer)820 void RenderCommandList::BindIndexBuffer(const IndexBuffer& indexBuffer)
821 {
822     ValidatePipeline();
823 
824     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(indexBuffer.bufferHandle);
825 #if (RENDER_VALIDATION_ENABLED == 1)
826     if ((indexBuffer.indexType > IndexType::CORE_INDEX_TYPE_UINT32) || (handleType != RenderHandleType::GPU_BUFFER)) {
827         PLUGIN_LOG_E("RENDER_VALIDATION: invalid index buffer binding");
828     }
829 #endif
830 
831     RenderCommandBindIndexBuffer* data = AllocateRenderCommand<RenderCommandBindIndexBuffer>(allocator_);
832     if (data && (handleType == RenderHandleType::GPU_BUFFER)) {
833         data->indexBuffer = indexBuffer;
834         if (RenderHandleUtil::IsDynamicResource(indexBuffer.bufferHandle)) {
835             stateData_.currentBarrierPoint->vertexIndexBarrierCount++;
836             rpVertexInputBufferBarriers_.push_back(
837                 { indexBuffer.bufferHandle, indexBuffer.bufferOffset, indexBuffer.byteSize });
838         }
839         renderCommands_.push_back({ RenderCommandType::BIND_INDEX_BUFFER, data });
840     }
841 }
842 
BeginRenderPass(const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)843 void RenderCommandList::BeginRenderPass(
844     const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
845 {
846 #if (RENDER_VALIDATION_ENABLED == 1)
847     if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
848         PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
849     }
850 #endif
851 
852     // TODO: needs to be missing multipass related stuff
853 
854     if (renderPassDesc.subpassCount != static_cast<uint32_t>(subpassDescs.size())) {
855 #if (RENDER_VALIDATION_ENABLED == 1)
856         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_subpass_",
857             "RENDER_VALIDATION: BeginRenderPass renderPassDesc.subpassCount (%u) must match subpassDescs size (%u)",
858             renderPassDesc.subpassCount, static_cast<uint32_t>(subpassDescs.size()));
859 #endif
860         stateData_.validCommandList = false;
861     }
862     ValidateRenderPass(renderPassDesc);
863     if (!stateData_.validCommandList) {
864         return;
865     }
866 
867     stateData_.renderPassHasBegun = true;
868     stateData_.renderPassStartIndex = 0;
869     stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
870 
871     if (renderPassDesc.attachmentCount > 0) {
872 #if (RENDER_VALIDATION_ENABLED == 1)
873         ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, subpassDescs);
874 #endif
875         AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
876 
877         if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
878             // NOTE: hashed in the backend
879             PLUGIN_ASSERT(renderPassDesc.subpassCount == static_cast<uint32_t>(subpassDescs.size()));
880 
881             data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
882             data->renderPassDesc = renderPassDesc;
883             data->renderPassDesc.renderArea.extentWidth = Math::max(1u, data->renderPassDesc.renderArea.extentWidth);
884             data->renderPassDesc.renderArea.extentHeight = Math::max(1u, data->renderPassDesc.renderArea.extentHeight);
885             data->subpassStartIndex = 0;
886             // if false -> initial layout is undefined
887             data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
888 
889             data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
890                 renderPassDesc.subpassCount };
891             data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
892                                                 allocator_, renderPassDesc.subpassCount),
893                 renderPassDesc.subpassCount };
894             if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
895                 return;
896             }
897 
898             CloneData(
899                 data->subpasses.data(), data->subpasses.size_bytes(), subpassDescs.data(), subpassDescs.size_bytes());
900 
901             bool valid = true;
902             for (size_t subpassIdx = 0; subpassIdx < subpassDescs.size(); ++subpassIdx) {
903                 const auto& subpassRef = subpassDescs[subpassIdx];
904 
905                 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
906                 subpassResourceStates = {};
907 
908                 valid = valid && ProcessInputAttachments(renderPassDesc, subpassRef, subpassResourceStates);
909                 valid = valid && ProcessColorAttachments(renderPassDesc, subpassRef, subpassResourceStates);
910                 valid = valid && ProcessResolveAttachments(renderPassDesc, subpassRef, subpassResourceStates);
911                 valid = valid && ProcessDepthAttachments(renderPassDesc, subpassRef, subpassResourceStates);
912                 valid =
913                     valid && ProcessFragmentShadingRateAttachments(renderPassDesc, subpassRef, subpassResourceStates);
914 #if (RENDER_VULKAN_FSR_ENABLED != 1)
915                 data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
916 #endif
917             }
918             if (!valid) {
919                 stateData_.validCommandList = false;
920             }
921 
922             // render pass layouts will be updated by render graph
923             renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
924         }
925     }
926 }
927 
BeginRenderPass(const RenderPassDesc & renderPassDesc,const uint32_t subpassStartIdx,const RenderPassSubpassDesc & subpassDesc)928 void RenderCommandList::BeginRenderPass(
929     const RenderPassDesc& renderPassDesc, const uint32_t subpassStartIdx, const RenderPassSubpassDesc& subpassDesc)
930 {
931 #if (RENDER_VALIDATION_ENABLED == 1)
932     if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
933         PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
934     }
935 #endif
936 
937     if (subpassStartIdx >= renderPassDesc.subpassCount) {
938         PLUGIN_LOG_E("RCL:BeginRenderPass: subpassStartIdx(%u) must be smaller than renderPassDesc.subpassCount (%u)",
939             subpassStartIdx, renderPassDesc.subpassCount);
940         stateData_.validCommandList = false;
941     }
942 
943     ValidateRenderPass(renderPassDesc);
944     if (!stateData_.validCommandList) {
945         return;
946     }
947 
948     stateData_.renderPassHasBegun = true;
949     stateData_.renderPassStartIndex = subpassStartIdx;
950     stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
951 
952     if (renderPassDesc.attachmentCount > 0) {
953 #if (RENDER_VALIDATION_ENABLED == 1)
954         ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, { &subpassDesc, 1u });
955 #endif
956         AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
957 
958         if (hasMultiRpCommandListSubpasses_) {
959             PLUGIN_LOG_E("RenderCommandList: BeginRenderPass: creating multiple render node subpasses not supported");
960             stateData_.validCommandList = false;
961         } else if (renderPassDesc.subpassCount > 1) {
962             hasMultiRpCommandListSubpasses_ = true;
963             multiRpCommandListData_.secondaryCmdLists =
964                 (renderPassDesc.subpassContents == CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS) ? true : false;
965             if ((!renderCommands_.empty()) && (renderCommands_.back().type == RenderCommandType::BARRIER_POINT)) {
966                 multiRpCommandListData_.rpBarrierCmdIndex = static_cast<uint32_t>(renderCommands_.size()) - 1u;
967             }
968         }
969         multiRpCommandListData_.subpassCount = renderPassDesc.subpassCount;
970         multiRpCommandListData_.rpBeginCmdIndex = static_cast<uint32_t>(renderCommands_.size());
971 
972         if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
973             // NOTE: hashed in the backend
974             data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
975             data->renderPassDesc = renderPassDesc;
976             data->subpassStartIndex = subpassStartIdx;
977             // if false -> initial layout is undefined
978             data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
979 
980             data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
981                 renderPassDesc.subpassCount };
982             data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
983                                                 allocator_, renderPassDesc.subpassCount),
984                 renderPassDesc.subpassCount };
985             if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
986                 return;
987             }
988 
989             bool valid = true;
990             for (size_t subpassIdx = 0; subpassIdx < data->subpasses.size(); ++subpassIdx) {
991                 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
992                 subpassResourceStates = {};
993                 data->subpasses[subpassIdx] = {};
994 
995                 if (subpassIdx == subpassStartIdx) {
996                     data->subpasses[subpassIdx] = subpassDesc;
997                     valid = valid && ProcessInputAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
998                     valid = valid && ProcessColorAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
999                     valid = valid && ProcessResolveAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1000                     valid = valid && ProcessDepthAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1001                     valid = valid &&
1002                             ProcessFragmentShadingRateAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1003 #if (RENDER_VULKAN_FSR_ENABLED != 1)
1004                     data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
1005 #endif
1006                 }
1007             }
1008             if (!valid) {
1009                 stateData_.validCommandList = false;
1010             }
1011 
1012             // render pass layouts will be updated by render graph
1013             renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
1014         }
1015     }
1016 }
1017 
ProcessInputAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1018 bool RenderCommandList::ProcessInputAttachments(const RenderPassDesc& renderPassDsc,
1019     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1020 {
1021     bool valid = true;
1022     for (uint32_t idx = 0; idx < subpassRef.inputAttachmentCount; ++idx) {
1023         const uint32_t attachmentIndex = subpassRef.inputAttachmentIndices[idx];
1024         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1025         if (!RenderHandleUtil::IsGpuImage(handle)) {
1026             valid = false;
1027         }
1028 
1029         // NOTE: mipLevel and layers are not updated to GpuResourceState
1030         // NOTE: validation needed for invalid handles
1031         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1032         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1033         refState.accessFlags |= CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT;
1034         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1035         refState.gpuQueue = gpuQueue_;
1036         // if used e.g. as input and color attachment use general layout
1037         if (subpassResourceStates.layouts[attachmentIndex] != CORE_IMAGE_LAYOUT_UNDEFINED) {
1038             subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_GENERAL;
1039         } else {
1040             subpassResourceStates.layouts[attachmentIndex] = (RenderHandleUtil::IsDepthImage(handle))
1041                                                                  ? CORE_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
1042                                                                  : CORE_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1043         }
1044 #if (RENDER_VALIDATION_ENABLED == 1)
1045         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1046             ImageUsageFlagBits::CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
1047 #endif
1048     }
1049     return valid;
1050 }
1051 
ProcessColorAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1052 bool RenderCommandList::ProcessColorAttachments(const RenderPassDesc& renderPassDsc,
1053     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1054 {
1055     bool valid = true;
1056     for (uint32_t idx = 0; idx < subpassRef.colorAttachmentCount; ++idx) {
1057         const uint32_t attachmentIndex = subpassRef.colorAttachmentIndices[idx];
1058         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1059         if (!RenderHandleUtil::IsGpuImage(handle)) {
1060             valid = false;
1061         }
1062 #if (RENDER_VALIDATION_ENABLED == 1)
1063         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1064             ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1065 #endif
1066 
1067         // NOTE: mipLevel and layers are not updated to GpuResourceState
1068         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1069         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1070         refState.accessFlags |= (CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1071         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1072         refState.gpuQueue = gpuQueue_;
1073         // if used e.g. as input and color attachment use general layout
1074         subpassResourceStates.layouts[attachmentIndex] =
1075             (subpassResourceStates.layouts[attachmentIndex] != ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED)
1076                 ? CORE_IMAGE_LAYOUT_GENERAL
1077                 : CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1078     }
1079     return valid;
1080 }
1081 
ProcessResolveAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1082 bool RenderCommandList::ProcessResolveAttachments(const RenderPassDesc& renderPassDsc,
1083     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1084 {
1085     bool valid = true;
1086     for (uint32_t idx = 0; idx < subpassRef.resolveAttachmentCount; ++idx) {
1087         const uint32_t attachmentIndex = subpassRef.resolveAttachmentIndices[idx];
1088         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1089         if (!RenderHandleUtil::IsGpuImage(handle)) {
1090             valid = false;
1091         }
1092 #if (RENDER_VALIDATION_ENABLED == 1)
1093         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1094             ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1095 #endif
1096 
1097         // NOTE: mipLevel and layers are not updated to GpuResourceState
1098         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1099         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1100         refState.accessFlags |= CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1101         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1102         refState.gpuQueue = gpuQueue_;
1103         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1104     }
1105     return valid;
1106 }
1107 
ProcessDepthAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1108 bool RenderCommandList::ProcessDepthAttachments(const RenderPassDesc& renderPassDsc,
1109     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1110 {
1111     bool valid = true;
1112     if (subpassRef.depthAttachmentCount == 1) {
1113         const uint32_t attachmentIndex = subpassRef.depthAttachmentIndex;
1114         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1115         if (!RenderHandleUtil::IsDepthImage(handle)) {
1116             valid = false;
1117         }
1118 #if (RENDER_VALIDATION_ENABLED == 1)
1119         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1120             ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1121             "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1122 #endif
1123 
1124         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1125         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1126         refState.accessFlags |=
1127             (CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
1128         refState.pipelineStageFlags |=
1129             (CORE_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT);
1130         refState.gpuQueue = gpuQueue_;
1131         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1132     }
1133     if ((subpassRef.depthAttachmentCount == 1) && (subpassRef.depthResolveAttachmentCount == 1)) {
1134         const uint32_t attachmentIndex = subpassRef.depthResolveAttachmentIndex;
1135         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1136         if (!RenderHandleUtil::IsDepthImage(handle)) {
1137             valid = false;
1138         }
1139 #if (RENDER_VALIDATION_ENABLED == 1)
1140         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1141             ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1142             "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1143 #endif
1144 
1145         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1146         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1147         refState.accessFlags |= CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
1148         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
1149         refState.gpuQueue = gpuQueue_;
1150         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1151     }
1152     return valid;
1153 }
1154 
ProcessFragmentShadingRateAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1155 bool RenderCommandList::ProcessFragmentShadingRateAttachments(const RenderPassDesc& renderPassDsc,
1156     const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1157 {
1158     bool valid = true;
1159     if (subpassRef.fragmentShadingRateAttachmentCount == 1) {
1160 #if (RENDER_VULKAN_FSR_ENABLED == 1)
1161         const uint32_t attachmentIndex = subpassRef.fragmentShadingRateAttachmentIndex;
1162         const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1163         if (!RenderHandleUtil::IsGpuImage(handle)) {
1164             valid = false;
1165         }
1166 #if (RENDER_VALIDATION_ENABLED == 1)
1167         ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1168             ImageUsageFlagBits::CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT,
1169             "CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT");
1170 #endif
1171 
1172         GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1173         refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1174         refState.accessFlags |= CORE_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT;
1175         refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT;
1176         refState.gpuQueue = gpuQueue_;
1177         subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL;
1178 #else
1179         PLUGIN_LOG_ONCE_I("vk_fsr_disabled_flag",
1180             "RENDER_VALIDATION: Fragment shading rate disabled and all related attachments ignored.");
1181 #endif
1182     }
1183     return valid;
1184 }
1185 
NextSubpass(const SubpassContents & subpassContents)1186 void RenderCommandList::NextSubpass(const SubpassContents& subpassContents)
1187 {
1188     RenderCommandNextSubpass* data = AllocateRenderCommand<RenderCommandNextSubpass>(allocator_);
1189     if (data) {
1190         data->subpassContents = subpassContents;
1191         data->renderCommandListIndex = 0; // will be updated in the render graph
1192 
1193         renderCommands_.push_back({ RenderCommandType::NEXT_SUBPASS, data });
1194     }
1195 }
1196 
EndRenderPass()1197 void RenderCommandList::EndRenderPass()
1198 {
1199     if (!stateData_.renderPassHasBegun) {
1200 #if (RENDER_VALIDATION_ENABLED == 1)
1201         PLUGIN_LOG_ONCE_E(
1202             nodeName_ + "_RCL_EndRenderPass_", "RenderCommandList: render pass needs to begin before calling end");
1203 #endif
1204         stateData_.validCommandList = false;
1205         return;
1206     }
1207 
1208     if (hasMultiRpCommandListSubpasses_ && (multiRpCommandListData_.rpBeginCmdIndex != INVALID_CL_IDX)) {
1209         multiRpCommandListData_.rpEndCmdIndex = static_cast<uint32_t>(renderCommands_.size());
1210     }
1211 
1212     RenderCommandEndRenderPass* data = AllocateRenderCommand<RenderCommandEndRenderPass>(allocator_);
1213     if (data) {
1214         // will be updated in render graph if multi render command list render pass
1215         data->endType = RenderPassEndType::END_RENDER_PASS;
1216         data->subpassStartIndex = stateData_.renderPassStartIndex;
1217         data->subpassCount = stateData_.renderPassSubpassCount;
1218 
1219         renderCommands_.push_back({ RenderCommandType::END_RENDER_PASS, data });
1220     }
1221 
1222     stateData_.renderPassHasBegun = false;
1223     stateData_.renderPassStartIndex = 0;
1224     stateData_.renderPassSubpassCount = 0;
1225 }
1226 
BeginDisableAutomaticBarrierPoints()1227 void RenderCommandList::BeginDisableAutomaticBarrierPoints()
1228 {
1229 #if (RENDER_VALIDATION_ENABLED == 1)
1230     if (!stateData_.automaticBarriersEnabled) {
1231         PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarrierPoints not called?");
1232     }
1233 #endif
1234     PLUGIN_ASSERT(stateData_.automaticBarriersEnabled);
1235 
1236     // barrier point for pending barriers
1237     AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1238     stateData_.automaticBarriersEnabled = false;
1239 }
1240 
EndDisableAutomaticBarrierPoints()1241 void RenderCommandList::EndDisableAutomaticBarrierPoints()
1242 {
1243 #if (RENDER_VALIDATION_ENABLED == 1)
1244     if (stateData_.automaticBarriersEnabled) {
1245         PLUGIN_LOG_E("RENDER_VALIDATION: BeginDisableAutomaticBarrierPoints not called?");
1246     }
1247 #endif
1248     PLUGIN_ASSERT(!stateData_.automaticBarriersEnabled);
1249 
1250     stateData_.automaticBarriersEnabled = true;
1251 }
1252 
AddCustomBarrierPoint()1253 void RenderCommandList::AddCustomBarrierPoint()
1254 {
1255     const bool barrierState = stateData_.automaticBarriersEnabled;
1256     stateData_.automaticBarriersEnabled = true; // flag checked in AddBarrierPoint
1257     AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1258     stateData_.automaticBarriersEnabled = barrierState;
1259 }
1260 
CustomMemoryBarrier(const GeneralBarrier & source,const GeneralBarrier & destination)1261 void RenderCommandList::CustomMemoryBarrier(const GeneralBarrier& source, const GeneralBarrier& destination)
1262 {
1263 #if (RENDER_VALIDATION_ENABLED == 1)
1264     if (stateData_.renderPassHasBegun) {
1265         PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1266     }
1267 #endif
1268 
1269     CommandBarrier cb {
1270         RenderHandleUtil::CreateGpuResourceHandle(RenderHandleType::UNDEFINED, 0, 0, 0, 0),
1271         {
1272             source.accessFlags,
1273             source.pipelineStageFlags,
1274         },
1275         {},
1276         {
1277             destination.accessFlags,
1278             destination.pipelineStageFlags,
1279         },
1280         {},
1281     };
1282 
1283     customBarriers_.push_back(move(cb));
1284 
1285     stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1286 }
1287 
CustomBufferBarrier(const RenderHandle handle,const BufferResourceBarrier & source,const BufferResourceBarrier & destination,const uint32_t byteOffset,const uint32_t byteSize)1288 void RenderCommandList::CustomBufferBarrier(const RenderHandle handle, const BufferResourceBarrier& source,
1289     const BufferResourceBarrier& destination, const uint32_t byteOffset, const uint32_t byteSize)
1290 {
1291     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1292 
1293 #if (RENDER_VALIDATION_ENABLED == 1)
1294     if (stateData_.renderPassHasBegun) {
1295         PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1296     }
1297     if (byteSize == 0) {
1298         PLUGIN_LOG_ONCE_W("RENDER_VALIDATION_custom_buffer_barrier",
1299             "RENDER_VALIDATION: do not create zero size custom buffer barriers");
1300     }
1301     if (handleType != RenderHandleType::GPU_BUFFER) {
1302         PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomBufferBarrier");
1303     }
1304 #endif
1305 
1306     if ((byteSize > 0) && (handleType == RenderHandleType::GPU_BUFFER)) {
1307         ResourceBarrier src;
1308         src.accessFlags = source.accessFlags;
1309         src.pipelineStageFlags = source.pipelineStageFlags;
1310         src.optionalByteOffset = byteOffset;
1311         src.optionalByteSize = byteSize;
1312 
1313         ResourceBarrier dst;
1314         dst.accessFlags = destination.accessFlags;
1315         dst.pipelineStageFlags = destination.pipelineStageFlags;
1316         dst.optionalByteOffset = byteOffset;
1317         dst.optionalByteSize = byteSize;
1318 
1319         CommandBarrier cb {
1320             handle,
1321             std::move(src),
1322             {},
1323             std::move(dst),
1324             {},
1325         };
1326 
1327         customBarriers_.push_back(move(cb));
1328 
1329         stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1330     }
1331 }
1332 
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1333 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& destination,
1334     const ImageSubresourceRange& imageSubresourceRange)
1335 {
1336     // specific layout MAX_ENUM to state that we fetch the correct state
1337     ImageResourceBarrier source { 0, 0, ImageLayout::CORE_IMAGE_LAYOUT_MAX_ENUM };
1338     CustomImageBarrier(handle, source, destination, imageSubresourceRange);
1339 }
1340 
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & source,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1341 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& source,
1342     const ImageResourceBarrier& destination, const ImageSubresourceRange& imageSubresourceRange)
1343 {
1344     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1345 
1346 #if (RENDER_VALIDATION_ENABLED == 1)
1347     if (stateData_.renderPassHasBegun) {
1348         PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1349     }
1350     if (handleType != RenderHandleType::GPU_IMAGE) {
1351         PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomImageBarrier");
1352     }
1353     ValidateImageSubresourceRange(gpuResourceMgr_, handle, imageSubresourceRange);
1354 #endif
1355 
1356     if (handleType == RenderHandleType::GPU_IMAGE) {
1357         ResourceBarrier src;
1358         src.accessFlags = source.accessFlags;
1359         src.pipelineStageFlags = source.pipelineStageFlags;
1360         src.optionalImageLayout = source.imageLayout;
1361         src.optionalImageSubresourceRange = imageSubresourceRange;
1362 
1363         ResourceBarrier dst;
1364         dst.accessFlags = destination.accessFlags;
1365         dst.pipelineStageFlags = destination.pipelineStageFlags;
1366         dst.optionalImageLayout = destination.imageLayout;
1367         dst.optionalImageSubresourceRange = imageSubresourceRange;
1368 
1369         CommandBarrier cb {
1370             handle,
1371             std::move(src),
1372             {},
1373             std::move(dst),
1374             {},
1375         };
1376 
1377         customBarriers_.push_back(std::move(cb));
1378 
1379         stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1380     }
1381 }
1382 
CopyBufferToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferCopy & bufferCopy)1383 void RenderCommandList::CopyBufferToBuffer(
1384     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferCopy& bufferCopy)
1385 {
1386     if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1387         // NOTE: combine copies, and only single combined barrier?
1388         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1389             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1390             AddBarrierPoint(RenderCommandType::COPY_BUFFER);
1391         }
1392 
1393         RenderCommandCopyBuffer* data = AllocateRenderCommand<RenderCommandCopyBuffer>(allocator_);
1394         if (data) {
1395             data->srcHandle = sourceHandle;
1396             data->dstHandle = destinationHandle;
1397             data->bufferCopy = bufferCopy;
1398 
1399             renderCommands_.push_back({ RenderCommandType::COPY_BUFFER, data });
1400         }
1401     } else {
1402         PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToBuffer");
1403     }
1404 }
1405 
CopyBufferToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1406 void RenderCommandList::CopyBufferToImage(
1407     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1408 {
1409     if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1410         // NOTE: combine copies, and only single combined barrier?
1411         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1412             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1413             AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1414         }
1415 
1416         RenderCommandCopyBufferImage* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1417         if (data) {
1418             data->copyType = RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE;
1419             data->srcHandle = sourceHandle;
1420             data->dstHandle = destinationHandle;
1421             data->bufferImageCopy = bufferImageCopy;
1422 
1423             renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1424         }
1425     } else {
1426         PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToImage");
1427     }
1428 }
1429 
CopyImageToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1430 void RenderCommandList::CopyImageToBuffer(
1431     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1432 {
1433     if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1434         // NOTE: combine copies, and only single combined barrier?
1435         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1436             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1437             AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1438         }
1439 
1440         RenderCommandCopyBufferImage* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1441         if (data) {
1442             data->copyType = RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER;
1443             data->srcHandle = sourceHandle;
1444             data->dstHandle = destinationHandle;
1445             data->bufferImageCopy = bufferImageCopy;
1446 
1447             renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1448         }
1449     } else {
1450         PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToBuffer");
1451     }
1452 }
1453 
CopyImageToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageCopy & imageCopy)1454 void RenderCommandList::CopyImageToImage(
1455     const RenderHandle sourceHandle, const RenderHandle destinationHandle, const ImageCopy& imageCopy)
1456 {
1457     if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1458         // NOTE: combine copies, and only single combined barrier?
1459         if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1460             RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1461             AddBarrierPoint(RenderCommandType::COPY_IMAGE);
1462         }
1463 
1464         RenderCommandCopyImage* data = AllocateRenderCommand<RenderCommandCopyImage>(allocator_);
1465         if (data) {
1466             data->srcHandle = sourceHandle;
1467             data->dstHandle = destinationHandle;
1468             data->imageCopy = imageCopy;
1469 
1470             renderCommands_.push_back({ RenderCommandType::COPY_IMAGE, data });
1471         }
1472     } else {
1473         PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToImage");
1474     }
1475 }
1476 
BlitImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageBlit & imageBlit,const Filter filter)1477 void RenderCommandList::BlitImage(const RenderHandle sourceHandle, const RenderHandle destinationHandle,
1478     const ImageBlit& imageBlit, const Filter filter)
1479 {
1480     if (!stateData_.renderPassHasBegun) {
1481         if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1482             if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1483                 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1484                 AddBarrierPoint(RenderCommandType::BLIT_IMAGE);
1485             }
1486 
1487             RenderCommandBlitImage* data = AllocateRenderCommand<RenderCommandBlitImage>(allocator_);
1488             if (data) {
1489                 data->srcHandle = sourceHandle;
1490                 data->dstHandle = destinationHandle;
1491                 data->imageBlit = imageBlit;
1492                 data->filter = filter;
1493                 // NOTE: desired layouts (barrier point needs to respect these)
1494                 data->srcImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1495                 data->dstImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1496 
1497                 renderCommands_.push_back({ RenderCommandType::BLIT_IMAGE, data });
1498             }
1499         }
1500     } else {
1501         PLUGIN_LOG_E("RenderCommandList: BlitImage can only be called outside of render pass");
1502     }
1503 }
1504 
UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)1505 void RenderCommandList::UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,
1506     const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)
1507 {
1508 #if (RENDER_VALIDATION_ENABLED == 1)
1509     if (handles.size() != bindingResources.size()) {
1510         PLUGIN_LOG_W("RENDER_VALIDATION: UpdateDescriptorSets handles and bindingResources size does not match");
1511     }
1512 #endif
1513     const uint32_t count = static_cast<uint32_t>(Math::min(handles.size(), bindingResources.size()));
1514     if (count > 0U) {
1515         for (uint32_t idx = 0; idx < count; ++idx) {
1516             const auto& handleRef = handles[idx];
1517             const auto& bindingResRef = bindingResources[idx];
1518 #if (RENDER_VALIDATION_ENABLED == 1)
1519             ValidateDescriptorTypeBinding(nodeName_, gpuResourceMgr_, bindingResRef);
1520 #endif
1521 #if (RENDER_VALIDATION_ENABLED == 1)
1522             if (bindingResRef.bindingMask != bindingResRef.descriptorSetBindingMask) {
1523                 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSets_bm_",
1524                     "RENDER_VALIDATION: invalid bindings in descriptor set update (node:%s)", nodeName_.c_str());
1525             }
1526 #endif
1527             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handleRef);
1528             if (handleType == RenderHandleType::DESCRIPTOR_SET) {
1529                 const bool valid =
1530                     nodeContextDescriptorSetManager_.UpdateCpuDescriptorSet(handleRef, bindingResRef, gpuQueue_);
1531                 if (valid) {
1532                     descriptorSetHandlesForUpdates_.push_back(handleRef);
1533                 } else {
1534 #if (RENDER_VALIDATION_ENABLED == 1)
1535                     PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSet_invalid_",
1536                         "RenderCommandList: invalid descriptor set bindings with update (node:%s)", nodeName_.c_str());
1537 #endif
1538                 }
1539             } else {
1540                 PLUGIN_LOG_E("RenderCommandList: invalid handle for UpdateDescriptorSet");
1541             }
1542         }
1543     }
1544 }
1545 
UpdateDescriptorSet(const RenderHandle handle,const DescriptorSetLayoutBindingResources & bindingResources)1546 void RenderCommandList::UpdateDescriptorSet(
1547     const RenderHandle handle, const DescriptorSetLayoutBindingResources& bindingResources)
1548 {
1549     UpdateDescriptorSets({ &handle, 1U }, { &bindingResources, 1U });
1550 }
1551 
BindDescriptorSets(const uint32_t firstSet,const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)1552 void RenderCommandList::BindDescriptorSets(
1553     const uint32_t firstSet, const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)
1554 {
1555     if (descriptorSetData.empty()) {
1556         return;
1557     }
1558     const uint32_t maxSetNumber = firstSet + static_cast<uint32_t>(descriptorSetData.size());
1559     if (maxSetNumber > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1560         PLUGIN_LOG_E("RenderCommandList::BindDescriptorSets: firstSet + handles.size() (%u) exceeds max count (%u)",
1561             maxSetNumber, PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1562         return;
1563     }
1564 
1565     ValidatePipeline();
1566 
1567 #if (RENDER_VALIDATION_ENABLED == 1)
1568     if ((descriptorSetData.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT)) {
1569         PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1570     }
1571     for (const auto& ref : descriptorSetData) {
1572         if (ref.dynamicOffsets.size() > PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT) {
1573             PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1574         }
1575     }
1576 #endif
1577 
1578     if (auto* data = AllocateRenderCommand<RenderCommandBindDescriptorSets>(allocator_); data) {
1579         *data = {}; // default
1580 
1581         data->psoHandle = stateData_.currentPsoHandle;
1582         data->firstSet = firstSet;
1583         data->setCount = static_cast<uint32_t>(descriptorSetData.size());
1584 
1585         uint32_t descriptorSetCounterForBarriers = 0;
1586         uint32_t currSet = firstSet;
1587         for (const auto& ref : descriptorSetData) {
1588             if (currSet < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1589                 // allocate offsets for this set
1590                 if (!ref.dynamicOffsets.empty()) {
1591                     const uint32_t dynCount = static_cast<uint32_t>(ref.dynamicOffsets.size());
1592                     if (auto* doData = AllocateRenderData<uint32_t>(allocator_, dynCount); doData) {
1593                         auto& dynRef = data->descriptorSetDynamicOffsets[currSet];
1594                         dynRef.dynamicOffsets = doData;
1595                         dynRef.dynamicOffsetCount = dynCount;
1596                         CloneData(dynRef.dynamicOffsets, dynCount * sizeof(uint32_t), ref.dynamicOffsets.data(),
1597                             ref.dynamicOffsets.size_bytes());
1598                     }
1599                 }
1600 
1601                 data->descriptorSetHandles[currSet] = ref.handle;
1602 
1603                 const bool hasDynamicBarrierResources =
1604                     nodeContextDescriptorSetManager_.HasDynamicBarrierResources(ref.handle);
1605                 if (stateData_.renderPassHasBegun && hasDynamicBarrierResources) {
1606                     descriptorSetHandlesForBarriers_.push_back(ref.handle);
1607                     descriptorSetCounterForBarriers++;
1608                 }
1609                 stateData_.currentBoundSets[currSet].hasDynamicBarrierResources = hasDynamicBarrierResources;
1610                 stateData_.currentBoundSets[currSet].descriptorSetHandle = ref.handle;
1611                 stateData_.currentBoundSetsMask |= (1 << currSet);
1612                 ++currSet;
1613             }
1614         }
1615 
1616         renderCommands_.push_back({ RenderCommandType::BIND_DESCRIPTOR_SETS, data });
1617 
1618         // if the currentBarrierPoint is null there has been some invalid bindings earlier
1619         if (stateData_.renderPassHasBegun && stateData_.currentBarrierPoint) {
1620             // add possible barriers before render pass
1621             stateData_.currentBarrierPoint->descriptorSetHandleCount += descriptorSetCounterForBarriers;
1622         } else if (stateData_.automaticBarriersEnabled) {
1623             stateData_.dirtyDescriptorSetsForBarriers = true;
1624         }
1625     }
1626 }
1627 
BindDescriptorSet(const uint32_t set,const BindDescriptorSetData & desriptorSetData)1628 void RenderCommandList::BindDescriptorSet(const uint32_t set, const BindDescriptorSetData& desriptorSetData)
1629 {
1630     BindDescriptorSets(set, { &desriptorSetData, 1U });
1631 }
1632 
BindDescriptorSets(const uint32_t firstSet,const array_view<const RenderHandle> handles)1633 void RenderCommandList::BindDescriptorSets(const uint32_t firstSet, const array_view<const RenderHandle> handles)
1634 {
1635     BindDescriptorSetData bdsd[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
1636     const uint32_t count =
1637         Math::min(static_cast<uint32_t>(handles.size()), PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1638     for (uint32_t idx = 0U; idx < count; ++idx) {
1639         bdsd[idx].handle = handles[idx];
1640     }
1641     BindDescriptorSets(firstSet, { bdsd, count });
1642 }
1643 
BindDescriptorSet(const uint32_t set,const RenderHandle handle)1644 void RenderCommandList::BindDescriptorSet(const uint32_t set, const RenderHandle handle)
1645 {
1646     BindDescriptorSetData bdsd = { handle, {} };
1647     BindDescriptorSets(set, { &bdsd, 1U });
1648 }
1649 
BindDescriptorSet(const uint32_t set,const RenderHandle handle,const array_view<const uint32_t> dynamicOffsets)1650 void RenderCommandList::BindDescriptorSet(
1651     const uint32_t set, const RenderHandle handle, const array_view<const uint32_t> dynamicOffsets)
1652 {
1653     BindDescriptorSetData bdsd = { handle, dynamicOffsets };
1654     BindDescriptorSets(set, { &bdsd, 1U });
1655 }
1656 
BuildAccelerationStructures(const AccelerationStructureBuildGeometryData & geometry,const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)1657 void RenderCommandList::BuildAccelerationStructures(const AccelerationStructureBuildGeometryData& geometry,
1658     const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,
1659     const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,
1660     const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)
1661 {
1662     if (!(triangles.empty() && aabbs.empty() && instances.empty())) {
1663 #if (RENDER_VULKAN_RT_ENABLED == 1)
1664         RenderCommandBuildAccelerationStructure* data =
1665             AllocateRenderCommand<RenderCommandBuildAccelerationStructure>(allocator_);
1666         if (data) {
1667             data->type = geometry.info.type;
1668             data->flags = geometry.info.flags;
1669             data->mode = geometry.info.mode;
1670             data->srcAccelerationStructure = geometry.srcAccelerationStructure;
1671             data->dstAccelerationStructure = geometry.dstAccelerationStructure;
1672             data->scratchBuffer = geometry.scratchBuffer.handle;
1673             data->scratchOffset = geometry.scratchBuffer.offset;
1674 
1675             if (!triangles.empty()) {
1676                 AccelerationStructureGeometryTrianglesData* trianglesData =
1677                     static_cast<AccelerationStructureGeometryTrianglesData*>(
1678                         AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryTrianglesData>(),
1679                             sizeof(AccelerationStructureGeometryTrianglesData) * triangles.size()));
1680                 data->trianglesData = trianglesData;
1681                 data->trianglesView = { data->trianglesData, triangles.size() };
1682                 for (size_t idx = 0; idx < triangles.size(); ++idx) {
1683                     data->trianglesView[idx] = triangles[idx];
1684                 }
1685             }
1686             if (!aabbs.empty()) {
1687                 AccelerationStructureGeometryAabbsData* aabbsData =
1688                     static_cast<AccelerationStructureGeometryAabbsData*>(
1689                         AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryAabbsData>(),
1690                             sizeof(AccelerationStructureGeometryAabbsData) * aabbs.size()));
1691                 data->aabbsData = aabbsData;
1692                 data->aabbsView = { data->aabbsData, aabbs.size() };
1693                 for (size_t idx = 0; idx < aabbs.size(); ++idx) {
1694                     data->aabbsView[idx] = aabbs[idx];
1695                 }
1696             }
1697             if (!instances.empty()) {
1698                 AccelerationStructureGeometryInstancesData* instancesData =
1699                     static_cast<AccelerationStructureGeometryInstancesData*>(
1700                         AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryInstancesData>(),
1701                             sizeof(AccelerationStructureGeometryInstancesData) * instances.size()));
1702                 data->instancesData = instancesData;
1703                 data->instancesView = { data->instancesData, instances.size() };
1704                 for (size_t idx = 0; idx < instances.size(); ++idx) {
1705                     data->instancesView[idx] = instances[idx];
1706                 }
1707             }
1708             renderCommands_.push_back({ RenderCommandType::BUILD_ACCELERATION_STRUCTURE, data });
1709         }
1710 #endif
1711     }
1712 }
1713 
ClearColorImage(const RenderHandle handle,const ClearColorValue color,const array_view<const ImageSubresourceRange> ranges)1714 void RenderCommandList::ClearColorImage(
1715     const RenderHandle handle, const ClearColorValue color, const array_view<const ImageSubresourceRange> ranges)
1716 {
1717 #if (RENDER_VALIDATION_ENABLED == 1)
1718     {
1719         if (!RenderHandleUtil::IsGpuImage(handle)) {
1720             PLUGIN_LOG_W("RENDER_VALIDATION: Invalid image handle given to ClearColorImage");
1721         }
1722         if (ranges.empty()) {
1723             PLUGIN_LOG_W("RENDER_VALIDATION: Invalid ranges given to ClearColorImage");
1724         }
1725         {
1726             const GpuImageDesc desc = gpuResourceMgr_.GetImageDescriptor(handle);
1727             if ((desc.usageFlags & CORE_IMAGE_USAGE_TRANSFER_DST_BIT) == 0) {
1728                 PLUGIN_LOG_E("RENDER_VALIDATION: Image missing usage flag TRANSFER_DST for ClearColorImage command");
1729             }
1730         }
1731     }
1732 #endif
1733     if (RenderHandleUtil::IsGpuImage(handle) && (!ranges.empty())) {
1734         AddBarrierPoint(RenderCommandType::CLEAR_COLOR_IMAGE);
1735 
1736         RenderCommandClearColorImage* data = AllocateRenderCommand<RenderCommandClearColorImage>(allocator_);
1737         if (data) {
1738             data->handle = handle;
1739             data->imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1740             data->color = color;
1741             data->ranges = { AllocateRenderData<ImageSubresourceRange>(
1742                                  allocator_, static_cast<uint32_t>(ranges.size())),
1743                 ranges.size() };
1744             if (!data->ranges.data()) {
1745                 return;
1746             }
1747             CloneData(data->ranges.data(), data->ranges.size_bytes(), ranges.data(), ranges.size_bytes());
1748 
1749             renderCommands_.push_back({ RenderCommandType::CLEAR_COLOR_IMAGE, data });
1750         }
1751     }
1752 }
1753 
SetDynamicStateViewport(const ViewportDesc & viewportDesc)1754 void RenderCommandList::SetDynamicStateViewport(const ViewportDesc& viewportDesc)
1755 {
1756 #if (RENDER_VALIDATION_ENABLED == 1)
1757     ValidateViewport(nodeName_, viewportDesc);
1758 #endif
1759     RenderCommandDynamicStateViewport* data = AllocateRenderCommand<RenderCommandDynamicStateViewport>(allocator_);
1760     if (data) {
1761         data->viewportDesc = viewportDesc;
1762         data->viewportDesc.width = Math::max(1.0f, data->viewportDesc.width);
1763         data->viewportDesc.height = Math::max(1.0f, data->viewportDesc.height);
1764         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_VIEWPORT, data });
1765     }
1766 }
1767 
SetDynamicStateScissor(const ScissorDesc & scissorDesc)1768 void RenderCommandList::SetDynamicStateScissor(const ScissorDesc& scissorDesc)
1769 {
1770 #if (RENDER_VALIDATION_ENABLED == 1)
1771     ValidateScissor(nodeName_, scissorDesc);
1772 #endif
1773     RenderCommandDynamicStateScissor* data = AllocateRenderCommand<RenderCommandDynamicStateScissor>(allocator_);
1774     if (data) {
1775         data->scissorDesc = scissorDesc;
1776         data->scissorDesc.extentWidth = Math::max(1u, data->scissorDesc.extentWidth);
1777         data->scissorDesc.extentHeight = Math::max(1u, data->scissorDesc.extentHeight);
1778         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_SCISSOR, data });
1779     }
1780 }
1781 
SetDynamicStateLineWidth(const float lineWidth)1782 void RenderCommandList::SetDynamicStateLineWidth(const float lineWidth)
1783 {
1784     RenderCommandDynamicStateLineWidth* data = AllocateRenderCommand<RenderCommandDynamicStateLineWidth>(allocator_);
1785     if (data) {
1786         data->lineWidth = lineWidth;
1787         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_LINE_WIDTH, data });
1788     }
1789 }
1790 
SetDynamicStateDepthBias(const float depthBiasConstantFactor,const float depthBiasClamp,const float depthBiasSlopeFactor)1791 void RenderCommandList::SetDynamicStateDepthBias(
1792     const float depthBiasConstantFactor, const float depthBiasClamp, const float depthBiasSlopeFactor)
1793 {
1794     RenderCommandDynamicStateDepthBias* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBias>(allocator_);
1795     if (data) {
1796         data->depthBiasConstantFactor = depthBiasConstantFactor;
1797         data->depthBiasClamp = depthBiasClamp;
1798         data->depthBiasSlopeFactor = depthBiasSlopeFactor;
1799         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS, data });
1800     }
1801 }
1802 
SetDynamicStateBlendConstants(const array_view<const float> blendConstants)1803 void RenderCommandList::SetDynamicStateBlendConstants(const array_view<const float> blendConstants)
1804 {
1805     constexpr uint32_t THRESHOLD = 4;
1806 #if (RENDER_VALIDATION_ENABLED == 1)
1807     if (blendConstants.size() > THRESHOLD) {
1808         PLUGIN_LOG_E("RenderCommandList: blend constant count (%zu) exceeds supported max (%u)", blendConstants.size(),
1809             THRESHOLD);
1810     }
1811 #endif
1812     RenderCommandDynamicStateBlendConstants* data =
1813         AllocateRenderCommand<RenderCommandDynamicStateBlendConstants>(allocator_);
1814     if (data) {
1815         *data = {};
1816         const uint32_t bcCount = Math::min(static_cast<uint32_t>(blendConstants.size()), THRESHOLD);
1817         for (uint32_t idx = 0; idx < bcCount; ++idx) {
1818             data->blendConstants[idx] = blendConstants[idx];
1819         }
1820         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS, data });
1821     }
1822 }
1823 
SetDynamicStateDepthBounds(const float minDepthBounds,const float maxDepthBounds)1824 void RenderCommandList::SetDynamicStateDepthBounds(const float minDepthBounds, const float maxDepthBounds)
1825 {
1826     RenderCommandDynamicStateDepthBounds* data =
1827         AllocateRenderCommand<RenderCommandDynamicStateDepthBounds>(allocator_);
1828     if (data) {
1829         data->minDepthBounds = minDepthBounds;
1830         data->maxDepthBounds = maxDepthBounds;
1831         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS, data });
1832     }
1833 }
1834 
SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask,const uint32_t compareMask)1835 void RenderCommandList::SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask, const uint32_t compareMask)
1836 {
1837     RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1838     if (data) {
1839         data->dynamicState = StencilDynamicState::COMPARE_MASK;
1840         data->faceMask = faceMask;
1841         data->mask = compareMask;
1842         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1843     }
1844 }
1845 
SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask,const uint32_t writeMask)1846 void RenderCommandList::SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask, const uint32_t writeMask)
1847 {
1848     RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1849     if (data) {
1850         data->dynamicState = StencilDynamicState::WRITE_MASK;
1851         data->faceMask = faceMask;
1852         data->mask = writeMask;
1853         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1854     }
1855 }
1856 
SetDynamicStateStencilReference(const StencilFaceFlags faceMask,const uint32_t reference)1857 void RenderCommandList::SetDynamicStateStencilReference(const StencilFaceFlags faceMask, const uint32_t reference)
1858 {
1859     RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1860     if (data) {
1861         data->dynamicState = StencilDynamicState::REFERENCE;
1862         data->faceMask = faceMask;
1863         data->mask = reference;
1864         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1865     }
1866 }
1867 
SetDynamicStateFragmentShadingRate(const Size2D & fragmentSize,const FragmentShadingRateCombinerOps & combinerOps)1868 void RenderCommandList::SetDynamicStateFragmentShadingRate(
1869     const Size2D& fragmentSize, const FragmentShadingRateCombinerOps& combinerOps)
1870 {
1871     RenderCommandDynamicStateFragmentShadingRate* data =
1872         AllocateRenderCommand<RenderCommandDynamicStateFragmentShadingRate>(allocator_);
1873     if (data) {
1874 #if (RENDER_VALIDATION_ENABLED == 1)
1875         ValidateFragmentShadingRate(fragmentSize);
1876 #endif
1877         // valid values for sizes from 0-4
1878         constexpr uint32_t maxValue { 4u };
1879         constexpr uint32_t valueMapper[maxValue + 1u] = { 1u, 1u, 2u, 2u, 4u };
1880         Size2D fs = fragmentSize;
1881         fs.width = (fs.width <= maxValue) ? valueMapper[fs.width] : maxValue;
1882         fs.height = (fs.height <= maxValue) ? valueMapper[fs.height] : maxValue;
1883 
1884         data->fragmentSize = fs;
1885         data->combinerOps = combinerOps;
1886         renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE, data });
1887     }
1888 }
1889 
SetExecuteBackendFramePosition()1890 void RenderCommandList::SetExecuteBackendFramePosition()
1891 {
1892     if (stateData_.executeBackendFrameSet == false) {
1893         AddBarrierPoint(RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION);
1894 
1895         RenderCommandExecuteBackendFramePosition* data =
1896             AllocateRenderCommand<RenderCommandExecuteBackendFramePosition>(allocator_);
1897         if (data) {
1898             data->id = 0;
1899             renderCommands_.push_back({ RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION, data });
1900             stateData_.executeBackendFrameSet = true;
1901         }
1902     } else {
1903         PLUGIN_LOG_E("RenderCommandList: there can be only one SetExecuteBackendFramePosition() -call per frame");
1904     }
1905 }
1906 
ValidateRenderPass(const RenderPassDesc & renderPassDesc)1907 void RenderCommandList::ValidateRenderPass(const RenderPassDesc& renderPassDesc)
1908 {
1909     if (stateData_.renderPassHasBegun) {
1910 #if (RENDER_VALIDATION_ENABLED == 1)
1911         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_hasbegun_",
1912             "RenderCommandList: render pass is active, needs to be end before starting a new (node: %s)",
1913             nodeName_.c_str());
1914 #endif
1915         stateData_.validCommandList = false;
1916     }
1917     // validate render pass attachments
1918     for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1919         if (!RenderHandleUtil::IsValid(renderPassDesc.attachmentHandles[idx])) {
1920 #if (RENDER_VALIDATION_ENABLED == 1)
1921             PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_attachments_",
1922                 "RenderCommandList: Invalid render pass attachment handle in index: %u (node:%s)", idx,
1923                 nodeName_.c_str());
1924 #endif
1925             stateData_.validCommandList = false;
1926         }
1927     }
1928 }
1929 
ValidatePipeline()1930 void RenderCommandList::ValidatePipeline()
1931 {
1932     if (!stateData_.validPso) {
1933         stateData_.validCommandList = false;
1934 #if (RENDER_VALIDATION_ENABLED == 1)
1935         PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidatePipeline_", "RenderCommandList: PSO not bound.");
1936 #endif
1937     }
1938 }
1939 
ValidatePipelineLayout()1940 void RenderCommandList::ValidatePipelineLayout()
1941 {
1942     if (stateData_.checkBindPipelineLayout) {
1943         stateData_.checkBindPipelineLayout = false;
1944         // fast check without validation
1945         const uint32_t pipelineLayoutSetsMask =
1946             RenderHandleUtil::GetPipelineLayoutDescriptorSetMask(stateData_.currentPsoHandle);
1947         if ((stateData_.currentBoundSetsMask & pipelineLayoutSetsMask) != pipelineLayoutSetsMask) {
1948 #if (RENDER_VALIDATION_ENABLED == 1)
1949             PLUGIN_LOG_ONCE_E(
1950                 "RenderCommandList::ValidatePipelineLayout", "RenderCommandList: not all needed descriptor sets bound");
1951 #endif
1952         }
1953 #if (RENDER_VALIDATION_ENABLED == 1)
1954         const RenderHandleType rhType = RenderHandleUtil::GetHandleType(stateData_.currentPsoHandle);
1955         const PipelineLayout& pl = (rhType == RenderHandleType::COMPUTE_PSO)
1956                                        ? psoMgr_.GetComputePsoPipelineLayout(stateData_.currentPsoHandle)
1957                                        : psoMgr_.GetGraphicsPsoPipelineLayout(stateData_.currentPsoHandle);
1958         const uint32_t plDescriptorSetCount = pl.descriptorSetCount;
1959         uint32_t bindCount = 0;
1960         uint32_t bindSetIndices[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT] { ~0u, ~0u, ~0u, ~0u };
1961         for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
1962             const DescriptorSetBind& currSet = stateData_.currentBoundSets[idx];
1963             if (RenderHandleUtil::IsValid(currSet.descriptorSetHandle)) {
1964                 bindCount++;
1965                 bindSetIndices[idx] = idx;
1966             }
1967         }
1968         if (bindCount < plDescriptorSetCount) {
1969             PLUGIN_LOG_E("RENDER_VALIDATION: not all pipeline layout required descriptor sets bound");
1970         }
1971 #endif
1972     }
1973 }
1974 
GetInterface(const BASE_NS::Uid & uid) const1975 const CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid) const
1976 {
1977     if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
1978         return this;
1979     }
1980     return nullptr;
1981 }
1982 
GetInterface(const BASE_NS::Uid & uid)1983 CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid)
1984 {
1985     if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
1986         return this;
1987     }
1988     return nullptr;
1989 }
1990 
Ref()1991 void RenderCommandList::Ref() {}
1992 
Unref()1993 void RenderCommandList::Unref() {}
1994 RENDER_END_NAMESPACE()
1995