1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_command_list.h"
17
18 #include <cinttypes>
19 #include <cstdint>
20
21 #include <base/containers/array_view.h>
22 #include <render/device/pipeline_layout_desc.h>
23 #include <render/namespace.h>
24 #include <render/nodecontext/intf_render_command_list.h>
25 #include <render/render_data_structures.h>
26
27 #include "device/gpu_resource_handle_util.h"
28 #include "device/gpu_resource_manager.h"
29 #include "nodecontext/node_context_descriptor_set_manager.h"
30 #include "nodecontext/node_context_pso_manager.h"
31 #include "nodecontext/render_node_context_manager.h"
32 #include "util/linear_allocator.h"
33 #include "util/log.h"
34
35 using namespace BASE_NS;
36
37 RENDER_BEGIN_NAMESPACE()
38 PLUGIN_STATIC_ASSERT(PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT == 4);
39 PLUGIN_STATIC_ASSERT(PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT == 8u);
40 namespace {
41 #if (RENDER_VALIDATION_ENABLED == 1)
ValidateImageUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const ImageUsageFlags imageUsageFlags,const string_view str)42 void ValidateImageUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
43 const RenderHandle handl, const ImageUsageFlags imageUsageFlags, const string_view str)
44 {
45 if ((gpuResourceMgr.GetImageDescriptor(handl).usageFlags & imageUsageFlags) == 0) {
46 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateImageUsageFlags_",
47 "RENDER_VALIDATION: gpu image (handle: %" PRIu64
48 ") (name: %s), not created with needed flags: %s, (node: %s)",
49 handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
50 }
51 }
52
ValidateBufferUsageFlags(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderHandle handl,const BufferUsageFlags bufferUsageFlags,const string_view str)53 void ValidateBufferUsageFlags(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
54 const RenderHandle handl, const BufferUsageFlags bufferUsageFlags, const string_view str)
55 {
56 if ((gpuResourceMgr.GetBufferDescriptor(handl).usageFlags & bufferUsageFlags) == 0) {
57 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateBufferUsageFlags_",
58 "RENDER_VALIDATION: gpu buffer (handle: %" PRIu64
59 ") (name: %s), not created with needed flags: %s, (node: %s)",
60 handl.id, gpuResourceMgr.GetName(handl).c_str(), str.data(), nodeName.data());
61 }
62 }
63
ValidateDescriptorTypeBinding(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const DescriptorSetLayoutBindingResources & bindingRes)64 void ValidateDescriptorTypeBinding(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
65 const DescriptorSetLayoutBindingResources& bindingRes)
66 {
67 for (const auto& ref : bindingRes.buffers) {
68 if (!RenderHandleUtil::IsGpuBuffer(ref.resource.handle)) {
69 PLUGIN_LOG_E("RENDER_VALIDATION: invalid GPU buffer");
70 }
71 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) {
72 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
73 CORE_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
74 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
75 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
76 CORE_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
77 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
78 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
79 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
80 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
81 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
82 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
83 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
84 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
85 CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT, "CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT");
86 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
87 ValidateBufferUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
88 CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT, "CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT");
89 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
90 } else {
91 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported buffer descriptor type: %u", ref.binding.descriptorType);
92 }
93 }
94 for (const auto& ref : bindingRes.images) {
95 if ((ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
96 (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE)) {
97 ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_SAMPLED_BIT,
98 "CORE_IMAGE_USAGE_SAMPLED_BIT");
99 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
100 ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle, CORE_IMAGE_USAGE_STORAGE_BIT,
101 "CORE_IMAGE_USAGE_STORAGE_BIT");
102 } else if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
103 ValidateImageUsageFlags(nodeName, gpuResourceMgr, ref.resource.handle,
104 CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
105 } else {
106 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported image descriptor type: %u", ref.binding.descriptorType);
107 }
108 }
109 for (const auto& ref : bindingRes.samplers) {
110 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
111 } else {
112 PLUGIN_LOG_E("RENDER_VALIDATION: unsupported sampler descriptor type: %u", ref.binding.descriptorType);
113 }
114 }
115 }
116
ValidateRenderPassAttachment(const string_view nodeName,const GpuResourceManager & gpuResourceMgr,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)117 void ValidateRenderPassAttachment(const string_view nodeName, const GpuResourceManager& gpuResourceMgr,
118 const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
119 {
120 const GpuImageDesc baseDesc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[0]);
121 const uint32_t baseWidth = baseDesc.width;
122 const uint32_t baseHeight = baseDesc.height;
123 // NOTE: we do not check fragment shading rate attachment size
124 for (uint32_t attachmentIdx = 1; attachmentIdx < renderPassDesc.attachmentCount; ++attachmentIdx) {
125 const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(renderPassDesc.attachmentHandles[attachmentIdx]);
126 if (desc.width != baseWidth || desc.height != baseHeight) {
127 for (const auto& subpassRef : subpassDescs) {
128 auto CheckAttachments = [](const auto& indices, const uint32_t count, const uint32_t attachmentIndex) {
129 for (uint32_t idx = 0; idx < count; ++idx) {
130 if (indices[idx] == attachmentIndex) {
131 return false;
132 }
133 }
134 return true;
135 };
136 bool valid = true;
137 valid &=
138 CheckAttachments(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount, attachmentIdx);
139 valid &=
140 CheckAttachments(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount, attachmentIdx);
141 valid &= CheckAttachments(
142 subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount, attachmentIdx);
143 if ((subpassRef.depthAttachmentIndex == attachmentIdx) ||
144 (subpassRef.depthResolveAttachmentIndex == attachmentIdx)) {
145 valid = false;
146 }
147 if (!valid) {
148 if (RenderHandleUtil::IsSwapchain(renderPassDesc.attachmentHandles[attachmentIdx]) &&
149 RenderHandleUtil::IsDepthImage(renderPassDesc.attachmentHandles[0])) {
150 PLUGIN_LOG_ONCE_W(nodeName + "_RCL_ValidateSize1_",
151 "RENDER_VALIDATION: Depth and swapchain input missmatch: baseWidth:%u baseHeight:%u "
152 "currWidth:%u currHeight:%u",
153 baseWidth, baseHeight, desc.width, desc.height);
154 } else {
155 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
156 "RENDER_VALIDATION: render pass attachment size does not match with attachment index: %u",
157 attachmentIdx);
158 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateSize1_",
159 "RENDER_VALIDATION: baseWidth:%u baseHeight:%u currWidth:%u currHeight:%u", baseWidth,
160 baseHeight, desc.width, desc.height);
161 }
162 }
163 }
164 }
165 }
166 if ((renderPassDesc.renderArea.extentWidth == 0) || (renderPassDesc.renderArea.extentHeight == 0)) {
167 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaExtent_",
168 "RENDER_VALIDATION: render area cannot be zero (width: %u, height: %u) (node: %s)",
169 renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight, nodeName.data());
170 }
171 if ((renderPassDesc.renderArea.offsetX >= static_cast<int32_t>(baseWidth)) ||
172 (renderPassDesc.renderArea.offsetY >= static_cast<int32_t>(baseHeight))) {
173 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateRaOffset_",
174 "RENDER_VALIDATION: render area offset cannot go out of screen (offsetX: %i, offsetY: %i) (baseWidth: "
175 "%u, baseHeight: %u, (node: %s)",
176 renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY, baseWidth, baseHeight,
177 nodeName.data());
178 }
179 }
180
ValidateImageSubresourceRange(const GpuResourceManager & gpuResourceMgr,const RenderHandle handle,const ImageSubresourceRange & imageSubresourceRange)181 void ValidateImageSubresourceRange(const GpuResourceManager& gpuResourceMgr, const RenderHandle handle,
182 const ImageSubresourceRange& imageSubresourceRange)
183 {
184 const GpuImageDesc desc = gpuResourceMgr.GetImageDescriptor(handle);
185 if (imageSubresourceRange.baseMipLevel >= desc.mipCount) {
186 PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange mipLevel: %u, is greater or equal to mipCount: %u",
187 imageSubresourceRange.baseMipLevel, desc.mipCount);
188 }
189 if (imageSubresourceRange.baseArrayLayer >= desc.layerCount) {
190 PLUGIN_LOG_E("RENDER_VALIDATION : ImageSubresourceRange layer: %u, is greater or equal to layerCount: %u",
191 imageSubresourceRange.baseArrayLayer, desc.layerCount);
192 }
193 }
194
ValidateViewport(const string_view nodeName,const ViewportDesc & vd)195 void ValidateViewport(const string_view nodeName, const ViewportDesc& vd)
196 {
197 if ((vd.width < 1.0f) || (vd.height < 1.0f)) {
198 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateViewport_",
199 "RENDER_VALIDATION : viewport width (%f) and height (%f) must be one or larger (node: %s)", vd.width,
200 vd.height, nodeName.data());
201 }
202 }
203
ValidateScissor(const string_view nodeName,const ScissorDesc & sd)204 void ValidateScissor(const string_view nodeName, const ScissorDesc& sd)
205 {
206 if ((sd.extentWidth == 0) || (sd.extentHeight == 0)) {
207 PLUGIN_LOG_ONCE_E(nodeName + "_RCL_ValidateScissor_",
208 "RENDER_VALIDATION : scissor extentWidth (%u) and scissor extentHeight (%u) cannot be zero (node: %s)",
209 sd.extentWidth, sd.extentHeight, nodeName.data());
210 }
211 }
212
ValidateFragmentShadingRate(const Size2D & size)213 void ValidateFragmentShadingRate(const Size2D& size)
214 {
215 bool valid = true;
216 if ((size.width == 0) || (size.height == 0)) {
217 valid = false;
218 } else if ((size.width == 3u) || (size.height == 3u)) {
219 valid = false;
220 } else if ((size.width > 4u) || (size.height > 4u)) {
221 valid = false;
222 }
223 if (!valid) {
224 PLUGIN_LOG_W("RENDER_VALIDATION_ENABLED: fragmentSize must be less than or equal to 4 and the value must be a "
225 "power of two (width = %u, height = %u)",
226 size.width, size.height);
227 }
228 }
229 #endif // RENDER_VALIDATION_ENABLED
230
231 constexpr uint32_t INVALID_CL_IDX { ~0u };
232
233 constexpr size_t BYTE_SIZE_ALIGNMENT { 64 };
234 constexpr size_t FRAME_RESERVE_EXTRA_DIVIDE { 8 };
235 constexpr size_t MIN_ALLOCATION_SIZE { 1024 * 2 };
236
237 // automatic acquire and release barriers
238 constexpr uint32_t INITIAL_MULTI_QUEUE_BARRIER_COUNT { 2u };
239
GetAlignedBytesize(const size_t byteSize,const size_t alignment)240 size_t GetAlignedBytesize(const size_t byteSize, const size_t alignment)
241 {
242 return (byteSize + alignment - 1) & (~(alignment - 1));
243 }
244
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,const size_t alignment,const size_t byteSz)245 void* AllocateRenderData(
246 RenderCommandList::LinearAllocatorStruct& allocator, const size_t alignment, const size_t byteSz)
247 {
248 PLUGIN_ASSERT(byteSz > 0);
249 void* rc = nullptr;
250 if (!allocator.allocators.empty()) {
251 const size_t currentIndex = allocator.allocators.size() - 1;
252 rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
253 }
254
255 if (rc == nullptr) { // current allocator is out of memory
256 size_t allocatorByteSize = Math::max(MIN_ALLOCATION_SIZE, GetAlignedBytesize(byteSz, BYTE_SIZE_ALIGNMENT));
257 const size_t currentIndex = allocator.allocators.size();
258 if (currentIndex > 0) {
259 allocatorByteSize =
260 Math::max(allocatorByteSize, allocator.allocators[currentIndex - 1]->GetCurrentByteSize() * 2u);
261 }
262 allocator.allocators.push_back(make_unique<LinearAllocator>(allocatorByteSize));
263
264 rc = allocator.allocators[currentIndex]->Allocate(byteSz, alignment);
265 if (rc == nullptr) {
266 PLUGIN_LOG_E("RenderCommandList: render command list allocation : out of memory");
267 PLUGIN_ASSERT(false);
268 }
269 }
270 return rc;
271 }
272
273 template<typename T>
AllocateRenderData(RenderCommandList::LinearAllocatorStruct & allocator,uint32_t count)274 T* AllocateRenderData(RenderCommandList::LinearAllocatorStruct& allocator, uint32_t count)
275 {
276 return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T) * count));
277 }
278
279 template<typename T>
AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct & allocator)280 T* AllocateRenderCommand(RenderCommandList::LinearAllocatorStruct& allocator)
281 {
282 return static_cast<T*>(AllocateRenderData(allocator, std::alignment_of<T>::value, sizeof(T)));
283 }
284 } // namespace
285
RenderCommandList(const BASE_NS::string_view nodeName,NodeContextDescriptorSetManager & nodeContextDescriptorSetMgr,const GpuResourceManager & gpuResourceMgr,const NodeContextPsoManager & nodeContextPsoMgr,const GpuQueue & queue,const bool enableMultiQueue)286 RenderCommandList::RenderCommandList(const BASE_NS::string_view nodeName,
287 NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr, const GpuResourceManager& gpuResourceMgr,
288 const NodeContextPsoManager& nodeContextPsoMgr, const GpuQueue& queue, const bool enableMultiQueue)
289 : IRenderCommandList(), nodeName_(nodeName),
290 #if (RENDER_VALIDATION_ENABLED == 1)
291 gpuResourceMgr_(gpuResourceMgr), psoMgr_(nodeContextPsoMgr),
292 #endif
293 nodeContextDescriptorSetManager_(nodeContextDescriptorSetMgr), gpuQueue_(queue),
294 enableMultiQueue_(enableMultiQueue)
295 {}
296
BeginFrame()297 void RenderCommandList::BeginFrame()
298 {
299 if (allocator_.allocators.size() == 1) { // size is good for this frame
300 allocator_.allocators[0]->Reset();
301 } else if (allocator_.allocators.size() > 1) {
302 size_t fullByteSize = 0;
303 size_t alignment = 0;
304 for (auto& ref : allocator_.allocators) {
305 fullByteSize += ref->GetCurrentByteSize();
306 alignment = Math::max(alignment, (size_t)ref->GetAlignment());
307 ref.reset();
308 }
309 allocator_.allocators.clear();
310
311 // add some room for current frame allocation for new render commands
312 const size_t extraBytes = Math::max(fullByteSize / FRAME_RESERVE_EXTRA_DIVIDE, BYTE_SIZE_ALIGNMENT);
313 fullByteSize += extraBytes;
314
315 // create new single allocation for combined previous size and some extra bytes
316 const size_t memAllocationByteSize = GetAlignedBytesize(fullByteSize, BYTE_SIZE_ALIGNMENT);
317 allocator_.allocators.push_back(make_unique<LinearAllocator>(memAllocationByteSize, alignment));
318 }
319
320 ResetStateData();
321
322 const auto clearAndReserve = [](auto& vec) {
323 const size_t count = vec.size();
324 vec.clear();
325 vec.reserve(count);
326 };
327
328 clearAndReserve(renderCommands_);
329 clearAndReserve(customBarriers_);
330 clearAndReserve(rpVertexInputBufferBarriers_);
331 clearAndReserve(rpIndirectBufferBarriers_);
332 clearAndReserve(descriptorSetHandlesForBarriers_);
333 clearAndReserve(descriptorSetHandlesForUpdates_);
334
335 validReleaseAcquire_ = false;
336 hasMultiRpCommandListSubpasses_ = false;
337 multiRpCommandListData_ = {};
338 }
339
SetValidGpuQueueReleaseAcquireBarriers()340 void RenderCommandList::SetValidGpuQueueReleaseAcquireBarriers()
341 {
342 if (enableMultiQueue_) {
343 validReleaseAcquire_ = true;
344 }
345 }
346
BeforeRenderNodeExecuteFrame()347 void RenderCommandList::BeforeRenderNodeExecuteFrame()
348 {
349 // add possible barrier point for gpu queue transfer acquire
350 if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
351 AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
352 }
353 }
354
AfterRenderNodeExecuteFrame()355 void RenderCommandList::AfterRenderNodeExecuteFrame()
356 {
357 #if (RENDER_VALIDATION_ENABLED == 1)
358 if (stateData_.renderPassHasBegun) {
359 PLUGIN_LOG_E("RENDER_VALIDATION: EndRenderPass() not called?");
360 }
361 if (!stateData_.automaticBarriersEnabled) {
362 PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarriers() not called?");
363 }
364 #endif
365
366 if ((gpuQueue_.type != GpuQueue::QueueType::UNDEFINED) && enableMultiQueue_) {
367 if (stateData_.currentCustomBarrierIndices.dirtyCustomBarriers) {
368 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
369 }
370
371 // add possible barrier point for gpu queue transfer release
372 AddBarrierPoint(RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
373 }
374 }
375
GetRenderCommands() const376 array_view<const RenderCommandWithType> RenderCommandList::GetRenderCommands() const
377 {
378 if ((!stateData_.validCommandList) || stateData_.renderPassHasBegun) {
379 #if (RENDER_VALIDATION_ENABLED == 1)
380 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_GetRenderCommands_",
381 "RenderCommandList: invalid state data in render command list (node: %s)", nodeName_.c_str());
382 #endif
383 return {};
384 } else {
385 return array_view<const RenderCommandWithType>(renderCommands_.data(), renderCommands_.size());
386 }
387 }
388
HasValidRenderCommands() const389 bool RenderCommandList::HasValidRenderCommands() const
390 {
391 const uint32_t renderCommandCount = GetRenderCommandCount();
392 bool valid = false;
393 if (enableMultiQueue_) {
394 if (renderCommandCount == INITIAL_MULTI_QUEUE_BARRIER_COUNT) { // only acquire and release barrier commands
395 // if there are patched explicit resource barriers, we need to execute this cmdlist in the backend
396 valid = validReleaseAcquire_;
397 } else if (renderCommandCount > INITIAL_MULTI_QUEUE_BARRIER_COUNT) {
398 valid = true;
399 }
400 } else {
401 valid = (renderCommandCount > 0);
402 }
403 valid = valid && stateData_.validCommandList;
404
405 return valid;
406 }
407
GetRenderCommandCount() const408 uint32_t RenderCommandList::GetRenderCommandCount() const
409 {
410 return static_cast<uint32_t>(renderCommands_.size());
411 }
412
GetGpuQueue() const413 GpuQueue RenderCommandList::GetGpuQueue() const
414 {
415 return gpuQueue_;
416 }
417
HasMultiRenderCommandListSubpasses() const418 bool RenderCommandList::HasMultiRenderCommandListSubpasses() const
419 {
420 return hasMultiRpCommandListSubpasses_;
421 }
422
GetMultiRenderCommandListData() const423 MultiRenderPassCommandListData RenderCommandList::GetMultiRenderCommandListData() const
424 {
425 return multiRpCommandListData_;
426 }
427
GetCustomBarriers() const428 array_view<const CommandBarrier> RenderCommandList::GetCustomBarriers() const
429 {
430 return array_view<const CommandBarrier>(customBarriers_.data(), customBarriers_.size());
431 }
432
GetRenderpassVertexInputBufferBarriers() const433 array_view<const VertexBuffer> RenderCommandList::GetRenderpassVertexInputBufferBarriers() const
434 {
435 return array_view<const VertexBuffer>(rpVertexInputBufferBarriers_.data(), rpVertexInputBufferBarriers_.size());
436 }
437
GetRenderpassIndirectBufferBarriers() const438 array_view<const VertexBuffer> RenderCommandList::GetRenderpassIndirectBufferBarriers() const
439 {
440 return array_view<const VertexBuffer>(rpIndirectBufferBarriers_.data(), rpIndirectBufferBarriers_.size());
441 }
442
GetDescriptorSetHandles() const443 array_view<const RenderHandle> RenderCommandList::GetDescriptorSetHandles() const
444 {
445 return { descriptorSetHandlesForBarriers_.data(), descriptorSetHandlesForBarriers_.size() };
446 }
447
GetUpdateDescriptorSetHandles() const448 array_view<const RenderHandle> RenderCommandList::GetUpdateDescriptorSetHandles() const
449 {
450 return { descriptorSetHandlesForUpdates_.data(), descriptorSetHandlesForUpdates_.size() };
451 }
452
AddBarrierPoint(const RenderCommandType renderCommandType)453 void RenderCommandList::AddBarrierPoint(const RenderCommandType renderCommandType)
454 {
455 if (!stateData_.automaticBarriersEnabled) {
456 return; // no barrier point added
457 }
458
459 RenderCommandBarrierPoint* data = AllocateRenderCommand<RenderCommandBarrierPoint>(allocator_);
460 if (data) {
461 *data = {}; // zero initialize
462
463 data->renderCommandType = renderCommandType;
464 data->barrierPointIndex = stateData_.currentBarrierPointIndex++;
465
466 // update new index (within render pass there might not be any dirty descriptor sets at this stage)
467 const uint32_t descriptorSetBeginIndex = static_cast<uint32_t>(descriptorSetHandlesForBarriers_.size());
468 data->descriptorSetHandleIndexBegin = descriptorSetBeginIndex;
469 data->descriptorSetHandleCount = 0U;
470 // update new index (only valid with render pass)
471 data->vertexIndexBarrierIndexBegin = static_cast<uint32_t>(rpVertexInputBufferBarriers_.size());
472 data->vertexIndexBarrierCount = 0U;
473 // update new index (only valid with render pass)
474 data->indirectBufferBarrierIndexBegin = static_cast<uint32_t>(rpIndirectBufferBarriers_.size());
475 data->indirectBufferBarrierCount = 0U;
476
477 // barriers are always needed e.g. when dynamic resource is bound for writing in multiple dispatches
478 const bool handleDescriptorSets = stateData_.dirtyDescriptorSetsForBarriers ||
479 renderCommandType == RenderCommandType::DISPATCH ||
480 renderCommandType == RenderCommandType::DISPATCH_INDIRECT;
481 if (handleDescriptorSets) {
482 stateData_.dirtyDescriptorSetsForBarriers = false;
483 for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
484 // only add descriptor set handles for barriers if there are dynamic barrier resources
485 if (stateData_.currentBoundSets[idx].hasDynamicBarrierResources) {
486 descriptorSetHandlesForBarriers_.push_back(stateData_.currentBoundSets[idx].descriptorSetHandle);
487 }
488 }
489 data->descriptorSetHandleCount =
490 static_cast<uint32_t>(descriptorSetHandlesForBarriers_.size()) - descriptorSetBeginIndex;
491 }
492
493 const bool handleCustomBarriers =
494 ((!customBarriers_.empty()) && stateData_.currentCustomBarrierIndices.dirtyCustomBarriers);
495 if (handleCustomBarriers) {
496 const int32_t newCount = static_cast<int32_t>(customBarriers_.size()) -
497 stateData_.currentCustomBarrierIndices.prevSize;
498 if (newCount > 0) {
499 data->customBarrierIndexBegin = static_cast<uint32_t>(stateData_.currentCustomBarrierIndices.prevSize);
500 data->customBarrierCount = static_cast<uint32_t>(newCount);
501
502 stateData_.currentCustomBarrierIndices.prevSize = static_cast<int32_t>(customBarriers_.size());
503 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = false;
504 }
505 }
506
507 // store current barrier point for render command list
508 // * binding descriptor sets (with dynamic barrier resources)
509 // * binding vertex and index buffers (with dynamic barrier resources)
510 // * indirect args buffer (with dynamic barrier resources)
511 // inside a render pass adds barriers directly to the RenderCommandBarrierPoint behind this pointer
512 stateData_.currentBarrierPoint = data;
513
514 renderCommands_.push_back({ RenderCommandType::BARRIER_POINT, data });
515 }
516 }
517
Draw(const uint32_t vertexCount,const uint32_t instanceCount,const uint32_t firstVertex,const uint32_t firstInstance)518 void RenderCommandList::Draw(
519 const uint32_t vertexCount, const uint32_t instanceCount, const uint32_t firstVertex, const uint32_t firstInstance)
520 {
521 #if (RENDER_VALIDATION_ENABLED == 1)
522 if (!stateData_.renderPassHasBegun) {
523 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_Draw_",
524 "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw)");
525 }
526 #endif
527
528 if (vertexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
529 ValidatePipeline();
530 ValidatePipelineLayout();
531
532 RenderCommandDraw* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
533 if (data) {
534 data->drawType = DrawType::DRAW;
535 data->vertexCount = vertexCount;
536 data->instanceCount = instanceCount;
537 data->firstVertex = firstVertex;
538 data->firstInstance = firstInstance;
539 data->indexCount = 0;
540 data->firstIndex = 0;
541 data->vertexOffset = 0;
542
543 renderCommands_.push_back({ RenderCommandType::DRAW, data });
544 }
545 }
546 }
547
DrawIndexed(const uint32_t indexCount,const uint32_t instanceCount,const uint32_t firstIndex,const int32_t vertexOffset,const uint32_t firstInstance)548 void RenderCommandList::DrawIndexed(const uint32_t indexCount, const uint32_t instanceCount, const uint32_t firstIndex,
549 const int32_t vertexOffset, const uint32_t firstInstance)
550 {
551 #if (RENDER_VALIDATION_ENABLED == 1)
552 if (!stateData_.renderPassHasBegun) {
553 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DrawIndexed_",
554 "RENDER_VALIDATION: RenderCommandList: render pass not active (begin before draw).");
555 }
556 #endif
557
558 if (indexCount > 0 && stateData_.renderPassHasBegun) { // prevent zero draws
559 ValidatePipeline();
560 ValidatePipelineLayout();
561
562 RenderCommandDraw* data = AllocateRenderCommand<RenderCommandDraw>(allocator_);
563 if (data) {
564 data->drawType = DrawType::DRAW_INDEXED;
565 data->vertexCount = 0;
566 data->instanceCount = instanceCount;
567 data->firstVertex = 0;
568 data->firstInstance = firstInstance;
569 data->indexCount = indexCount;
570 data->firstIndex = firstIndex;
571 data->vertexOffset = vertexOffset;
572
573 renderCommands_.push_back({ RenderCommandType::DRAW, data });
574 }
575 }
576 }
577
DrawIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)578 void RenderCommandList::DrawIndirect(
579 const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
580 {
581 #if (RENDER_VALIDATION_ENABLED == 1)
582 if (!stateData_.renderPassHasBegun) {
583 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
584 }
585 if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
586 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DI_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
587 }
588 #endif
589
590 if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
591 ValidatePipeline();
592 ValidatePipelineLayout();
593
594 RenderCommandDrawIndirect* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
595 if (data) {
596 data->drawType = DrawType::DRAW_INDIRECT;
597 data->argsHandle = bufferHandle;
598 data->offset = offset;
599 data->drawCount = drawCount;
600 data->stride = stride;
601
602 // add possible indirect buffer barrier before render pass
603 if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
604 constexpr uint32_t drawIndirectCommandSize { 4U * sizeof(uint32_t) };
605 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
606 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
607 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
608 }
609
610 renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
611 }
612 }
613 }
614
DrawIndexedIndirect(const RenderHandle bufferHandle,const uint32_t offset,const uint32_t drawCount,const uint32_t stride)615 void RenderCommandList::DrawIndexedIndirect(
616 const RenderHandle bufferHandle, const uint32_t offset, const uint32_t drawCount, const uint32_t stride)
617 {
618 #if (RENDER_VALIDATION_ENABLED == 1)
619 if (!stateData_.renderPassHasBegun) {
620 PLUGIN_LOG_E("RENDER_VALIDATION: render pass not active (begin before draw)");
621 }
622 if (!RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
623 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_DII_buffer_", "RENDER_VALIDATION: DrawIndirect buffer handle invalid.");
624 }
625 #endif
626
627 if (stateData_.renderPassHasBegun && RenderHandleUtil::IsGpuBuffer(bufferHandle)) {
628 ValidatePipeline();
629 ValidatePipelineLayout();
630
631 RenderCommandDrawIndirect* data = AllocateRenderCommand<RenderCommandDrawIndirect>(allocator_);
632 if (data) {
633 data->drawType = DrawType::DRAW_INDEXED_INDIRECT;
634 data->argsHandle = bufferHandle;
635 data->offset = offset;
636 data->drawCount = drawCount;
637 data->stride = stride;
638
639 // add possible indirect buffer barrier before render pass
640 if (RenderHandleUtil::IsDynamicResource(bufferHandle)) {
641 constexpr uint32_t drawIndirectCommandSize { 5U * sizeof(uint32_t) };
642 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
643 stateData_.currentBarrierPoint->indirectBufferBarrierCount++;
644 rpIndirectBufferBarriers_.push_back({ bufferHandle, offset, drawIndirectCommandSize });
645 }
646
647 renderCommands_.push_back({ RenderCommandType::DRAW_INDIRECT, data });
648 }
649 }
650 }
651
Dispatch(const uint32_t groupCountX,const uint32_t groupCountY,const uint32_t groupCountZ)652 void RenderCommandList::Dispatch(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ)
653 {
654 if (groupCountX > 0 && groupCountY > 0 && groupCountZ > 0) { // prevent zero dispatches
655 ValidatePipeline();
656 ValidatePipelineLayout();
657
658 AddBarrierPoint(RenderCommandType::DISPATCH);
659
660 RenderCommandDispatch* data = AllocateRenderCommand<RenderCommandDispatch>(allocator_);
661 if (data) {
662 data->groupCountX = groupCountX;
663 data->groupCountY = groupCountY;
664 data->groupCountZ = groupCountZ;
665
666 renderCommands_.push_back({ RenderCommandType::DISPATCH, data });
667 }
668 }
669 }
670
DispatchIndirect(const RenderHandle bufferHandle,const uint32_t offset)671 void RenderCommandList::DispatchIndirect(const RenderHandle bufferHandle, const uint32_t offset)
672 {
673 ValidatePipeline();
674 ValidatePipelineLayout();
675
676 AddBarrierPoint(RenderCommandType::DISPATCH_INDIRECT);
677
678 RenderCommandDispatchIndirect* data = AllocateRenderCommand<RenderCommandDispatchIndirect>(allocator_);
679 if (data) {
680 data->argsHandle = bufferHandle;
681 data->offset = offset;
682
683 renderCommands_.push_back({ RenderCommandType::DISPATCH_INDIRECT, data });
684 }
685 }
686
BindPipeline(const RenderHandle psoHandle)687 void RenderCommandList::BindPipeline(const RenderHandle psoHandle)
688 {
689 // NOTE: we cannot early out with the same pso handle
690 // the render pass and it's hashes might have been changed
691 // the final pso needs to be hashed with final render pass
692 // the backends try to check the re-binding of the same pipeline
693 // another approach would be to check when render pass changes to re-bind psos if needed
694
695 bool valid = RenderHandleUtil::IsValid(psoHandle);
696
697 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(psoHandle);
698 PipelineBindPoint pipelineBindPoint {};
699 if (handleType == RenderHandleType::COMPUTE_PSO) {
700 pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE;
701 } else if (handleType == RenderHandleType::GRAPHICS_PSO) {
702 pipelineBindPoint = PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS;
703 } else {
704 valid = false;
705 }
706
707 stateData_.checkBindPipelineLayout = true;
708 #if (RENDER_VALIDATION_ENABLED == 1)
709 if (pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
710 if (!stateData_.renderPassHasBegun) {
711 valid = false;
712 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_BindPipeline_",
713 "RENDER_VALIDATION: RenderCommandList: bind pipeline after render pass begin.");
714 }
715 }
716 #endif
717
718 stateData_.validPso = valid;
719 ValidatePipeline();
720
721 stateData_.currentPsoHandle = psoHandle;
722 stateData_.currentPsoBindPoint = pipelineBindPoint;
723
724 RenderCommandBindPipeline* data = AllocateRenderCommand<RenderCommandBindPipeline>(allocator_);
725 if (data) {
726 data->psoHandle = psoHandle;
727 data->pipelineBindPoint = pipelineBindPoint;
728
729 renderCommands_.push_back({ RenderCommandType::BIND_PIPELINE, data });
730 }
731 }
732
PushConstantData(const RENDER_NS::PushConstant & pushConstant,const BASE_NS::array_view<const uint8_t> data)733 void RenderCommandList::PushConstantData(
734 const RENDER_NS::PushConstant& pushConstant, const BASE_NS::array_view<const uint8_t> data)
735 {
736 ValidatePipeline();
737
738 // push constant is not used/allocated if byte size is bigger than supported max
739 if ((pushConstant.byteSize > 0) &&
740 (pushConstant.byteSize <= PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE) && (!data.empty())) {
741 RenderCommandPushConstant* rc = AllocateRenderCommand<RenderCommandPushConstant>(allocator_);
742 // use aligment of uint32 as currently the push constants are uint32s
743 // the data is allocated by shader/pipeline needs
744 uint8_t* pushData =
745 static_cast<uint8_t*>(AllocateRenderData(allocator_, std::alignment_of<uint32_t>(), pushConstant.byteSize));
746 if (rc && pushData) {
747 rc->psoHandle = stateData_.currentPsoHandle;
748 rc->pushConstant = pushConstant;
749 rc->data = pushData;
750 // the max amount of visible data is copied
751 const size_t minData = Math::min(static_cast<size_t>(pushConstant.byteSize), data.size_bytes());
752 const bool res = CloneData(rc->data, pushConstant.byteSize, data.data(), minData);
753 PLUGIN_UNUSED(res);
754 PLUGIN_ASSERT(res);
755
756 renderCommands_.push_back(RenderCommandWithType { RenderCommandType::PUSH_CONSTANT, rc });
757 }
758 } else if (pushConstant.byteSize > 0) {
759 #if (RENDER_VALIDATION_ENABLED == 1)
760 PLUGIN_LOG_E("RENDER_VALIDATION: push constant byte size must be smaller or equal to %u bytes.",
761 PipelineLayoutConstants::MAX_PUSH_CONSTANT_BYTE_SIZE);
762 #endif
763 }
764 }
765
PushConstant(const RENDER_NS::PushConstant & pushConstant,const uint8_t * data)766 void RenderCommandList::PushConstant(const RENDER_NS::PushConstant& pushConstant, const uint8_t* data)
767 {
768 if ((pushConstant.byteSize > 0) && data) {
769 PushConstantData(pushConstant, { data, pushConstant.byteSize });
770 }
771 }
772
BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)773 void RenderCommandList::BindVertexBuffers(const array_view<const VertexBuffer> vertexBuffers)
774 {
775 ValidatePipeline();
776
777 #if (RENDER_VALIDATION_ENABLED == 1)
778 if (vertexBuffers.size() > PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT) {
779 PLUGIN_LOG_W("RENDER_VALIDATION : max vertex buffer count exceeded, binding only max vertex buffer count");
780 }
781 #endif
782
783 if (!vertexBuffers.empty()) {
784 RenderCommandBindVertexBuffers* data = AllocateRenderCommand<RenderCommandBindVertexBuffers>(allocator_);
785 if (data) {
786 VertexBuffer dynamicBarrierVertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
787 uint32_t dynamicBarrierVertexBufferCount = 0;
788 const uint32_t vertexBufferCount =
789 Math::min(PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT, static_cast<uint32_t>(vertexBuffers.size()));
790 data->vertexBufferCount = vertexBufferCount;
791 RenderHandle previousVbHandle; // often all vertex buffers are withing the same buffer with offsets
792 for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
793 data->vertexBuffers[idx] = vertexBuffers[idx];
794 const RenderHandle currVbHandle = vertexBuffers[idx].bufferHandle;
795 if ((previousVbHandle.id != currVbHandle.id) && RenderHandleUtil::IsDynamicResource(currVbHandle) &&
796 (vertexBuffers[idx].byteSize > 0)) {
797 // NOTE: we do not try to create perfect barriers with vertex inputs (just barrier the whole rc)
798 dynamicBarrierVertexBuffers[dynamicBarrierVertexBufferCount++] = { currVbHandle, 0,
799 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
800 previousVbHandle = currVbHandle;
801 }
802 }
803
804 // add possible vertex/index buffer barriers before render pass
805 if (stateData_.renderPassHasBegun && (dynamicBarrierVertexBufferCount > 0)) {
806 PLUGIN_ASSERT(stateData_.currentBarrierPoint);
807 stateData_.currentBarrierPoint->vertexIndexBarrierCount += dynamicBarrierVertexBufferCount;
808 const size_t currCount = rpVertexInputBufferBarriers_.size();
809 rpVertexInputBufferBarriers_.resize(currCount + static_cast<size_t>(dynamicBarrierVertexBufferCount));
810 for (uint32_t dynIdx = 0; dynIdx < dynamicBarrierVertexBufferCount; ++dynIdx) {
811 rpVertexInputBufferBarriers_[currCount + dynIdx] = dynamicBarrierVertexBuffers[dynIdx];
812 }
813 }
814
815 renderCommands_.push_back({ RenderCommandType::BIND_VERTEX_BUFFERS, data });
816 }
817 }
818 }
819
BindIndexBuffer(const IndexBuffer & indexBuffer)820 void RenderCommandList::BindIndexBuffer(const IndexBuffer& indexBuffer)
821 {
822 ValidatePipeline();
823
824 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(indexBuffer.bufferHandle);
825 #if (RENDER_VALIDATION_ENABLED == 1)
826 if ((indexBuffer.indexType > IndexType::CORE_INDEX_TYPE_UINT32) || (handleType != RenderHandleType::GPU_BUFFER)) {
827 PLUGIN_LOG_E("RENDER_VALIDATION: invalid index buffer binding");
828 }
829 #endif
830
831 RenderCommandBindIndexBuffer* data = AllocateRenderCommand<RenderCommandBindIndexBuffer>(allocator_);
832 if (data && (handleType == RenderHandleType::GPU_BUFFER)) {
833 data->indexBuffer = indexBuffer;
834 if (RenderHandleUtil::IsDynamicResource(indexBuffer.bufferHandle)) {
835 stateData_.currentBarrierPoint->vertexIndexBarrierCount++;
836 rpVertexInputBufferBarriers_.push_back(
837 { indexBuffer.bufferHandle, indexBuffer.bufferOffset, indexBuffer.byteSize });
838 }
839 renderCommands_.push_back({ RenderCommandType::BIND_INDEX_BUFFER, data });
840 }
841 }
842
BeginRenderPass(const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> subpassDescs)843 void RenderCommandList::BeginRenderPass(
844 const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc> subpassDescs)
845 {
846 #if (RENDER_VALIDATION_ENABLED == 1)
847 if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
848 PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
849 }
850 #endif
851
852 // TODO: needs to be missing multipass related stuff
853
854 if (renderPassDesc.subpassCount != static_cast<uint32_t>(subpassDescs.size())) {
855 #if (RENDER_VALIDATION_ENABLED == 1)
856 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_subpass_",
857 "RENDER_VALIDATION: BeginRenderPass renderPassDesc.subpassCount (%u) must match subpassDescs size (%u)",
858 renderPassDesc.subpassCount, static_cast<uint32_t>(subpassDescs.size()));
859 #endif
860 stateData_.validCommandList = false;
861 }
862 ValidateRenderPass(renderPassDesc);
863 if (!stateData_.validCommandList) {
864 return;
865 }
866
867 stateData_.renderPassHasBegun = true;
868 stateData_.renderPassStartIndex = 0;
869 stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
870
871 if (renderPassDesc.attachmentCount > 0) {
872 #if (RENDER_VALIDATION_ENABLED == 1)
873 ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, subpassDescs);
874 #endif
875 AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
876
877 if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
878 // NOTE: hashed in the backend
879 PLUGIN_ASSERT(renderPassDesc.subpassCount == static_cast<uint32_t>(subpassDescs.size()));
880
881 data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
882 data->renderPassDesc = renderPassDesc;
883 data->renderPassDesc.renderArea.extentWidth = Math::max(1u, data->renderPassDesc.renderArea.extentWidth);
884 data->renderPassDesc.renderArea.extentHeight = Math::max(1u, data->renderPassDesc.renderArea.extentHeight);
885 data->subpassStartIndex = 0;
886 // if false -> initial layout is undefined
887 data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
888
889 data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
890 renderPassDesc.subpassCount };
891 data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
892 allocator_, renderPassDesc.subpassCount),
893 renderPassDesc.subpassCount };
894 if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
895 return;
896 }
897
898 CloneData(
899 data->subpasses.data(), data->subpasses.size_bytes(), subpassDescs.data(), subpassDescs.size_bytes());
900
901 bool valid = true;
902 for (size_t subpassIdx = 0; subpassIdx < subpassDescs.size(); ++subpassIdx) {
903 const auto& subpassRef = subpassDescs[subpassIdx];
904
905 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
906 subpassResourceStates = {};
907
908 valid = valid && ProcessInputAttachments(renderPassDesc, subpassRef, subpassResourceStates);
909 valid = valid && ProcessColorAttachments(renderPassDesc, subpassRef, subpassResourceStates);
910 valid = valid && ProcessResolveAttachments(renderPassDesc, subpassRef, subpassResourceStates);
911 valid = valid && ProcessDepthAttachments(renderPassDesc, subpassRef, subpassResourceStates);
912 valid =
913 valid && ProcessFragmentShadingRateAttachments(renderPassDesc, subpassRef, subpassResourceStates);
914 #if (RENDER_VULKAN_FSR_ENABLED != 1)
915 data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
916 #endif
917 }
918 if (!valid) {
919 stateData_.validCommandList = false;
920 }
921
922 // render pass layouts will be updated by render graph
923 renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
924 }
925 }
926 }
927
BeginRenderPass(const RenderPassDesc & renderPassDesc,const uint32_t subpassStartIdx,const RenderPassSubpassDesc & subpassDesc)928 void RenderCommandList::BeginRenderPass(
929 const RenderPassDesc& renderPassDesc, const uint32_t subpassStartIdx, const RenderPassSubpassDesc& subpassDesc)
930 {
931 #if (RENDER_VALIDATION_ENABLED == 1)
932 if ((renderPassDesc.attachmentCount == 0) || (renderPassDesc.subpassCount == 0)) {
933 PLUGIN_LOG_E("RENDER_VALIDATION: invalid RenderPassDesc in BeginRenderPass");
934 }
935 #endif
936
937 if (subpassStartIdx >= renderPassDesc.subpassCount) {
938 PLUGIN_LOG_E("RCL:BeginRenderPass: subpassStartIdx(%u) must be smaller than renderPassDesc.subpassCount (%u)",
939 subpassStartIdx, renderPassDesc.subpassCount);
940 stateData_.validCommandList = false;
941 }
942
943 ValidateRenderPass(renderPassDesc);
944 if (!stateData_.validCommandList) {
945 return;
946 }
947
948 stateData_.renderPassHasBegun = true;
949 stateData_.renderPassStartIndex = subpassStartIdx;
950 stateData_.renderPassSubpassCount = renderPassDesc.subpassCount;
951
952 if (renderPassDesc.attachmentCount > 0) {
953 #if (RENDER_VALIDATION_ENABLED == 1)
954 ValidateRenderPassAttachment(nodeName_, gpuResourceMgr_, renderPassDesc, { &subpassDesc, 1u });
955 #endif
956 AddBarrierPoint(RenderCommandType::BEGIN_RENDER_PASS);
957
958 if (hasMultiRpCommandListSubpasses_) {
959 PLUGIN_LOG_E("RenderCommandList: BeginRenderPass: creating multiple render node subpasses not supported");
960 stateData_.validCommandList = false;
961 } else if (renderPassDesc.subpassCount > 1) {
962 hasMultiRpCommandListSubpasses_ = true;
963 multiRpCommandListData_.secondaryCmdLists =
964 (renderPassDesc.subpassContents == CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS) ? true : false;
965 if ((!renderCommands_.empty()) && (renderCommands_.back().type == RenderCommandType::BARRIER_POINT)) {
966 multiRpCommandListData_.rpBarrierCmdIndex = static_cast<uint32_t>(renderCommands_.size()) - 1u;
967 }
968 }
969 multiRpCommandListData_.subpassCount = renderPassDesc.subpassCount;
970 multiRpCommandListData_.rpBeginCmdIndex = static_cast<uint32_t>(renderCommands_.size());
971
972 if (auto* data = AllocateRenderCommand<RenderCommandBeginRenderPass>(allocator_); data) {
973 // NOTE: hashed in the backend
974 data->beginType = RenderPassBeginType::RENDER_PASS_BEGIN;
975 data->renderPassDesc = renderPassDesc;
976 data->subpassStartIndex = subpassStartIdx;
977 // if false -> initial layout is undefined
978 data->enableAutomaticLayoutChanges = stateData_.automaticBarriersEnabled;
979
980 data->subpasses = { AllocateRenderData<RenderPassSubpassDesc>(allocator_, renderPassDesc.subpassCount),
981 renderPassDesc.subpassCount };
982 data->subpassResourceStates = { AllocateRenderData<RenderPassAttachmentResourceStates>(
983 allocator_, renderPassDesc.subpassCount),
984 renderPassDesc.subpassCount };
985 if ((!data->subpasses.data()) || (!data->subpassResourceStates.data())) {
986 return;
987 }
988
989 bool valid = true;
990 for (size_t subpassIdx = 0; subpassIdx < data->subpasses.size(); ++subpassIdx) {
991 RenderPassAttachmentResourceStates& subpassResourceStates = data->subpassResourceStates[subpassIdx];
992 subpassResourceStates = {};
993 data->subpasses[subpassIdx] = {};
994
995 if (subpassIdx == subpassStartIdx) {
996 data->subpasses[subpassIdx] = subpassDesc;
997 valid = valid && ProcessInputAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
998 valid = valid && ProcessColorAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
999 valid = valid && ProcessResolveAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1000 valid = valid && ProcessDepthAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1001 valid = valid &&
1002 ProcessFragmentShadingRateAttachments(renderPassDesc, subpassDesc, subpassResourceStates);
1003 #if (RENDER_VULKAN_FSR_ENABLED != 1)
1004 data->subpasses[subpassIdx].fragmentShadingRateAttachmentCount = 0u;
1005 #endif
1006 }
1007 }
1008 if (!valid) {
1009 stateData_.validCommandList = false;
1010 }
1011
1012 // render pass layouts will be updated by render graph
1013 renderCommands_.push_back({ RenderCommandType::BEGIN_RENDER_PASS, data });
1014 }
1015 }
1016 }
1017
ProcessInputAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1018 bool RenderCommandList::ProcessInputAttachments(const RenderPassDesc& renderPassDsc,
1019 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1020 {
1021 bool valid = true;
1022 for (uint32_t idx = 0; idx < subpassRef.inputAttachmentCount; ++idx) {
1023 const uint32_t attachmentIndex = subpassRef.inputAttachmentIndices[idx];
1024 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1025 if (!RenderHandleUtil::IsGpuImage(handle)) {
1026 valid = false;
1027 }
1028
1029 // NOTE: mipLevel and layers are not updated to GpuResourceState
1030 // NOTE: validation needed for invalid handles
1031 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1032 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1033 refState.accessFlags |= CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT;
1034 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1035 refState.gpuQueue = gpuQueue_;
1036 // if used e.g. as input and color attachment use general layout
1037 if (subpassResourceStates.layouts[attachmentIndex] != CORE_IMAGE_LAYOUT_UNDEFINED) {
1038 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_GENERAL;
1039 } else {
1040 subpassResourceStates.layouts[attachmentIndex] = (RenderHandleUtil::IsDepthImage(handle))
1041 ? CORE_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
1042 : CORE_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1043 }
1044 #if (RENDER_VALIDATION_ENABLED == 1)
1045 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1046 ImageUsageFlagBits::CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT");
1047 #endif
1048 }
1049 return valid;
1050 }
1051
ProcessColorAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1052 bool RenderCommandList::ProcessColorAttachments(const RenderPassDesc& renderPassDsc,
1053 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1054 {
1055 bool valid = true;
1056 for (uint32_t idx = 0; idx < subpassRef.colorAttachmentCount; ++idx) {
1057 const uint32_t attachmentIndex = subpassRef.colorAttachmentIndices[idx];
1058 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1059 if (!RenderHandleUtil::IsGpuImage(handle)) {
1060 valid = false;
1061 }
1062 #if (RENDER_VALIDATION_ENABLED == 1)
1063 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1064 ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1065 #endif
1066
1067 // NOTE: mipLevel and layers are not updated to GpuResourceState
1068 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1069 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1070 refState.accessFlags |= (CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1071 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1072 refState.gpuQueue = gpuQueue_;
1073 // if used e.g. as input and color attachment use general layout
1074 subpassResourceStates.layouts[attachmentIndex] =
1075 (subpassResourceStates.layouts[attachmentIndex] != ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED)
1076 ? CORE_IMAGE_LAYOUT_GENERAL
1077 : CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1078 }
1079 return valid;
1080 }
1081
ProcessResolveAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1082 bool RenderCommandList::ProcessResolveAttachments(const RenderPassDesc& renderPassDsc,
1083 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1084 {
1085 bool valid = true;
1086 for (uint32_t idx = 0; idx < subpassRef.resolveAttachmentCount; ++idx) {
1087 const uint32_t attachmentIndex = subpassRef.resolveAttachmentIndices[idx];
1088 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1089 if (!RenderHandleUtil::IsGpuImage(handle)) {
1090 valid = false;
1091 }
1092 #if (RENDER_VALIDATION_ENABLED == 1)
1093 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1094 ImageUsageFlagBits::CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, "CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT");
1095 #endif
1096
1097 // NOTE: mipLevel and layers are not updated to GpuResourceState
1098 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1099 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1100 refState.accessFlags |= CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1101 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1102 refState.gpuQueue = gpuQueue_;
1103 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1104 }
1105 return valid;
1106 }
1107
ProcessDepthAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1108 bool RenderCommandList::ProcessDepthAttachments(const RenderPassDesc& renderPassDsc,
1109 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1110 {
1111 bool valid = true;
1112 if (subpassRef.depthAttachmentCount == 1) {
1113 const uint32_t attachmentIndex = subpassRef.depthAttachmentIndex;
1114 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1115 if (!RenderHandleUtil::IsDepthImage(handle)) {
1116 valid = false;
1117 }
1118 #if (RENDER_VALIDATION_ENABLED == 1)
1119 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1120 ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1121 "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1122 #endif
1123
1124 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1125 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1126 refState.accessFlags |=
1127 (CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
1128 refState.pipelineStageFlags |=
1129 (CORE_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT);
1130 refState.gpuQueue = gpuQueue_;
1131 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1132 }
1133 if ((subpassRef.depthAttachmentCount == 1) && (subpassRef.depthResolveAttachmentCount == 1)) {
1134 const uint32_t attachmentIndex = subpassRef.depthResolveAttachmentIndex;
1135 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1136 if (!RenderHandleUtil::IsDepthImage(handle)) {
1137 valid = false;
1138 }
1139 #if (RENDER_VALIDATION_ENABLED == 1)
1140 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1141 ImageUsageFlagBits::CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1142 "CORE_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT");
1143 #endif
1144
1145 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1146 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1147 refState.accessFlags |= CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
1148 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
1149 refState.gpuQueue = gpuQueue_;
1150 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1151 }
1152 return valid;
1153 }
1154
ProcessFragmentShadingRateAttachments(const RenderPassDesc & renderPassDsc,const RenderPassSubpassDesc & subpassRef,RenderPassAttachmentResourceStates & subpassResourceStates)1155 bool RenderCommandList::ProcessFragmentShadingRateAttachments(const RenderPassDesc& renderPassDsc,
1156 const RenderPassSubpassDesc& subpassRef, RenderPassAttachmentResourceStates& subpassResourceStates)
1157 {
1158 bool valid = true;
1159 if (subpassRef.fragmentShadingRateAttachmentCount == 1) {
1160 #if (RENDER_VULKAN_FSR_ENABLED == 1)
1161 const uint32_t attachmentIndex = subpassRef.fragmentShadingRateAttachmentIndex;
1162 const RenderHandle handle = renderPassDsc.attachmentHandles[attachmentIndex];
1163 if (!RenderHandleUtil::IsGpuImage(handle)) {
1164 valid = false;
1165 }
1166 #if (RENDER_VALIDATION_ENABLED == 1)
1167 ValidateImageUsageFlags(nodeName_, gpuResourceMgr_, handle,
1168 ImageUsageFlagBits::CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT,
1169 "CORE_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT");
1170 #endif
1171
1172 GpuResourceState& refState = subpassResourceStates.states[attachmentIndex];
1173 refState.shaderStageFlags |= CORE_SHADER_STAGE_FRAGMENT_BIT;
1174 refState.accessFlags |= CORE_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT;
1175 refState.pipelineStageFlags |= CORE_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT;
1176 refState.gpuQueue = gpuQueue_;
1177 subpassResourceStates.layouts[attachmentIndex] = CORE_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL;
1178 #else
1179 PLUGIN_LOG_ONCE_I("vk_fsr_disabled_flag",
1180 "RENDER_VALIDATION: Fragment shading rate disabled and all related attachments ignored.");
1181 #endif
1182 }
1183 return valid;
1184 }
1185
NextSubpass(const SubpassContents & subpassContents)1186 void RenderCommandList::NextSubpass(const SubpassContents& subpassContents)
1187 {
1188 RenderCommandNextSubpass* data = AllocateRenderCommand<RenderCommandNextSubpass>(allocator_);
1189 if (data) {
1190 data->subpassContents = subpassContents;
1191 data->renderCommandListIndex = 0; // will be updated in the render graph
1192
1193 renderCommands_.push_back({ RenderCommandType::NEXT_SUBPASS, data });
1194 }
1195 }
1196
EndRenderPass()1197 void RenderCommandList::EndRenderPass()
1198 {
1199 if (!stateData_.renderPassHasBegun) {
1200 #if (RENDER_VALIDATION_ENABLED == 1)
1201 PLUGIN_LOG_ONCE_E(
1202 nodeName_ + "_RCL_EndRenderPass_", "RenderCommandList: render pass needs to begin before calling end");
1203 #endif
1204 stateData_.validCommandList = false;
1205 return;
1206 }
1207
1208 if (hasMultiRpCommandListSubpasses_ && (multiRpCommandListData_.rpBeginCmdIndex != INVALID_CL_IDX)) {
1209 multiRpCommandListData_.rpEndCmdIndex = static_cast<uint32_t>(renderCommands_.size());
1210 }
1211
1212 RenderCommandEndRenderPass* data = AllocateRenderCommand<RenderCommandEndRenderPass>(allocator_);
1213 if (data) {
1214 // will be updated in render graph if multi render command list render pass
1215 data->endType = RenderPassEndType::END_RENDER_PASS;
1216 data->subpassStartIndex = stateData_.renderPassStartIndex;
1217 data->subpassCount = stateData_.renderPassSubpassCount;
1218
1219 renderCommands_.push_back({ RenderCommandType::END_RENDER_PASS, data });
1220 }
1221
1222 stateData_.renderPassHasBegun = false;
1223 stateData_.renderPassStartIndex = 0;
1224 stateData_.renderPassSubpassCount = 0;
1225 }
1226
BeginDisableAutomaticBarrierPoints()1227 void RenderCommandList::BeginDisableAutomaticBarrierPoints()
1228 {
1229 #if (RENDER_VALIDATION_ENABLED == 1)
1230 if (!stateData_.automaticBarriersEnabled) {
1231 PLUGIN_LOG_E("RENDER_VALIDATION: EndDisableAutomaticBarrierPoints not called?");
1232 }
1233 #endif
1234 PLUGIN_ASSERT(stateData_.automaticBarriersEnabled);
1235
1236 // barrier point for pending barriers
1237 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1238 stateData_.automaticBarriersEnabled = false;
1239 }
1240
EndDisableAutomaticBarrierPoints()1241 void RenderCommandList::EndDisableAutomaticBarrierPoints()
1242 {
1243 #if (RENDER_VALIDATION_ENABLED == 1)
1244 if (stateData_.automaticBarriersEnabled) {
1245 PLUGIN_LOG_E("RENDER_VALIDATION: BeginDisableAutomaticBarrierPoints not called?");
1246 }
1247 #endif
1248 PLUGIN_ASSERT(!stateData_.automaticBarriersEnabled);
1249
1250 stateData_.automaticBarriersEnabled = true;
1251 }
1252
AddCustomBarrierPoint()1253 void RenderCommandList::AddCustomBarrierPoint()
1254 {
1255 const bool barrierState = stateData_.automaticBarriersEnabled;
1256 stateData_.automaticBarriersEnabled = true; // flag checked in AddBarrierPoint
1257 AddBarrierPoint(RenderCommandType::BARRIER_POINT);
1258 stateData_.automaticBarriersEnabled = barrierState;
1259 }
1260
CustomMemoryBarrier(const GeneralBarrier & source,const GeneralBarrier & destination)1261 void RenderCommandList::CustomMemoryBarrier(const GeneralBarrier& source, const GeneralBarrier& destination)
1262 {
1263 #if (RENDER_VALIDATION_ENABLED == 1)
1264 if (stateData_.renderPassHasBegun) {
1265 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1266 }
1267 #endif
1268
1269 CommandBarrier cb {
1270 RenderHandleUtil::CreateGpuResourceHandle(RenderHandleType::UNDEFINED, 0, 0, 0, 0),
1271 {
1272 source.accessFlags,
1273 source.pipelineStageFlags,
1274 },
1275 {},
1276 {
1277 destination.accessFlags,
1278 destination.pipelineStageFlags,
1279 },
1280 {},
1281 };
1282
1283 customBarriers_.push_back(move(cb));
1284
1285 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1286 }
1287
CustomBufferBarrier(const RenderHandle handle,const BufferResourceBarrier & source,const BufferResourceBarrier & destination,const uint32_t byteOffset,const uint32_t byteSize)1288 void RenderCommandList::CustomBufferBarrier(const RenderHandle handle, const BufferResourceBarrier& source,
1289 const BufferResourceBarrier& destination, const uint32_t byteOffset, const uint32_t byteSize)
1290 {
1291 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1292
1293 #if (RENDER_VALIDATION_ENABLED == 1)
1294 if (stateData_.renderPassHasBegun) {
1295 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1296 }
1297 if (byteSize == 0) {
1298 PLUGIN_LOG_ONCE_W("RENDER_VALIDATION_custom_buffer_barrier",
1299 "RENDER_VALIDATION: do not create zero size custom buffer barriers");
1300 }
1301 if (handleType != RenderHandleType::GPU_BUFFER) {
1302 PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomBufferBarrier");
1303 }
1304 #endif
1305
1306 if ((byteSize > 0) && (handleType == RenderHandleType::GPU_BUFFER)) {
1307 ResourceBarrier src;
1308 src.accessFlags = source.accessFlags;
1309 src.pipelineStageFlags = source.pipelineStageFlags;
1310 src.optionalByteOffset = byteOffset;
1311 src.optionalByteSize = byteSize;
1312
1313 ResourceBarrier dst;
1314 dst.accessFlags = destination.accessFlags;
1315 dst.pipelineStageFlags = destination.pipelineStageFlags;
1316 dst.optionalByteOffset = byteOffset;
1317 dst.optionalByteSize = byteSize;
1318
1319 CommandBarrier cb {
1320 handle,
1321 std::move(src),
1322 {},
1323 std::move(dst),
1324 {},
1325 };
1326
1327 customBarriers_.push_back(move(cb));
1328
1329 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1330 }
1331 }
1332
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1333 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& destination,
1334 const ImageSubresourceRange& imageSubresourceRange)
1335 {
1336 // specific layout MAX_ENUM to state that we fetch the correct state
1337 ImageResourceBarrier source { 0, 0, ImageLayout::CORE_IMAGE_LAYOUT_MAX_ENUM };
1338 CustomImageBarrier(handle, source, destination, imageSubresourceRange);
1339 }
1340
CustomImageBarrier(const RenderHandle handle,const ImageResourceBarrier & source,const ImageResourceBarrier & destination,const ImageSubresourceRange & imageSubresourceRange)1341 void RenderCommandList::CustomImageBarrier(const RenderHandle handle, const ImageResourceBarrier& source,
1342 const ImageResourceBarrier& destination, const ImageSubresourceRange& imageSubresourceRange)
1343 {
1344 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
1345
1346 #if (RENDER_VALIDATION_ENABLED == 1)
1347 if (stateData_.renderPassHasBegun) {
1348 PLUGIN_LOG_E("RENDER_VALIDATION: barriers are not allowed inside render passes");
1349 }
1350 if (handleType != RenderHandleType::GPU_IMAGE) {
1351 PLUGIN_LOG_E("RENDER_VALIDATION: invalid handle type to CustomImageBarrier");
1352 }
1353 ValidateImageSubresourceRange(gpuResourceMgr_, handle, imageSubresourceRange);
1354 #endif
1355
1356 if (handleType == RenderHandleType::GPU_IMAGE) {
1357 ResourceBarrier src;
1358 src.accessFlags = source.accessFlags;
1359 src.pipelineStageFlags = source.pipelineStageFlags;
1360 src.optionalImageLayout = source.imageLayout;
1361 src.optionalImageSubresourceRange = imageSubresourceRange;
1362
1363 ResourceBarrier dst;
1364 dst.accessFlags = destination.accessFlags;
1365 dst.pipelineStageFlags = destination.pipelineStageFlags;
1366 dst.optionalImageLayout = destination.imageLayout;
1367 dst.optionalImageSubresourceRange = imageSubresourceRange;
1368
1369 CommandBarrier cb {
1370 handle,
1371 std::move(src),
1372 {},
1373 std::move(dst),
1374 {},
1375 };
1376
1377 customBarriers_.push_back(std::move(cb));
1378
1379 stateData_.currentCustomBarrierIndices.dirtyCustomBarriers = true;
1380 }
1381 }
1382
CopyBufferToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferCopy & bufferCopy)1383 void RenderCommandList::CopyBufferToBuffer(
1384 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferCopy& bufferCopy)
1385 {
1386 if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1387 // NOTE: combine copies, and only single combined barrier?
1388 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1389 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1390 AddBarrierPoint(RenderCommandType::COPY_BUFFER);
1391 }
1392
1393 RenderCommandCopyBuffer* data = AllocateRenderCommand<RenderCommandCopyBuffer>(allocator_);
1394 if (data) {
1395 data->srcHandle = sourceHandle;
1396 data->dstHandle = destinationHandle;
1397 data->bufferCopy = bufferCopy;
1398
1399 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER, data });
1400 }
1401 } else {
1402 PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToBuffer");
1403 }
1404 }
1405
CopyBufferToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1406 void RenderCommandList::CopyBufferToImage(
1407 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1408 {
1409 if (RenderHandleUtil::IsGpuBuffer(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1410 // NOTE: combine copies, and only single combined barrier?
1411 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1412 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1413 AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1414 }
1415
1416 RenderCommandCopyBufferImage* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1417 if (data) {
1418 data->copyType = RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE;
1419 data->srcHandle = sourceHandle;
1420 data->dstHandle = destinationHandle;
1421 data->bufferImageCopy = bufferImageCopy;
1422
1423 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1424 }
1425 } else {
1426 PLUGIN_LOG_E("RenderCommandList: invalid CopyBufferToImage");
1427 }
1428 }
1429
CopyImageToBuffer(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const BufferImageCopy & bufferImageCopy)1430 void RenderCommandList::CopyImageToBuffer(
1431 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const BufferImageCopy& bufferImageCopy)
1432 {
1433 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuBuffer(destinationHandle)) {
1434 // NOTE: combine copies, and only single combined barrier?
1435 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1436 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1437 AddBarrierPoint(RenderCommandType::COPY_BUFFER_IMAGE);
1438 }
1439
1440 RenderCommandCopyBufferImage* data = AllocateRenderCommand<RenderCommandCopyBufferImage>(allocator_);
1441 if (data) {
1442 data->copyType = RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER;
1443 data->srcHandle = sourceHandle;
1444 data->dstHandle = destinationHandle;
1445 data->bufferImageCopy = bufferImageCopy;
1446
1447 renderCommands_.push_back({ RenderCommandType::COPY_BUFFER_IMAGE, data });
1448 }
1449 } else {
1450 PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToBuffer");
1451 }
1452 }
1453
CopyImageToImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageCopy & imageCopy)1454 void RenderCommandList::CopyImageToImage(
1455 const RenderHandle sourceHandle, const RenderHandle destinationHandle, const ImageCopy& imageCopy)
1456 {
1457 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1458 // NOTE: combine copies, and only single combined barrier?
1459 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1460 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1461 AddBarrierPoint(RenderCommandType::COPY_IMAGE);
1462 }
1463
1464 RenderCommandCopyImage* data = AllocateRenderCommand<RenderCommandCopyImage>(allocator_);
1465 if (data) {
1466 data->srcHandle = sourceHandle;
1467 data->dstHandle = destinationHandle;
1468 data->imageCopy = imageCopy;
1469
1470 renderCommands_.push_back({ RenderCommandType::COPY_IMAGE, data });
1471 }
1472 } else {
1473 PLUGIN_LOG_E("RenderCommandList: invalid CopyImageToImage");
1474 }
1475 }
1476
BlitImage(const RenderHandle sourceHandle,const RenderHandle destinationHandle,const ImageBlit & imageBlit,const Filter filter)1477 void RenderCommandList::BlitImage(const RenderHandle sourceHandle, const RenderHandle destinationHandle,
1478 const ImageBlit& imageBlit, const Filter filter)
1479 {
1480 if (!stateData_.renderPassHasBegun) {
1481 if (RenderHandleUtil::IsGpuImage(sourceHandle) && RenderHandleUtil::IsGpuImage(destinationHandle)) {
1482 if (RenderHandleUtil::IsDynamicResource(sourceHandle) ||
1483 RenderHandleUtil::IsDynamicResource(destinationHandle)) {
1484 AddBarrierPoint(RenderCommandType::BLIT_IMAGE);
1485 }
1486
1487 RenderCommandBlitImage* data = AllocateRenderCommand<RenderCommandBlitImage>(allocator_);
1488 if (data) {
1489 data->srcHandle = sourceHandle;
1490 data->dstHandle = destinationHandle;
1491 data->imageBlit = imageBlit;
1492 data->filter = filter;
1493 // NOTE: desired layouts (barrier point needs to respect these)
1494 data->srcImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1495 data->dstImageLayout = ImageLayout::CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1496
1497 renderCommands_.push_back({ RenderCommandType::BLIT_IMAGE, data });
1498 }
1499 }
1500 } else {
1501 PLUGIN_LOG_E("RenderCommandList: BlitImage can only be called outside of render pass");
1502 }
1503 }
1504
UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)1505 void RenderCommandList::UpdateDescriptorSets(const BASE_NS::array_view<const RenderHandle> handles,
1506 const BASE_NS::array_view<const DescriptorSetLayoutBindingResources> bindingResources)
1507 {
1508 #if (RENDER_VALIDATION_ENABLED == 1)
1509 if (handles.size() != bindingResources.size()) {
1510 PLUGIN_LOG_W("RENDER_VALIDATION: UpdateDescriptorSets handles and bindingResources size does not match");
1511 }
1512 #endif
1513 const uint32_t count = static_cast<uint32_t>(Math::min(handles.size(), bindingResources.size()));
1514 if (count > 0U) {
1515 for (uint32_t idx = 0; idx < count; ++idx) {
1516 const auto& handleRef = handles[idx];
1517 const auto& bindingResRef = bindingResources[idx];
1518 #if (RENDER_VALIDATION_ENABLED == 1)
1519 ValidateDescriptorTypeBinding(nodeName_, gpuResourceMgr_, bindingResRef);
1520 #endif
1521 #if (RENDER_VALIDATION_ENABLED == 1)
1522 if (bindingResRef.bindingMask != bindingResRef.descriptorSetBindingMask) {
1523 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSets_bm_",
1524 "RENDER_VALIDATION: invalid bindings in descriptor set update (node:%s)", nodeName_.c_str());
1525 }
1526 #endif
1527 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handleRef);
1528 if (handleType == RenderHandleType::DESCRIPTOR_SET) {
1529 const bool valid =
1530 nodeContextDescriptorSetManager_.UpdateCpuDescriptorSet(handleRef, bindingResRef, gpuQueue_);
1531 if (valid) {
1532 descriptorSetHandlesForUpdates_.push_back(handleRef);
1533 } else {
1534 #if (RENDER_VALIDATION_ENABLED == 1)
1535 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_UpdateDescriptorSet_invalid_",
1536 "RenderCommandList: invalid descriptor set bindings with update (node:%s)", nodeName_.c_str());
1537 #endif
1538 }
1539 } else {
1540 PLUGIN_LOG_E("RenderCommandList: invalid handle for UpdateDescriptorSet");
1541 }
1542 }
1543 }
1544 }
1545
UpdateDescriptorSet(const RenderHandle handle,const DescriptorSetLayoutBindingResources & bindingResources)1546 void RenderCommandList::UpdateDescriptorSet(
1547 const RenderHandle handle, const DescriptorSetLayoutBindingResources& bindingResources)
1548 {
1549 UpdateDescriptorSets({ &handle, 1U }, { &bindingResources, 1U });
1550 }
1551
BindDescriptorSets(const uint32_t firstSet,const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)1552 void RenderCommandList::BindDescriptorSets(
1553 const uint32_t firstSet, const BASE_NS::array_view<const BindDescriptorSetData> descriptorSetData)
1554 {
1555 if (descriptorSetData.empty()) {
1556 return;
1557 }
1558 const uint32_t maxSetNumber = firstSet + static_cast<uint32_t>(descriptorSetData.size());
1559 if (maxSetNumber > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1560 PLUGIN_LOG_E("RenderCommandList::BindDescriptorSets: firstSet + handles.size() (%u) exceeds max count (%u)",
1561 maxSetNumber, PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1562 return;
1563 }
1564
1565 ValidatePipeline();
1566
1567 #if (RENDER_VALIDATION_ENABLED == 1)
1568 if ((descriptorSetData.size() > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT)) {
1569 PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1570 }
1571 for (const auto& ref : descriptorSetData) {
1572 if (ref.dynamicOffsets.size() > PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT) {
1573 PLUGIN_LOG_E("RENDER_VALIDATION: invalid inputs to BindDescriptorSets");
1574 }
1575 }
1576 #endif
1577
1578 if (auto* data = AllocateRenderCommand<RenderCommandBindDescriptorSets>(allocator_); data) {
1579 *data = {}; // default
1580
1581 data->psoHandle = stateData_.currentPsoHandle;
1582 data->firstSet = firstSet;
1583 data->setCount = static_cast<uint32_t>(descriptorSetData.size());
1584
1585 uint32_t descriptorSetCounterForBarriers = 0;
1586 uint32_t currSet = firstSet;
1587 for (const auto& ref : descriptorSetData) {
1588 if (currSet < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) {
1589 // allocate offsets for this set
1590 if (!ref.dynamicOffsets.empty()) {
1591 const uint32_t dynCount = static_cast<uint32_t>(ref.dynamicOffsets.size());
1592 if (auto* doData = AllocateRenderData<uint32_t>(allocator_, dynCount); doData) {
1593 auto& dynRef = data->descriptorSetDynamicOffsets[currSet];
1594 dynRef.dynamicOffsets = doData;
1595 dynRef.dynamicOffsetCount = dynCount;
1596 CloneData(dynRef.dynamicOffsets, dynCount * sizeof(uint32_t), ref.dynamicOffsets.data(),
1597 ref.dynamicOffsets.size_bytes());
1598 }
1599 }
1600
1601 data->descriptorSetHandles[currSet] = ref.handle;
1602
1603 const bool hasDynamicBarrierResources =
1604 nodeContextDescriptorSetManager_.HasDynamicBarrierResources(ref.handle);
1605 if (stateData_.renderPassHasBegun && hasDynamicBarrierResources) {
1606 descriptorSetHandlesForBarriers_.push_back(ref.handle);
1607 descriptorSetCounterForBarriers++;
1608 }
1609 stateData_.currentBoundSets[currSet].hasDynamicBarrierResources = hasDynamicBarrierResources;
1610 stateData_.currentBoundSets[currSet].descriptorSetHandle = ref.handle;
1611 stateData_.currentBoundSetsMask |= (1 << currSet);
1612 ++currSet;
1613 }
1614 }
1615
1616 renderCommands_.push_back({ RenderCommandType::BIND_DESCRIPTOR_SETS, data });
1617
1618 // if the currentBarrierPoint is null there has been some invalid bindings earlier
1619 if (stateData_.renderPassHasBegun && stateData_.currentBarrierPoint) {
1620 // add possible barriers before render pass
1621 stateData_.currentBarrierPoint->descriptorSetHandleCount += descriptorSetCounterForBarriers;
1622 } else if (stateData_.automaticBarriersEnabled) {
1623 stateData_.dirtyDescriptorSetsForBarriers = true;
1624 }
1625 }
1626 }
1627
BindDescriptorSet(const uint32_t set,const BindDescriptorSetData & desriptorSetData)1628 void RenderCommandList::BindDescriptorSet(const uint32_t set, const BindDescriptorSetData& desriptorSetData)
1629 {
1630 BindDescriptorSets(set, { &desriptorSetData, 1U });
1631 }
1632
BindDescriptorSets(const uint32_t firstSet,const array_view<const RenderHandle> handles)1633 void RenderCommandList::BindDescriptorSets(const uint32_t firstSet, const array_view<const RenderHandle> handles)
1634 {
1635 BindDescriptorSetData bdsd[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
1636 const uint32_t count =
1637 Math::min(static_cast<uint32_t>(handles.size()), PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
1638 for (uint32_t idx = 0U; idx < count; ++idx) {
1639 bdsd[idx].handle = handles[idx];
1640 }
1641 BindDescriptorSets(firstSet, { bdsd, count });
1642 }
1643
BindDescriptorSet(const uint32_t set,const RenderHandle handle)1644 void RenderCommandList::BindDescriptorSet(const uint32_t set, const RenderHandle handle)
1645 {
1646 BindDescriptorSetData bdsd = { handle, {} };
1647 BindDescriptorSets(set, { &bdsd, 1U });
1648 }
1649
BindDescriptorSet(const uint32_t set,const RenderHandle handle,const array_view<const uint32_t> dynamicOffsets)1650 void RenderCommandList::BindDescriptorSet(
1651 const uint32_t set, const RenderHandle handle, const array_view<const uint32_t> dynamicOffsets)
1652 {
1653 BindDescriptorSetData bdsd = { handle, dynamicOffsets };
1654 BindDescriptorSets(set, { &bdsd, 1U });
1655 }
1656
BuildAccelerationStructures(const AccelerationStructureBuildGeometryData & geometry,const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)1657 void RenderCommandList::BuildAccelerationStructures(const AccelerationStructureBuildGeometryData& geometry,
1658 const BASE_NS::array_view<const AccelerationStructureGeometryTrianglesData> triangles,
1659 const BASE_NS::array_view<const AccelerationStructureGeometryAabbsData> aabbs,
1660 const BASE_NS::array_view<const AccelerationStructureGeometryInstancesData> instances)
1661 {
1662 if (!(triangles.empty() && aabbs.empty() && instances.empty())) {
1663 #if (RENDER_VULKAN_RT_ENABLED == 1)
1664 RenderCommandBuildAccelerationStructure* data =
1665 AllocateRenderCommand<RenderCommandBuildAccelerationStructure>(allocator_);
1666 if (data) {
1667 data->type = geometry.info.type;
1668 data->flags = geometry.info.flags;
1669 data->mode = geometry.info.mode;
1670 data->srcAccelerationStructure = geometry.srcAccelerationStructure;
1671 data->dstAccelerationStructure = geometry.dstAccelerationStructure;
1672 data->scratchBuffer = geometry.scratchBuffer.handle;
1673 data->scratchOffset = geometry.scratchBuffer.offset;
1674
1675 if (!triangles.empty()) {
1676 AccelerationStructureGeometryTrianglesData* trianglesData =
1677 static_cast<AccelerationStructureGeometryTrianglesData*>(
1678 AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryTrianglesData>(),
1679 sizeof(AccelerationStructureGeometryTrianglesData) * triangles.size()));
1680 data->trianglesData = trianglesData;
1681 data->trianglesView = { data->trianglesData, triangles.size() };
1682 for (size_t idx = 0; idx < triangles.size(); ++idx) {
1683 data->trianglesView[idx] = triangles[idx];
1684 }
1685 }
1686 if (!aabbs.empty()) {
1687 AccelerationStructureGeometryAabbsData* aabbsData =
1688 static_cast<AccelerationStructureGeometryAabbsData*>(
1689 AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryAabbsData>(),
1690 sizeof(AccelerationStructureGeometryAabbsData) * aabbs.size()));
1691 data->aabbsData = aabbsData;
1692 data->aabbsView = { data->aabbsData, aabbs.size() };
1693 for (size_t idx = 0; idx < aabbs.size(); ++idx) {
1694 data->aabbsView[idx] = aabbs[idx];
1695 }
1696 }
1697 if (!instances.empty()) {
1698 AccelerationStructureGeometryInstancesData* instancesData =
1699 static_cast<AccelerationStructureGeometryInstancesData*>(
1700 AllocateRenderData(allocator_, std::alignment_of<AccelerationStructureGeometryInstancesData>(),
1701 sizeof(AccelerationStructureGeometryInstancesData) * instances.size()));
1702 data->instancesData = instancesData;
1703 data->instancesView = { data->instancesData, instances.size() };
1704 for (size_t idx = 0; idx < instances.size(); ++idx) {
1705 data->instancesView[idx] = instances[idx];
1706 }
1707 }
1708 renderCommands_.push_back({ RenderCommandType::BUILD_ACCELERATION_STRUCTURE, data });
1709 }
1710 #endif
1711 }
1712 }
1713
ClearColorImage(const RenderHandle handle,const ClearColorValue color,const array_view<const ImageSubresourceRange> ranges)1714 void RenderCommandList::ClearColorImage(
1715 const RenderHandle handle, const ClearColorValue color, const array_view<const ImageSubresourceRange> ranges)
1716 {
1717 #if (RENDER_VALIDATION_ENABLED == 1)
1718 {
1719 if (!RenderHandleUtil::IsGpuImage(handle)) {
1720 PLUGIN_LOG_W("RENDER_VALIDATION: Invalid image handle given to ClearColorImage");
1721 }
1722 if (ranges.empty()) {
1723 PLUGIN_LOG_W("RENDER_VALIDATION: Invalid ranges given to ClearColorImage");
1724 }
1725 {
1726 const GpuImageDesc desc = gpuResourceMgr_.GetImageDescriptor(handle);
1727 if ((desc.usageFlags & CORE_IMAGE_USAGE_TRANSFER_DST_BIT) == 0) {
1728 PLUGIN_LOG_E("RENDER_VALIDATION: Image missing usage flag TRANSFER_DST for ClearColorImage command");
1729 }
1730 }
1731 }
1732 #endif
1733 if (RenderHandleUtil::IsGpuImage(handle) && (!ranges.empty())) {
1734 AddBarrierPoint(RenderCommandType::CLEAR_COLOR_IMAGE);
1735
1736 RenderCommandClearColorImage* data = AllocateRenderCommand<RenderCommandClearColorImage>(allocator_);
1737 if (data) {
1738 data->handle = handle;
1739 data->imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1740 data->color = color;
1741 data->ranges = { AllocateRenderData<ImageSubresourceRange>(
1742 allocator_, static_cast<uint32_t>(ranges.size())),
1743 ranges.size() };
1744 if (!data->ranges.data()) {
1745 return;
1746 }
1747 CloneData(data->ranges.data(), data->ranges.size_bytes(), ranges.data(), ranges.size_bytes());
1748
1749 renderCommands_.push_back({ RenderCommandType::CLEAR_COLOR_IMAGE, data });
1750 }
1751 }
1752 }
1753
SetDynamicStateViewport(const ViewportDesc & viewportDesc)1754 void RenderCommandList::SetDynamicStateViewport(const ViewportDesc& viewportDesc)
1755 {
1756 #if (RENDER_VALIDATION_ENABLED == 1)
1757 ValidateViewport(nodeName_, viewportDesc);
1758 #endif
1759 RenderCommandDynamicStateViewport* data = AllocateRenderCommand<RenderCommandDynamicStateViewport>(allocator_);
1760 if (data) {
1761 data->viewportDesc = viewportDesc;
1762 data->viewportDesc.width = Math::max(1.0f, data->viewportDesc.width);
1763 data->viewportDesc.height = Math::max(1.0f, data->viewportDesc.height);
1764 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_VIEWPORT, data });
1765 }
1766 }
1767
SetDynamicStateScissor(const ScissorDesc & scissorDesc)1768 void RenderCommandList::SetDynamicStateScissor(const ScissorDesc& scissorDesc)
1769 {
1770 #if (RENDER_VALIDATION_ENABLED == 1)
1771 ValidateScissor(nodeName_, scissorDesc);
1772 #endif
1773 RenderCommandDynamicStateScissor* data = AllocateRenderCommand<RenderCommandDynamicStateScissor>(allocator_);
1774 if (data) {
1775 data->scissorDesc = scissorDesc;
1776 data->scissorDesc.extentWidth = Math::max(1u, data->scissorDesc.extentWidth);
1777 data->scissorDesc.extentHeight = Math::max(1u, data->scissorDesc.extentHeight);
1778 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_SCISSOR, data });
1779 }
1780 }
1781
SetDynamicStateLineWidth(const float lineWidth)1782 void RenderCommandList::SetDynamicStateLineWidth(const float lineWidth)
1783 {
1784 RenderCommandDynamicStateLineWidth* data = AllocateRenderCommand<RenderCommandDynamicStateLineWidth>(allocator_);
1785 if (data) {
1786 data->lineWidth = lineWidth;
1787 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_LINE_WIDTH, data });
1788 }
1789 }
1790
SetDynamicStateDepthBias(const float depthBiasConstantFactor,const float depthBiasClamp,const float depthBiasSlopeFactor)1791 void RenderCommandList::SetDynamicStateDepthBias(
1792 const float depthBiasConstantFactor, const float depthBiasClamp, const float depthBiasSlopeFactor)
1793 {
1794 RenderCommandDynamicStateDepthBias* data = AllocateRenderCommand<RenderCommandDynamicStateDepthBias>(allocator_);
1795 if (data) {
1796 data->depthBiasConstantFactor = depthBiasConstantFactor;
1797 data->depthBiasClamp = depthBiasClamp;
1798 data->depthBiasSlopeFactor = depthBiasSlopeFactor;
1799 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS, data });
1800 }
1801 }
1802
SetDynamicStateBlendConstants(const array_view<const float> blendConstants)1803 void RenderCommandList::SetDynamicStateBlendConstants(const array_view<const float> blendConstants)
1804 {
1805 constexpr uint32_t THRESHOLD = 4;
1806 #if (RENDER_VALIDATION_ENABLED == 1)
1807 if (blendConstants.size() > THRESHOLD) {
1808 PLUGIN_LOG_E("RenderCommandList: blend constant count (%zu) exceeds supported max (%u)", blendConstants.size(),
1809 THRESHOLD);
1810 }
1811 #endif
1812 RenderCommandDynamicStateBlendConstants* data =
1813 AllocateRenderCommand<RenderCommandDynamicStateBlendConstants>(allocator_);
1814 if (data) {
1815 *data = {};
1816 const uint32_t bcCount = Math::min(static_cast<uint32_t>(blendConstants.size()), THRESHOLD);
1817 for (uint32_t idx = 0; idx < bcCount; ++idx) {
1818 data->blendConstants[idx] = blendConstants[idx];
1819 }
1820 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS, data });
1821 }
1822 }
1823
SetDynamicStateDepthBounds(const float minDepthBounds,const float maxDepthBounds)1824 void RenderCommandList::SetDynamicStateDepthBounds(const float minDepthBounds, const float maxDepthBounds)
1825 {
1826 RenderCommandDynamicStateDepthBounds* data =
1827 AllocateRenderCommand<RenderCommandDynamicStateDepthBounds>(allocator_);
1828 if (data) {
1829 data->minDepthBounds = minDepthBounds;
1830 data->maxDepthBounds = maxDepthBounds;
1831 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS, data });
1832 }
1833 }
1834
SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask,const uint32_t compareMask)1835 void RenderCommandList::SetDynamicStateStencilCompareMask(const StencilFaceFlags faceMask, const uint32_t compareMask)
1836 {
1837 RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1838 if (data) {
1839 data->dynamicState = StencilDynamicState::COMPARE_MASK;
1840 data->faceMask = faceMask;
1841 data->mask = compareMask;
1842 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1843 }
1844 }
1845
SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask,const uint32_t writeMask)1846 void RenderCommandList::SetDynamicStateStencilWriteMask(const StencilFaceFlags faceMask, const uint32_t writeMask)
1847 {
1848 RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1849 if (data) {
1850 data->dynamicState = StencilDynamicState::WRITE_MASK;
1851 data->faceMask = faceMask;
1852 data->mask = writeMask;
1853 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1854 }
1855 }
1856
SetDynamicStateStencilReference(const StencilFaceFlags faceMask,const uint32_t reference)1857 void RenderCommandList::SetDynamicStateStencilReference(const StencilFaceFlags faceMask, const uint32_t reference)
1858 {
1859 RenderCommandDynamicStateStencil* data = AllocateRenderCommand<RenderCommandDynamicStateStencil>(allocator_);
1860 if (data) {
1861 data->dynamicState = StencilDynamicState::REFERENCE;
1862 data->faceMask = faceMask;
1863 data->mask = reference;
1864 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_STENCIL, data });
1865 }
1866 }
1867
SetDynamicStateFragmentShadingRate(const Size2D & fragmentSize,const FragmentShadingRateCombinerOps & combinerOps)1868 void RenderCommandList::SetDynamicStateFragmentShadingRate(
1869 const Size2D& fragmentSize, const FragmentShadingRateCombinerOps& combinerOps)
1870 {
1871 RenderCommandDynamicStateFragmentShadingRate* data =
1872 AllocateRenderCommand<RenderCommandDynamicStateFragmentShadingRate>(allocator_);
1873 if (data) {
1874 #if (RENDER_VALIDATION_ENABLED == 1)
1875 ValidateFragmentShadingRate(fragmentSize);
1876 #endif
1877 // valid values for sizes from 0-4
1878 constexpr uint32_t maxValue { 4u };
1879 constexpr uint32_t valueMapper[maxValue + 1u] = { 1u, 1u, 2u, 2u, 4u };
1880 Size2D fs = fragmentSize;
1881 fs.width = (fs.width <= maxValue) ? valueMapper[fs.width] : maxValue;
1882 fs.height = (fs.height <= maxValue) ? valueMapper[fs.height] : maxValue;
1883
1884 data->fragmentSize = fs;
1885 data->combinerOps = combinerOps;
1886 renderCommands_.push_back({ RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE, data });
1887 }
1888 }
1889
SetExecuteBackendFramePosition()1890 void RenderCommandList::SetExecuteBackendFramePosition()
1891 {
1892 if (stateData_.executeBackendFrameSet == false) {
1893 AddBarrierPoint(RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION);
1894
1895 RenderCommandExecuteBackendFramePosition* data =
1896 AllocateRenderCommand<RenderCommandExecuteBackendFramePosition>(allocator_);
1897 if (data) {
1898 data->id = 0;
1899 renderCommands_.push_back({ RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION, data });
1900 stateData_.executeBackendFrameSet = true;
1901 }
1902 } else {
1903 PLUGIN_LOG_E("RenderCommandList: there can be only one SetExecuteBackendFramePosition() -call per frame");
1904 }
1905 }
1906
ValidateRenderPass(const RenderPassDesc & renderPassDesc)1907 void RenderCommandList::ValidateRenderPass(const RenderPassDesc& renderPassDesc)
1908 {
1909 if (stateData_.renderPassHasBegun) {
1910 #if (RENDER_VALIDATION_ENABLED == 1)
1911 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_hasbegun_",
1912 "RenderCommandList: render pass is active, needs to be end before starting a new (node: %s)",
1913 nodeName_.c_str());
1914 #endif
1915 stateData_.validCommandList = false;
1916 }
1917 // validate render pass attachments
1918 for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1919 if (!RenderHandleUtil::IsValid(renderPassDesc.attachmentHandles[idx])) {
1920 #if (RENDER_VALIDATION_ENABLED == 1)
1921 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidateRenderPass_attachments_",
1922 "RenderCommandList: Invalid render pass attachment handle in index: %u (node:%s)", idx,
1923 nodeName_.c_str());
1924 #endif
1925 stateData_.validCommandList = false;
1926 }
1927 }
1928 }
1929
ValidatePipeline()1930 void RenderCommandList::ValidatePipeline()
1931 {
1932 if (!stateData_.validPso) {
1933 stateData_.validCommandList = false;
1934 #if (RENDER_VALIDATION_ENABLED == 1)
1935 PLUGIN_LOG_ONCE_E(nodeName_ + "_RCL_ValidatePipeline_", "RenderCommandList: PSO not bound.");
1936 #endif
1937 }
1938 }
1939
ValidatePipelineLayout()1940 void RenderCommandList::ValidatePipelineLayout()
1941 {
1942 if (stateData_.checkBindPipelineLayout) {
1943 stateData_.checkBindPipelineLayout = false;
1944 // fast check without validation
1945 const uint32_t pipelineLayoutSetsMask =
1946 RenderHandleUtil::GetPipelineLayoutDescriptorSetMask(stateData_.currentPsoHandle);
1947 if ((stateData_.currentBoundSetsMask & pipelineLayoutSetsMask) != pipelineLayoutSetsMask) {
1948 #if (RENDER_VALIDATION_ENABLED == 1)
1949 PLUGIN_LOG_ONCE_E(
1950 "RenderCommandList::ValidatePipelineLayout", "RenderCommandList: not all needed descriptor sets bound");
1951 #endif
1952 }
1953 #if (RENDER_VALIDATION_ENABLED == 1)
1954 const RenderHandleType rhType = RenderHandleUtil::GetHandleType(stateData_.currentPsoHandle);
1955 const PipelineLayout& pl = (rhType == RenderHandleType::COMPUTE_PSO)
1956 ? psoMgr_.GetComputePsoPipelineLayout(stateData_.currentPsoHandle)
1957 : psoMgr_.GetGraphicsPsoPipelineLayout(stateData_.currentPsoHandle);
1958 const uint32_t plDescriptorSetCount = pl.descriptorSetCount;
1959 uint32_t bindCount = 0;
1960 uint32_t bindSetIndices[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT] { ~0u, ~0u, ~0u, ~0u };
1961 for (uint32_t idx = 0; idx < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++idx) {
1962 const DescriptorSetBind& currSet = stateData_.currentBoundSets[idx];
1963 if (RenderHandleUtil::IsValid(currSet.descriptorSetHandle)) {
1964 bindCount++;
1965 bindSetIndices[idx] = idx;
1966 }
1967 }
1968 if (bindCount < plDescriptorSetCount) {
1969 PLUGIN_LOG_E("RENDER_VALIDATION: not all pipeline layout required descriptor sets bound");
1970 }
1971 #endif
1972 }
1973 }
1974
GetInterface(const BASE_NS::Uid & uid) const1975 const CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid) const
1976 {
1977 if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
1978 return this;
1979 }
1980 return nullptr;
1981 }
1982
GetInterface(const BASE_NS::Uid & uid)1983 CORE_NS::IInterface* RenderCommandList::GetInterface(const BASE_NS::Uid& uid)
1984 {
1985 if ((uid == IRenderCommandList::UID) || (uid == IInterface::UID)) {
1986 return this;
1987 }
1988 return nullptr;
1989 }
1990
Ref()1991 void RenderCommandList::Ref() {}
1992
Unref()1993 void RenderCommandList::Unref() {}
1994 RENDER_END_NAMESPACE()
1995