1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_backend_vk.h"
17
18 #include <algorithm>
19 #include <cstdint>
20 #include <functional>
21 #include <vulkan/vulkan_core.h>
22
23 #include <base/containers/array_view.h>
24 #include <base/containers/fixed_string.h>
25 #include <base/containers/string_view.h>
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <core/plugin/intf_class_register.h>
29 #include <render/datastore/render_data_store_render_pods.h>
30 #include <render/device/pipeline_state_desc.h>
31 #include <render/namespace.h>
32 #include <render/nodecontext/intf_render_backend_node.h>
33 #include <render/vulkan/intf_device_vk.h>
34
35 #if (RENDER_PERF_ENABLED == 1)
36 #include "perf/gpu_query.h"
37 #include "perf/gpu_query_manager.h"
38 #include "vulkan/gpu_query_vk.h"
39 #endif
40
41 #include "device/gpu_buffer.h"
42 #include "device/gpu_image.h"
43 #include "device/gpu_resource_handle_util.h"
44 #include "device/gpu_resource_manager.h"
45 #include "device/gpu_sampler.h"
46 #include "device/pipeline_state_object.h"
47 #include "device/render_frame_sync.h"
48 #include "nodecontext/node_context_descriptor_set_manager.h"
49 #include "nodecontext/node_context_pool_manager.h"
50 #include "nodecontext/node_context_pso_manager.h"
51 #include "nodecontext/render_barrier_list.h"
52 #include "nodecontext/render_command_list.h"
53 #include "nodecontext/render_node_graph_node_store.h"
54 #include "render_backend.h"
55 #include "render_graph.h"
56 #include "util/log.h"
57 #include "util/render_frame_util.h"
58 #include "vulkan/gpu_buffer_vk.h"
59 #include "vulkan/gpu_image_vk.h"
60 #include "vulkan/gpu_sampler_vk.h"
61 #include "vulkan/gpu_semaphore_vk.h"
62 #include "vulkan/node_context_descriptor_set_manager_vk.h"
63 #include "vulkan/node_context_pool_manager_vk.h"
64 #include "vulkan/pipeline_state_object_vk.h"
65 #include "vulkan/render_frame_sync_vk.h"
66 #include "vulkan/swapchain_vk.h"
67 #include "vulkan/validate_vk.h"
68
69 using namespace BASE_NS;
70
71 using CORE_NS::GetInstance;
72 using CORE_NS::IParallelTaskQueue;
73 using CORE_NS::IPerformanceDataManager;
74 using CORE_NS::IPerformanceDataManagerFactory;
75 using CORE_NS::ITaskQueueFactory;
76 using CORE_NS::IThreadPool;
77 using CORE_NS::UID_TASK_QUEUE_FACTORY;
78
79 RENDER_BEGIN_NAMESPACE()
80 namespace {
81 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device,const VkBuffer buffer)82 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
83 {
84 const VkBufferDeviceAddressInfo addressInfo {
85 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
86 nullptr, // pNext
87 buffer, // buffer
88 };
89 return vkGetBufferDeviceAddress(device, &addressInfo);
90 }
91 #endif
92 #if (RENDER_PERF_ENABLED == 1)
CopyPerfCounters(const PerfCounters & src,PerfCounters & dst)93 void CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)
94 {
95 dst.drawCount += src.drawCount;
96 dst.drawIndirectCount += src.drawIndirectCount;
97 dst.dispatchCount += src.dispatchCount;
98 dst.dispatchIndirectCount += src.dispatchIndirectCount;
99 dst.bindPipelineCount += src.bindPipelineCount;
100 dst.renderPassCount += src.renderPassCount;
101 dst.updateDescriptorSetCount += src.updateDescriptorSetCount;
102 dst.bindDescriptorSetCount += src.bindDescriptorSetCount;
103 dst.triangleCount += src.triangleCount;
104 dst.instanceCount += src.instanceCount;
105 }
106 #endif
107 } // namespace
108
109 // Helper class for running std::function as a ThreadPool task.
110 class FunctionTask final : public IThreadPool::ITask {
111 public:
Create(std::function<void ()> func)112 static Ptr Create(std::function<void()> func)
113 {
114 return Ptr { new FunctionTask(BASE_NS::move(func)) };
115 }
116
FunctionTask(std::function<void ()> func)117 explicit FunctionTask(std::function<void()> func) : func_(BASE_NS::move(func)) {};
118
operator ()()119 void operator()() override
120 {
121 func_();
122 }
123
124 protected:
Destroy()125 void Destroy() override
126 {
127 delete this;
128 }
129
130 private:
131 std::function<void()> func_;
132 };
133
134 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
135 namespace {
136 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
137 }
138 #endif
139
RenderBackendVk(Device & dev,GpuResourceManager & gpuResourceManager,const CORE_NS::IParallelTaskQueue::Ptr & queue)140 RenderBackendVk::RenderBackendVk(
141 Device& dev, GpuResourceManager& gpuResourceManager, const CORE_NS::IParallelTaskQueue::Ptr& queue)
142 : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
143 queue_(queue.get())
144 {
145 #if (RENDER_PERF_ENABLED == 1)
146 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
147 gpuQueryMgr_ = make_unique<GpuQueryManager>();
148
149 constexpr uint32_t maxQueryObjectCount { 512u };
150 constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
151 const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
152 const GpuBufferDesc desc {
153 BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT, // usageFlags
154 CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
155 0, // engineCreationFlags
156 fullByteSize, // byteSize
157 };
158 perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
159 perfGpuTimerData_.currentOffset = 0;
160 perfGpuTimerData_.frameByteSize = byteSize;
161 perfGpuTimerData_.fullByteSize = fullByteSize;
162 { // zero initialize
163 uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
164 memset_s(bufferData, fullByteSize, 0, fullByteSize);
165 perfGpuTimerData_.gpuBuffer->Unmap();
166 }
167 #endif
168 #endif
169 }
170
AcquirePresentationInfo(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)171 void RenderBackendVk::AcquirePresentationInfo(
172 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
173 {
174 if (device_.HasSwapchain()) {
175 presentationData_.present = true;
176 // resized to same for convenience
177 presentationData_.infos.resize(backBufferConfig.swapchainData.size());
178 for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
179 const auto& swapData = backBufferConfig.swapchainData[swapIdx];
180 PresentationInfo pi;
181 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
182
183 if (const SwapchainVk* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain(swapData.handle));
184 swapchain) {
185 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
186 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
187 const uint32_t semaphoreIdx = swapchain->GetNextAcquireSwapchainSemaphoreIndex();
188 PLUGIN_ASSERT(semaphoreIdx < platSwapchain.swapchainImages.semaphores.size());
189 pi.swapchainSemaphore = platSwapchain.swapchainImages.semaphores[semaphoreIdx];
190 pi.swapchain = platSwapchain.swapchain;
191 pi.useSwapchain = true;
192 // NOTE: for legacy default backbuffer reasons there might the same swapchain multiple times ATM
193 for (const auto& piRef : presentationData_.infos) {
194 if (piRef.swapchain == pi.swapchain) {
195 pi.useSwapchain = false;
196 }
197 }
198 // NOTE: do not re-acquire default backbuffer swapchain if it's in used with different handle
199 if (pi.useSwapchain) {
200 const VkResult result = vkAcquireNextImageKHR(device, // device
201 vkSwapchain, // swapchin
202 UINT64_MAX, // timeout
203 pi.swapchainSemaphore, // semaphore
204 (VkFence) nullptr, // fence
205 &pi.swapchainImageIndex); // pImageIndex
206
207 switch (result) {
208 // Success
209 case VK_SUCCESS:
210 case VK_TIMEOUT:
211 case VK_NOT_READY:
212 case VK_SUBOPTIMAL_KHR:
213 pi.validAcquire = true;
214 break;
215
216 // Failure
217 case VK_ERROR_OUT_OF_HOST_MEMORY:
218 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
219 PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
220 return;
221 case VK_ERROR_DEVICE_LOST:
222 PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
223 return;
224 case VK_ERROR_OUT_OF_DATE_KHR:
225 PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
226 return;
227 case VK_ERROR_SURFACE_LOST_KHR:
228 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
229 return;
230
231 case VK_EVENT_SET:
232 case VK_EVENT_RESET:
233 case VK_INCOMPLETE:
234 case VK_ERROR_INITIALIZATION_FAILED:
235 case VK_ERROR_MEMORY_MAP_FAILED:
236 case VK_ERROR_LAYER_NOT_PRESENT:
237 case VK_ERROR_EXTENSION_NOT_PRESENT:
238 case VK_ERROR_FEATURE_NOT_PRESENT:
239 case VK_ERROR_INCOMPATIBLE_DRIVER:
240 case VK_ERROR_TOO_MANY_OBJECTS:
241 case VK_ERROR_FORMAT_NOT_SUPPORTED:
242 case VK_ERROR_FRAGMENTED_POOL:
243 case VK_ERROR_OUT_OF_POOL_MEMORY:
244 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
245 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
246 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
247 case VK_ERROR_VALIDATION_FAILED_EXT:
248 case VK_ERROR_INVALID_SHADER_NV:
249 // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
250 case VK_ERROR_FRAGMENTATION_EXT:
251 case VK_ERROR_NOT_PERMITTED_EXT:
252 // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
253 case VK_RESULT_MAX_ENUM:
254 default:
255 PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
256 PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
257 device_.SetDeviceStatus(false);
258 break;
259 }
260
261 if (pi.swapchainImageIndex >= static_cast<uint32_t>(platSwapchain.swapchainImages.images.size())) {
262 PLUGIN_LOG_E("swapchain image index (%u) should be smaller than (%u)", pi.swapchainImageIndex,
263 static_cast<uint32_t>(platSwapchain.swapchainImages.images.size()));
264 }
265
266 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
267 const RenderHandle handle = swapchainData.remappableSwapchainImage;
268 if (pi.swapchainImageIndex < swapchainData.imageViewCount) {
269 // remap image to backbuffer
270 const RenderHandle currentSwapchainHandle = swapchainData.imageViews[pi.swapchainImageIndex];
271 // special swapchain remapping
272 gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(handle, currentSwapchainHandle);
273 }
274 pi.renderGraphProcessedState = swapData.backBufferState;
275 pi.imageLayout = swapData.layout;
276 if (pi.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC) {
277 pi.presentationLayoutChangeNeeded = true;
278 pi.renderNodeCommandListIndex =
279 static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
280
281 const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
282 PLUGIN_ASSERT(swapImage);
283 pi.swapchainImage = swapImage->GetPlatformData().image;
284 }
285 }
286 }
287 presentationData_.infos[swapIdx] = pi;
288 }
289 }
290 }
291
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)292 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
293 {
294 if (!backBufferConfig.swapchainData.empty()) {
295 if (device_.HasSwapchain() && presentationData_.present) {
296 PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8u);
297 uint32_t swapchainCount = 0U;
298 VkSwapchainKHR vkSwapchains[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { VK_NULL_HANDLE, VK_NULL_HANDLE,
299 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
300 uint32_t vkSwapImageIndices[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { 0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U };
301 for (const auto& presRef : presentationData_.infos) {
302 // NOTE: default backbuffer might be present multiple times
303 // the flag useSwapchain should be false in these cases
304 if (presRef.useSwapchain && presRef.swapchain && presRef.validAcquire) {
305 PLUGIN_ASSERT(presRef.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
306 vkSwapImageIndices[swapchainCount] = presRef.swapchainImageIndex;
307 vkSwapchains[swapchainCount++] = presRef.swapchain;
308 }
309 }
310 #if (RENDER_PERF_ENABLED == 1)
311 commonCpuTimers_.present.Begin();
312 #endif
313
314 // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid
315 // semaphore)
316 if (swapchainCount > 0U) {
317 VkSemaphore waitSemaphore = VK_NULL_HANDLE;
318 uint32_t waitSemaphoreCount = 0;
319 if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
320 waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
321 waitSemaphoreCount = 1;
322 }
323
324 const VkPresentInfoKHR presentInfo {
325 VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, // sType
326 nullptr, // pNext
327 waitSemaphoreCount, // waitSemaphoreCount
328 &waitSemaphore, // pWaitSemaphores
329 swapchainCount, // swapchainCount
330 vkSwapchains, // pSwapchains
331 vkSwapImageIndices, // pImageIndices
332 nullptr // pResults
333 };
334
335 const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
336 const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
337 &presentInfo); // pPresentInfo
338
339 switch (result) {
340 // Success
341 case VK_SUCCESS:
342 break;
343 case VK_SUBOPTIMAL_KHR:
344 #if (RENDER_VALIDATION_ENABLED == 1)
345 PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
346 #endif
347 break;
348
349 // Failure
350 case VK_ERROR_OUT_OF_HOST_MEMORY:
351 case VK_ERROR_OUT_OF_DEVICE_MEMORY:
352 PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
353 return;
354 case VK_ERROR_DEVICE_LOST:
355 PLUGIN_LOG_E("vkQueuePresentKHR device lost");
356 return;
357 case VK_ERROR_OUT_OF_DATE_KHR:
358 PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
359 return;
360 case VK_ERROR_SURFACE_LOST_KHR:
361 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
362 return;
363
364 case VK_NOT_READY:
365 case VK_TIMEOUT:
366 case VK_EVENT_SET:
367 case VK_EVENT_RESET:
368 case VK_INCOMPLETE:
369 case VK_ERROR_INITIALIZATION_FAILED:
370 case VK_ERROR_MEMORY_MAP_FAILED:
371 case VK_ERROR_LAYER_NOT_PRESENT:
372 case VK_ERROR_EXTENSION_NOT_PRESENT:
373 case VK_ERROR_FEATURE_NOT_PRESENT:
374 case VK_ERROR_INCOMPATIBLE_DRIVER:
375 case VK_ERROR_TOO_MANY_OBJECTS:
376 case VK_ERROR_FORMAT_NOT_SUPPORTED:
377 case VK_ERROR_FRAGMENTED_POOL:
378 case VK_ERROR_OUT_OF_POOL_MEMORY:
379 case VK_ERROR_INVALID_EXTERNAL_HANDLE:
380 case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
381 case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
382 case VK_ERROR_VALIDATION_FAILED_EXT:
383 case VK_ERROR_INVALID_SHADER_NV:
384 case VK_ERROR_FRAGMENTATION_EXT:
385 case VK_ERROR_NOT_PERMITTED_EXT:
386 case VK_RESULT_MAX_ENUM:
387 default:
388 PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
389 PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
390 break;
391 }
392 }
393 #if (RENDER_PERF_ENABLED == 1)
394 commonCpuTimers_.present.End();
395 #endif
396 } else {
397 #if (RENDER_VALIDATION_ENABLED == 1)
398 PLUGIN_LOG_ONCE_E(
399 "RenderBackendVk::Present_layout", "Presentation layout has not been updated, cannot present.");
400 #endif
401 }
402 }
403 }
404
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)405 void RenderBackendVk::Render(
406 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
407 {
408 // NOTE: all command lists are validated before entering here
409 #if (RENDER_PERF_ENABLED == 1)
410 commonCpuTimers_.full.Begin();
411 commonCpuTimers_.acquire.Begin();
412 #endif
413
414 commandBufferSubmitter_ = {};
415 commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
416
417 presentationData_.present = false;
418 presentationData_.infos.clear();
419
420 #if (RENDER_PERF_ENABLED == 1)
421 commonCpuTimers_.acquire.End();
422
423 StartFrameTimers(renderCommandFrameData);
424 commonCpuTimers_.execute.Begin();
425 #endif
426
427 // command list process loop/execute
428 // first tries to acquire swapchain if needed in a task
429 RenderProcessCommandLists(renderCommandFrameData, backBufferConfig);
430
431 #if (RENDER_PERF_ENABLED == 1)
432 commonCpuTimers_.execute.End();
433 commonCpuTimers_.submit.Begin();
434 #endif
435
436 PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
437 // submit vulkan command buffers
438 // checks that presentation info has valid acquire
439 RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
440
441 #if (RENDER_PERF_ENABLED == 1)
442 commonCpuTimers_.submit.End();
443 commonCpuTimers_.full.End();
444 EndFrameTimers();
445 #endif
446 }
447
RenderProcessSubmitCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)448 void RenderBackendVk::RenderProcessSubmitCommandLists(
449 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
450 {
451 // NOTE: currently backtraces to final valid command buffer semaphore
452 uint32_t finalCommandBufferSubmissionIndex = ~0u;
453 commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
454 bool swapchainSemaphoreWaited = false;
455 for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
456 --cmdBufferIdx) {
457 if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
458 (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
459 VK_NULL_HANDLE)) {
460 finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
461 break;
462 }
463 }
464
465 for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
466 const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
467 if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
468 continue;
469 }
470
471 const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
472
473 uint32_t waitSemaphoreCount = 0u;
474 constexpr const uint32_t maxWaitSemaphoreCount =
475 PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + DeviceConstants::MAX_SWAPCHAIN_COUNT;
476 VkSemaphore waitSemaphores[maxWaitSemaphoreCount];
477 VkPipelineStageFlags waitSemaphorePipelineStageFlags[maxWaitSemaphoreCount];
478 for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
479 const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
480 PLUGIN_ASSERT(waitIdx < static_cast<uint32_t>(commandBufferSubmitter_.commandBuffers.size()));
481
482 VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
483 if (waitSemaphore != VK_NULL_HANDLE) {
484 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
485 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
486 waitSemaphoreCount++;
487 }
488 }
489
490 if ((!swapchainSemaphoreWaited) && (renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
491 (!presentationData_.infos.empty())) {
492 swapchainSemaphoreWaited = true;
493 // go through all swapchain semaphores
494 for (const auto& presRef : presentationData_.infos) {
495 if (presRef.swapchainSemaphore) {
496 waitSemaphores[waitSemaphoreCount] = presRef.swapchainSemaphore;
497 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
498 waitSemaphoreCount++;
499 }
500 }
501 }
502
503 uint32_t signalSemaphoreCount = 0u;
504 PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8U);
505 constexpr uint32_t maxSignalSemaphoreCount { 1U + DeviceConstants::MAX_SWAPCHAIN_COUNT };
506 VkSemaphore semaphores[maxSignalSemaphoreCount] = { VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE,
507 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
508 VkFence fence = VK_NULL_HANDLE;
509 if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
510 // add fence signaling to last submission for frame sync
511 if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
512 fence = frameSync->GetFrameFence().fence;
513 frameSync->FrameFenceIsSignalled();
514 }
515 // signal external semaphores
516 if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
517 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
518 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
519 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
520 if (externalSignals.size() == externalSemaphores.size()) {
521 for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
522 // needs to be false
523 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
524 if (const GpuSemaphoreVk* gs = (const GpuSemaphoreVk*)externalSemaphores[sigIdx].get();
525 gs) {
526 semaphores[signalSemaphoreCount++] = gs->GetPlatformData().semaphore;
527 externalSignals[sigIdx].signaled = true;
528 }
529 }
530 }
531 }
532 }
533
534 if (presentationData_.present) {
535 commandBufferSubmitter_.presentationWaitSemaphore =
536 commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
537 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
538 }
539 // add additional semaphores
540 for (const auto& swapRef : backBufferConfig.swapchainData) {
541 // should have been checked in render graph already
542 if ((signalSemaphoreCount < maxSignalSemaphoreCount) && swapRef.config.gpuSemaphoreHandle) {
543 semaphores[signalSemaphoreCount++] =
544 VulkanHandleCast<VkSemaphore>(swapRef.config.gpuSemaphoreHandle);
545 }
546 }
547 } else if (renderContextRef.submitDepencies.signalSemaphore) {
548 semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
549 }
550 PLUGIN_ASSERT(signalSemaphoreCount <= maxSignalSemaphoreCount);
551
552 const VkSubmitInfo submitInfo {
553 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
554 nullptr, // pNext
555 waitSemaphoreCount, // waitSemaphoreCount
556 (waitSemaphoreCount == 0) ? nullptr : waitSemaphores, // pWaitSemaphores
557 waitSemaphorePipelineStageFlags, // pWaitDstStageMask
558 1, // commandBufferCount
559 &cmdSubmitterRef.commandBuffer, // pCommandBuffers
560 signalSemaphoreCount, // signalSemaphoreCount
561 (signalSemaphoreCount == 0) ? nullptr : semaphores, // pSignalSemaphores
562 };
563
564 const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
565 if (queue) {
566 VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
567 1, // submitCount
568 &submitInfo, // pSubmits
569 fence)); // fence
570 }
571 }
572 }
573
RenderProcessCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)574 void RenderBackendVk::RenderProcessCommandLists(
575 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
576 {
577 const uint32_t cmdBufferCount = static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());
578 if (queue_) {
579 constexpr uint64_t acquireTaskIdendifier { ~0U };
580 vector<uint64_t> afterIdentifiers;
581 afterIdentifiers.reserve(1u); // need for swapchain acquire wait
582 // submit acquire task if needed
583 if ((!backBufferConfig.swapchainData.empty()) && device_.HasSwapchain()) {
584 queue_->Submit(
585 acquireTaskIdendifier, FunctionTask::Create([this, &renderCommandFrameData, &backBufferConfig]() {
586 AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
587 }));
588 }
589 uint64_t secondaryIdx = cmdBufferCount;
590 for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < cmdBufferCount;) {
591 afterIdentifiers.clear();
592 // add wait for acquire if needed
593 if (cmdBufferIdx >= renderCommandFrameData.firstSwapchainNodeIdx) {
594 afterIdentifiers.push_back(acquireTaskIdendifier);
595 }
596 // NOTE: idx increase
597 const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
598 const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
599 PLUGIN_ASSERT(mrpData.subpassCount > 0);
600 const uint32_t rcCount = mrpData.subpassCount;
601 if (mrpData.secondaryCmdLists) {
602 afterIdentifiers.reserve(afterIdentifiers.size() + rcCount);
603 for (uint32_t secondIdx = 0; secondIdx < rcCount; ++secondIdx) {
604 const uint64_t submitId = secondaryIdx++;
605 afterIdentifiers.push_back(submitId);
606 PLUGIN_ASSERT((cmdBufferIdx + secondIdx) < cmdBufferCount);
607 queue_->SubmitAfter(afterIdentifiers, submitId,
608 FunctionTask::Create([this, cmdBufferIdx, secondIdx, &renderCommandFrameData]() {
609 const uint32_t currCmdBufferIdx = cmdBufferIdx + secondIdx;
610 MultiRenderCommandListDesc mrcDesc;
611 mrcDesc.multiRenderCommandListCount = 1u;
612 mrcDesc.baseContext = nullptr;
613 mrcDesc.secondaryCommandBuffer = true;
614 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currCmdBufferIdx];
615 const DebugNames debugNames { ref2.debugName,
616 renderCommandFrameData.renderCommandContexts[currCmdBufferIdx].debugName };
617 RenderSingleCommandList(ref2, currCmdBufferIdx, mrcDesc, debugNames);
618 }));
619 }
620 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
621 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
622 MultiRenderCommandListDesc mrcDesc;
623 mrcDesc.multiRenderCommandListCount = rcCount;
624 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
625 const DebugNames debugNames { ref2.debugName,
626 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
627 RenderPrimaryRenderPass(renderCommandFrameData, ref2, cmdBufferIdx, mrcDesc, debugNames);
628 }));
629 } else {
630 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
631 cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
632 MultiRenderCommandListDesc mrcDesc;
633 mrcDesc.multiRenderCommandListCount = rcCount;
634 if (rcCount > 1) {
635 mrcDesc.multiRenderNodeCmdList = true;
636 mrcDesc.baseContext = &renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
637 }
638 for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
639 const uint32_t currIdx = cmdBufferIdx + rcIdx;
640 mrcDesc.multiRenderCommandListIndex = rcIdx;
641 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
642 const DebugNames debugNames { ref2.debugName,
643 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
644 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
645 }
646 }));
647 }
648 // idx increase
649 cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
650 }
651
652 // execute and wait for completion.
653 queue_->Execute();
654 queue_->Clear();
655 } else {
656 AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
657 for (uint32_t cmdBufferIdx = 0;
658 cmdBufferIdx < static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());) {
659 // NOTE: idx increase
660 const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
661 const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
662 PLUGIN_ASSERT(mrpData.subpassCount > 0);
663 const uint32_t rcCount = mrpData.subpassCount;
664
665 MultiRenderCommandListDesc mrcDesc;
666 mrcDesc.multiRenderCommandListCount = rcCount;
667 mrcDesc.baseContext = (rcCount > 1) ? &renderCommandFrameData.renderCommandContexts[cmdBufferIdx] : nullptr;
668
669 for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
670 const uint32_t currIdx = cmdBufferIdx + rcIdx;
671 mrcDesc.multiRenderCommandListIndex = rcIdx;
672 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
673 const DebugNames debugNames { ref2.debugName,
674 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
675 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
676 }
677 cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
678 }
679 }
680 }
681
RenderPrimaryRenderPass(const RenderCommandFrameData & renderCommandFrameData,RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & multiRenderCommandListDesc,const DebugNames & debugNames)682 void RenderBackendVk::RenderPrimaryRenderPass(const RenderCommandFrameData& renderCommandFrameData,
683 RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
684 const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
685 {
686 const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
687 NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
688 NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
689
690 const ContextCommandPoolVk& ptrCmdPool =
691 (static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
692 const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool.commandBuffer;
693
694 // begin cmd buffer
695 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
696 constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
697 const bool valid = ptrCmdPool.commandPool && cmdBuffer.commandBuffer;
698 if (valid) {
699 VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
700 ptrCmdPool.commandPool, // commandPool
701 commandPoolResetFlags)); // flags
702 }
703
704 constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
705 VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
706 };
707 const VkCommandBufferBeginInfo commandBufferBeginInfo {
708 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
709 nullptr, // pNext
710 commandBufferUsageFlags, // flags
711 nullptr, // pInheritanceInfo
712 };
713 if (valid) {
714 VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
715 &commandBufferBeginInfo)); // pBeginInfo
716 }
717
718 StateCache stateCache;
719
720 const MultiRenderPassCommandListData mrpcld = renderCommandList.GetMultiRenderCommandListData();
721 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
722 const uint32_t commandCount = static_cast<uint32_t>(rcRef.size());
723 const RenderCommandBeginRenderPass* rcBeginRenderPass =
724 (mrpcld.rpBeginCmdIndex < commandCount)
725 ? static_cast<const RenderCommandBeginRenderPass*>(rcRef[mrpcld.rpBeginCmdIndex].rc)
726 : nullptr;
727 const RenderCommandEndRenderPass* rcEndRenderPass =
728 (mrpcld.rpEndCmdIndex < commandCount)
729 ? static_cast<const RenderCommandEndRenderPass*>(rcRef[mrpcld.rpEndCmdIndex].rc)
730 : nullptr;
731
732 if (rcBeginRenderPass && rcEndRenderPass) {
733 if (mrpcld.rpBarrierCmdIndex < commandCount) {
734 const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
735 PLUGIN_ASSERT(rcRef[mrpcld.rpBarrierCmdIndex].type == RenderCommandType::BARRIER_POINT);
736 const RenderCommandBarrierPoint& barrierPoint =
737 *static_cast<RenderCommandBarrierPoint*>(rcRef[mrpcld.rpBarrierCmdIndex].rc);
738 // handle all barriers before render command that needs resource syncing
739 RenderCommand(barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
740 }
741
742 // begin render pass
743 stateCache.primaryRenderPass = true;
744 RenderCommand(*rcBeginRenderPass, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
745 stateCache.primaryRenderPass = false;
746
747 // get secondary command buffers from correct indices and execute
748 for (uint32_t idx = 0; idx < multiRenderCommandListDesc.multiRenderCommandListCount; ++idx) {
749 const uint32_t currCmdBufIdx = cmdBufIdx + idx;
750 PLUGIN_ASSERT(currCmdBufIdx < renderCommandFrameData.renderCommandContexts.size());
751 const RenderCommandContext& currContext = renderCommandFrameData.renderCommandContexts[currCmdBufIdx];
752 NodeContextPoolManagerVk& contextPoolVk =
753 *static_cast<NodeContextPoolManagerVk*>(currContext.nodeContextPoolMgr);
754
755 const array_view<const RenderCommandWithType> mlaRcRef = currContext.renderCommandList->GetRenderCommands();
756 const auto& mla = currContext.renderCommandList->GetMultiRenderCommandListData();
757 const uint32_t mlaCommandCount = static_cast<uint32_t>(mlaRcRef.size());
758 // next subpass only called from second render pass on
759 if ((idx > 0) && (mla.rpBeginCmdIndex < mlaCommandCount)) {
760 RenderCommandBeginRenderPass renderPass =
761 *static_cast<RenderCommandBeginRenderPass*>(mlaRcRef[mla.rpBeginCmdIndex].rc);
762 renderPass.renderPassDesc.subpassContents =
763 SubpassContents::CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS;
764 stateCache.renderCommandBeginRenderPass = nullptr; // reset
765 RenderCommand(
766 renderPass, cmdBuffer, *currContext.nodeContextPsoMgr, *currContext.nodeContextPoolMgr, stateCache);
767 }
768 RenderExecuteSecondaryCommandLists(cmdBuffer, contextPoolVk.GetContextSecondaryCommandPool().commandBuffer);
769 }
770
771 // end render pass (replace the primary render pass)
772 stateCache.renderCommandBeginRenderPass = rcBeginRenderPass;
773 // NOTE: render graph has batched the subpasses to have END_SUBPASS, we need END_RENDER_PASS
774 constexpr RenderCommandEndRenderPass rcerp = {};
775 RenderCommand(rcerp, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
776 }
777
778 // end cmd buffer
779 if (valid) {
780 VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
781 }
782
783 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
784 }
785
RenderExecuteSecondaryCommandLists(const LowLevelCommandBufferVk & cmdBuffer,const LowLevelCommandBufferVk & executeCmdBuffer)786 void RenderBackendVk::RenderExecuteSecondaryCommandLists(
787 const LowLevelCommandBufferVk& cmdBuffer, const LowLevelCommandBufferVk& executeCmdBuffer)
788 {
789 if (cmdBuffer.commandBuffer && executeCmdBuffer.commandBuffer) {
790 vkCmdExecuteCommands(cmdBuffer.commandBuffer, // commandBuffer
791 1u, // commandBufferCount
792 &executeCmdBuffer.commandBuffer); // pCommandBuffers
793 }
794 }
795
RenderGetCommandBufferInheritanceInfo(const RenderCommandList & renderCommandList,NodeContextPoolManager & poolMgr)796 VkCommandBufferInheritanceInfo RenderBackendVk::RenderGetCommandBufferInheritanceInfo(
797 const RenderCommandList& renderCommandList, NodeContextPoolManager& poolMgr)
798 {
799 NodeContextPoolManagerVk& poolMgrVk = static_cast<NodeContextPoolManagerVk&>(poolMgr);
800
801 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
802 const uint32_t cmdCount = static_cast<uint32_t>(rcRef.size());
803
804 const MultiRenderPassCommandListData mrpCmdData = renderCommandList.GetMultiRenderCommandListData();
805 PLUGIN_ASSERT(mrpCmdData.rpBeginCmdIndex < cmdCount);
806 PLUGIN_ASSERT(mrpCmdData.rpEndCmdIndex < cmdCount);
807 if (mrpCmdData.rpBeginCmdIndex < cmdCount) {
808 const auto& ref = rcRef[mrpCmdData.rpBeginCmdIndex];
809 PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
810 const RenderCommandBeginRenderPass& renderCmd = *static_cast<const RenderCommandBeginRenderPass*>(ref.rc);
811 LowLevelRenderPassDataVk lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
812
813 const uint32_t subpass = renderCmd.subpassStartIndex;
814 return VkCommandBufferInheritanceInfo {
815 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // sType
816 nullptr, // pNext
817 lowLevelRenderPassData.renderPass, // renderPass
818 subpass, // subpass
819 VK_NULL_HANDLE, // framebuffer
820 VK_FALSE, // occlusionQueryEnable
821 0, // queryFlags
822 0, // pipelineStatistics
823 };
824 } else {
825 return VkCommandBufferInheritanceInfo {};
826 }
827 }
828
RenderSingleCommandList(RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & mrclDesc,const DebugNames & debugNames)829 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
830 const MultiRenderCommandListDesc& mrclDesc, const DebugNames& debugNames)
831 {
832 // these are validated in render graph
833 const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
834 const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
835 NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
836 NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
837 NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
838
839 contextPoolMgr.BeginBackendFrame();
840 ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
841 nodeContextPsoMgr.BeginBackendFrame();
842
843 const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
844
845 StateCache stateCache = {}; // state cache for this render command list
846 stateCache.backendNode = renderCommandCtx.renderBackendNode;
847 stateCache.secondaryCommandBuffer = mrclDesc.secondaryCommandBuffer;
848
849 // command buffer has been wait with a single frame fence
850 const bool multiCmdList = (mrclDesc.multiRenderNodeCmdList);
851 const bool beginCommandBuffer = (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == 0));
852 const bool endCommandBuffer =
853 (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == mrclDesc.multiRenderCommandListCount - 1));
854 const ContextCommandPoolVk* ptrCmdPool = nullptr;
855 if (mrclDesc.multiRenderNodeCmdList) {
856 PLUGIN_ASSERT(mrclDesc.baseContext);
857 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(mrclDesc.baseContext->nodeContextPoolMgr))
858 ->GetContextCommandPool();
859 } else if (mrclDesc.secondaryCommandBuffer) {
860 PLUGIN_ASSERT(stateCache.secondaryCommandBuffer);
861 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextSecondaryCommandPool();
862 } else {
863 ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
864 }
865
866 // update cmd list context descriptor sets
867 UpdateCommandListDescriptorSets(renderCommandList, stateCache, nodeContextDescriptorSetMgr);
868
869 PLUGIN_ASSERT(ptrCmdPool);
870 const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
871
872 #if (RENDER_PERF_ENABLED == 1)
873 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
874 const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
875 const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
876 #endif
877 PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
878 PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
879 #endif
880
881 if (beginCommandBuffer) {
882 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
883 constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
884 VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
885 ptrCmdPool->commandPool, // commandPool
886 commandPoolResetFlags)); // flags
887
888 VkCommandBufferUsageFlags commandBufferUsageFlags { VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };
889 VkCommandBufferInheritanceInfo inheritanceInfo {};
890 if (stateCache.secondaryCommandBuffer) {
891 commandBufferUsageFlags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
892 inheritanceInfo = RenderGetCommandBufferInheritanceInfo(renderCommandList, contextPoolMgr);
893 }
894 const VkCommandBufferBeginInfo commandBufferBeginInfo {
895 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
896 nullptr, // pNext
897 commandBufferUsageFlags, // flags
898 mrclDesc.secondaryCommandBuffer ? (&inheritanceInfo) : nullptr, // pInheritanceInfo
899 };
900
901 VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
902 &commandBufferBeginInfo)); // pBeginInfo
903
904 #if (RENDER_PERF_ENABLED == 1)
905 if (perfDataSet) {
906 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
907 if (validGpuQueries) {
908 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
909 PLUGIN_ASSERT(gpuQuery);
910
911 gpuQuery->NextQueryIndex();
912
913 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
914 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, stateCache);
915 }
916 #endif
917 perfDataSet->cpuTimer.Begin();
918 }
919 #endif
920 }
921
922 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
923 if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
924 const VkDebugUtilsLabelEXT label {
925 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
926 nullptr, // pNext
927 debugNames.renderCommandListName.data(), // pLabelName
928 { 1.f, 1.f, 1.f, 1.f } // color[4]
929 };
930 deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
931 }
932 #endif
933
934 for (const auto& ref : rcRef) {
935 if (!stateCache.validCommandList) {
936 #if (RENDER_VALIDATION_ENABLED == 1)
937 PLUGIN_LOG_ONCE_E("invalidated_be_cmd_list_" + debugNames.renderCommandListName,
938 "RENDER_VALIDATION: (RN:%s) backend render commands are invalidated",
939 debugNames.renderCommandListName.data());
940 #endif
941 break;
942 }
943
944 PLUGIN_ASSERT(ref.rc);
945 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
946 if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
947 const uint32_t index = static_cast<uint32_t>(ref.type) < countof(COMMAND_NAMES) ?
948 static_cast<uint32_t>(ref.type) : 0;
949 const VkDebugUtilsLabelEXT label {
950 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
951 nullptr, // pNext
952 COMMAND_NAMES[index], // pLabelName
953 { 0.87f, 0.83f, 0.29f, 1.f } // color[4]
954 };
955 deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
956 }
957 #endif
958
959 switch (ref.type) {
960 case RenderCommandType::BARRIER_POINT: {
961 if (!stateCache.secondaryCommandBuffer) {
962 const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
963 // handle all barriers before render command that needs resource syncing
964 RenderCommand(
965 barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
966 }
967 break;
968 }
969 case RenderCommandType::DRAW: {
970 RenderCommand(
971 *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
972 break;
973 }
974 case RenderCommandType::DRAW_INDIRECT: {
975 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
976 contextPoolMgr, stateCache);
977 break;
978 }
979 case RenderCommandType::DISPATCH: {
980 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
981 contextPoolMgr, stateCache);
982 break;
983 }
984 case RenderCommandType::DISPATCH_INDIRECT: {
985 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
986 contextPoolMgr, stateCache);
987 break;
988 }
989 case RenderCommandType::BIND_PIPELINE: {
990 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
991 contextPoolMgr, stateCache);
992 break;
993 }
994 case RenderCommandType::BEGIN_RENDER_PASS: {
995 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
996 contextPoolMgr, stateCache);
997 break;
998 }
999 case RenderCommandType::NEXT_SUBPASS: {
1000 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1001 contextPoolMgr, stateCache);
1002 break;
1003 }
1004 case RenderCommandType::END_RENDER_PASS: {
1005 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1006 contextPoolMgr, stateCache);
1007 break;
1008 }
1009 case RenderCommandType::BIND_VERTEX_BUFFERS: {
1010 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1011 contextPoolMgr, stateCache);
1012 break;
1013 }
1014 case RenderCommandType::BIND_INDEX_BUFFER: {
1015 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1016 contextPoolMgr, stateCache);
1017 break;
1018 }
1019 case RenderCommandType::COPY_BUFFER: {
1020 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1021 contextPoolMgr, stateCache);
1022 break;
1023 }
1024 case RenderCommandType::COPY_BUFFER_IMAGE: {
1025 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1026 contextPoolMgr, stateCache);
1027 break;
1028 }
1029 case RenderCommandType::COPY_IMAGE: {
1030 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1031 contextPoolMgr, stateCache);
1032 break;
1033 }
1034 case RenderCommandType::BIND_DESCRIPTOR_SETS: {
1035 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1036 contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
1037 break;
1038 }
1039 case RenderCommandType::PUSH_CONSTANT: {
1040 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1041 contextPoolMgr, stateCache);
1042 break;
1043 }
1044 case RenderCommandType::BLIT_IMAGE: {
1045 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1046 contextPoolMgr, stateCache);
1047 break;
1048 }
1049 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
1050 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
1051 nodeContextPsoMgr, contextPoolMgr, stateCache);
1052 break;
1053 }
1054 case RenderCommandType::CLEAR_COLOR_IMAGE: {
1055 RenderCommand(*static_cast<RenderCommandClearColorImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1056 contextPoolMgr, stateCache);
1057 break;
1058 }
1059 // dynamic states
1060 case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
1061 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1062 contextPoolMgr, stateCache);
1063 break;
1064 }
1065 case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
1066 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1067 contextPoolMgr, stateCache);
1068 break;
1069 }
1070 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
1071 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1072 contextPoolMgr, stateCache);
1073 break;
1074 }
1075 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
1076 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1077 contextPoolMgr, stateCache);
1078 break;
1079 }
1080 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
1081 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
1082 nodeContextPsoMgr, contextPoolMgr, stateCache);
1083 break;
1084 }
1085 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
1086 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1087 contextPoolMgr, stateCache);
1088 break;
1089 }
1090 case RenderCommandType::DYNAMIC_STATE_STENCIL: {
1091 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1092 contextPoolMgr, stateCache);
1093 break;
1094 }
1095 case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
1096 RenderCommand(*static_cast<RenderCommandDynamicStateFragmentShadingRate*>(ref.rc), cmdBuffer,
1097 nodeContextPsoMgr, contextPoolMgr, stateCache);
1098 break;
1099 }
1100 case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
1101 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
1102 nodeContextPsoMgr, contextPoolMgr, stateCache);
1103 break;
1104 }
1105 //
1106 case RenderCommandType::WRITE_TIMESTAMP: {
1107 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1108 contextPoolMgr, stateCache);
1109 break;
1110 }
1111 case RenderCommandType::UNDEFINED:
1112 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
1113 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
1114 default: {
1115 PLUGIN_ASSERT(false && "non-valid render command");
1116 break;
1117 }
1118 }
1119 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1120 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1121 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1122 }
1123 #endif
1124 }
1125
1126 if ((!presentationData_.infos.empty())) {
1127 RenderPresentationLayout(cmdBuffer, cmdBufIdx);
1128 }
1129
1130 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1131 if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1132 deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1133 }
1134 #endif
1135
1136 #if (RENDER_PERF_ENABLED == 1)
1137 // copy counters
1138 if (perfDataSet) {
1139 CopyPerfCounters(stateCache.perfCounters, perfDataSet->perfCounters);
1140 }
1141 #endif
1142
1143 if (endCommandBuffer) {
1144 #if (RENDER_PERF_ENABLED == 1)
1145 if (perfDataSet) {
1146 perfDataSet->cpuTimer.End();
1147 }
1148 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1149 if (validGpuQueries) {
1150 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
1151 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, stateCache);
1152 }
1153 #endif
1154 CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
1155 #endif
1156
1157 VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
1158
1159 if (mrclDesc.secondaryCommandBuffer) {
1160 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = {};
1161 } else {
1162 commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
1163 }
1164 }
1165 }
1166
RenderCommand(const RenderCommandBindPipeline & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1167 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1168 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1169 {
1170 const RenderHandle psoHandle = renderCmd.psoHandle;
1171 const VkPipelineBindPoint pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
1172
1173 stateCache.psoHandle = psoHandle;
1174
1175 VkPipeline pipeline { VK_NULL_HANDLE };
1176 VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
1177 if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
1178 const ComputePipelineStateObjectVk* pso = static_cast<const ComputePipelineStateObjectVk*>(
1179 psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
1180 if (pso) {
1181 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1182 pipeline = plat.pipeline;
1183 pipelineLayout = plat.pipelineLayout;
1184 }
1185 } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
1186 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1187 if (stateCache.renderCommandBeginRenderPass) {
1188 uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
1189 if (stateCache.pipelineDescSetHash != 0) {
1190 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
1191 }
1192 const GraphicsPipelineStateObjectVk* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
1193 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
1194 stateCache.renderCommandBeginRenderPass->subpasses,
1195 stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
1196 &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
1197 if (pso) {
1198 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1199 pipeline = plat.pipeline;
1200 pipelineLayout = plat.pipelineLayout;
1201 }
1202 }
1203 }
1204
1205 // NOTE: render front-end expects pso binding after begin render pass
1206 // in some situations the render pass might change and therefore the pipeline changes
1207 // in some situations the render pass is the same and the rebinding is not needed
1208 const bool newPipeline = (pipeline != stateCache.pipeline) ? true : false;
1209 const bool valid = (pipeline != VK_NULL_HANDLE) ? true : false;
1210 if (valid && newPipeline) {
1211 stateCache.pipeline = pipeline;
1212 stateCache.pipelineLayout = pipelineLayout;
1213 stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
1214 vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
1215 pipelineBindPoint, // pipelineBindPoint
1216 pipeline); // pipeline
1217 #if (RENDER_PERF_ENABLED == 1)
1218 stateCache.perfCounters.bindPipelineCount++;
1219 #endif
1220 }
1221 }
1222
RenderCommand(const RenderCommandDraw & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1223 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1224 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1225 {
1226 if (stateCache.validBindings) {
1227 if (renderCmd.indexCount) {
1228 vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
1229 renderCmd.indexCount, // indexCount
1230 renderCmd.instanceCount, // instanceCount
1231 renderCmd.firstIndex, // firstIndex
1232 renderCmd.vertexOffset, // vertexOffset
1233 renderCmd.firstInstance); // firstInstance
1234 #if (RENDER_PERF_ENABLED == 1)
1235 stateCache.perfCounters.drawCount++;
1236 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1237 stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1238 #endif
1239 } else {
1240 vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
1241 renderCmd.vertexCount, // vertexCount
1242 renderCmd.instanceCount, // instanceCount
1243 renderCmd.firstVertex, // firstVertex
1244 renderCmd.firstInstance); // firstInstance
1245 #if (RENDER_PERF_ENABLED == 1)
1246 stateCache.perfCounters.drawCount++;
1247 stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1248 stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
1249 * renderCmd.instanceCount;
1250 #endif
1251 }
1252 }
1253 }
1254
RenderCommand(const RenderCommandDrawIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1255 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1256 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1257 {
1258 if (stateCache.validBindings) {
1259 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1260 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1261 const VkBuffer buffer = plat.buffer;
1262 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1263 if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1264 vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
1265 buffer, // buffer
1266 offset, // offset
1267 renderCmd.drawCount, // drawCount
1268 renderCmd.stride); // stride
1269 } else {
1270 vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
1271 buffer, // buffer
1272 (VkDeviceSize)renderCmd.offset, // offset
1273 renderCmd.drawCount, // drawCount
1274 renderCmd.stride); // stride
1275 }
1276 #if (RENDER_PERF_ENABLED == 1)
1277 stateCache.perfCounters.drawIndirectCount++;
1278 #endif
1279 }
1280 }
1281 }
1282
RenderCommand(const RenderCommandDispatch & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1283 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1284 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1285 {
1286 if (stateCache.validBindings) {
1287 vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
1288 renderCmd.groupCountX, // groupCountX
1289 renderCmd.groupCountY, // groupCountY
1290 renderCmd.groupCountZ); // groupCountZ
1291 #if (RENDER_PERF_ENABLED == 1)
1292 stateCache.perfCounters.dispatchCount++;
1293 #endif
1294 }
1295 }
1296
RenderCommand(const RenderCommandDispatchIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1297 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
1298 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1299 const StateCache& stateCache)
1300 {
1301 if (stateCache.validBindings) {
1302 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1303 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1304 const VkBuffer buffer = plat.buffer;
1305 const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1306 vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
1307 buffer, // buffer
1308 offset); // offset
1309 #if (RENDER_PERF_ENABLED == 1)
1310 stateCache.perfCounters.dispatchIndirectCount++;
1311 #endif
1312 }
1313 }
1314 }
1315
RenderCommand(const RenderCommandBeginRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1316 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1317 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1318 StateCache& stateCache)
1319 {
1320 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1321 stateCache.renderCommandBeginRenderPass = &renderCmd;
1322
1323 NodeContextPoolManagerVk& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1324 // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1325 stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1326
1327 // early out for multi render command list render pass
1328 if (stateCache.secondaryCommandBuffer) {
1329 return; // early out
1330 }
1331 const bool validRpFbo = (stateCache.lowLevelRenderPassData.renderPass != VK_NULL_HANDLE) &&
1332 (stateCache.lowLevelRenderPassData.framebuffer != VK_NULL_HANDLE);
1333 // invalidate the whole command list
1334 if (!validRpFbo) {
1335 stateCache.validCommandList = false;
1336 return; // early out
1337 }
1338
1339 if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1340 #if (RENDER_VULKAN_COMBINE_MULTI_COMMAND_LIST_MSAA_SUBPASSES_ENABLED == 1)
1341 // fix for e.g. moltenvk msaa resolve not working with mac (we do not execute subpasses)
1342 if ((!stateCache.renderCommandBeginRenderPass->subpasses.empty()) &&
1343 stateCache.renderCommandBeginRenderPass->subpasses[0].resolveAttachmentCount == 0) {
1344 const VkSubpassContents subpassContents =
1345 static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1346 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1347 subpassContents); // contents
1348 }
1349 #else
1350 const VkSubpassContents subpassContents =
1351 static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1352 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1353 subpassContents); // contents
1354 #endif
1355 return; // early out
1356 }
1357
1358 const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1359
1360 VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1361 bool hasClearValues = false;
1362 for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1363 const auto& ref = renderPassDesc.attachments[idx];
1364 if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1365 ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1366 const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1367 VkClearValue clearValue;
1368 if (RenderHandleUtil::IsDepthImage(handle)) {
1369 PLUGIN_STATIC_ASSERT(sizeof(clearValue.depthStencil) == sizeof(ref.clearValue.depthStencil));
1370 clearValue.depthStencil.depth = ref.clearValue.depthStencil.depth;
1371 clearValue.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1372 } else {
1373 PLUGIN_STATIC_ASSERT(sizeof(clearValue.color) == sizeof(ref.clearValue.color));
1374 if (!CloneData(&clearValue.color, sizeof(clearValue.color), &ref.clearValue.color,
1375 sizeof(ref.clearValue.color))) {
1376 PLUGIN_LOG_E("Copying of clearValue.color failed.");
1377 }
1378 }
1379 clearValues[idx] = clearValue;
1380 hasClearValues = true;
1381 }
1382 }
1383
1384 // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1385 // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1386 const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1387
1388 VkRect2D renderArea {
1389 { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1390 { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1391 };
1392 // render area needs to be inside frame buffer
1393 const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1394 renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1395 renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1396 renderArea.extent.width = Math::min(renderArea.extent.width,
1397 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1398 renderArea.extent.height = Math::min(renderArea.extent.height,
1399 static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1400
1401 const VkRenderPassBeginInfo renderPassBeginInfo {
1402 VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // sType
1403 nullptr, // pNext
1404 stateCache.lowLevelRenderPassData.renderPass, // renderPass
1405 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1406 renderArea, // renderArea
1407 clearValueCount, // clearValueCount
1408 clearValues, // pClearValues
1409 };
1410
1411 // NOTE: could be patched in render graph
1412 const VkSubpassContents subpassContents =
1413 stateCache.primaryRenderPass ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE;
1414 vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1415 &renderPassBeginInfo, // pRenderPassBegin
1416 subpassContents); // contents
1417 #if (RENDER_PERF_ENABLED == 1)
1418 stateCache.perfCounters.renderPassCount++;
1419 #endif
1420 }
1421
RenderCommand(const RenderCommandNextSubpass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1422 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1423 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1424 {
1425 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1426
1427 const VkSubpassContents subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1428 vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1429 subpassContents); // contents
1430 }
1431
RenderCommand(const RenderCommandEndRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1432 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1433 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1434 {
1435 PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1436
1437 // early out for multi render command list render pass
1438 if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1439 return; // NOTE
1440 }
1441
1442 stateCache.renderCommandBeginRenderPass = nullptr;
1443 stateCache.lowLevelRenderPassData = {};
1444
1445 if (!stateCache.secondaryCommandBuffer) {
1446 vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1447 }
1448 }
1449
RenderCommand(const RenderCommandBindVertexBuffers & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1450 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1451 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1452 const StateCache& stateCache)
1453 {
1454 PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1455 PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1456
1457 const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1458
1459 VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1460 VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1461 const GpuBufferVk* gpuBuffer = nullptr;
1462 RenderHandle currBufferHandle;
1463 for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1464 const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1465 // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1466 // do not re-fetch the buffer if not needed
1467 if (currBufferHandle.id != currVb.bufferHandle.id) {
1468 currBufferHandle = currVb.bufferHandle;
1469 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1470 }
1471 if (gpuBuffer) {
1472 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1473 const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1474 vertexBuffers[idx] = plat.buffer;
1475 offsets[idx] = offset;
1476 }
1477 }
1478
1479 vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1480 0, // firstBinding
1481 vertexBufferCount, // bindingCount
1482 vertexBuffers, // pBuffers
1483 offsets); // pOffsets
1484 }
1485
RenderCommand(const RenderCommandBindIndexBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1486 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1487 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1488 const StateCache& stateCache)
1489 {
1490 const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1491
1492 PLUGIN_ASSERT(gpuBuffer);
1493 if (gpuBuffer) {
1494 const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1495 const VkBuffer buffer = plat.buffer;
1496 const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1497 const VkIndexType indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1498
1499 vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1500 buffer, // buffer
1501 offset, // offset
1502 indexType); // indexType
1503 }
1504 }
1505
RenderCommand(const RenderCommandBlitImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1506 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1507 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1508 {
1509 const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1510 const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1511 if (srcImagePtr && dstImagePtr) {
1512 const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1513 const GpuImagePlatformDataVk& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1514
1515 const ImageBlit& ib = renderCmd.imageBlit;
1516 const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1517 ? srcPlatImage.arrayLayers
1518 : ib.srcSubresource.layerCount;
1519 const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1520 ? dstPlatImage.arrayLayers
1521 : ib.dstSubresource.layerCount;
1522
1523 const VkImageSubresourceLayers srcSubresourceLayers {
1524 (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1525 ib.srcSubresource.mipLevel, // mipLevel
1526 ib.srcSubresource.baseArrayLayer, // baseArrayLayer
1527 srcLayerCount, // layerCount
1528 };
1529 const VkImageSubresourceLayers dstSubresourceLayers {
1530 (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1531 ib.dstSubresource.mipLevel, // mipLevel
1532 ib.dstSubresource.baseArrayLayer, // baseArrayLayer
1533 dstLayerCount, // layerCount
1534 };
1535
1536 const VkImageBlit imageBlit {
1537 srcSubresourceLayers, // srcSubresource
1538 { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1539 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1540 (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1541 dstSubresourceLayers, // dstSubresource
1542 { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1543 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1544 (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1545 };
1546
1547 vkCmdBlitImage(cmdBuf.commandBuffer, // commandBuffer
1548 srcPlatImage.image, // srcImage
1549 (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1550 dstPlatImage.image, // dstImage
1551 (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1552 1, // regionCount
1553 &imageBlit, // pRegions
1554 (VkFilter)renderCmd.filter); // filter
1555 }
1556 }
1557
RenderCommand(const RenderCommandCopyBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1558 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1559 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1560 {
1561 const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1562 const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1563
1564 PLUGIN_ASSERT(srcGpuBuffer);
1565 PLUGIN_ASSERT(dstGpuBuffer);
1566
1567 if (srcGpuBuffer && dstGpuBuffer) {
1568 const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1569 const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1570 const VkBufferCopy bufferCopy {
1571 renderCmd.bufferCopy.srcOffset,
1572 renderCmd.bufferCopy.dstOffset,
1573 renderCmd.bufferCopy.size,
1574 };
1575
1576 if (bufferCopy.size > 0) {
1577 vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1578 srcBuffer, // srcBuffer
1579 dstBuffer, // dstBuffer
1580 1, // regionCount
1581 &bufferCopy); // pRegions
1582 }
1583 }
1584 }
1585
RenderCommand(const RenderCommandCopyBufferImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1586 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1587 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1588 const StateCache& stateCache)
1589 {
1590 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1591 PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1592 return;
1593 }
1594
1595 const GpuBufferVk* gpuBuffer = nullptr;
1596 const GpuImageVk* gpuImage = nullptr;
1597 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1598 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1599 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1600 } else {
1601 gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1602 gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1603 }
1604
1605 if (gpuBuffer && gpuImage) {
1606 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1607 const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1608 const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1609 const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1610 ? platImage.arrayLayers
1611 : subresourceLayer.layerCount;
1612 const VkImageSubresourceLayers imageSubresourceLayer {
1613 (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1614 subresourceLayer.mipLevel,
1615 subresourceLayer.baseArrayLayer,
1616 layerCount,
1617 };
1618 const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1619 // Math::min to force staying inside image
1620 const uint32_t mip = subresourceLayer.mipLevel;
1621 const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1622 const Size3D& imageOffset = bufferImageCopy.imageOffset;
1623 const VkExtent3D imageExtent = {
1624 Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1625 Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1626 Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1627 };
1628 const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1629 (imageOffset.depth < imageSize.depth);
1630 const VkBufferImageCopy bufferImageCopyVk {
1631 bufferImageCopy.bufferOffset,
1632 bufferImageCopy.bufferRowLength,
1633 bufferImageCopy.bufferImageHeight,
1634 imageSubresourceLayer,
1635 { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1636 static_cast<int32_t>(imageOffset.depth) },
1637 imageExtent,
1638 };
1639
1640 const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1641 const VkImage image = (gpuImage->GetPlatformData()).image;
1642
1643 if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1644 vkCmdCopyBufferToImage(cmdBuf.commandBuffer, // commandBuffer
1645 buffer, // srcBuffer
1646 image, // dstImage
1647 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1648 1, // regionCount
1649 &bufferImageCopyVk); // pRegions
1650 } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1651 vkCmdCopyImageToBuffer(cmdBuf.commandBuffer, // commandBuffer
1652 image, // srcImage
1653 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1654 buffer, // dstBuffer
1655 1, // regionCount
1656 &bufferImageCopyVk); // pRegions
1657 }
1658 }
1659 }
1660
RenderCommand(const RenderCommandCopyImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1661 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1662 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1663 {
1664 const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1665 const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1666 if (srcGpuImage && dstGpuImage) {
1667 const ImageCopy& copy = renderCmd.imageCopy;
1668 const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1669 const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1670
1671 const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1672 const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1673 const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1674 ? srcPlatImage.arrayLayers
1675 : srcSubresourceLayer.layerCount;
1676 const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1677 ? dstPlatImage.arrayLayers
1678 : dstSubresourceLayer.layerCount;
1679
1680 const VkImageSubresourceLayers srcImageSubresourceLayer {
1681 (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1682 srcSubresourceLayer.mipLevel,
1683 srcSubresourceLayer.baseArrayLayer,
1684 srcLayerCount,
1685 };
1686 const VkImageSubresourceLayers dstImageSubresourceLayer {
1687 (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1688 dstSubresourceLayer.mipLevel,
1689 dstSubresourceLayer.baseArrayLayer,
1690 dstLayerCount,
1691 };
1692
1693 const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1694 const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1695
1696 VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1697 ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1698 ext.height =
1699 Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1700 ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1701
1702 const VkImageCopy imageCopyVk {
1703 srcImageSubresourceLayer, // srcSubresource
1704 { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1705 dstImageSubresourceLayer, // dstSubresource
1706 { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1707 ext, // extent
1708 };
1709 vkCmdCopyImage(cmdBuf.commandBuffer, // commandBuffer
1710 srcPlatImage.image, // srcImage
1711 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1712 dstPlatImage.image, // dstImage
1713 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1714 1, // regionCount
1715 &imageCopyVk); // pRegions
1716 }
1717 }
1718
RenderCommand(const RenderCommandBarrierPoint & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,const RenderBarrierList & rbl)1719 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1720 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1721 const RenderBarrierList& rbl)
1722 {
1723 if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1724 return;
1725 }
1726
1727 const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1728 rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1729 PLUGIN_ASSERT(barrierPointBarriers);
1730 if (!barrierPointBarriers) {
1731 return;
1732 }
1733 constexpr uint32_t maxBarrierCount { 8 };
1734 VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1735 VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1736 VkMemoryBarrier memoryBarriers[maxBarrierCount];
1737
1738 // generally there is only single barrierListCount per barrier point
1739 // in situations with batched render passes there can be many
1740 // NOTE: all barrier lists could be patched to single vk command if needed
1741 // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1742 const uint32_t barrierListCount = static_cast<uint32_t>(barrierPointBarriers->barrierListCount);
1743 const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1744 #if (RENDER_VALIDATION_ENABLED == 1)
1745 uint32_t fullBarrierCount = 0u;
1746 #endif
1747 for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1748 if (nextBarrierList == nullptr) { // cannot be null, just a safety
1749 PLUGIN_ASSERT(false);
1750 return;
1751 }
1752 const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1753 nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1754 const uint32_t barrierCount = static_cast<uint32_t>(barrierListRef.count);
1755
1756 uint32_t bufferBarrierIdx = 0;
1757 uint32_t imageBarrierIdx = 0;
1758 uint32_t memoryBarrierIdx = 0;
1759
1760 VkPipelineStageFlags srcPipelineStageMask { 0 };
1761 VkPipelineStageFlags dstPipelineStageMask { 0 };
1762 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1763
1764 for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1765 const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1766
1767 uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1768 uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1769 if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1770 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1771 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1772 }
1773
1774 const RenderHandle resourceHandle = ref.resourceHandle;
1775 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1776
1777 PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1778 (handleType == RenderHandleType::GPU_IMAGE));
1779
1780 const VkAccessFlags srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1781 const VkAccessFlags dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1782
1783 srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1784 dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1785
1786 // NOTE: zero size buffer barriers allowed ATM
1787 if (handleType == RenderHandleType::GPU_BUFFER) {
1788 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle); gpuBuffer) {
1789 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1790 // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1791 const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1792 const VkDeviceSize size =
1793 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1794 (VkDeviceSize)ref.dst.optionalByteSize);
1795 if (platBuffer.buffer) {
1796 bufferMemoryBarriers[bufferBarrierIdx++] = {
1797 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1798 nullptr, // pNext
1799 srcAccessMask, // srcAccessMask
1800 dstAccessMask, // dstAccessMask
1801 srcQueueFamilyIndex, // srcQueueFamilyIndex
1802 dstQueueFamilyIndex, // dstQueueFamilyIndex
1803 platBuffer.buffer, // buffer
1804 offset, // offset
1805 size, // size
1806 };
1807 }
1808 }
1809 } else if (handleType == RenderHandleType::GPU_IMAGE) {
1810 if (const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle); gpuImage) {
1811 const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1812
1813 const VkImageLayout srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1814 const VkImageLayout dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1815
1816 const VkImageAspectFlags imageAspectFlags =
1817 (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1818 ? platImage.aspectFlags
1819 : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1820
1821 const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1822 PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1823 ? VK_REMAINING_MIP_LEVELS
1824 : ref.src.optionalImageSubresourceRange.levelCount;
1825
1826 const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1827 PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1828 ? VK_REMAINING_ARRAY_LAYERS
1829 : ref.src.optionalImageSubresourceRange.layerCount;
1830
1831 const VkImageSubresourceRange imageSubresourceRange {
1832 imageAspectFlags, // aspectMask
1833 ref.src.optionalImageSubresourceRange.baseMipLevel, // baseMipLevel
1834 levelCount, // levelCount
1835 ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1836 layerCount, // layerCount
1837 };
1838
1839 if (platImage.image) {
1840 imageMemoryBarriers[imageBarrierIdx++] = {
1841 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1842 nullptr, // pNext
1843 srcAccessMask, // srcAccessMask
1844 dstAccessMask, // dstAccessMask
1845 srcImageLayout, // oldLayout
1846 dstImageLayout, // newLayout
1847 srcQueueFamilyIndex, // srcQueueFamilyIndex
1848 dstQueueFamilyIndex, // dstQueueFamilyIndex
1849 platImage.image, // image
1850 imageSubresourceRange, // subresourceRange
1851 };
1852 }
1853 }
1854 } else {
1855 memoryBarriers[memoryBarrierIdx++] = {
1856 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1857 nullptr, // pNext
1858 srcAccessMask, // srcAccessMask
1859 dstAccessMask, // dstAccessMask
1860 };
1861 }
1862
1863 const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1864 const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1865 (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)))
1866 ? true
1867 : false;
1868
1869 if (hasBarriers && resetBarriers) {
1870 #if (RENDER_VALIDATION_ENABLED == 1)
1871 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1872 #endif
1873 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1874 srcPipelineStageMask, // srcStageMask
1875 dstPipelineStageMask, // dstStageMask
1876 dependencyFlags, // dependencyFlags
1877 memoryBarrierIdx, // memoryBarrierCount
1878 memoryBarriers, // pMemoryBarriers
1879 bufferBarrierIdx, // bufferMemoryBarrierCount
1880 bufferMemoryBarriers, // pBufferMemoryBarriers
1881 imageBarrierIdx, // imageMemoryBarrierCount
1882 imageMemoryBarriers); // pImageMemoryBarriers
1883
1884 bufferBarrierIdx = 0;
1885 imageBarrierIdx = 0;
1886 memoryBarrierIdx = 0;
1887 }
1888 }
1889 }
1890 #if (RENDER_VALIDATION_ENABLED == 1)
1891 if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1892 PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1893 "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1894 barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1895 }
1896 #endif
1897 }
1898
1899 namespace {
1900 struct DescriptorSetUpdateDataStruct {
1901 uint32_t accelIndex { 0U };
1902 uint32_t bufferIndex { 0U };
1903 uint32_t imageIndex { 0U };
1904 uint32_t samplerIndex { 0U };
1905 uint32_t writeBindIdx { 0U };
1906 };
1907 } // namespace
1908
UpdateCommandListDescriptorSets(const RenderCommandList & renderCommandList,StateCache & stateCache,NodeContextDescriptorSetManager & ncdsm)1909 void RenderBackendVk::UpdateCommandListDescriptorSets(
1910 const RenderCommandList& renderCommandList, StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
1911 {
1912 NodeContextDescriptorSetManagerVk& ctxDescMgr = (NodeContextDescriptorSetManagerVk&)ncdsm;
1913
1914 const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
1915 const uint32_t upDescriptorSetCount = static_cast<uint32_t>(allDescSets.size());
1916 LowLevelContextDescriptorWriteDataVk& wd = ctxDescMgr.GetLowLevelDescriptorWriteData();
1917 DescriptorSetUpdateDataStruct dsud;
1918 for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
1919 if ((descIdx >= static_cast<uint32_t>(wd.writeDescriptorSets.size())) ||
1920 (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET)) {
1921 continue;
1922 }
1923 const RenderHandle descHandle = allDescSets[descIdx];
1924 // first update gpu descriptor indices
1925 ncdsm.UpdateDescriptorSetGpuHandle(descHandle);
1926
1927 // actual vulkan descriptor set update
1928 const LowLevelDescriptorSetVk* descriptorSet = ctxDescMgr.GetDescriptorSet(descHandle);
1929 if (descriptorSet && descriptorSet->descriptorSet) {
1930 const DescriptorSetLayoutBindingResources bindingResources = ncdsm.GetCpuDescriptorSetData(descHandle);
1931 #if (RENDER_VALIDATION_ENABLED == 1)
1932 // get descriptor counts
1933 const LowLevelDescriptorCountsVk& descriptorCounts = ctxDescMgr.GetLowLevelDescriptorCounts(descHandle);
1934 if (static_cast<uint32_t>(bindingResources.bindings.size()) > descriptorCounts.writeDescriptorCount) {
1935 PLUGIN_LOG_E("RENDER_VALIDATION: update descriptor set bindings exceed descriptor set bindings");
1936 }
1937 #endif
1938 if (static_cast<uint32_t>(bindingResources.bindings.size()) >
1939 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT) {
1940 PLUGIN_ASSERT(false);
1941 continue;
1942 }
1943 const auto& buffers = bindingResources.buffers;
1944 const auto& images = bindingResources.images;
1945 const auto& samplers = bindingResources.samplers;
1946 for (const auto& ref : buffers) {
1947 const uint32_t descriptorCount = ref.binding.descriptorCount;
1948 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1949 if (descriptorCount == 0) {
1950 continue;
1951 }
1952 const uint32_t arrayOffset = ref.arrayOffset;
1953 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1954 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1955 #if (RENDER_VULKAN_RT_ENABLED == 1)
1956 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1957 // first is the ref, starting from 1 we use array offsets
1958 const BindableBuffer& bRes =
1959 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1960 if (const GpuBufferVk* resPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1961 const GpuAccelerationStructurePlatformDataVk& platAccel =
1962 resPtr->GetPlatformDataAccelerationStructure();
1963 wd.descriptorAccelInfos[dsud.accelIndex + idx] = {
1964 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1965 nullptr, // pNext
1966 descriptorCount, // accelerationStructureCount
1967 &platAccel.accelerationStructure, // pAccelerationStructures
1968 };
1969 }
1970 }
1971 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1972 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
1973 &wd.descriptorAccelInfos[dsud.accelIndex], // pNext
1974 descriptorSet->descriptorSet, // dstSet
1975 ref.binding.binding, // dstBinding
1976 0, // dstArrayElement
1977 descriptorCount, // descriptorCount
1978 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1979 nullptr, // pImageInfo
1980 nullptr, // pBufferInfo
1981 nullptr, // pTexelBufferView
1982 };
1983 dsud.accelIndex += descriptorCount;
1984 #endif
1985 } else {
1986 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1987 // first is the ref, starting from 1 we use array offsets
1988 const BindableBuffer& bRes =
1989 (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1990 const VkDeviceSize optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1991 if (const GpuBufferVk* resPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1992 const GpuBufferPlatformDataVk& platBuffer = resPtr->GetPlatformData();
1993 // takes into account dynamic ring buffers with mapping
1994 const VkDeviceSize bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1995 const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1996 const VkDeviceSize bufferRange =
1997 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1998 (VkDeviceSize)bRes.byteSize);
1999 wd.descriptorBufferInfos[dsud.bufferIndex + idx] = {
2000 platBuffer.buffer, // buffer
2001 byteOffset, // offset
2002 bufferRange, // range
2003 };
2004 }
2005 }
2006 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2007 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2008 nullptr, // pNext
2009 descriptorSet->descriptorSet, // dstSet
2010 ref.binding.binding, // dstBinding
2011 0, // dstArrayElement
2012 descriptorCount, // descriptorCount
2013 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2014 nullptr, // pImageInfo
2015 &wd.descriptorBufferInfos[dsud.bufferIndex], // pBufferInfo
2016 nullptr, // pTexelBufferView
2017 };
2018 dsud.bufferIndex += descriptorCount;
2019 }
2020 }
2021 for (const auto& ref : images) {
2022 const uint32_t descriptorCount = ref.binding.descriptorCount;
2023 // skip, array bindings which are bound from first index have also descriptorCount 0
2024 if (descriptorCount == 0) {
2025 continue;
2026 }
2027 const VkDescriptorType descriptorType = (VkDescriptorType)ref.binding.descriptorType;
2028 const uint32_t arrayOffset = ref.arrayOffset;
2029 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
2030 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2031 // first is the ref, starting from 1 we use array offsets
2032 const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].resource;
2033 if (const GpuImageVk* resPtr = gpuResourceMgr_.GetImage<GpuImageVk>(bRes.handle); resPtr) {
2034 VkSampler sampler = VK_NULL_HANDLE;
2035 if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2036 const GpuSamplerVk* samplerPtr =
2037 gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
2038 if (samplerPtr) {
2039 sampler = samplerPtr->GetPlatformData().sampler;
2040 }
2041 }
2042 const GpuImagePlatformDataVk& platImage = resPtr->GetPlatformData();
2043 const GpuImagePlatformDataViewsVk& platImageViews = resPtr->GetPlatformDataViews();
2044 VkImageView imageView = platImage.imageView;
2045 if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2046 (bRes.layer < platImageViews.layerImageViews.size())) {
2047 imageView = platImageViews.layerImageViews[bRes.layer];
2048 } else if (bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) {
2049 if ((bRes.layer == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2050 (bRes.mip < platImageViews.mipImageAllLayerViews.size())) {
2051 imageView = platImageViews.mipImageAllLayerViews[bRes.mip];
2052 } else if (bRes.mip < platImageViews.mipImageViews.size()) {
2053 imageView = platImageViews.mipImageViews[bRes.mip];
2054 }
2055 }
2056 wd.descriptorImageInfos[dsud.imageIndex + idx] = {
2057 sampler, // sampler
2058 imageView, // imageView
2059 (VkImageLayout)bRes.imageLayout, // imageLayout
2060 };
2061 }
2062 }
2063 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2064 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2065 nullptr, // pNext
2066 descriptorSet->descriptorSet, // dstSet
2067 ref.binding.binding, // dstBinding
2068 0, // dstArrayElement
2069 descriptorCount, // descriptorCount
2070 descriptorType, // descriptorType
2071 &wd.descriptorImageInfos[dsud.imageIndex], // pImageInfo
2072 nullptr, // pBufferInfo
2073 nullptr, // pTexelBufferView
2074 };
2075 dsud.imageIndex += descriptorCount;
2076 }
2077 for (const auto& ref : samplers) {
2078 const uint32_t descriptorCount = ref.binding.descriptorCount;
2079 // skip, array bindings which are bound from first index have also descriptorCount 0
2080 if (descriptorCount == 0) {
2081 continue;
2082 }
2083 const uint32_t arrayOffset = ref.arrayOffset;
2084 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
2085 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2086 // first is the ref, starting from 1 we use array offsets
2087 const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].resource;
2088 if (const GpuSamplerVk* resPtr = gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.handle); resPtr) {
2089 const GpuSamplerPlatformDataVk& platSampler = resPtr->GetPlatformData();
2090 wd.descriptorSamplerInfos[dsud.samplerIndex + idx] = {
2091 platSampler.sampler, // sampler
2092 VK_NULL_HANDLE, // imageView
2093 VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
2094 };
2095 }
2096 }
2097 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2098 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
2099 nullptr, // pNext
2100 descriptorSet->descriptorSet, // dstSet
2101 ref.binding.binding, // dstBinding
2102 0, // dstArrayElement
2103 descriptorCount, // descriptorCount
2104 (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2105 &wd.descriptorSamplerInfos[dsud.samplerIndex], // pImageInfo
2106 nullptr, // pBufferInfo
2107 nullptr, // pTexelBufferView
2108 };
2109 dsud.samplerIndex += descriptorCount;
2110 }
2111
2112 #if (RENDER_PERF_ENABLED == 1)
2113 // count the actual updated descriptors sets, not the api calls
2114 stateCache.perfCounters.updateDescriptorSetCount++;
2115 #endif
2116 }
2117 }
2118 // update if the batch ended or we are the last descriptor set
2119 if (dsud.writeBindIdx > 0U) {
2120 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2121 vkUpdateDescriptorSets(device, // device
2122 dsud.writeBindIdx, // descriptorWriteCount
2123 wd.writeDescriptorSets.data(), // pDescriptorWrites
2124 0, // descriptorCopyCount
2125 nullptr); // pDescriptorCopies
2126 }
2127 }
2128
RenderCommand(const RenderCommandBindDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache,NodeContextDescriptorSetManager & aNcdsm)2129 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
2130 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2131 StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
2132 {
2133 const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
2134
2135 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2136 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
2137 const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
2138 ? VK_PIPELINE_BIND_POINT_COMPUTE
2139 : VK_PIPELINE_BIND_POINT_GRAPHICS;
2140 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2141
2142 bool valid = (pipelineLayout != VK_NULL_HANDLE) ? true : false;
2143 const uint32_t firstSet = renderCmd.firstSet;
2144 const uint32_t setCount = renderCmd.setCount;
2145 if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
2146 uint32_t combinedDynamicOffsetCount = 0;
2147 uint32_t dynamicOffsetDescriptorSetIndices = 0;
2148 uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
2149
2150 VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2151 const uint32_t firstPlusCount = firstSet + setCount;
2152 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2153 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2154 if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
2155 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
2156 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
2157 combinedDynamicOffsetCount += dynamicDescriptorCount;
2158
2159 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
2160 if (descriptorSet && descriptorSet->descriptorSet) {
2161 descriptorSets[idx] = descriptorSet->descriptorSet;
2162 // update, copy to state cache
2163 PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
2164 stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
2165 const uint32_t currShift = (idx * 16u);
2166 const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
2167 uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
2168 stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
2169 } else {
2170 valid = false;
2171 }
2172 }
2173 }
2174
2175 uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT *
2176 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2177 uint32_t dynamicOffsetIdx = 0;
2178 // NOTE: optimize
2179 // this code has some safety checks that the offset is not updated for non-dynamic sets
2180 // it could be left on only for validation
2181 for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2182 if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
2183 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2184 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
2185 const uint32_t dodResCount = static_cast<uint32_t>(dod.resources.size());
2186 const auto& descriptorSetDynamicOffsets = renderCmd.descriptorSetDynamicOffsets[idx];
2187 for (uint32_t dodIdx = 0U; dodIdx < dodResCount; ++dodIdx) {
2188 uint32_t byteOffset = 0U;
2189 if (dodIdx < descriptorSetDynamicOffsets.dynamicOffsetCount) {
2190 byteOffset = descriptorSetDynamicOffsets.dynamicOffsets[dynamicOffsetIdx];
2191 }
2192 dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
2193 }
2194 }
2195 }
2196
2197 stateCache.validBindings = valid;
2198 if (stateCache.validBindings) {
2199 if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
2200 vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
2201 pipelineBindPoint, // pipelineBindPoint
2202 pipelineLayout, // layout
2203 firstSet, // firstSet
2204 setCount, // descriptorSetCount
2205 &descriptorSets[firstSet], // pDescriptorSets
2206 dynamicOffsetIdx, // dynamicOffsetCount
2207 dynamicOffsets); // pDynamicOffsets
2208 #if (RENDER_PERF_ENABLED == 1)
2209 stateCache.perfCounters.bindDescriptorSetCount++;
2210 #endif
2211 } else {
2212 // possible pso re-creation and bind of these sets to the new pso
2213 const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
2214 (PipelineBindPoint)pipelineBindPoint };
2215 RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
2216 RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
2217 }
2218 }
2219 }
2220 }
2221
RenderCommand(const RenderCommandPushConstant & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2222 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2223 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2224 {
2225 PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
2226 PLUGIN_ASSERT(renderCmd.data);
2227
2228 PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2229 const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2230
2231 const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0)) ? true : false;
2232 PLUGIN_ASSERT(valid);
2233
2234 if (valid) {
2235 const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
2236 vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
2237 pipelineLayout, // layout
2238 shaderStageFlags, // stageFlags
2239 0, // offset
2240 renderCmd.pushConstant.byteSize, // size
2241 static_cast<void*>(renderCmd.data)); // pValues
2242 }
2243 }
2244
RenderCommand(const RenderCommandBuildAccelerationStructure & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2245 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
2246 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2247 const StateCache& stateCache)
2248 {
2249 #if (RENDER_VULKAN_RT_ENABLED == 1)
2250 // NOTE: missing
2251 const GpuBufferVk* dst = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.dstAccelerationStructure);
2252 const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.scratchBuffer);
2253 if (dst && scratchBuffer) {
2254 const DevicePlatformDataVk& devicePlat = deviceVk_.GetPlatformDataVk();
2255 const VkDevice device = devicePlat.device;
2256
2257 const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformDataAccelerationStructure();
2258 const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
2259
2260 // scratch data with user offset
2261 const VkDeviceAddress scratchData { GetBufferDeviceAddress(device, scratchBuffer->GetPlatformData().buffer) +
2262 VkDeviceSize(renderCmd.scratchOffset) };
2263
2264 const size_t arraySize =
2265 renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
2266 vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
2267 vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
2268
2269 size_t arrayIndex = 0;
2270 for (const auto& trianglesRef : renderCmd.trianglesView) {
2271 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2272 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2273 nullptr, // pNext
2274 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR, // geometryType
2275 {}, // geometry;
2276 0, // flags
2277 };
2278 uint32_t primitiveCount = 0;
2279 const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.vertexData.handle);
2280 const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.indexData.handle);
2281 if (vb && ib) {
2282 const VkDeviceOrHostAddressConstKHR vertexData { GetBufferDeviceAddress(
2283 device, vb->GetPlatformData().buffer) };
2284 const VkDeviceOrHostAddressConstKHR indexData { GetBufferDeviceAddress(
2285 device, ib->GetPlatformData().buffer) };
2286 VkDeviceOrHostAddressConstKHR transformData {};
2287 if (RenderHandleUtil::IsValid(trianglesRef.transformData.handle)) {
2288 if (const GpuBufferVk* tr =
2289 gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.transformData.handle);
2290 tr) {
2291 transformData.deviceAddress = { GetBufferDeviceAddress(device, ib->GetPlatformData().buffer) };
2292 }
2293 }
2294 primitiveCount = trianglesRef.info.indexCount / 3u; // triangles
2295
2296 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2297 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
2298 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
2299 nullptr, // pNext
2300 VkFormat(trianglesRef.info.vertexFormat), // vertexFormat
2301 vertexData, // vertexData
2302 VkDeviceSize(trianglesRef.info.vertexStride), // vertexStride
2303 trianglesRef.info.maxVertex, // maxVertex
2304 VkIndexType(trianglesRef.info.indexType), // indexType
2305 indexData, // indexData
2306 transformData, // transformData
2307 };
2308 }
2309 buildRangeInfos[arrayIndex] = {
2310 primitiveCount, // primitiveCount
2311 0u, // primitiveOffset
2312 0u, // firstVertex
2313 0u, // transformOffset
2314 };
2315 arrayIndex++;
2316 }
2317 for (const auto& aabbsRef : renderCmd.aabbsView) {
2318 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2319 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2320 nullptr, // pNext
2321 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR, // geometryType
2322 {}, // geometry;
2323 0, // flags
2324 };
2325 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2326 if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(aabbsRef.data.handle); iPtr) {
2327 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2328 }
2329 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2330 geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
2331 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
2332 nullptr, // pNext
2333 deviceAddress, // data
2334 aabbsRef.info.stride, // stride
2335 };
2336 buildRangeInfos[arrayIndex] = {
2337 1u, // primitiveCount
2338 0u, // primitiveOffset
2339 0u, // firstVertex
2340 0u, // transformOffset
2341 };
2342 arrayIndex++;
2343 }
2344 for (const auto& instancesRef : renderCmd.instancesView) {
2345 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2346 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2347 nullptr, // pNext
2348 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR, // geometryType
2349 {}, // geometry;
2350 0, // flags
2351 };
2352 VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2353 if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(instancesRef.data.handle);
2354 iPtr) {
2355 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2356 }
2357 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2358 geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2359 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2360 nullptr, // pNext
2361 instancesRef.info.arrayOfPointers, // arrayOfPointers
2362 deviceAddress, // data
2363 };
2364 buildRangeInfos[arrayIndex] = {
2365 1u, // primitiveCount
2366 0u, // primitiveOffset
2367 0u, // firstVertex
2368 0u, // transformOffset
2369 };
2370 arrayIndex++;
2371 }
2372
2373 const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2374 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2375 nullptr, // pNext
2376 VkAccelerationStructureTypeKHR(renderCmd.type), // type
2377 VkBuildAccelerationStructureFlagsKHR(renderCmd.flags), // flags
2378 VkBuildAccelerationStructureModeKHR(renderCmd.mode), // mode
2379 VK_NULL_HANDLE, // srcAccelerationStructure
2380 dstAs, // dstAccelerationStructure
2381 uint32_t(arrayIndex), // geometryCount
2382 geometryData.data(), // pGeometries
2383 nullptr, // ppGeometries
2384 scratchData, // scratchData
2385 };
2386
2387 vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2388 for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2389 buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2390 }
2391 const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2392 if (extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2393 extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2394 1u, // infoCount
2395 &buildGeometryInfo, // pInfos
2396 buildRangeInfosPtr.data()); // ppBuildRangeInfos
2397 }
2398 }
2399 #endif
2400 }
2401
RenderCommand(const RenderCommandClearColorImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2402 void RenderBackendVk::RenderCommand(const RenderCommandClearColorImage& renderCmd,
2403 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2404 const StateCache& stateCache)
2405 {
2406 const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.handle);
2407 // the layout could be VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR but we don't support it at the moment
2408 const VkImageLayout imageLayout = (VkImageLayout)renderCmd.imageLayout;
2409 PLUGIN_ASSERT((imageLayout == VK_IMAGE_LAYOUT_GENERAL) || (imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL));
2410 if (imagePtr) {
2411 const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
2412 if (platImage.image) {
2413 VkClearColorValue clearColor;
2414 PLUGIN_STATIC_ASSERT(sizeof(clearColor) == sizeof(renderCmd.color));
2415 CloneData(&clearColor, sizeof(clearColor), &renderCmd.color, sizeof(renderCmd.color));
2416
2417 // NOTE: temporary vector allocated due to not having max limit
2418 vector<VkImageSubresourceRange> ranges(renderCmd.ranges.size());
2419 for (size_t idx = 0; idx < ranges.size(); ++idx) {
2420 const auto& inputRef = renderCmd.ranges[idx];
2421 ranges[idx] = {
2422 (VkImageAspectFlags)inputRef.imageAspectFlags, // aspectMask
2423 inputRef.baseMipLevel, // baseMipLevel
2424 inputRef.levelCount, // levelCount
2425 inputRef.baseArrayLayer, // baseArrayLayer
2426 inputRef.layerCount, // layerCount
2427 };
2428 }
2429
2430 vkCmdClearColorImage(cmdBuf.commandBuffer, // commandBuffer
2431 platImage.image, // image
2432 imageLayout, // imageLayout
2433 &clearColor, // pColor
2434 static_cast<uint32_t>(ranges.size()), // rangeCount
2435 ranges.data()); // pRanges
2436 }
2437 }
2438 }
2439
RenderCommand(const RenderCommandDynamicStateViewport & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2440 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2441 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2442 const StateCache& stateCache)
2443 {
2444 const ViewportDesc& vd = renderCmd.viewportDesc;
2445
2446 const VkViewport viewport {
2447 vd.x, // x
2448 vd.y, // y
2449 vd.width, // width
2450 vd.height, // height
2451 vd.minDepth, // minDepth
2452 vd.maxDepth, // maxDepth
2453 };
2454
2455 vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2456 0, // firstViewport
2457 1, // viewportCount
2458 &viewport); // pViewports
2459 }
2460
RenderCommand(const RenderCommandDynamicStateScissor & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2461 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2462 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2463 const StateCache& stateCache)
2464 {
2465 const ScissorDesc& sd = renderCmd.scissorDesc;
2466
2467 const VkRect2D scissor {
2468 { sd.offsetX, sd.offsetY }, // offset
2469 { sd.extentWidth, sd.extentHeight }, // extent
2470 };
2471
2472 vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2473 0, // firstScissor
2474 1, // scissorCount
2475 &scissor); // pScissors
2476 }
2477
RenderCommand(const RenderCommandDynamicStateLineWidth & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2478 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2479 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2480 const StateCache& stateCache)
2481 {
2482 vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2483 renderCmd.lineWidth); // lineWidth
2484 }
2485
RenderCommand(const RenderCommandDynamicStateDepthBias & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2486 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2487 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2488 const StateCache& stateCache)
2489 {
2490 vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2491 renderCmd.depthBiasConstantFactor, // depthBiasConstantFactor
2492 renderCmd.depthBiasClamp, // depthBiasClamp
2493 renderCmd.depthBiasSlopeFactor); // depthBiasSlopeFactor
2494 }
2495
RenderCommand(const RenderCommandDynamicStateBlendConstants & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2496 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2497 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2498 const StateCache& stateCache)
2499 {
2500 vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2501 renderCmd.blendConstants); // blendConstants[4]
2502 }
2503
RenderCommand(const RenderCommandDynamicStateDepthBounds & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2504 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2505 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2506 const StateCache& stateCache)
2507 {
2508 vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2509 renderCmd.minDepthBounds, // minDepthBounds
2510 renderCmd.maxDepthBounds); // maxDepthBounds
2511 }
2512
RenderCommand(const RenderCommandDynamicStateStencil & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2513 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2514 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2515 const StateCache& stateCache)
2516 {
2517 const VkStencilFaceFlags stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2518
2519 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2520 vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2521 stencilFaceMask, // faceMask
2522 renderCmd.mask); // compareMask
2523 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2524 vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2525 stencilFaceMask, // faceMask
2526 renderCmd.mask); // writeMask
2527 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2528 vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2529 stencilFaceMask, // faceMask
2530 renderCmd.mask); // reference
2531 }
2532 }
2533
RenderCommand(const RenderCommandDynamicStateFragmentShadingRate & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2534 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateFragmentShadingRate& renderCmd,
2535 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2536 const StateCache& stateCache)
2537 {
2538 #if (RENDER_VULKAN_FSR_ENABLED == 1)
2539 const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2540 if (extFunctions.vkCmdSetFragmentShadingRateKHR) {
2541 const VkExtent2D fragmentSize = { renderCmd.fragmentSize.width, renderCmd.fragmentSize.height };
2542 const VkFragmentShadingRateCombinerOpKHR combinerOps[2] = {
2543 (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op1,
2544 (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op2,
2545 };
2546
2547 extFunctions.vkCmdSetFragmentShadingRateKHR(cmdBuf.commandBuffer, // commandBuffer
2548 &fragmentSize, // pFragmentSize
2549 combinerOps); // combinerOps
2550 }
2551 #endif
2552 }
2553
RenderCommand(const RenderCommandExecuteBackendFramePosition & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2554 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2555 const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2556 const StateCache& stateCache)
2557 {
2558 if (stateCache.backendNode) {
2559 const RenderBackendRecordingStateVk recordingState = {
2560 {},
2561 cmdBuf.commandBuffer, // commandBuffer
2562 stateCache.lowLevelRenderPassData.renderPass, // renderPass
2563 stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
2564 stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2565 stateCache.lowLevelRenderPassData.subpassIndex, // subpassIndex
2566 stateCache.pipelineLayout, // pipelineLayout
2567 };
2568 const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2569 stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2570 }
2571 }
2572
RenderCommand(const RenderCommandWriteTimestamp & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2573 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2574 NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2575 {
2576 PLUGIN_ASSERT_MSG(false, "not implemented");
2577
2578 const VkPipelineStageFlagBits pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2579 const uint32_t queryIndex = renderCmd.queryIndex;
2580 VkQueryPool queryPool = VK_NULL_HANDLE;
2581
2582 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2583 queryPool, // queryPool
2584 queryIndex, // firstQuery
2585 1); // queryCount
2586
2587 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2588 pipelineStageFlagBits, // pipelineStage
2589 queryPool, // queryPool
2590 queryIndex); // query
2591 }
2592
RenderPresentationLayout(const LowLevelCommandBufferVk & cmdBuf,const uint32_t cmdBufferIdx)2593 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf, const uint32_t cmdBufferIdx)
2594 {
2595 for (auto& presRef : presentationData_.infos) {
2596 if (presRef.renderNodeCommandListIndex != cmdBufferIdx) {
2597 continue;
2598 }
2599
2600 PLUGIN_ASSERT(presRef.presentationLayoutChangeNeeded);
2601 PLUGIN_ASSERT(presRef.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
2602
2603 const GpuResourceState& state = presRef.renderGraphProcessedState;
2604 const VkAccessFlags srcAccessMask = (VkAccessFlags)state.accessFlags;
2605 const VkAccessFlags dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2606 const VkPipelineStageFlags srcStageMask = ((VkPipelineStageFlags)state.pipelineStageFlags) |
2607 (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2608 const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2609 const VkImageLayout oldLayout = (VkImageLayout)presRef.imageLayout;
2610 const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2611 // NOTE: queue is not currently checked (should be in the same queue as last time used)
2612 constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2613 constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2614 constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2615 constexpr VkImageSubresourceRange imageSubresourceRange {
2616 VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2617 0, // baseMipLevel
2618 1, // levelCount
2619 0, // baseArrayLayer
2620 1, // layerCount
2621 };
2622
2623 const VkImageMemoryBarrier imageMemoryBarrier {
2624 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2625 nullptr, // pNext
2626 srcAccessMask, // srcAccessMask
2627 dstAccessMask, // dstAccessMask
2628 oldLayout, // oldLayout
2629 newLayout, // newLayout
2630 srcQueueFamilyIndex, // srcQueueFamilyIndex
2631 dstQueueFamilyIndex, // dstQueueFamilyIndex
2632 presRef.swapchainImage, // image
2633 imageSubresourceRange, // subresourceRange
2634 };
2635
2636 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2637 srcStageMask, // srcStageMask
2638 dstStageMask, // dstStageMask
2639 dependencyFlags, // dependencyFlags
2640 0, // memoryBarrierCount
2641 nullptr, // pMemoryBarriers
2642 0, // bufferMemoryBarrierCount
2643 nullptr, // pBufferMemoryBarriers
2644 1, // imageMemoryBarrierCount
2645 &imageMemoryBarrier); // pImageMemoryBarriers
2646
2647 presRef.presentationLayoutChangeNeeded = false;
2648 presRef.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC;
2649 }
2650 }
2651
2652 #if (RENDER_PERF_ENABLED == 1)
2653
StartFrameTimers(RenderCommandFrameData & renderCommandFrameData)2654 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2655 {
2656 for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2657 const string_view& debugName = renderCommandContext.debugName;
2658 if (timers_.count(debugName) == 0) { // new timers
2659 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2660 PerfDataSet& perfDataSet = timers_[debugName];
2661 constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2662 perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2663 constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2664 perfDataSet.gpuBufferOffset = static_cast<uint32_t>(timers_.size()) * singleQueryByteSize;
2665 #else
2666 timers_.insert({ debugName, {} });
2667 #endif
2668 }
2669 }
2670
2671 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2672 perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2673 perfGpuTimerData_.currentOffset =
2674 (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2675 #endif
2676 }
2677
EndFrameTimers()2678 void RenderBackendVk::EndFrameTimers()
2679 {
2680 int64_t fullGpuTime = 0;
2681 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2682 // already in micros
2683 fullGpuTime = perfGpuTimerData_.fullGpuCounter;
2684 perfGpuTimerData_.fullGpuCounter = 0;
2685
2686 perfGpuTimerData_.gpuBuffer->Unmap();
2687 #endif
2688 if (IPerformanceDataManagerFactory* globalPerfData =
2689 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2690 globalPerfData) {
2691 IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2692 perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2693 perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2694 perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2695 perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2696 perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2697 perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2698 }
2699 }
2700
WritePerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const uint32_t queryIndex,const VkPipelineStageFlagBits stageFlagBits,const StateCache & stateCache)2701 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2702 const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits, const StateCache& stateCache)
2703 {
2704 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2705 if (stateCache.secondaryCommandBuffer) {
2706 return; // cannot be called inside render pass (e.g. with secondary command buffers)
2707 }
2708 PLUGIN_ASSERT(timers_.count(name) == 1);
2709 const PerfDataSet* perfDataSet = &timers_[name];
2710 if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2711 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2712 if (platData.queryPool) {
2713 vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2714 platData.queryPool, // queryPool
2715 queryIndex, // firstQuery
2716 1); // queryCount
2717
2718 vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2719 stageFlagBits, // pipelineStage,
2720 platData.queryPool, // queryPool,
2721 queryIndex); // query
2722 }
2723 }
2724 #endif
2725 }
2726
2727 namespace {
UpdatePerfCounters(IPerformanceDataManager & perfData,const string_view name,const PerfCounters & perfCounters)2728 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2729 {
2730 perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount);
2731 perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount);
2732 perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount);
2733 perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount);
2734 perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount);
2735 perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount);
2736 perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount);
2737 perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount);
2738 perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount);
2739 perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount);
2740 }
2741 } // namespace
2742
CopyPerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const StateCache & stateCache)2743 void RenderBackendVk::CopyPerfTimeStamp(
2744 const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& stateCache)
2745 {
2746 PLUGIN_ASSERT(timers_.count(name) == 1);
2747 PerfDataSet* const perfDataSet = &timers_[name];
2748
2749 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2750 // take data from earlier queries to cpu
2751 // and copy in from query to gpu buffer
2752 const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
2753 int64_t gpuMicroSeconds = 0;
2754 {
2755 auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
2756 auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
2757
2758 const uint64_t startStamp = *currentData;
2759 const uint64_t endStamp = *(currentData + 1);
2760
2761 const double timestampPeriod =
2762 static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
2763 .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
2764 constexpr int64_t nanosToMicrosDivisor { 1000 };
2765 gpuMicroSeconds = static_cast<int64_t>((endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
2766 constexpr int64_t maxValidMicroSecondValue { 4294967295 };
2767 if (gpuMicroSeconds > maxValidMicroSecondValue) {
2768 gpuMicroSeconds = 0;
2769 }
2770 perfGpuTimerData_.fullGpuCounter += gpuMicroSeconds;
2771 }
2772 #endif
2773 const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
2774
2775 if (IPerformanceDataManagerFactory* globalPerfData =
2776 GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2777 globalPerfData) {
2778 IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2779
2780 perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2781 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2782 perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2783
2784 // cannot be called inside render pass (e.g. with secondary command buffers)
2785 if (!stateCache.secondaryCommandBuffer) {
2786 if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2787 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2788
2789 const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
2790 PLUGIN_ASSERT(gpuBuffer);
2791 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
2792
2793 constexpr uint32_t queryCount = 2;
2794 constexpr VkDeviceSize queryStride = sizeof(uint64_t);
2795 constexpr VkQueryResultFlags queryResultFlags =
2796 VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
2797
2798 if (platData.queryPool) {
2799 vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
2800 platData.queryPool, // queryPool
2801 0, // firstQuery
2802 queryCount, // queryCount
2803 platBuffer.buffer, // dstBuffer
2804 currentFrameByteOffset, // dstOffset
2805 queryStride, // stride
2806 queryResultFlags); // flags
2807 }
2808 }
2809 }
2810 #endif
2811 UpdatePerfCounters(*perfData, name, perfDataSet->perfCounters);
2812 perfDataSet->perfCounters = {}; // reset perf counters
2813 }
2814 }
2815
2816 #endif
2817 RENDER_END_NAMESPACE()
2818