1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_backend_vk.h"
17 
18 #include <algorithm>
19 #include <cstdint>
20 #include <functional>
21 #include <vulkan/vulkan_core.h>
22 
23 #include <base/containers/array_view.h>
24 #include <base/containers/fixed_string.h>
25 #include <base/containers/string_view.h>
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #include <core/plugin/intf_class_register.h>
29 #include <render/datastore/render_data_store_render_pods.h>
30 #include <render/device/pipeline_state_desc.h>
31 #include <render/namespace.h>
32 #include <render/nodecontext/intf_render_backend_node.h>
33 #include <render/vulkan/intf_device_vk.h>
34 
35 #if (RENDER_PERF_ENABLED == 1)
36 #include "perf/gpu_query.h"
37 #include "perf/gpu_query_manager.h"
38 #include "vulkan/gpu_query_vk.h"
39 #endif
40 
41 #include "device/gpu_buffer.h"
42 #include "device/gpu_image.h"
43 #include "device/gpu_resource_handle_util.h"
44 #include "device/gpu_resource_manager.h"
45 #include "device/gpu_sampler.h"
46 #include "device/pipeline_state_object.h"
47 #include "device/render_frame_sync.h"
48 #include "nodecontext/node_context_descriptor_set_manager.h"
49 #include "nodecontext/node_context_pool_manager.h"
50 #include "nodecontext/node_context_pso_manager.h"
51 #include "nodecontext/render_barrier_list.h"
52 #include "nodecontext/render_command_list.h"
53 #include "nodecontext/render_node_graph_node_store.h"
54 #include "render_backend.h"
55 #include "render_graph.h"
56 #include "util/log.h"
57 #include "util/render_frame_util.h"
58 #include "vulkan/gpu_buffer_vk.h"
59 #include "vulkan/gpu_image_vk.h"
60 #include "vulkan/gpu_sampler_vk.h"
61 #include "vulkan/gpu_semaphore_vk.h"
62 #include "vulkan/node_context_descriptor_set_manager_vk.h"
63 #include "vulkan/node_context_pool_manager_vk.h"
64 #include "vulkan/pipeline_state_object_vk.h"
65 #include "vulkan/render_frame_sync_vk.h"
66 #include "vulkan/swapchain_vk.h"
67 #include "vulkan/validate_vk.h"
68 
69 using namespace BASE_NS;
70 
71 using CORE_NS::GetInstance;
72 using CORE_NS::IParallelTaskQueue;
73 using CORE_NS::IPerformanceDataManager;
74 using CORE_NS::IPerformanceDataManagerFactory;
75 using CORE_NS::ITaskQueueFactory;
76 using CORE_NS::IThreadPool;
77 using CORE_NS::UID_TASK_QUEUE_FACTORY;
78 
79 RENDER_BEGIN_NAMESPACE()
80 namespace {
81 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device,const VkBuffer buffer)82 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
83 {
84     const VkBufferDeviceAddressInfo addressInfo {
85         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
86         nullptr,                                      // pNext
87         buffer,                                       // buffer
88     };
89     return vkGetBufferDeviceAddress(device, &addressInfo);
90 }
91 #endif
92 #if (RENDER_PERF_ENABLED == 1)
CopyPerfCounters(const PerfCounters & src,PerfCounters & dst)93 void CopyPerfCounters(const PerfCounters& src, PerfCounters& dst)
94 {
95     dst.drawCount += src.drawCount;
96     dst.drawIndirectCount += src.drawIndirectCount;
97     dst.dispatchCount += src.dispatchCount;
98     dst.dispatchIndirectCount += src.dispatchIndirectCount;
99     dst.bindPipelineCount += src.bindPipelineCount;
100     dst.renderPassCount += src.renderPassCount;
101     dst.updateDescriptorSetCount += src.updateDescriptorSetCount;
102     dst.bindDescriptorSetCount += src.bindDescriptorSetCount;
103     dst.triangleCount += src.triangleCount;
104     dst.instanceCount += src.instanceCount;
105 }
106 #endif
107 } // namespace
108 
109 // Helper class for running std::function as a ThreadPool task.
110 class FunctionTask final : public IThreadPool::ITask {
111 public:
Create(std::function<void ()> func)112     static Ptr Create(std::function<void()> func)
113     {
114         return Ptr { new FunctionTask(BASE_NS::move(func)) };
115     }
116 
FunctionTask(std::function<void ()> func)117     explicit FunctionTask(std::function<void()> func) : func_(BASE_NS::move(func)) {};
118 
operator ()()119     void operator()() override
120     {
121         func_();
122     }
123 
124 protected:
Destroy()125     void Destroy() override
126     {
127         delete this;
128     }
129 
130 private:
131     std::function<void()> func_;
132 };
133 
134 #if (RENDER_PERF_ENABLED == 1) && (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
135 namespace {
136 static constexpr uint32_t TIME_STAMP_PER_GPU_QUERY { 2u };
137 }
138 #endif
139 
RenderBackendVk(Device & dev,GpuResourceManager & gpuResourceManager,const CORE_NS::IParallelTaskQueue::Ptr & queue)140 RenderBackendVk::RenderBackendVk(
141     Device& dev, GpuResourceManager& gpuResourceManager, const CORE_NS::IParallelTaskQueue::Ptr& queue)
142     : RenderBackend(), device_(dev), deviceVk_(static_cast<DeviceVk&>(device_)), gpuResourceMgr_(gpuResourceManager),
143       queue_(queue.get())
144 {
145 #if (RENDER_PERF_ENABLED == 1)
146 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
147     gpuQueryMgr_ = make_unique<GpuQueryManager>();
148 
149     constexpr uint32_t maxQueryObjectCount { 512u };
150     constexpr uint32_t byteSize = maxQueryObjectCount * sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
151     const uint32_t fullByteSize = byteSize * device_.GetCommandBufferingCount();
152     const GpuBufferDesc desc {
153         BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_DST_BIT,                        // usageFlags
154         CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT, // memoryPropertyFlags
155         0,                                                                              // engineCreationFlags
156         fullByteSize,                                                                   // byteSize
157     };
158     perfGpuTimerData_.gpuBuffer = device_.CreateGpuBuffer(desc);
159     perfGpuTimerData_.currentOffset = 0;
160     perfGpuTimerData_.frameByteSize = byteSize;
161     perfGpuTimerData_.fullByteSize = fullByteSize;
162     { // zero initialize
163         uint8_t* bufferData = static_cast<uint8_t*>(perfGpuTimerData_.gpuBuffer->Map());
164         memset_s(bufferData, fullByteSize, 0, fullByteSize);
165         perfGpuTimerData_.gpuBuffer->Unmap();
166     }
167 #endif
168 #endif
169 }
170 
AcquirePresentationInfo(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)171 void RenderBackendVk::AcquirePresentationInfo(
172     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
173 {
174     if (device_.HasSwapchain()) {
175         presentationData_.present = true;
176         // resized to same for convenience
177         presentationData_.infos.resize(backBufferConfig.swapchainData.size());
178         for (size_t swapIdx = 0; swapIdx < backBufferConfig.swapchainData.size(); ++swapIdx) {
179             const auto& swapData = backBufferConfig.swapchainData[swapIdx];
180             PresentationInfo pi;
181             const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
182 
183             if (const SwapchainVk* swapchain = static_cast<const SwapchainVk*>(device_.GetSwapchain(swapData.handle));
184                 swapchain) {
185                 const SwapchainPlatformDataVk& platSwapchain = swapchain->GetPlatformData();
186                 const VkSwapchainKHR vkSwapchain = platSwapchain.swapchain;
187                 const uint32_t semaphoreIdx = swapchain->GetNextAcquireSwapchainSemaphoreIndex();
188                 PLUGIN_ASSERT(semaphoreIdx < platSwapchain.swapchainImages.semaphores.size());
189                 pi.swapchainSemaphore = platSwapchain.swapchainImages.semaphores[semaphoreIdx];
190                 pi.swapchain = platSwapchain.swapchain;
191                 pi.useSwapchain = true;
192                 // NOTE: for legacy default backbuffer reasons there might the same swapchain multiple times ATM
193                 for (const auto& piRef : presentationData_.infos) {
194                     if (piRef.swapchain == pi.swapchain) {
195                         pi.useSwapchain = false;
196                     }
197                 }
198                 // NOTE: do not re-acquire default backbuffer swapchain if it's in used with different handle
199                 if (pi.useSwapchain) {
200                     const VkResult result = vkAcquireNextImageKHR(device, // device
201                         vkSwapchain,                                      // swapchin
202                         UINT64_MAX,                                       // timeout
203                         pi.swapchainSemaphore,                            // semaphore
204                         (VkFence) nullptr,                                // fence
205                         &pi.swapchainImageIndex);                         // pImageIndex
206 
207                     switch (result) {
208                         // Success
209                         case VK_SUCCESS:
210                         case VK_TIMEOUT:
211                         case VK_NOT_READY:
212                         case VK_SUBOPTIMAL_KHR:
213                             pi.validAcquire = true;
214                             break;
215 
216                         // Failure
217                         case VK_ERROR_OUT_OF_HOST_MEMORY:
218                         case VK_ERROR_OUT_OF_DEVICE_MEMORY:
219                             PLUGIN_LOG_E("vkAcquireNextImageKHR out of memory");
220                             return;
221                         case VK_ERROR_DEVICE_LOST:
222                             PLUGIN_LOG_E("vkAcquireNextImageKHR device lost");
223                             return;
224                         case VK_ERROR_OUT_OF_DATE_KHR:
225                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface out of date");
226                             return;
227                         case VK_ERROR_SURFACE_LOST_KHR:
228                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost");
229                             return;
230 
231                         case VK_EVENT_SET:
232                         case VK_EVENT_RESET:
233                         case VK_INCOMPLETE:
234                         case VK_ERROR_INITIALIZATION_FAILED:
235                         case VK_ERROR_MEMORY_MAP_FAILED:
236                         case VK_ERROR_LAYER_NOT_PRESENT:
237                         case VK_ERROR_EXTENSION_NOT_PRESENT:
238                         case VK_ERROR_FEATURE_NOT_PRESENT:
239                         case VK_ERROR_INCOMPATIBLE_DRIVER:
240                         case VK_ERROR_TOO_MANY_OBJECTS:
241                         case VK_ERROR_FORMAT_NOT_SUPPORTED:
242                         case VK_ERROR_FRAGMENTED_POOL:
243                         case VK_ERROR_OUT_OF_POOL_MEMORY:
244                         case VK_ERROR_INVALID_EXTERNAL_HANDLE:
245                         case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
246                         case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
247                         case VK_ERROR_VALIDATION_FAILED_EXT:
248                         case VK_ERROR_INVALID_SHADER_NV:
249                         // case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
250                         case VK_ERROR_FRAGMENTATION_EXT:
251                         case VK_ERROR_NOT_PERMITTED_EXT:
252                         // case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
253                         case VK_RESULT_MAX_ENUM:
254                         default:
255                             PLUGIN_LOG_E("vkAcquireNextImageKHR surface lost. Device invalidated");
256                             PLUGIN_ASSERT(false && "unknown result from vkAcquireNextImageKHR");
257                             device_.SetDeviceStatus(false);
258                             break;
259                     }
260 
261                     if (pi.swapchainImageIndex >= static_cast<uint32_t>(platSwapchain.swapchainImages.images.size())) {
262                         PLUGIN_LOG_E("swapchain image index (%u) should be smaller than (%u)", pi.swapchainImageIndex,
263                             static_cast<uint32_t>(platSwapchain.swapchainImages.images.size()));
264                     }
265 
266                     const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
267                     const RenderHandle handle = swapchainData.remappableSwapchainImage;
268                     if (pi.swapchainImageIndex < swapchainData.imageViewCount) {
269                         // remap image to backbuffer
270                         const RenderHandle currentSwapchainHandle = swapchainData.imageViews[pi.swapchainImageIndex];
271                         // special swapchain remapping
272                         gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(handle, currentSwapchainHandle);
273                     }
274                     pi.renderGraphProcessedState = swapData.backBufferState;
275                     pi.imageLayout = swapData.layout;
276                     if (pi.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC) {
277                         pi.presentationLayoutChangeNeeded = true;
278                         pi.renderNodeCommandListIndex =
279                             static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size() - 1);
280 
281                         const GpuImageVk* swapImage = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
282                         PLUGIN_ASSERT(swapImage);
283                         pi.swapchainImage = swapImage->GetPlatformData().image;
284                     }
285                 }
286             }
287             presentationData_.infos[swapIdx] = pi;
288         }
289     }
290 }
291 
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)292 void RenderBackendVk::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
293 {
294     if (!backBufferConfig.swapchainData.empty()) {
295         if (device_.HasSwapchain() && presentationData_.present) {
296             PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8u);
297             uint32_t swapchainCount = 0U;
298             VkSwapchainKHR vkSwapchains[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { VK_NULL_HANDLE, VK_NULL_HANDLE,
299                 VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
300             uint32_t vkSwapImageIndices[DeviceConstants::MAX_SWAPCHAIN_COUNT] = { 0U, 0U, 0U, 0U, 0U, 0U, 0U, 0U };
301             for (const auto& presRef : presentationData_.infos) {
302                 // NOTE: default backbuffer might be present multiple times
303                 // the flag useSwapchain should be false in these cases
304                 if (presRef.useSwapchain && presRef.swapchain && presRef.validAcquire) {
305                     PLUGIN_ASSERT(presRef.imageLayout == ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
306                     vkSwapImageIndices[swapchainCount] = presRef.swapchainImageIndex;
307                     vkSwapchains[swapchainCount++] = presRef.swapchain;
308                 }
309             }
310 #if (RENDER_PERF_ENABLED == 1)
311             commonCpuTimers_.present.Begin();
312 #endif
313 
314             // NOTE: currently waits for the last valid submission semaphore (backtraces here for valid
315             // semaphore)
316             if (swapchainCount > 0U) {
317                 VkSemaphore waitSemaphore = VK_NULL_HANDLE;
318                 uint32_t waitSemaphoreCount = 0;
319                 if (commandBufferSubmitter_.presentationWaitSemaphore != VK_NULL_HANDLE) {
320                     waitSemaphore = commandBufferSubmitter_.presentationWaitSemaphore;
321                     waitSemaphoreCount = 1;
322                 }
323 
324                 const VkPresentInfoKHR presentInfo {
325                     VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, // sType
326                     nullptr,                            // pNext
327                     waitSemaphoreCount,                 // waitSemaphoreCount
328                     &waitSemaphore,                     // pWaitSemaphores
329                     swapchainCount,                     // swapchainCount
330                     vkSwapchains,                       // pSwapchains
331                     vkSwapImageIndices,                 // pImageIndices
332                     nullptr                             // pResults
333                 };
334 
335                 const LowLevelGpuQueueVk lowLevelQueue = deviceVk_.GetPresentationGpuQueue();
336                 const VkResult result = vkQueuePresentKHR(lowLevelQueue.queue, // queue
337                     &presentInfo);                                             // pPresentInfo
338 
339                 switch (result) {
340                         // Success
341                     case VK_SUCCESS:
342                         break;
343                     case VK_SUBOPTIMAL_KHR:
344 #if (RENDER_VALIDATION_ENABLED == 1)
345                         PLUGIN_LOG_ONCE_W("VkQueuePresentKHR_suboptimal", "VkQueuePresentKHR suboptimal khr");
346 #endif
347                         break;
348 
349                         // Failure
350                     case VK_ERROR_OUT_OF_HOST_MEMORY:
351                     case VK_ERROR_OUT_OF_DEVICE_MEMORY:
352                         PLUGIN_LOG_E("vkQueuePresentKHR out of memory");
353                         return;
354                     case VK_ERROR_DEVICE_LOST:
355                         PLUGIN_LOG_E("vkQueuePresentKHR device lost");
356                         return;
357                     case VK_ERROR_OUT_OF_DATE_KHR:
358                         PLUGIN_LOG_E("vkQueuePresentKHR surface out of date");
359                         return;
360                     case VK_ERROR_SURFACE_LOST_KHR:
361                         PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
362                         return;
363 
364                     case VK_NOT_READY:
365                     case VK_TIMEOUT:
366                     case VK_EVENT_SET:
367                     case VK_EVENT_RESET:
368                     case VK_INCOMPLETE:
369                     case VK_ERROR_INITIALIZATION_FAILED:
370                     case VK_ERROR_MEMORY_MAP_FAILED:
371                     case VK_ERROR_LAYER_NOT_PRESENT:
372                     case VK_ERROR_EXTENSION_NOT_PRESENT:
373                     case VK_ERROR_FEATURE_NOT_PRESENT:
374                     case VK_ERROR_INCOMPATIBLE_DRIVER:
375                     case VK_ERROR_TOO_MANY_OBJECTS:
376                     case VK_ERROR_FORMAT_NOT_SUPPORTED:
377                     case VK_ERROR_FRAGMENTED_POOL:
378                     case VK_ERROR_OUT_OF_POOL_MEMORY:
379                     case VK_ERROR_INVALID_EXTERNAL_HANDLE:
380                     case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
381                     case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
382                     case VK_ERROR_VALIDATION_FAILED_EXT:
383                     case VK_ERROR_INVALID_SHADER_NV:
384                     case VK_ERROR_FRAGMENTATION_EXT:
385                     case VK_ERROR_NOT_PERMITTED_EXT:
386                     case VK_RESULT_MAX_ENUM:
387                     default:
388                         PLUGIN_LOG_E("vkQueuePresentKHR surface lost");
389                         PLUGIN_ASSERT(false && "unknown result from vkQueuePresentKHR");
390                         break;
391                 }
392             }
393 #if (RENDER_PERF_ENABLED == 1)
394             commonCpuTimers_.present.End();
395 #endif
396         } else {
397 #if (RENDER_VALIDATION_ENABLED == 1)
398             PLUGIN_LOG_ONCE_E(
399                 "RenderBackendVk::Present_layout", "Presentation layout has not been updated, cannot present.");
400 #endif
401         }
402     }
403 }
404 
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)405 void RenderBackendVk::Render(
406     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
407 {
408     // NOTE: all command lists are validated before entering here
409 #if (RENDER_PERF_ENABLED == 1)
410     commonCpuTimers_.full.Begin();
411     commonCpuTimers_.acquire.Begin();
412 #endif
413 
414     commandBufferSubmitter_ = {};
415     commandBufferSubmitter_.commandBuffers.resize(renderCommandFrameData.renderCommandContexts.size());
416 
417     presentationData_.present = false;
418     presentationData_.infos.clear();
419 
420 #if (RENDER_PERF_ENABLED == 1)
421     commonCpuTimers_.acquire.End();
422 
423     StartFrameTimers(renderCommandFrameData);
424     commonCpuTimers_.execute.Begin();
425 #endif
426 
427     // command list process loop/execute
428     // first tries to acquire swapchain if needed in a task
429     RenderProcessCommandLists(renderCommandFrameData, backBufferConfig);
430 
431 #if (RENDER_PERF_ENABLED == 1)
432     commonCpuTimers_.execute.End();
433     commonCpuTimers_.submit.Begin();
434 #endif
435 
436     PLUGIN_ASSERT(renderCommandFrameData.renderCommandContexts.size() == commandBufferSubmitter_.commandBuffers.size());
437     // submit vulkan command buffers
438     // checks that presentation info has valid acquire
439     RenderProcessSubmitCommandLists(renderCommandFrameData, backBufferConfig);
440 
441 #if (RENDER_PERF_ENABLED == 1)
442     commonCpuTimers_.submit.End();
443     commonCpuTimers_.full.End();
444     EndFrameTimers();
445 #endif
446 }
447 
RenderProcessSubmitCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)448 void RenderBackendVk::RenderProcessSubmitCommandLists(
449     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
450 {
451     // NOTE: currently backtraces to final valid command buffer semaphore
452     uint32_t finalCommandBufferSubmissionIndex = ~0u;
453     commandBufferSubmitter_.presentationWaitSemaphore = VK_NULL_HANDLE;
454     bool swapchainSemaphoreWaited = false;
455     for (int32_t cmdBufferIdx = (int32_t)commandBufferSubmitter_.commandBuffers.size() - 1; cmdBufferIdx >= 0;
456          --cmdBufferIdx) {
457         if ((commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].semaphore != VK_NULL_HANDLE) &&
458             (commandBufferSubmitter_.commandBuffers[static_cast<size_t>(cmdBufferIdx)].commandBuffer !=
459                 VK_NULL_HANDLE)) {
460             finalCommandBufferSubmissionIndex = static_cast<uint32_t>(cmdBufferIdx);
461             break;
462         }
463     }
464 
465     for (size_t cmdBufferIdx = 0; cmdBufferIdx < commandBufferSubmitter_.commandBuffers.size(); ++cmdBufferIdx) {
466         const auto& cmdSubmitterRef = commandBufferSubmitter_.commandBuffers[cmdBufferIdx];
467         if (cmdSubmitterRef.commandBuffer == VK_NULL_HANDLE) {
468             continue;
469         }
470 
471         const auto& renderContextRef = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
472 
473         uint32_t waitSemaphoreCount = 0u;
474         constexpr const uint32_t maxWaitSemaphoreCount =
475             PipelineStateConstants::MAX_RENDER_NODE_GPU_WAIT_SIGNALS + DeviceConstants::MAX_SWAPCHAIN_COUNT;
476         VkSemaphore waitSemaphores[maxWaitSemaphoreCount];
477         VkPipelineStageFlags waitSemaphorePipelineStageFlags[maxWaitSemaphoreCount];
478         for (uint32_t waitIdx = 0; waitIdx < renderContextRef.submitDepencies.waitSemaphoreCount; ++waitIdx) {
479             const uint32_t waitCmdBufferIdx = renderContextRef.submitDepencies.waitSemaphoreNodeIndices[waitIdx];
480             PLUGIN_ASSERT(waitIdx < static_cast<uint32_t>(commandBufferSubmitter_.commandBuffers.size()));
481 
482             VkSemaphore waitSemaphore = commandBufferSubmitter_.commandBuffers[waitCmdBufferIdx].semaphore;
483             if (waitSemaphore != VK_NULL_HANDLE) {
484                 waitSemaphores[waitSemaphoreCount] = waitSemaphore;
485                 waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
486                 waitSemaphoreCount++;
487             }
488         }
489 
490         if ((!swapchainSemaphoreWaited) && (renderContextRef.submitDepencies.waitForSwapchainAcquireSignal) &&
491             (!presentationData_.infos.empty())) {
492             swapchainSemaphoreWaited = true;
493             // go through all swapchain semaphores
494             for (const auto& presRef : presentationData_.infos) {
495                 if (presRef.swapchainSemaphore) {
496                     waitSemaphores[waitSemaphoreCount] = presRef.swapchainSemaphore;
497                     waitSemaphorePipelineStageFlags[waitSemaphoreCount] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
498                     waitSemaphoreCount++;
499                 }
500             }
501         }
502 
503         uint32_t signalSemaphoreCount = 0u;
504         PLUGIN_STATIC_ASSERT(DeviceConstants::MAX_SWAPCHAIN_COUNT == 8U);
505         constexpr uint32_t maxSignalSemaphoreCount { 1U + DeviceConstants::MAX_SWAPCHAIN_COUNT };
506         VkSemaphore semaphores[maxSignalSemaphoreCount] = { VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE,
507             VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE };
508         VkFence fence = VK_NULL_HANDLE;
509         if (finalCommandBufferSubmissionIndex == cmdBufferIdx) { // final presentation
510             // add fence signaling to last submission for frame sync
511             if (auto frameSync = static_cast<RenderFrameSyncVk*>(renderCommandFrameData.renderFrameSync); frameSync) {
512                 fence = frameSync->GetFrameFence().fence;
513                 frameSync->FrameFenceIsSignalled();
514             }
515             // signal external semaphores
516             if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
517                 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
518                 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
519                 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
520                 if (externalSignals.size() == externalSemaphores.size()) {
521                     for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
522                         // needs to be false
523                         if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
524                             if (const GpuSemaphoreVk* gs = (const GpuSemaphoreVk*)externalSemaphores[sigIdx].get();
525                                 gs) {
526                                 semaphores[signalSemaphoreCount++] = gs->GetPlatformData().semaphore;
527                                 externalSignals[sigIdx].signaled = true;
528                             }
529                         }
530                     }
531                 }
532             }
533 
534             if (presentationData_.present) {
535                 commandBufferSubmitter_.presentationWaitSemaphore =
536                     commandBufferSubmitter_.commandBuffers[cmdBufferIdx].semaphore;
537                 semaphores[signalSemaphoreCount++] = commandBufferSubmitter_.presentationWaitSemaphore;
538             }
539             // add additional semaphores
540             for (const auto& swapRef : backBufferConfig.swapchainData) {
541                 // should have been checked in render graph already
542                 if ((signalSemaphoreCount < maxSignalSemaphoreCount) && swapRef.config.gpuSemaphoreHandle) {
543                     semaphores[signalSemaphoreCount++] =
544                         VulkanHandleCast<VkSemaphore>(swapRef.config.gpuSemaphoreHandle);
545                 }
546             }
547         } else if (renderContextRef.submitDepencies.signalSemaphore) {
548             semaphores[signalSemaphoreCount++] = cmdSubmitterRef.semaphore;
549         }
550         PLUGIN_ASSERT(signalSemaphoreCount <= maxSignalSemaphoreCount);
551 
552         const VkSubmitInfo submitInfo {
553             VK_STRUCTURE_TYPE_SUBMIT_INFO,                        // sType
554             nullptr,                                              // pNext
555             waitSemaphoreCount,                                   // waitSemaphoreCount
556             (waitSemaphoreCount == 0) ? nullptr : waitSemaphores, // pWaitSemaphores
557             waitSemaphorePipelineStageFlags,                      // pWaitDstStageMask
558             1,                                                    // commandBufferCount
559             &cmdSubmitterRef.commandBuffer,                       // pCommandBuffers
560             signalSemaphoreCount,                                 // signalSemaphoreCount
561             (signalSemaphoreCount == 0) ? nullptr : semaphores,   // pSignalSemaphores
562         };
563 
564         const VkQueue queue = deviceVk_.GetGpuQueue(renderContextRef.renderCommandList->GetGpuQueue()).queue;
565         if (queue) {
566             VALIDATE_VK_RESULT(vkQueueSubmit(queue, // queue
567                 1,                                  // submitCount
568                 &submitInfo,                        // pSubmits
569                 fence));                            // fence
570         }
571     }
572 }
573 
RenderProcessCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)574 void RenderBackendVk::RenderProcessCommandLists(
575     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
576 {
577     const uint32_t cmdBufferCount = static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());
578     if (queue_) {
579         constexpr uint64_t acquireTaskIdendifier { ~0U };
580         vector<uint64_t> afterIdentifiers;
581         afterIdentifiers.reserve(1u); // need for swapchain acquire wait
582         // submit acquire task if needed
583         if ((!backBufferConfig.swapchainData.empty()) && device_.HasSwapchain()) {
584             queue_->Submit(
585                 acquireTaskIdendifier, FunctionTask::Create([this, &renderCommandFrameData, &backBufferConfig]() {
586                     AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
587                 }));
588         }
589         uint64_t secondaryIdx = cmdBufferCount;
590         for (uint32_t cmdBufferIdx = 0; cmdBufferIdx < cmdBufferCount;) {
591             afterIdentifiers.clear();
592             // add wait for acquire if needed
593             if (cmdBufferIdx >= renderCommandFrameData.firstSwapchainNodeIdx) {
594                 afterIdentifiers.push_back(acquireTaskIdendifier);
595             }
596             // NOTE: idx increase
597             const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
598             const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
599             PLUGIN_ASSERT(mrpData.subpassCount > 0);
600             const uint32_t rcCount = mrpData.subpassCount;
601             if (mrpData.secondaryCmdLists) {
602                 afterIdentifiers.reserve(afterIdentifiers.size() + rcCount);
603                 for (uint32_t secondIdx = 0; secondIdx < rcCount; ++secondIdx) {
604                     const uint64_t submitId = secondaryIdx++;
605                     afterIdentifiers.push_back(submitId);
606                     PLUGIN_ASSERT((cmdBufferIdx + secondIdx) < cmdBufferCount);
607                     queue_->SubmitAfter(afterIdentifiers, submitId,
608                         FunctionTask::Create([this, cmdBufferIdx, secondIdx, &renderCommandFrameData]() {
609                             const uint32_t currCmdBufferIdx = cmdBufferIdx + secondIdx;
610                             MultiRenderCommandListDesc mrcDesc;
611                             mrcDesc.multiRenderCommandListCount = 1u;
612                             mrcDesc.baseContext = nullptr;
613                             mrcDesc.secondaryCommandBuffer = true;
614                             RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currCmdBufferIdx];
615                             const DebugNames debugNames { ref2.debugName,
616                                 renderCommandFrameData.renderCommandContexts[currCmdBufferIdx].debugName };
617                             RenderSingleCommandList(ref2, currCmdBufferIdx, mrcDesc, debugNames);
618                         }));
619                 }
620                 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
621                     cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
622                         MultiRenderCommandListDesc mrcDesc;
623                         mrcDesc.multiRenderCommandListCount = rcCount;
624                         RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
625                         const DebugNames debugNames { ref2.debugName,
626                             renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
627                         RenderPrimaryRenderPass(renderCommandFrameData, ref2, cmdBufferIdx, mrcDesc, debugNames);
628                     }));
629             } else {
630                 queue_->SubmitAfter(array_view<const uint64_t>(afterIdentifiers.data(), afterIdentifiers.size()),
631                     cmdBufferIdx, FunctionTask::Create([this, cmdBufferIdx, rcCount, &renderCommandFrameData]() {
632                         MultiRenderCommandListDesc mrcDesc;
633                         mrcDesc.multiRenderCommandListCount = rcCount;
634                         if (rcCount > 1) {
635                             mrcDesc.multiRenderNodeCmdList = true;
636                             mrcDesc.baseContext = &renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
637                         }
638                         for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
639                             const uint32_t currIdx = cmdBufferIdx + rcIdx;
640                             mrcDesc.multiRenderCommandListIndex = rcIdx;
641                             RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
642                             const DebugNames debugNames { ref2.debugName,
643                                 renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
644                             RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
645                         }
646                     }));
647             }
648             // idx increase
649             cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
650         }
651 
652         // execute and wait for completion.
653         queue_->Execute();
654         queue_->Clear();
655     } else {
656         AcquirePresentationInfo(renderCommandFrameData, backBufferConfig);
657         for (uint32_t cmdBufferIdx = 0;
658             cmdBufferIdx < static_cast<uint32_t>(renderCommandFrameData.renderCommandContexts.size());) {
659             // NOTE: idx increase
660             const RenderCommandContext& ref = renderCommandFrameData.renderCommandContexts[cmdBufferIdx];
661             const MultiRenderPassCommandListData& mrpData = ref.renderCommandList->GetMultiRenderCommandListData();
662             PLUGIN_ASSERT(mrpData.subpassCount > 0);
663             const uint32_t rcCount = mrpData.subpassCount;
664 
665             MultiRenderCommandListDesc mrcDesc;
666             mrcDesc.multiRenderCommandListCount = rcCount;
667             mrcDesc.baseContext = (rcCount > 1) ? &renderCommandFrameData.renderCommandContexts[cmdBufferIdx] : nullptr;
668 
669             for (uint32_t rcIdx = 0; rcIdx < rcCount; ++rcIdx) {
670                 const uint32_t currIdx = cmdBufferIdx + rcIdx;
671                 mrcDesc.multiRenderCommandListIndex = rcIdx;
672                 RenderCommandContext& ref2 = renderCommandFrameData.renderCommandContexts[currIdx];
673                 const DebugNames debugNames { ref2.debugName,
674                     renderCommandFrameData.renderCommandContexts[cmdBufferIdx].debugName };
675                 RenderSingleCommandList(ref2, cmdBufferIdx, mrcDesc, debugNames);
676             }
677             cmdBufferIdx += (rcCount > 1) ? rcCount : 1;
678         }
679     }
680 }
681 
RenderPrimaryRenderPass(const RenderCommandFrameData & renderCommandFrameData,RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & multiRenderCommandListDesc,const DebugNames & debugNames)682 void RenderBackendVk::RenderPrimaryRenderPass(const RenderCommandFrameData& renderCommandFrameData,
683     RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
684     const MultiRenderCommandListDesc& multiRenderCommandListDesc, const DebugNames& debugNames)
685 {
686     const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
687     NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
688     NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
689 
690     const ContextCommandPoolVk& ptrCmdPool =
691         (static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
692     const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool.commandBuffer;
693 
694     // begin cmd buffer
695     const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
696     constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
697     const bool valid = ptrCmdPool.commandPool && cmdBuffer.commandBuffer;
698     if (valid) {
699         VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
700             ptrCmdPool.commandPool,                   // commandPool
701             commandPoolResetFlags));                  // flags
702     }
703 
704     constexpr VkCommandBufferUsageFlags commandBufferUsageFlags {
705         VkCommandBufferUsageFlagBits::VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
706     };
707     const VkCommandBufferBeginInfo commandBufferBeginInfo {
708         VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType
709         nullptr,                                     // pNext
710         commandBufferUsageFlags,                     // flags
711         nullptr,                                     // pInheritanceInfo
712     };
713     if (valid) {
714         VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
715             &commandBufferBeginInfo));                                   // pBeginInfo
716     }
717 
718     StateCache stateCache;
719 
720     const MultiRenderPassCommandListData mrpcld = renderCommandList.GetMultiRenderCommandListData();
721     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
722     const uint32_t commandCount = static_cast<uint32_t>(rcRef.size());
723     const RenderCommandBeginRenderPass* rcBeginRenderPass =
724         (mrpcld.rpBeginCmdIndex < commandCount)
725             ? static_cast<const RenderCommandBeginRenderPass*>(rcRef[mrpcld.rpBeginCmdIndex].rc)
726             : nullptr;
727     const RenderCommandEndRenderPass* rcEndRenderPass =
728         (mrpcld.rpEndCmdIndex < commandCount)
729             ? static_cast<const RenderCommandEndRenderPass*>(rcRef[mrpcld.rpEndCmdIndex].rc)
730             : nullptr;
731 
732     if (rcBeginRenderPass && rcEndRenderPass) {
733         if (mrpcld.rpBarrierCmdIndex < commandCount) {
734             const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
735             PLUGIN_ASSERT(rcRef[mrpcld.rpBarrierCmdIndex].type == RenderCommandType::BARRIER_POINT);
736             const RenderCommandBarrierPoint& barrierPoint =
737                 *static_cast<RenderCommandBarrierPoint*>(rcRef[mrpcld.rpBarrierCmdIndex].rc);
738             // handle all barriers before render command that needs resource syncing
739             RenderCommand(barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
740         }
741 
742         // begin render pass
743         stateCache.primaryRenderPass = true;
744         RenderCommand(*rcBeginRenderPass, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
745         stateCache.primaryRenderPass = false;
746 
747         // get secondary command buffers from correct indices and execute
748         for (uint32_t idx = 0; idx < multiRenderCommandListDesc.multiRenderCommandListCount; ++idx) {
749             const uint32_t currCmdBufIdx = cmdBufIdx + idx;
750             PLUGIN_ASSERT(currCmdBufIdx < renderCommandFrameData.renderCommandContexts.size());
751             const RenderCommandContext& currContext = renderCommandFrameData.renderCommandContexts[currCmdBufIdx];
752             NodeContextPoolManagerVk& contextPoolVk =
753                 *static_cast<NodeContextPoolManagerVk*>(currContext.nodeContextPoolMgr);
754 
755             const array_view<const RenderCommandWithType> mlaRcRef = currContext.renderCommandList->GetRenderCommands();
756             const auto& mla = currContext.renderCommandList->GetMultiRenderCommandListData();
757             const uint32_t mlaCommandCount = static_cast<uint32_t>(mlaRcRef.size());
758             // next subpass only called from second render pass on
759             if ((idx > 0) && (mla.rpBeginCmdIndex < mlaCommandCount)) {
760                 RenderCommandBeginRenderPass renderPass =
761                     *static_cast<RenderCommandBeginRenderPass*>(mlaRcRef[mla.rpBeginCmdIndex].rc);
762                 renderPass.renderPassDesc.subpassContents =
763                     SubpassContents::CORE_SUBPASS_CONTENTS_SECONDARY_COMMAND_LISTS;
764                 stateCache.renderCommandBeginRenderPass = nullptr; // reset
765                 RenderCommand(
766                     renderPass, cmdBuffer, *currContext.nodeContextPsoMgr, *currContext.nodeContextPoolMgr, stateCache);
767             }
768             RenderExecuteSecondaryCommandLists(cmdBuffer, contextPoolVk.GetContextSecondaryCommandPool().commandBuffer);
769         }
770 
771         // end render pass (replace the primary render pass)
772         stateCache.renderCommandBeginRenderPass = rcBeginRenderPass;
773         // NOTE: render graph has batched the subpasses to have END_SUBPASS, we need END_RENDER_PASS
774         constexpr RenderCommandEndRenderPass rcerp = {};
775         RenderCommand(rcerp, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
776     }
777 
778     // end cmd buffer
779     if (valid) {
780         VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
781     }
782 
783     commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
784 }
785 
RenderExecuteSecondaryCommandLists(const LowLevelCommandBufferVk & cmdBuffer,const LowLevelCommandBufferVk & executeCmdBuffer)786 void RenderBackendVk::RenderExecuteSecondaryCommandLists(
787     const LowLevelCommandBufferVk& cmdBuffer, const LowLevelCommandBufferVk& executeCmdBuffer)
788 {
789     if (cmdBuffer.commandBuffer && executeCmdBuffer.commandBuffer) {
790         vkCmdExecuteCommands(cmdBuffer.commandBuffer, // commandBuffer
791             1u,                                       // commandBufferCount
792             &executeCmdBuffer.commandBuffer);         // pCommandBuffers
793     }
794 }
795 
RenderGetCommandBufferInheritanceInfo(const RenderCommandList & renderCommandList,NodeContextPoolManager & poolMgr)796 VkCommandBufferInheritanceInfo RenderBackendVk::RenderGetCommandBufferInheritanceInfo(
797     const RenderCommandList& renderCommandList, NodeContextPoolManager& poolMgr)
798 {
799     NodeContextPoolManagerVk& poolMgrVk = static_cast<NodeContextPoolManagerVk&>(poolMgr);
800 
801     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
802     const uint32_t cmdCount = static_cast<uint32_t>(rcRef.size());
803 
804     const MultiRenderPassCommandListData mrpCmdData = renderCommandList.GetMultiRenderCommandListData();
805     PLUGIN_ASSERT(mrpCmdData.rpBeginCmdIndex < cmdCount);
806     PLUGIN_ASSERT(mrpCmdData.rpEndCmdIndex < cmdCount);
807     if (mrpCmdData.rpBeginCmdIndex < cmdCount) {
808         const auto& ref = rcRef[mrpCmdData.rpBeginCmdIndex];
809         PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
810         const RenderCommandBeginRenderPass& renderCmd = *static_cast<const RenderCommandBeginRenderPass*>(ref.rc);
811         LowLevelRenderPassDataVk lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
812 
813         const uint32_t subpass = renderCmd.subpassStartIndex;
814         return VkCommandBufferInheritanceInfo {
815             VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // sType
816             nullptr,                                           // pNext
817             lowLevelRenderPassData.renderPass,                 // renderPass
818             subpass,                                           // subpass
819             VK_NULL_HANDLE,                                    // framebuffer
820             VK_FALSE,                                          // occlusionQueryEnable
821             0,                                                 // queryFlags
822             0,                                                 // pipelineStatistics
823         };
824     } else {
825         return VkCommandBufferInheritanceInfo {};
826     }
827 }
828 
RenderSingleCommandList(RenderCommandContext & renderCommandCtx,const uint32_t cmdBufIdx,const MultiRenderCommandListDesc & mrclDesc,const DebugNames & debugNames)829 void RenderBackendVk::RenderSingleCommandList(RenderCommandContext& renderCommandCtx, const uint32_t cmdBufIdx,
830     const MultiRenderCommandListDesc& mrclDesc, const DebugNames& debugNames)
831 {
832     // these are validated in render graph
833     const RenderCommandList& renderCommandList = *renderCommandCtx.renderCommandList;
834     const RenderBarrierList& renderBarrierList = *renderCommandCtx.renderBarrierList;
835     NodeContextPsoManager& nodeContextPsoMgr = *renderCommandCtx.nodeContextPsoMgr;
836     NodeContextDescriptorSetManager& nodeContextDescriptorSetMgr = *renderCommandCtx.nodeContextDescriptorSetMgr;
837     NodeContextPoolManager& contextPoolMgr = *renderCommandCtx.nodeContextPoolMgr;
838 
839     contextPoolMgr.BeginBackendFrame();
840     ((NodeContextDescriptorSetManagerVk&)(nodeContextDescriptorSetMgr)).BeginBackendFrame();
841     nodeContextPsoMgr.BeginBackendFrame();
842 
843     const array_view<const RenderCommandWithType> rcRef = renderCommandList.GetRenderCommands();
844 
845     StateCache stateCache = {}; // state cache for this render command list
846     stateCache.backendNode = renderCommandCtx.renderBackendNode;
847     stateCache.secondaryCommandBuffer = mrclDesc.secondaryCommandBuffer;
848 
849     // command buffer has been wait with a single frame fence
850     const bool multiCmdList = (mrclDesc.multiRenderNodeCmdList);
851     const bool beginCommandBuffer = (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == 0));
852     const bool endCommandBuffer =
853         (!multiCmdList || (mrclDesc.multiRenderCommandListIndex == mrclDesc.multiRenderCommandListCount - 1));
854     const ContextCommandPoolVk* ptrCmdPool = nullptr;
855     if (mrclDesc.multiRenderNodeCmdList) {
856         PLUGIN_ASSERT(mrclDesc.baseContext);
857         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk*>(mrclDesc.baseContext->nodeContextPoolMgr))
858                           ->GetContextCommandPool();
859     } else if (mrclDesc.secondaryCommandBuffer) {
860         PLUGIN_ASSERT(stateCache.secondaryCommandBuffer);
861         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextSecondaryCommandPool();
862     } else {
863         ptrCmdPool = &(static_cast<NodeContextPoolManagerVk&>(contextPoolMgr)).GetContextCommandPool();
864     }
865 
866     // update cmd list context descriptor sets
867     UpdateCommandListDescriptorSets(renderCommandList, stateCache, nodeContextDescriptorSetMgr);
868 
869     PLUGIN_ASSERT(ptrCmdPool);
870     const LowLevelCommandBufferVk& cmdBuffer = ptrCmdPool->commandBuffer;
871 
872 #if (RENDER_PERF_ENABLED == 1)
873 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
874     const VkQueueFlags queueFlags = deviceVk_.GetGpuQueue(renderCommandList.GetGpuQueue()).queueInfo.queueFlags;
875     const bool validGpuQueries = (queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) > 0;
876 #endif
877     PLUGIN_ASSERT(timers_.count(debugNames.renderCommandBufferName) == 1);
878     PerfDataSet* perfDataSet = &timers_[debugNames.renderCommandBufferName];
879 #endif
880 
881     if (beginCommandBuffer) {
882         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
883         constexpr VkCommandPoolResetFlags commandPoolResetFlags { 0 };
884         VALIDATE_VK_RESULT(vkResetCommandPool(device, // device
885             ptrCmdPool->commandPool,                  // commandPool
886             commandPoolResetFlags));                  // flags
887 
888         VkCommandBufferUsageFlags commandBufferUsageFlags { VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT };
889         VkCommandBufferInheritanceInfo inheritanceInfo {};
890         if (stateCache.secondaryCommandBuffer) {
891             commandBufferUsageFlags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
892             inheritanceInfo = RenderGetCommandBufferInheritanceInfo(renderCommandList, contextPoolMgr);
893         }
894         const VkCommandBufferBeginInfo commandBufferBeginInfo {
895             VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,                    // sType
896             nullptr,                                                        // pNext
897             commandBufferUsageFlags,                                        // flags
898             mrclDesc.secondaryCommandBuffer ? (&inheritanceInfo) : nullptr, // pInheritanceInfo
899         };
900 
901         VALIDATE_VK_RESULT(vkBeginCommandBuffer(cmdBuffer.commandBuffer, // commandBuffer
902             &commandBufferBeginInfo));                                   // pBeginInfo
903 
904 #if (RENDER_PERF_ENABLED == 1)
905         if (perfDataSet) {
906 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
907             if (validGpuQueries) {
908                 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle);
909                 PLUGIN_ASSERT(gpuQuery);
910 
911                 gpuQuery->NextQueryIndex();
912 
913                 WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 0,
914                     VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, stateCache);
915             }
916 #endif
917             perfDataSet->cpuTimer.Begin();
918         }
919 #endif
920     }
921 
922 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
923     if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
924         const VkDebugUtilsLabelEXT label {
925             VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
926             nullptr,                                 // pNext
927             debugNames.renderCommandListName.data(), // pLabelName
928             { 1.f, 1.f, 1.f, 1.f }                   // color[4]
929         };
930         deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
931     }
932 #endif
933 
934     for (const auto& ref : rcRef) {
935         if (!stateCache.validCommandList) {
936 #if (RENDER_VALIDATION_ENABLED == 1)
937             PLUGIN_LOG_ONCE_E("invalidated_be_cmd_list_" + debugNames.renderCommandListName,
938                 "RENDER_VALIDATION: (RN:%s) backend render commands are invalidated",
939                 debugNames.renderCommandListName.data());
940 #endif
941             break;
942         }
943 
944         PLUGIN_ASSERT(ref.rc);
945 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
946         if (deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT) {
947             const uint32_t index = static_cast<uint32_t>(ref.type) < countof(COMMAND_NAMES) ?
948                 static_cast<uint32_t>(ref.type) : 0;
949             const VkDebugUtilsLabelEXT label {
950                 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, // sType
951                 nullptr,                                 // pNext
952                 COMMAND_NAMES[index],                    // pLabelName
953                 { 0.87f, 0.83f, 0.29f, 1.f }             // color[4]
954             };
955             deviceVk_.GetDebugFunctionUtilities().vkCmdBeginDebugUtilsLabelEXT(cmdBuffer.commandBuffer, &label);
956         }
957 #endif
958 
959         switch (ref.type) {
960             case RenderCommandType::BARRIER_POINT: {
961                 if (!stateCache.secondaryCommandBuffer) {
962                     const RenderCommandBarrierPoint& barrierPoint = *static_cast<RenderCommandBarrierPoint*>(ref.rc);
963                     // handle all barriers before render command that needs resource syncing
964                     RenderCommand(
965                         barrierPoint, cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache, renderBarrierList);
966                 }
967                 break;
968             }
969             case RenderCommandType::DRAW: {
970                 RenderCommand(
971                     *static_cast<RenderCommandDraw*>(ref.rc), cmdBuffer, nodeContextPsoMgr, contextPoolMgr, stateCache);
972                 break;
973             }
974             case RenderCommandType::DRAW_INDIRECT: {
975                 RenderCommand(*static_cast<RenderCommandDrawIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
976                     contextPoolMgr, stateCache);
977                 break;
978             }
979             case RenderCommandType::DISPATCH: {
980                 RenderCommand(*static_cast<RenderCommandDispatch*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
981                     contextPoolMgr, stateCache);
982                 break;
983             }
984             case RenderCommandType::DISPATCH_INDIRECT: {
985                 RenderCommand(*static_cast<RenderCommandDispatchIndirect*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
986                     contextPoolMgr, stateCache);
987                 break;
988             }
989             case RenderCommandType::BIND_PIPELINE: {
990                 RenderCommand(*static_cast<RenderCommandBindPipeline*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
991                     contextPoolMgr, stateCache);
992                 break;
993             }
994             case RenderCommandType::BEGIN_RENDER_PASS: {
995                 RenderCommand(*static_cast<RenderCommandBeginRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
996                     contextPoolMgr, stateCache);
997                 break;
998             }
999             case RenderCommandType::NEXT_SUBPASS: {
1000                 RenderCommand(*static_cast<RenderCommandNextSubpass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1001                     contextPoolMgr, stateCache);
1002                 break;
1003             }
1004             case RenderCommandType::END_RENDER_PASS: {
1005                 RenderCommand(*static_cast<RenderCommandEndRenderPass*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1006                     contextPoolMgr, stateCache);
1007                 break;
1008             }
1009             case RenderCommandType::BIND_VERTEX_BUFFERS: {
1010                 RenderCommand(*static_cast<RenderCommandBindVertexBuffers*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1011                     contextPoolMgr, stateCache);
1012                 break;
1013             }
1014             case RenderCommandType::BIND_INDEX_BUFFER: {
1015                 RenderCommand(*static_cast<RenderCommandBindIndexBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1016                     contextPoolMgr, stateCache);
1017                 break;
1018             }
1019             case RenderCommandType::COPY_BUFFER: {
1020                 RenderCommand(*static_cast<RenderCommandCopyBuffer*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1021                     contextPoolMgr, stateCache);
1022                 break;
1023             }
1024             case RenderCommandType::COPY_BUFFER_IMAGE: {
1025                 RenderCommand(*static_cast<RenderCommandCopyBufferImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1026                     contextPoolMgr, stateCache);
1027                 break;
1028             }
1029             case RenderCommandType::COPY_IMAGE: {
1030                 RenderCommand(*static_cast<RenderCommandCopyImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1031                     contextPoolMgr, stateCache);
1032                 break;
1033             }
1034             case RenderCommandType::BIND_DESCRIPTOR_SETS: {
1035                 RenderCommand(*static_cast<RenderCommandBindDescriptorSets*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1036                     contextPoolMgr, stateCache, nodeContextDescriptorSetMgr);
1037                 break;
1038             }
1039             case RenderCommandType::PUSH_CONSTANT: {
1040                 RenderCommand(*static_cast<RenderCommandPushConstant*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1041                     contextPoolMgr, stateCache);
1042                 break;
1043             }
1044             case RenderCommandType::BLIT_IMAGE: {
1045                 RenderCommand(*static_cast<RenderCommandBlitImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1046                     contextPoolMgr, stateCache);
1047                 break;
1048             }
1049             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE: {
1050                 RenderCommand(*static_cast<RenderCommandBuildAccelerationStructure*>(ref.rc), cmdBuffer,
1051                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1052                 break;
1053             }
1054             case RenderCommandType::CLEAR_COLOR_IMAGE: {
1055                 RenderCommand(*static_cast<RenderCommandClearColorImage*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1056                     contextPoolMgr, stateCache);
1057                 break;
1058             }
1059             // dynamic states
1060             case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
1061                 RenderCommand(*static_cast<RenderCommandDynamicStateViewport*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1062                     contextPoolMgr, stateCache);
1063                 break;
1064             }
1065             case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
1066                 RenderCommand(*static_cast<RenderCommandDynamicStateScissor*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1067                     contextPoolMgr, stateCache);
1068                 break;
1069             }
1070             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
1071                 RenderCommand(*static_cast<RenderCommandDynamicStateLineWidth*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1072                     contextPoolMgr, stateCache);
1073                 break;
1074             }
1075             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
1076                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBias*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1077                     contextPoolMgr, stateCache);
1078                 break;
1079             }
1080             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
1081                 RenderCommand(*static_cast<RenderCommandDynamicStateBlendConstants*>(ref.rc), cmdBuffer,
1082                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1083                 break;
1084             }
1085             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
1086                 RenderCommand(*static_cast<RenderCommandDynamicStateDepthBounds*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1087                     contextPoolMgr, stateCache);
1088                 break;
1089             }
1090             case RenderCommandType::DYNAMIC_STATE_STENCIL: {
1091                 RenderCommand(*static_cast<RenderCommandDynamicStateStencil*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1092                     contextPoolMgr, stateCache);
1093                 break;
1094             }
1095             case RenderCommandType::DYNAMIC_STATE_FRAGMENT_SHADING_RATE: {
1096                 RenderCommand(*static_cast<RenderCommandDynamicStateFragmentShadingRate*>(ref.rc), cmdBuffer,
1097                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1098                 break;
1099             }
1100             case RenderCommandType::EXECUTE_BACKEND_FRAME_POSITION: {
1101                 RenderCommand(*static_cast<RenderCommandExecuteBackendFramePosition*>(ref.rc), cmdBuffer,
1102                     nodeContextPsoMgr, contextPoolMgr, stateCache);
1103                 break;
1104             }
1105             //
1106             case RenderCommandType::WRITE_TIMESTAMP: {
1107                 RenderCommand(*static_cast<RenderCommandWriteTimestamp*>(ref.rc), cmdBuffer, nodeContextPsoMgr,
1108                     contextPoolMgr, stateCache);
1109                 break;
1110             }
1111             case RenderCommandType::UNDEFINED:
1112             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
1113             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
1114             default: {
1115                 PLUGIN_ASSERT(false && "non-valid render command");
1116                 break;
1117             }
1118         }
1119 #if (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1120         if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1121             deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1122         }
1123 #endif
1124     }
1125 
1126     if ((!presentationData_.infos.empty())) {
1127         RenderPresentationLayout(cmdBuffer, cmdBufIdx);
1128     }
1129 
1130 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1131     if (deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT) {
1132         deviceVk_.GetDebugFunctionUtilities().vkCmdEndDebugUtilsLabelEXT(cmdBuffer.commandBuffer);
1133     }
1134 #endif
1135 
1136 #if (RENDER_PERF_ENABLED == 1)
1137     // copy counters
1138     if (perfDataSet) {
1139         CopyPerfCounters(stateCache.perfCounters, perfDataSet->perfCounters);
1140     }
1141 #endif
1142 
1143     if (endCommandBuffer) {
1144 #if (RENDER_PERF_ENABLED == 1)
1145         if (perfDataSet) {
1146             perfDataSet->cpuTimer.End();
1147         }
1148 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1149         if (validGpuQueries) {
1150             WritePerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, 1,
1151                 VkPipelineStageFlagBits::VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, stateCache);
1152         }
1153 #endif
1154         CopyPerfTimeStamp(cmdBuffer, debugNames.renderCommandBufferName, stateCache);
1155 #endif
1156 
1157         VALIDATE_VK_RESULT(vkEndCommandBuffer(cmdBuffer.commandBuffer)); // commandBuffer
1158 
1159         if (mrclDesc.secondaryCommandBuffer) {
1160             commandBufferSubmitter_.commandBuffers[cmdBufIdx] = {};
1161         } else {
1162             commandBufferSubmitter_.commandBuffers[cmdBufIdx] = { cmdBuffer.commandBuffer, cmdBuffer.semaphore };
1163         }
1164     }
1165 }
1166 
RenderCommand(const RenderCommandBindPipeline & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1167 void RenderBackendVk::RenderCommand(const RenderCommandBindPipeline& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1168     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1169 {
1170     const RenderHandle psoHandle = renderCmd.psoHandle;
1171     const VkPipelineBindPoint pipelineBindPoint = (VkPipelineBindPoint)renderCmd.pipelineBindPoint;
1172 
1173     stateCache.psoHandle = psoHandle;
1174 
1175     VkPipeline pipeline { VK_NULL_HANDLE };
1176     VkPipelineLayout pipelineLayout { VK_NULL_HANDLE };
1177     if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_COMPUTE) {
1178         const ComputePipelineStateObjectVk* pso = static_cast<const ComputePipelineStateObjectVk*>(
1179             psoMgr.GetComputePso(psoHandle, &stateCache.lowLevelPipelineLayoutData));
1180         if (pso) {
1181             const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1182             pipeline = plat.pipeline;
1183             pipelineLayout = plat.pipelineLayout;
1184         }
1185     } else if (pipelineBindPoint == VkPipelineBindPoint::VK_PIPELINE_BIND_POINT_GRAPHICS) {
1186         PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1187         if (stateCache.renderCommandBeginRenderPass) {
1188             uint64_t psoStateHash = stateCache.lowLevelRenderPassData.renderPassCompatibilityHash;
1189             if (stateCache.pipelineDescSetHash != 0) {
1190                 HashCombine(psoStateHash, stateCache.pipelineDescSetHash);
1191             }
1192             const GraphicsPipelineStateObjectVk* pso = static_cast<const GraphicsPipelineStateObjectVk*>(
1193                 psoMgr.GetGraphicsPso(psoHandle, stateCache.renderCommandBeginRenderPass->renderPassDesc,
1194                     stateCache.renderCommandBeginRenderPass->subpasses,
1195                     stateCache.renderCommandBeginRenderPass->subpassStartIndex, psoStateHash,
1196                     &stateCache.lowLevelRenderPassData, &stateCache.lowLevelPipelineLayoutData));
1197             if (pso) {
1198                 const PipelineStateObjectPlatformDataVk& plat = pso->GetPlatformData();
1199                 pipeline = plat.pipeline;
1200                 pipelineLayout = plat.pipelineLayout;
1201             }
1202         }
1203     }
1204 
1205     // NOTE: render front-end expects pso binding after begin render pass
1206     // in some situations the render pass might change and therefore the pipeline changes
1207     // in some situations the render pass is the same and the rebinding is not needed
1208     const bool newPipeline = (pipeline != stateCache.pipeline) ? true : false;
1209     const bool valid = (pipeline != VK_NULL_HANDLE) ? true : false;
1210     if (valid && newPipeline) {
1211         stateCache.pipeline = pipeline;
1212         stateCache.pipelineLayout = pipelineLayout;
1213         stateCache.lowLevelPipelineLayoutData.pipelineLayout = pipelineLayout;
1214         vkCmdBindPipeline(cmdBuf.commandBuffer, // commandBuffer
1215             pipelineBindPoint,                  // pipelineBindPoint
1216             pipeline);                          // pipeline
1217 #if (RENDER_PERF_ENABLED == 1)
1218         stateCache.perfCounters.bindPipelineCount++;
1219 #endif
1220     }
1221 }
1222 
RenderCommand(const RenderCommandDraw & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1223 void RenderBackendVk::RenderCommand(const RenderCommandDraw& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1224     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1225 {
1226     if (stateCache.validBindings) {
1227         if (renderCmd.indexCount) {
1228             vkCmdDrawIndexed(cmdBuf.commandBuffer, // commandBuffer
1229                 renderCmd.indexCount,              // indexCount
1230                 renderCmd.instanceCount,           // instanceCount
1231                 renderCmd.firstIndex,              // firstIndex
1232                 renderCmd.vertexOffset,            // vertexOffset
1233                 renderCmd.firstInstance);          // firstInstance
1234 #if (RENDER_PERF_ENABLED == 1)
1235             stateCache.perfCounters.drawCount++;
1236             stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1237             stateCache.perfCounters.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1238 #endif
1239         } else {
1240             vkCmdDraw(cmdBuf.commandBuffer, // commandBuffer
1241                 renderCmd.vertexCount,      // vertexCount
1242                 renderCmd.instanceCount,    // instanceCount
1243                 renderCmd.firstVertex,      // firstVertex
1244                 renderCmd.firstInstance);   // firstInstance
1245 #if (RENDER_PERF_ENABLED == 1)
1246             stateCache.perfCounters.drawCount++;
1247             stateCache.perfCounters.instanceCount += renderCmd.instanceCount;
1248             stateCache.perfCounters.triangleCount += (renderCmd.vertexCount * 3) // 3: vertex dimension
1249                                                      * renderCmd.instanceCount;
1250 #endif
1251         }
1252     }
1253 }
1254 
RenderCommand(const RenderCommandDrawIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1255 void RenderBackendVk::RenderCommand(const RenderCommandDrawIndirect& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1256     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1257 {
1258     if (stateCache.validBindings) {
1259         if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1260             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1261             const VkBuffer buffer = plat.buffer;
1262             const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1263             if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1264                 vkCmdDrawIndexedIndirect(cmdBuf.commandBuffer, // commandBuffer
1265                     buffer,                                    // buffer
1266                     offset,                                    // offset
1267                     renderCmd.drawCount,                       // drawCount
1268                     renderCmd.stride);                         // stride
1269             } else {
1270                 vkCmdDrawIndirect(cmdBuf.commandBuffer, // commandBuffer
1271                     buffer,                             // buffer
1272                     (VkDeviceSize)renderCmd.offset,     // offset
1273                     renderCmd.drawCount,                // drawCount
1274                     renderCmd.stride);                  // stride
1275             }
1276 #if (RENDER_PERF_ENABLED == 1)
1277             stateCache.perfCounters.drawIndirectCount++;
1278 #endif
1279         }
1280     }
1281 }
1282 
RenderCommand(const RenderCommandDispatch & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1283 void RenderBackendVk::RenderCommand(const RenderCommandDispatch& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1284     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1285 {
1286     if (stateCache.validBindings) {
1287         vkCmdDispatch(cmdBuf.commandBuffer, // commandBuffer
1288             renderCmd.groupCountX,          // groupCountX
1289             renderCmd.groupCountY,          // groupCountY
1290             renderCmd.groupCountZ);         // groupCountZ
1291 #if (RENDER_PERF_ENABLED == 1)
1292         stateCache.perfCounters.dispatchCount++;
1293 #endif
1294     }
1295 }
1296 
RenderCommand(const RenderCommandDispatchIndirect & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1297 void RenderBackendVk::RenderCommand(const RenderCommandDispatchIndirect& renderCmd,
1298     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1299     const StateCache& stateCache)
1300 {
1301     if (stateCache.validBindings) {
1302         if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.argsHandle); gpuBuffer) {
1303             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1304             const VkBuffer buffer = plat.buffer;
1305             const VkDeviceSize offset = (VkDeviceSize)renderCmd.offset + plat.currentByteOffset;
1306             vkCmdDispatchIndirect(cmdBuf.commandBuffer, // commandBuffer
1307                 buffer,                                 // buffer
1308                 offset);                                // offset
1309 #if (RENDER_PERF_ENABLED == 1)
1310             stateCache.perfCounters.dispatchIndirectCount++;
1311 #endif
1312         }
1313     }
1314 }
1315 
RenderCommand(const RenderCommandBeginRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1316 void RenderBackendVk::RenderCommand(const RenderCommandBeginRenderPass& renderCmd,
1317     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1318     StateCache& stateCache)
1319 {
1320     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass == nullptr);
1321     stateCache.renderCommandBeginRenderPass = &renderCmd;
1322 
1323     NodeContextPoolManagerVk& poolMgrVk = (NodeContextPoolManagerVk&)poolMgr;
1324     // NOTE: state cache could be optimized to store lowLevelRenderPassData in multi-rendercommandlist-case
1325     stateCache.lowLevelRenderPassData = poolMgrVk.GetRenderPassData(renderCmd);
1326 
1327     // early out for multi render command list render pass
1328     if (stateCache.secondaryCommandBuffer) {
1329         return; // early out
1330     }
1331     const bool validRpFbo = (stateCache.lowLevelRenderPassData.renderPass != VK_NULL_HANDLE) &&
1332                             (stateCache.lowLevelRenderPassData.framebuffer != VK_NULL_HANDLE);
1333     // invalidate the whole command list
1334     if (!validRpFbo) {
1335         stateCache.validCommandList = false;
1336         return; // early out
1337     }
1338 
1339     if (renderCmd.beginType == RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN) {
1340 #if (RENDER_VULKAN_COMBINE_MULTI_COMMAND_LIST_MSAA_SUBPASSES_ENABLED == 1)
1341         // fix for e.g. moltenvk msaa resolve not working with mac (we do not execute subpasses)
1342         if ((!stateCache.renderCommandBeginRenderPass->subpasses.empty()) &&
1343             stateCache.renderCommandBeginRenderPass->subpasses[0].resolveAttachmentCount == 0) {
1344             const VkSubpassContents subpassContents =
1345                 static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1346             vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1347                 subpassContents);                  // contents
1348         }
1349 #else
1350         const VkSubpassContents subpassContents =
1351             static_cast<VkSubpassContents>(renderCmd.renderPassDesc.subpassContents);
1352         vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1353             subpassContents);                  // contents
1354 #endif
1355         return; // early out
1356     }
1357 
1358     const RenderPassDesc& renderPassDesc = renderCmd.renderPassDesc;
1359 
1360     VkClearValue clearValues[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1361     bool hasClearValues = false;
1362     for (uint32_t idx = 0; idx < renderPassDesc.attachmentCount; ++idx) {
1363         const auto& ref = renderPassDesc.attachments[idx];
1364         if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR ||
1365             ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1366             const RenderHandle handle = renderPassDesc.attachmentHandles[idx];
1367             VkClearValue clearValue;
1368             if (RenderHandleUtil::IsDepthImage(handle)) {
1369                 PLUGIN_STATIC_ASSERT(sizeof(clearValue.depthStencil) == sizeof(ref.clearValue.depthStencil));
1370                 clearValue.depthStencil.depth = ref.clearValue.depthStencil.depth;
1371                 clearValue.depthStencil.stencil = ref.clearValue.depthStencil.stencil;
1372             } else {
1373                 PLUGIN_STATIC_ASSERT(sizeof(clearValue.color) == sizeof(ref.clearValue.color));
1374                 if (!CloneData(&clearValue.color, sizeof(clearValue.color), &ref.clearValue.color,
1375                         sizeof(ref.clearValue.color))) {
1376                     PLUGIN_LOG_E("Copying of clearValue.color failed.");
1377                 }
1378             }
1379             clearValues[idx] = clearValue;
1380             hasClearValues = true;
1381         }
1382     }
1383 
1384     // clearValueCount must be greater than the largest attachment index in renderPass that specifies a loadOp
1385     // (or stencilLoadOp, if the attachment has a depth/stencil format) of VK_ATTACHMENT_LOAD_OP_CLEAR
1386     const uint32_t clearValueCount = hasClearValues ? renderPassDesc.attachmentCount : 0;
1387 
1388     VkRect2D renderArea {
1389         { renderPassDesc.renderArea.offsetX, renderPassDesc.renderArea.offsetY },
1390         { renderPassDesc.renderArea.extentWidth, renderPassDesc.renderArea.extentHeight },
1391     };
1392     // render area needs to be inside frame buffer
1393     const auto& lowLevelData = stateCache.lowLevelRenderPassData;
1394     renderArea.offset.x = Math::min(renderArea.offset.x, static_cast<int32_t>(lowLevelData.framebufferSize.width));
1395     renderArea.offset.y = Math::min(renderArea.offset.y, static_cast<int32_t>(lowLevelData.framebufferSize.height));
1396     renderArea.extent.width = Math::min(renderArea.extent.width,
1397         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.width) - renderArea.offset.x));
1398     renderArea.extent.height = Math::min(renderArea.extent.height,
1399         static_cast<uint32_t>(static_cast<int32_t>(lowLevelData.framebufferSize.height) - renderArea.offset.y));
1400 
1401     const VkRenderPassBeginInfo renderPassBeginInfo {
1402         VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,      // sType
1403         nullptr,                                       // pNext
1404         stateCache.lowLevelRenderPassData.renderPass,  // renderPass
1405         stateCache.lowLevelRenderPassData.framebuffer, // framebuffer
1406         renderArea,                                    // renderArea
1407         clearValueCount,                               // clearValueCount
1408         clearValues,                                   // pClearValues
1409     };
1410 
1411     // NOTE: could be patched in render graph
1412     const VkSubpassContents subpassContents =
1413         stateCache.primaryRenderPass ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE;
1414     vkCmdBeginRenderPass(cmdBuf.commandBuffer, // commandBuffer
1415         &renderPassBeginInfo,                  // pRenderPassBegin
1416         subpassContents);                      // contents
1417 #if (RENDER_PERF_ENABLED == 1)
1418     stateCache.perfCounters.renderPassCount++;
1419 #endif
1420 }
1421 
RenderCommand(const RenderCommandNextSubpass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1422 void RenderBackendVk::RenderCommand(const RenderCommandNextSubpass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1423     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1424 {
1425     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1426 
1427     const VkSubpassContents subpassContents = (VkSubpassContents)renderCmd.subpassContents;
1428     vkCmdNextSubpass(cmdBuf.commandBuffer, // commandBuffer
1429         subpassContents);                  // contents
1430 }
1431 
RenderCommand(const RenderCommandEndRenderPass & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache)1432 void RenderBackendVk::RenderCommand(const RenderCommandEndRenderPass& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1433     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, StateCache& stateCache)
1434 {
1435     PLUGIN_ASSERT(stateCache.renderCommandBeginRenderPass != nullptr);
1436 
1437     // early out for multi render command list render pass
1438     if (renderCmd.endType == RenderPassEndType::END_SUBPASS) {
1439         return; // NOTE
1440     }
1441 
1442     stateCache.renderCommandBeginRenderPass = nullptr;
1443     stateCache.lowLevelRenderPassData = {};
1444 
1445     if (!stateCache.secondaryCommandBuffer) {
1446         vkCmdEndRenderPass(cmdBuf.commandBuffer); // commandBuffer
1447     }
1448 }
1449 
RenderCommand(const RenderCommandBindVertexBuffers & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1450 void RenderBackendVk::RenderCommand(const RenderCommandBindVertexBuffers& renderCmd,
1451     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1452     const StateCache& stateCache)
1453 {
1454     PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1455     PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1456 
1457     const uint32_t vertexBufferCount = renderCmd.vertexBufferCount;
1458 
1459     VkBuffer vertexBuffers[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1460     VkDeviceSize offsets[PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT];
1461     const GpuBufferVk* gpuBuffer = nullptr;
1462     RenderHandle currBufferHandle;
1463     for (size_t idx = 0; idx < vertexBufferCount; ++idx) {
1464         const VertexBuffer& currVb = renderCmd.vertexBuffers[idx];
1465         // our importer usually uses same GPU buffer for all vertex buffers in single primitive
1466         // do not re-fetch the buffer if not needed
1467         if (currBufferHandle.id != currVb.bufferHandle.id) {
1468             currBufferHandle = currVb.bufferHandle;
1469             gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(currBufferHandle);
1470         }
1471         if (gpuBuffer) {
1472             const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1473             const VkDeviceSize offset = (VkDeviceSize)currVb.bufferOffset + plat.currentByteOffset;
1474             vertexBuffers[idx] = plat.buffer;
1475             offsets[idx] = offset;
1476         }
1477     }
1478 
1479     vkCmdBindVertexBuffers(cmdBuf.commandBuffer, // commandBuffer
1480         0,                                       // firstBinding
1481         vertexBufferCount,                       // bindingCount
1482         vertexBuffers,                           // pBuffers
1483         offsets);                                // pOffsets
1484 }
1485 
RenderCommand(const RenderCommandBindIndexBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1486 void RenderBackendVk::RenderCommand(const RenderCommandBindIndexBuffer& renderCmd,
1487     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1488     const StateCache& stateCache)
1489 {
1490     const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.indexBuffer.bufferHandle);
1491 
1492     PLUGIN_ASSERT(gpuBuffer);
1493     if (gpuBuffer) {
1494         const GpuBufferPlatformDataVk& plat = gpuBuffer->GetPlatformData();
1495         const VkBuffer buffer = plat.buffer;
1496         const VkDeviceSize offset = (VkDeviceSize)renderCmd.indexBuffer.bufferOffset + plat.currentByteOffset;
1497         const VkIndexType indexType = (VkIndexType)renderCmd.indexBuffer.indexType;
1498 
1499         vkCmdBindIndexBuffer(cmdBuf.commandBuffer, // commandBuffer
1500             buffer,                                // buffer
1501             offset,                                // offset
1502             indexType);                            // indexType
1503     }
1504 }
1505 
RenderCommand(const RenderCommandBlitImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1506 void RenderBackendVk::RenderCommand(const RenderCommandBlitImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1507     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1508 {
1509     const GpuImageVk* srcImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1510     const GpuImageVk* dstImagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1511     if (srcImagePtr && dstImagePtr) {
1512         const GpuImagePlatformDataVk& srcPlatImage = srcImagePtr->GetPlatformData();
1513         const GpuImagePlatformDataVk& dstPlatImage = (const GpuImagePlatformDataVk&)dstImagePtr->GetPlatformData();
1514 
1515         const ImageBlit& ib = renderCmd.imageBlit;
1516         const uint32_t srcLayerCount = (ib.srcSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1517                                            ? srcPlatImage.arrayLayers
1518                                            : ib.srcSubresource.layerCount;
1519         const uint32_t dstLayerCount = (ib.dstSubresource.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1520                                            ? dstPlatImage.arrayLayers
1521                                            : ib.dstSubresource.layerCount;
1522 
1523         const VkImageSubresourceLayers srcSubresourceLayers {
1524             (VkImageAspectFlags)ib.srcSubresource.imageAspectFlags, // aspectMask
1525             ib.srcSubresource.mipLevel,                             // mipLevel
1526             ib.srcSubresource.baseArrayLayer,                       // baseArrayLayer
1527             srcLayerCount,                                          // layerCount
1528         };
1529         const VkImageSubresourceLayers dstSubresourceLayers {
1530             (VkImageAspectFlags)ib.dstSubresource.imageAspectFlags, // aspectMask
1531             ib.dstSubresource.mipLevel,                             // mipLevel
1532             ib.dstSubresource.baseArrayLayer,                       // baseArrayLayer
1533             dstLayerCount,                                          // layerCount
1534         };
1535 
1536         const VkImageBlit imageBlit {
1537             srcSubresourceLayers, // srcSubresource
1538             { { (int32_t)ib.srcOffsets[0].width, (int32_t)ib.srcOffsets[0].height, (int32_t)ib.srcOffsets[0].depth },
1539                 { (int32_t)ib.srcOffsets[1].width, (int32_t)ib.srcOffsets[1].height,
1540                     (int32_t)ib.srcOffsets[1].depth } }, // srcOffsets[2]
1541             dstSubresourceLayers,                        // dstSubresource
1542             { { (int32_t)ib.dstOffsets[0].width, (int32_t)ib.dstOffsets[0].height, (int32_t)ib.dstOffsets[0].depth },
1543                 { (int32_t)ib.dstOffsets[1].width, (int32_t)ib.dstOffsets[1].height,
1544                     (int32_t)ib.dstOffsets[1].depth } }, // dstOffsets[2]
1545         };
1546 
1547         vkCmdBlitImage(cmdBuf.commandBuffer,         // commandBuffer
1548             srcPlatImage.image,                      // srcImage
1549             (VkImageLayout)renderCmd.srcImageLayout, // srcImageLayout,
1550             dstPlatImage.image,                      // dstImage
1551             (VkImageLayout)renderCmd.dstImageLayout, // dstImageLayout
1552             1,                                       // regionCount
1553             &imageBlit,                              // pRegions
1554             (VkFilter)renderCmd.filter);             // filter
1555     }
1556 }
1557 
RenderCommand(const RenderCommandCopyBuffer & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1558 void RenderBackendVk::RenderCommand(const RenderCommandCopyBuffer& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1559     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1560 {
1561     const GpuBufferVk* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1562     const GpuBufferVk* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1563 
1564     PLUGIN_ASSERT(srcGpuBuffer);
1565     PLUGIN_ASSERT(dstGpuBuffer);
1566 
1567     if (srcGpuBuffer && dstGpuBuffer) {
1568         const VkBuffer srcBuffer = (srcGpuBuffer->GetPlatformData()).buffer;
1569         const VkBuffer dstBuffer = (dstGpuBuffer->GetPlatformData()).buffer;
1570         const VkBufferCopy bufferCopy {
1571             renderCmd.bufferCopy.srcOffset,
1572             renderCmd.bufferCopy.dstOffset,
1573             renderCmd.bufferCopy.size,
1574         };
1575 
1576         if (bufferCopy.size > 0) {
1577             vkCmdCopyBuffer(cmdBuf.commandBuffer, // commandBuffer
1578                 srcBuffer,                        // srcBuffer
1579                 dstBuffer,                        // dstBuffer
1580                 1,                                // regionCount
1581                 &bufferCopy);                     // pRegions
1582         }
1583     }
1584 }
1585 
RenderCommand(const RenderCommandCopyBufferImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1586 void RenderBackendVk::RenderCommand(const RenderCommandCopyBufferImage& renderCmd,
1587     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
1588     const StateCache& stateCache)
1589 {
1590     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::UNDEFINED) {
1591         PLUGIN_ASSERT(renderCmd.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1592         return;
1593     }
1594 
1595     const GpuBufferVk* gpuBuffer = nullptr;
1596     const GpuImageVk* gpuImage = nullptr;
1597     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1598         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.srcHandle);
1599         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1600     } else {
1601         gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1602         gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(renderCmd.dstHandle);
1603     }
1604 
1605     if (gpuBuffer && gpuImage) {
1606         const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1607         const BufferImageCopy& bufferImageCopy = renderCmd.bufferImageCopy;
1608         const ImageSubresourceLayers& subresourceLayer = bufferImageCopy.imageSubresource;
1609         const uint32_t layerCount = (subresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1610                                         ? platImage.arrayLayers
1611                                         : subresourceLayer.layerCount;
1612         const VkImageSubresourceLayers imageSubresourceLayer {
1613             (VkImageAspectFlags)subresourceLayer.imageAspectFlags,
1614             subresourceLayer.mipLevel,
1615             subresourceLayer.baseArrayLayer,
1616             layerCount,
1617         };
1618         const GpuImageDesc& imageDesc = gpuImage->GetDesc();
1619         // Math::min to force staying inside image
1620         const uint32_t mip = subresourceLayer.mipLevel;
1621         const VkExtent3D imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
1622         const Size3D& imageOffset = bufferImageCopy.imageOffset;
1623         const VkExtent3D imageExtent = {
1624             Math::min(imageSize.width - imageOffset.width, bufferImageCopy.imageExtent.width),
1625             Math::min(imageSize.height - imageOffset.height, bufferImageCopy.imageExtent.height),
1626             Math::min(imageSize.depth - imageOffset.depth, bufferImageCopy.imageExtent.depth),
1627         };
1628         const bool valid = (imageOffset.width < imageSize.width) && (imageOffset.height < imageSize.height) &&
1629                            (imageOffset.depth < imageSize.depth);
1630         const VkBufferImageCopy bufferImageCopyVk {
1631             bufferImageCopy.bufferOffset,
1632             bufferImageCopy.bufferRowLength,
1633             bufferImageCopy.bufferImageHeight,
1634             imageSubresourceLayer,
1635             { static_cast<int32_t>(imageOffset.width), static_cast<int32_t>(imageOffset.height),
1636                 static_cast<int32_t>(imageOffset.depth) },
1637             imageExtent,
1638         };
1639 
1640         const VkBuffer buffer = (gpuBuffer->GetPlatformData()).buffer;
1641         const VkImage image = (gpuImage->GetPlatformData()).image;
1642 
1643         if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1644             vkCmdCopyBufferToImage(cmdBuf.commandBuffer,             // commandBuffer
1645                 buffer,                                              // srcBuffer
1646                 image,                                               // dstImage
1647                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1648                 1,                                                   // regionCount
1649                 &bufferImageCopyVk);                                 // pRegions
1650         } else if (valid && renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1651             vkCmdCopyImageToBuffer(cmdBuf.commandBuffer,             // commandBuffer
1652                 image,                                               // srcImage
1653                 VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1654                 buffer,                                              // dstBuffer
1655                 1,                                                   // regionCount
1656                 &bufferImageCopyVk);                                 // pRegions
1657         }
1658     }
1659 }
1660 
RenderCommand(const RenderCommandCopyImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)1661 void RenderBackendVk::RenderCommand(const RenderCommandCopyImage& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1662     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
1663 {
1664     const GpuImageVk* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.srcHandle);
1665     const GpuImageVk* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.dstHandle);
1666     if (srcGpuImage && dstGpuImage) {
1667         const ImageCopy& copy = renderCmd.imageCopy;
1668         const ImageSubresourceLayers& srcSubresourceLayer = copy.srcSubresource;
1669         const ImageSubresourceLayers& dstSubresourceLayer = copy.dstSubresource;
1670 
1671         const GpuImagePlatformDataVk& srcPlatImage = srcGpuImage->GetPlatformData();
1672         const GpuImagePlatformDataVk& dstPlatImage = dstGpuImage->GetPlatformData();
1673         const uint32_t srcLayerCount = (srcSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1674                                            ? srcPlatImage.arrayLayers
1675                                            : srcSubresourceLayer.layerCount;
1676         const uint32_t dstLayerCount = (dstSubresourceLayer.layerCount == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1677                                            ? dstPlatImage.arrayLayers
1678                                            : dstSubresourceLayer.layerCount;
1679 
1680         const VkImageSubresourceLayers srcImageSubresourceLayer {
1681             (VkImageAspectFlags)srcSubresourceLayer.imageAspectFlags,
1682             srcSubresourceLayer.mipLevel,
1683             srcSubresourceLayer.baseArrayLayer,
1684             srcLayerCount,
1685         };
1686         const VkImageSubresourceLayers dstImageSubresourceLayer {
1687             (VkImageAspectFlags)dstSubresourceLayer.imageAspectFlags,
1688             dstSubresourceLayer.mipLevel,
1689             dstSubresourceLayer.baseArrayLayer,
1690             dstLayerCount,
1691         };
1692 
1693         const GpuImageDesc& srcDesc = srcGpuImage->GetDesc();
1694         const GpuImageDesc& dstDesc = dstGpuImage->GetDesc();
1695 
1696         VkExtent3D ext = { copy.extent.width, copy.extent.height, copy.extent.depth };
1697         ext.width = Math::min(ext.width, Math::min(srcDesc.width - copy.srcOffset.x, dstDesc.width - copy.dstOffset.x));
1698         ext.height =
1699             Math::min(ext.height, Math::min(srcDesc.height - copy.srcOffset.y, dstDesc.height - copy.dstOffset.y));
1700         ext.depth = Math::min(ext.depth, Math::min(srcDesc.depth - copy.srcOffset.z, dstDesc.depth - copy.dstOffset.z));
1701 
1702         const VkImageCopy imageCopyVk {
1703             srcImageSubresourceLayer,                                 // srcSubresource
1704             { copy.srcOffset.x, copy.srcOffset.y, copy.srcOffset.z }, // srcOffset
1705             dstImageSubresourceLayer,                                 // dstSubresource
1706             { copy.dstOffset.x, copy.dstOffset.y, copy.dstOffset.z }, // dstOffset
1707             ext,                                                      // extent
1708         };
1709         vkCmdCopyImage(cmdBuf.commandBuffer,                     // commandBuffer
1710             srcPlatImage.image,                                  // srcImage
1711             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // srcImageLayout
1712             dstPlatImage.image,                                  // dstImage
1713             VkImageLayout::VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // dstImageLayout
1714             1,                                                   // regionCount
1715             &imageCopyVk);                                       // pRegions
1716     }
1717 }
1718 
RenderCommand(const RenderCommandBarrierPoint & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache,const RenderBarrierList & rbl)1719 void RenderBackendVk::RenderCommand(const RenderCommandBarrierPoint& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
1720     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache,
1721     const RenderBarrierList& rbl)
1722 {
1723     if (!rbl.HasBarriers(renderCmd.barrierPointIndex)) {
1724         return;
1725     }
1726 
1727     const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1728         rbl.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1729     PLUGIN_ASSERT(barrierPointBarriers);
1730     if (!barrierPointBarriers) {
1731         return;
1732     }
1733     constexpr uint32_t maxBarrierCount { 8 };
1734     VkBufferMemoryBarrier bufferMemoryBarriers[maxBarrierCount];
1735     VkImageMemoryBarrier imageMemoryBarriers[maxBarrierCount];
1736     VkMemoryBarrier memoryBarriers[maxBarrierCount];
1737 
1738     // generally there is only single barrierListCount per barrier point
1739     // in situations with batched render passes there can be many
1740     // NOTE: all barrier lists could be patched to single vk command if needed
1741     // NOTE: Memory and pipeline barriers should be allowed in the front-end side
1742     const uint32_t barrierListCount = static_cast<uint32_t>(barrierPointBarriers->barrierListCount);
1743     const RenderBarrierList::BarrierPointBarrierList* nextBarrierList = barrierPointBarriers->firstBarrierList;
1744 #if (RENDER_VALIDATION_ENABLED == 1)
1745     uint32_t fullBarrierCount = 0u;
1746 #endif
1747     for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1748         if (nextBarrierList == nullptr) { // cannot be null, just a safety
1749             PLUGIN_ASSERT(false);
1750             return;
1751         }
1752         const RenderBarrierList::BarrierPointBarrierList& barrierListRef = *nextBarrierList;
1753         nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1754         const uint32_t barrierCount = static_cast<uint32_t>(barrierListRef.count);
1755 
1756         uint32_t bufferBarrierIdx = 0;
1757         uint32_t imageBarrierIdx = 0;
1758         uint32_t memoryBarrierIdx = 0;
1759 
1760         VkPipelineStageFlags srcPipelineStageMask { 0 };
1761         VkPipelineStageFlags dstPipelineStageMask { 0 };
1762         constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
1763 
1764         for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1765             const CommandBarrier& ref = barrierListRef.commandBarriers[barrierIdx];
1766 
1767             uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1768             uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1769             if (ref.srcGpuQueue.type != ref.dstGpuQueue.type) {
1770                 srcQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.srcGpuQueue).queueInfo.queueFamilyIndex;
1771                 dstQueueFamilyIndex = deviceVk_.GetGpuQueue(ref.dstGpuQueue).queueInfo.queueFamilyIndex;
1772             }
1773 
1774             const RenderHandle resourceHandle = ref.resourceHandle;
1775             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1776 
1777             PLUGIN_ASSERT((handleType == RenderHandleType::UNDEFINED) || (handleType == RenderHandleType::GPU_BUFFER) ||
1778                           (handleType == RenderHandleType::GPU_IMAGE));
1779 
1780             const VkAccessFlags srcAccessMask = (VkAccessFlags)(ref.src.accessFlags);
1781             const VkAccessFlags dstAccessMask = (VkAccessFlags)(ref.dst.accessFlags);
1782 
1783             srcPipelineStageMask |= (VkPipelineStageFlags)(ref.src.pipelineStageFlags);
1784             dstPipelineStageMask |= (VkPipelineStageFlags)(ref.dst.pipelineStageFlags);
1785 
1786             // NOTE: zero size buffer barriers allowed ATM
1787             if (handleType == RenderHandleType::GPU_BUFFER) {
1788                 if (const GpuBufferVk* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(resourceHandle); gpuBuffer) {
1789                     const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
1790                     // mapped currentByteOffset (dynamic ring buffer offset) taken into account
1791                     const VkDeviceSize offset = (VkDeviceSize)ref.dst.optionalByteOffset + platBuffer.currentByteOffset;
1792                     const VkDeviceSize size =
1793                         Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - ref.dst.optionalByteOffset,
1794                             (VkDeviceSize)ref.dst.optionalByteSize);
1795                     if (platBuffer.buffer) {
1796                         bufferMemoryBarriers[bufferBarrierIdx++] = {
1797                             VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
1798                             nullptr,                                 // pNext
1799                             srcAccessMask,                           // srcAccessMask
1800                             dstAccessMask,                           // dstAccessMask
1801                             srcQueueFamilyIndex,                     // srcQueueFamilyIndex
1802                             dstQueueFamilyIndex,                     // dstQueueFamilyIndex
1803                             platBuffer.buffer,                       // buffer
1804                             offset,                                  // offset
1805                             size,                                    // size
1806                         };
1807                     }
1808                 }
1809             } else if (handleType == RenderHandleType::GPU_IMAGE) {
1810                 if (const GpuImageVk* gpuImage = gpuResourceMgr_.GetImage<GpuImageVk>(resourceHandle); gpuImage) {
1811                     const GpuImagePlatformDataVk& platImage = gpuImage->GetPlatformData();
1812 
1813                     const VkImageLayout srcImageLayout = (VkImageLayout)(ref.src.optionalImageLayout);
1814                     const VkImageLayout dstImageLayout = (VkImageLayout)(ref.dst.optionalImageLayout);
1815 
1816                     const VkImageAspectFlags imageAspectFlags =
1817                         (ref.dst.optionalImageSubresourceRange.imageAspectFlags == 0)
1818                             ? platImage.aspectFlags
1819                             : (VkImageAspectFlags)ref.dst.optionalImageSubresourceRange.imageAspectFlags;
1820 
1821                     const uint32_t levelCount = (ref.src.optionalImageSubresourceRange.levelCount ==
1822                                                     PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)
1823                                                     ? VK_REMAINING_MIP_LEVELS
1824                                                     : ref.src.optionalImageSubresourceRange.levelCount;
1825 
1826                     const uint32_t layerCount = (ref.src.optionalImageSubresourceRange.layerCount ==
1827                                                     PipelineStateConstants::GPU_IMAGE_ALL_LAYERS)
1828                                                     ? VK_REMAINING_ARRAY_LAYERS
1829                                                     : ref.src.optionalImageSubresourceRange.layerCount;
1830 
1831                     const VkImageSubresourceRange imageSubresourceRange {
1832                         imageAspectFlags,                                     // aspectMask
1833                         ref.src.optionalImageSubresourceRange.baseMipLevel,   // baseMipLevel
1834                         levelCount,                                           // levelCount
1835                         ref.src.optionalImageSubresourceRange.baseArrayLayer, // baseArrayLayer
1836                         layerCount,                                           // layerCount
1837                     };
1838 
1839                     if (platImage.image) {
1840                         imageMemoryBarriers[imageBarrierIdx++] = {
1841                             VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1842                             nullptr,                                // pNext
1843                             srcAccessMask,                          // srcAccessMask
1844                             dstAccessMask,                          // dstAccessMask
1845                             srcImageLayout,                         // oldLayout
1846                             dstImageLayout,                         // newLayout
1847                             srcQueueFamilyIndex,                    // srcQueueFamilyIndex
1848                             dstQueueFamilyIndex,                    // dstQueueFamilyIndex
1849                             platImage.image,                        // image
1850                             imageSubresourceRange,                  // subresourceRange
1851                         };
1852                     }
1853                 }
1854             } else {
1855                 memoryBarriers[memoryBarrierIdx++] = {
1856                     VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1857                     nullptr,                          // pNext
1858                     srcAccessMask,                    // srcAccessMask
1859                     dstAccessMask,                    // dstAccessMask
1860                 };
1861             }
1862 
1863             const bool hasBarriers = ((bufferBarrierIdx > 0) || (imageBarrierIdx > 0) || (memoryBarrierIdx > 0));
1864             const bool resetBarriers = ((bufferBarrierIdx >= maxBarrierCount) || (imageBarrierIdx >= maxBarrierCount) ||
1865                                            (memoryBarrierIdx >= maxBarrierCount) || (barrierIdx >= (barrierCount - 1)))
1866                                            ? true
1867                                            : false;
1868 
1869             if (hasBarriers && resetBarriers) {
1870 #if (RENDER_VALIDATION_ENABLED == 1)
1871                 fullBarrierCount += bufferBarrierIdx + imageBarrierIdx + memoryBarrierIdx;
1872 #endif
1873                 vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
1874                     srcPipelineStageMask,                  // srcStageMask
1875                     dstPipelineStageMask,                  // dstStageMask
1876                     dependencyFlags,                       // dependencyFlags
1877                     memoryBarrierIdx,                      // memoryBarrierCount
1878                     memoryBarriers,                        // pMemoryBarriers
1879                     bufferBarrierIdx,                      // bufferMemoryBarrierCount
1880                     bufferMemoryBarriers,                  // pBufferMemoryBarriers
1881                     imageBarrierIdx,                       // imageMemoryBarrierCount
1882                     imageMemoryBarriers);                  // pImageMemoryBarriers
1883 
1884                 bufferBarrierIdx = 0;
1885                 imageBarrierIdx = 0;
1886                 memoryBarrierIdx = 0;
1887             }
1888         }
1889     }
1890 #if (RENDER_VALIDATION_ENABLED == 1)
1891     if (fullBarrierCount != barrierPointBarriers->fullCommandBarrierCount) {
1892         PLUGIN_LOG_ONCE_W("RenderBackendVk_RenderCommand_RenderCommandBarrierPoint",
1893             "RENDER_VALIDATION: barrier count does not match (front-end-count: %u, back-end-count: %u)",
1894             barrierPointBarriers->fullCommandBarrierCount, fullBarrierCount);
1895     }
1896 #endif
1897 }
1898 
1899 namespace {
1900 struct DescriptorSetUpdateDataStruct {
1901     uint32_t accelIndex { 0U };
1902     uint32_t bufferIndex { 0U };
1903     uint32_t imageIndex { 0U };
1904     uint32_t samplerIndex { 0U };
1905     uint32_t writeBindIdx { 0U };
1906 };
1907 } // namespace
1908 
UpdateCommandListDescriptorSets(const RenderCommandList & renderCommandList,StateCache & stateCache,NodeContextDescriptorSetManager & ncdsm)1909 void RenderBackendVk::UpdateCommandListDescriptorSets(
1910     const RenderCommandList& renderCommandList, StateCache& stateCache, NodeContextDescriptorSetManager& ncdsm)
1911 {
1912     NodeContextDescriptorSetManagerVk& ctxDescMgr = (NodeContextDescriptorSetManagerVk&)ncdsm;
1913 
1914     const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
1915     const uint32_t upDescriptorSetCount = static_cast<uint32_t>(allDescSets.size());
1916     LowLevelContextDescriptorWriteDataVk& wd = ctxDescMgr.GetLowLevelDescriptorWriteData();
1917     DescriptorSetUpdateDataStruct dsud;
1918     for (uint32_t descIdx = 0U; descIdx < upDescriptorSetCount; ++descIdx) {
1919         if ((descIdx >= static_cast<uint32_t>(wd.writeDescriptorSets.size())) ||
1920             (RenderHandleUtil::GetHandleType(allDescSets[descIdx]) != RenderHandleType::DESCRIPTOR_SET)) {
1921             continue;
1922         }
1923         const RenderHandle descHandle = allDescSets[descIdx];
1924         // first update gpu descriptor indices
1925         ncdsm.UpdateDescriptorSetGpuHandle(descHandle);
1926 
1927         // actual vulkan descriptor set update
1928         const LowLevelDescriptorSetVk* descriptorSet = ctxDescMgr.GetDescriptorSet(descHandle);
1929         if (descriptorSet && descriptorSet->descriptorSet) {
1930             const DescriptorSetLayoutBindingResources bindingResources = ncdsm.GetCpuDescriptorSetData(descHandle);
1931 #if (RENDER_VALIDATION_ENABLED == 1)
1932             // get descriptor counts
1933             const LowLevelDescriptorCountsVk& descriptorCounts = ctxDescMgr.GetLowLevelDescriptorCounts(descHandle);
1934             if (static_cast<uint32_t>(bindingResources.bindings.size()) > descriptorCounts.writeDescriptorCount) {
1935                 PLUGIN_LOG_E("RENDER_VALIDATION: update descriptor set bindings exceed descriptor set bindings");
1936             }
1937 #endif
1938             if (static_cast<uint32_t>(bindingResources.bindings.size()) >
1939                 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_BINDING_COUNT) {
1940                 PLUGIN_ASSERT(false);
1941                 continue;
1942             }
1943             const auto& buffers = bindingResources.buffers;
1944             const auto& images = bindingResources.images;
1945             const auto& samplers = bindingResources.samplers;
1946             for (const auto& ref : buffers) {
1947                 const uint32_t descriptorCount = ref.binding.descriptorCount;
1948                 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1949                 if (descriptorCount == 0) {
1950                     continue;
1951                 }
1952                 const uint32_t arrayOffset = ref.arrayOffset;
1953                 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1954                 if (ref.binding.descriptorType == CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE) {
1955 #if (RENDER_VULKAN_RT_ENABLED == 1)
1956                     for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1957                         // first is the ref, starting from 1 we use array offsets
1958                         const BindableBuffer& bRes =
1959                             (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1960                         if (const GpuBufferVk* resPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1961                             const GpuAccelerationStructurePlatformDataVk& platAccel =
1962                                 resPtr->GetPlatformDataAccelerationStructure();
1963                             wd.descriptorAccelInfos[dsud.accelIndex + idx] = {
1964                                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // sType
1965                                 nullptr,                                                           // pNext
1966                                 descriptorCount,                  // accelerationStructureCount
1967                                 &platAccel.accelerationStructure, // pAccelerationStructures
1968                             };
1969                         }
1970                     }
1971                     wd.writeDescriptorSets[dsud.writeBindIdx++] = {
1972                         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
1973                         &wd.descriptorAccelInfos[dsud.accelIndex],    // pNext
1974                         descriptorSet->descriptorSet,                 // dstSet
1975                         ref.binding.binding,                          // dstBinding
1976                         0,                                            // dstArrayElement
1977                         descriptorCount,                              // descriptorCount
1978                         (VkDescriptorType)ref.binding.descriptorType, // descriptorType
1979                         nullptr,                                      // pImageInfo
1980                         nullptr,                                      // pBufferInfo
1981                         nullptr,                                      // pTexelBufferView
1982                     };
1983                     dsud.accelIndex += descriptorCount;
1984 #endif
1985                 } else {
1986                     for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1987                         // first is the ref, starting from 1 we use array offsets
1988                         const BindableBuffer& bRes =
1989                             (idx == 0) ? ref.resource : buffers[arrayOffset + idx - 1].resource;
1990                         const VkDeviceSize optionalByteOffset = (VkDeviceSize)bRes.byteOffset;
1991                         if (const GpuBufferVk* resPtr = gpuResourceMgr_.GetBuffer<GpuBufferVk>(bRes.handle); resPtr) {
1992                             const GpuBufferPlatformDataVk& platBuffer = resPtr->GetPlatformData();
1993                             // takes into account dynamic ring buffers with mapping
1994                             const VkDeviceSize bufferMapByteOffset = (VkDeviceSize)platBuffer.currentByteOffset;
1995                             const VkDeviceSize byteOffset = bufferMapByteOffset + optionalByteOffset;
1996                             const VkDeviceSize bufferRange =
1997                                 Math::min((VkDeviceSize)platBuffer.bindMemoryByteSize - optionalByteOffset,
1998                                     (VkDeviceSize)bRes.byteSize);
1999                             wd.descriptorBufferInfos[dsud.bufferIndex + idx] = {
2000                                 platBuffer.buffer, // buffer
2001                                 byteOffset,        // offset
2002                                 bufferRange,       // range
2003                             };
2004                         }
2005                     }
2006                     wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2007                         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,       // sType
2008                         nullptr,                                      // pNext
2009                         descriptorSet->descriptorSet,                 // dstSet
2010                         ref.binding.binding,                          // dstBinding
2011                         0,                                            // dstArrayElement
2012                         descriptorCount,                              // descriptorCount
2013                         (VkDescriptorType)ref.binding.descriptorType, // descriptorType
2014                         nullptr,                                      // pImageInfo
2015                         &wd.descriptorBufferInfos[dsud.bufferIndex],  // pBufferInfo
2016                         nullptr,                                      // pTexelBufferView
2017                     };
2018                     dsud.bufferIndex += descriptorCount;
2019                 }
2020             }
2021             for (const auto& ref : images) {
2022                 const uint32_t descriptorCount = ref.binding.descriptorCount;
2023                 // skip, array bindings which are bound from first index have also descriptorCount 0
2024                 if (descriptorCount == 0) {
2025                     continue;
2026                 }
2027                 const VkDescriptorType descriptorType = (VkDescriptorType)ref.binding.descriptorType;
2028                 const uint32_t arrayOffset = ref.arrayOffset;
2029                 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
2030                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2031                     // first is the ref, starting from 1 we use array offsets
2032                     const BindableImage& bRes = (idx == 0) ? ref.resource : images[arrayOffset + idx - 1].resource;
2033                     if (const GpuImageVk* resPtr = gpuResourceMgr_.GetImage<GpuImageVk>(bRes.handle); resPtr) {
2034                         VkSampler sampler = VK_NULL_HANDLE;
2035                         if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2036                             const GpuSamplerVk* samplerPtr =
2037                                 gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.samplerHandle);
2038                             if (samplerPtr) {
2039                                 sampler = samplerPtr->GetPlatformData().sampler;
2040                             }
2041                         }
2042                         const GpuImagePlatformDataVk& platImage = resPtr->GetPlatformData();
2043                         const GpuImagePlatformDataViewsVk& platImageViews = resPtr->GetPlatformDataViews();
2044                         VkImageView imageView = platImage.imageView;
2045                         if ((bRes.layer != PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2046                             (bRes.layer < platImageViews.layerImageViews.size())) {
2047                             imageView = platImageViews.layerImageViews[bRes.layer];
2048                         } else if (bRes.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) {
2049                             if ((bRes.layer == PipelineStateConstants::GPU_IMAGE_ALL_LAYERS) &&
2050                                 (bRes.mip < platImageViews.mipImageAllLayerViews.size())) {
2051                                 imageView = platImageViews.mipImageAllLayerViews[bRes.mip];
2052                             } else if (bRes.mip < platImageViews.mipImageViews.size()) {
2053                                 imageView = platImageViews.mipImageViews[bRes.mip];
2054                             }
2055                         }
2056                         wd.descriptorImageInfos[dsud.imageIndex + idx] = {
2057                             sampler,                         // sampler
2058                             imageView,                       // imageView
2059                             (VkImageLayout)bRes.imageLayout, // imageLayout
2060                         };
2061                     }
2062                 }
2063                 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2064                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,    // sType
2065                     nullptr,                                   // pNext
2066                     descriptorSet->descriptorSet,              // dstSet
2067                     ref.binding.binding,                       // dstBinding
2068                     0,                                         // dstArrayElement
2069                     descriptorCount,                           // descriptorCount
2070                     descriptorType,                            // descriptorType
2071                     &wd.descriptorImageInfos[dsud.imageIndex], // pImageInfo
2072                     nullptr,                                   // pBufferInfo
2073                     nullptr,                                   // pTexelBufferView
2074                 };
2075                 dsud.imageIndex += descriptorCount;
2076             }
2077             for (const auto& ref : samplers) {
2078                 const uint32_t descriptorCount = ref.binding.descriptorCount;
2079                 // skip, array bindings which are bound from first index have also descriptorCount 0
2080                 if (descriptorCount == 0) {
2081                     continue;
2082                 }
2083                 const uint32_t arrayOffset = ref.arrayOffset;
2084                 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= samplers.size());
2085                 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
2086                     // first is the ref, starting from 1 we use array offsets
2087                     const BindableSampler& bRes = (idx == 0) ? ref.resource : samplers[arrayOffset + idx - 1].resource;
2088                     if (const GpuSamplerVk* resPtr = gpuResourceMgr_.GetSampler<GpuSamplerVk>(bRes.handle); resPtr) {
2089                         const GpuSamplerPlatformDataVk& platSampler = resPtr->GetPlatformData();
2090                         wd.descriptorSamplerInfos[dsud.samplerIndex + idx] = {
2091                             platSampler.sampler,      // sampler
2092                             VK_NULL_HANDLE,           // imageView
2093                             VK_IMAGE_LAYOUT_UNDEFINED // imageLayout
2094                         };
2095                     }
2096                 }
2097                 wd.writeDescriptorSets[dsud.writeBindIdx++] = {
2098                     VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,        // sType
2099                     nullptr,                                       // pNext
2100                     descriptorSet->descriptorSet,                  // dstSet
2101                     ref.binding.binding,                           // dstBinding
2102                     0,                                             // dstArrayElement
2103                     descriptorCount,                               // descriptorCount
2104                     (VkDescriptorType)ref.binding.descriptorType,  // descriptorType
2105                     &wd.descriptorSamplerInfos[dsud.samplerIndex], // pImageInfo
2106                     nullptr,                                       // pBufferInfo
2107                     nullptr,                                       // pTexelBufferView
2108                 };
2109                 dsud.samplerIndex += descriptorCount;
2110             }
2111 
2112 #if (RENDER_PERF_ENABLED == 1)
2113             // count the actual updated descriptors sets, not the api calls
2114             stateCache.perfCounters.updateDescriptorSetCount++;
2115 #endif
2116         }
2117     }
2118     // update if the batch ended or we are the last descriptor set
2119     if (dsud.writeBindIdx > 0U) {
2120         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
2121         vkUpdateDescriptorSets(device,     // device
2122             dsud.writeBindIdx,             // descriptorWriteCount
2123             wd.writeDescriptorSets.data(), // pDescriptorWrites
2124             0,                             // descriptorCopyCount
2125             nullptr);                      // pDescriptorCopies
2126     }
2127 }
2128 
RenderCommand(const RenderCommandBindDescriptorSets & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,StateCache & stateCache,NodeContextDescriptorSetManager & aNcdsm)2129 void RenderBackendVk::RenderCommand(const RenderCommandBindDescriptorSets& renderCmd,
2130     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2131     StateCache& stateCache, NodeContextDescriptorSetManager& aNcdsm)
2132 {
2133     const NodeContextDescriptorSetManagerVk& aNcdsmVk = (NodeContextDescriptorSetManagerVk&)aNcdsm;
2134 
2135     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2136     const RenderHandleType handleType = RenderHandleUtil::GetHandleType(stateCache.psoHandle);
2137     const VkPipelineBindPoint pipelineBindPoint = (handleType == RenderHandleType::COMPUTE_PSO)
2138                                                       ? VK_PIPELINE_BIND_POINT_COMPUTE
2139                                                       : VK_PIPELINE_BIND_POINT_GRAPHICS;
2140     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2141 
2142     bool valid = (pipelineLayout != VK_NULL_HANDLE) ? true : false;
2143     const uint32_t firstSet = renderCmd.firstSet;
2144     const uint32_t setCount = renderCmd.setCount;
2145     if (valid && (firstSet + setCount <= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) && (setCount > 0)) {
2146         uint32_t combinedDynamicOffsetCount = 0;
2147         uint32_t dynamicOffsetDescriptorSetIndices = 0;
2148         uint64_t priorStatePipelineDescSetHash = stateCache.pipelineDescSetHash;
2149 
2150         VkDescriptorSet descriptorSets[PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2151         const uint32_t firstPlusCount = firstSet + setCount;
2152         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2153             const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2154             if (RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET) {
2155                 const uint32_t dynamicDescriptorCount = aNcdsm.GetDynamicOffsetDescriptorCount(descriptorSetHandle);
2156                 dynamicOffsetDescriptorSetIndices |= (dynamicDescriptorCount > 0) ? (1 << idx) : 0;
2157                 combinedDynamicOffsetCount += dynamicDescriptorCount;
2158 
2159                 const LowLevelDescriptorSetVk* descriptorSet = aNcdsmVk.GetDescriptorSet(descriptorSetHandle);
2160                 if (descriptorSet && descriptorSet->descriptorSet) {
2161                     descriptorSets[idx] = descriptorSet->descriptorSet;
2162                     // update, copy to state cache
2163                     PLUGIN_ASSERT(descriptorSet->descriptorSetLayout);
2164                     stateCache.lowLevelPipelineLayoutData.descriptorSetLayouts[idx] = *descriptorSet;
2165                     const uint32_t currShift = (idx * 16u);
2166                     const uint64_t oldOutMask = (~(static_cast<uint64_t>(0xffff) << currShift));
2167                     uint64_t currHash = stateCache.pipelineDescSetHash & oldOutMask;
2168                     stateCache.pipelineDescSetHash = currHash | (descriptorSet->immutableSamplerBitmask);
2169                 } else {
2170                     valid = false;
2171                 }
2172             }
2173         }
2174 
2175         uint32_t dynamicOffsets[PipelineLayoutConstants::MAX_DYNAMIC_DESCRIPTOR_OFFSET_COUNT *
2176                                 PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT];
2177         uint32_t dynamicOffsetIdx = 0;
2178         // NOTE: optimize
2179         // this code has some safety checks that the offset is not updated for non-dynamic sets
2180         // it could be left on only for validation
2181         for (uint32_t idx = firstSet; idx < firstPlusCount; ++idx) {
2182             if ((1 << idx) & dynamicOffsetDescriptorSetIndices) {
2183                 const RenderHandle descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2184                 const DynamicOffsetDescriptors dod = aNcdsm.GetDynamicOffsetDescriptors(descriptorSetHandle);
2185                 const uint32_t dodResCount = static_cast<uint32_t>(dod.resources.size());
2186                 const auto& descriptorSetDynamicOffsets = renderCmd.descriptorSetDynamicOffsets[idx];
2187                 for (uint32_t dodIdx = 0U; dodIdx < dodResCount; ++dodIdx) {
2188                     uint32_t byteOffset = 0U;
2189                     if (dodIdx < descriptorSetDynamicOffsets.dynamicOffsetCount) {
2190                         byteOffset = descriptorSetDynamicOffsets.dynamicOffsets[dynamicOffsetIdx];
2191                     }
2192                     dynamicOffsets[dynamicOffsetIdx++] = byteOffset;
2193                 }
2194             }
2195         }
2196 
2197         stateCache.validBindings = valid;
2198         if (stateCache.validBindings) {
2199             if (priorStatePipelineDescSetHash == stateCache.pipelineDescSetHash) {
2200                 vkCmdBindDescriptorSets(cmdBuf.commandBuffer, // commandBuffer
2201                     pipelineBindPoint,                        // pipelineBindPoint
2202                     pipelineLayout,                           // layout
2203                     firstSet,                                 // firstSet
2204                     setCount,                                 // descriptorSetCount
2205                     &descriptorSets[firstSet],                // pDescriptorSets
2206                     dynamicOffsetIdx,                         // dynamicOffsetCount
2207                     dynamicOffsets);                          // pDynamicOffsets
2208 #if (RENDER_PERF_ENABLED == 1)
2209                 stateCache.perfCounters.bindDescriptorSetCount++;
2210 #endif
2211             } else {
2212                 // possible pso re-creation and bind of these sets to the new pso
2213                 const RenderCommandBindPipeline renderCmdBindPipeline { stateCache.psoHandle,
2214                     (PipelineBindPoint)pipelineBindPoint };
2215                 RenderCommand(renderCmdBindPipeline, cmdBuf, psoMgr, poolMgr, stateCache);
2216                 RenderCommand(renderCmd, cmdBuf, psoMgr, poolMgr, stateCache, aNcdsm);
2217             }
2218         }
2219     }
2220 }
2221 
RenderCommand(const RenderCommandPushConstant & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2222 void RenderBackendVk::RenderCommand(const RenderCommandPushConstant& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2223     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2224 {
2225     PLUGIN_ASSERT(renderCmd.pushConstant.byteSize > 0);
2226     PLUGIN_ASSERT(renderCmd.data);
2227 
2228     PLUGIN_ASSERT(stateCache.psoHandle == renderCmd.psoHandle);
2229     const VkPipelineLayout pipelineLayout = stateCache.pipelineLayout;
2230 
2231     const bool valid = ((pipelineLayout != VK_NULL_HANDLE) && (renderCmd.pushConstant.byteSize > 0)) ? true : false;
2232     PLUGIN_ASSERT(valid);
2233 
2234     if (valid) {
2235         const auto shaderStageFlags = static_cast<VkShaderStageFlags>(renderCmd.pushConstant.shaderStageFlags);
2236         vkCmdPushConstants(cmdBuf.commandBuffer, // commandBuffer
2237             pipelineLayout,                      // layout
2238             shaderStageFlags,                    // stageFlags
2239             0,                                   // offset
2240             renderCmd.pushConstant.byteSize,     // size
2241             static_cast<void*>(renderCmd.data)); // pValues
2242     }
2243 }
2244 
RenderCommand(const RenderCommandBuildAccelerationStructure & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2245 void RenderBackendVk::RenderCommand(const RenderCommandBuildAccelerationStructure& renderCmd,
2246     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2247     const StateCache& stateCache)
2248 {
2249 #if (RENDER_VULKAN_RT_ENABLED == 1)
2250     // NOTE: missing
2251     const GpuBufferVk* dst = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.dstAccelerationStructure);
2252     const GpuBufferVk* scratchBuffer = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(renderCmd.scratchBuffer);
2253     if (dst && scratchBuffer) {
2254         const DevicePlatformDataVk& devicePlat = deviceVk_.GetPlatformDataVk();
2255         const VkDevice device = devicePlat.device;
2256 
2257         const GpuAccelerationStructurePlatformDataVk& dstPlat = dst->GetPlatformDataAccelerationStructure();
2258         const VkAccelerationStructureKHR dstAs = dstPlat.accelerationStructure;
2259 
2260         // scratch data with user offset
2261         const VkDeviceAddress scratchData { GetBufferDeviceAddress(device, scratchBuffer->GetPlatformData().buffer) +
2262                                             VkDeviceSize(renderCmd.scratchOffset) };
2263 
2264         const size_t arraySize =
2265             renderCmd.trianglesView.size() + renderCmd.aabbsView.size() + renderCmd.instancesView.size();
2266         vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
2267         vector<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos(arraySize);
2268 
2269         size_t arrayIndex = 0;
2270         for (const auto& trianglesRef : renderCmd.trianglesView) {
2271             geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2272                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2273                 nullptr,                                               // pNext
2274                 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR,     // geometryType
2275                 {},                                                    // geometry;
2276                 0,                                                     // flags
2277             };
2278             uint32_t primitiveCount = 0;
2279             const GpuBufferVk* vb = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.vertexData.handle);
2280             const GpuBufferVk* ib = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.indexData.handle);
2281             if (vb && ib) {
2282                 const VkDeviceOrHostAddressConstKHR vertexData { GetBufferDeviceAddress(
2283                     device, vb->GetPlatformData().buffer) };
2284                 const VkDeviceOrHostAddressConstKHR indexData { GetBufferDeviceAddress(
2285                     device, ib->GetPlatformData().buffer) };
2286                 VkDeviceOrHostAddressConstKHR transformData {};
2287                 if (RenderHandleUtil::IsValid(trianglesRef.transformData.handle)) {
2288                     if (const GpuBufferVk* tr =
2289                             gpuResourceMgr_.GetBuffer<const GpuBufferVk>(trianglesRef.transformData.handle);
2290                         tr) {
2291                         transformData.deviceAddress = { GetBufferDeviceAddress(device, ib->GetPlatformData().buffer) };
2292                     }
2293                 }
2294                 primitiveCount = trianglesRef.info.indexCount / 3u; // triangles
2295 
2296                 geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2297                 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
2298                     VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
2299                     nullptr,                                                              // pNext
2300                     VkFormat(trianglesRef.info.vertexFormat),                             // vertexFormat
2301                     vertexData,                                                           // vertexData
2302                     VkDeviceSize(trianglesRef.info.vertexStride),                         // vertexStride
2303                     trianglesRef.info.maxVertex,                                          // maxVertex
2304                     VkIndexType(trianglesRef.info.indexType),                             // indexType
2305                     indexData,                                                            // indexData
2306                     transformData,                                                        // transformData
2307                 };
2308             }
2309             buildRangeInfos[arrayIndex] = {
2310                 primitiveCount, // primitiveCount
2311                 0u,             // primitiveOffset
2312                 0u,             // firstVertex
2313                 0u,             // transformOffset
2314             };
2315             arrayIndex++;
2316         }
2317         for (const auto& aabbsRef : renderCmd.aabbsView) {
2318             geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2319                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2320                 nullptr,                                               // pNext
2321                 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR,         // geometryType
2322                 {},                                                    // geometry;
2323                 0,                                                     // flags
2324             };
2325             VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2326             if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(aabbsRef.data.handle); iPtr) {
2327                 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2328             }
2329             geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2330             geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
2331                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
2332                 nullptr,                                                          // pNext
2333                 deviceAddress,                                                    // data
2334                 aabbsRef.info.stride,                                             // stride
2335             };
2336             buildRangeInfos[arrayIndex] = {
2337                 1u, // primitiveCount
2338                 0u, // primitiveOffset
2339                 0u, // firstVertex
2340                 0u, // transformOffset
2341             };
2342             arrayIndex++;
2343         }
2344         for (const auto& instancesRef : renderCmd.instancesView) {
2345             geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
2346                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
2347                 nullptr,                                               // pNext
2348                 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR,     // geometryType
2349                 {},                                                    // geometry;
2350                 0,                                                     // flags
2351             };
2352             VkDeviceOrHostAddressConstKHR deviceAddress { 0 };
2353             if (const GpuBufferVk* iPtr = gpuResourceMgr_.GetBuffer<const GpuBufferVk>(instancesRef.data.handle);
2354                 iPtr) {
2355                 deviceAddress.deviceAddress = GetBufferDeviceAddress(device, iPtr->GetPlatformData().buffer);
2356             }
2357             geometryData[arrayIndex].flags = VkGeometryFlagsKHR(renderCmd.flags);
2358             geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
2359                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
2360                 nullptr,                                                              // pNext
2361                 instancesRef.info.arrayOfPointers,                                    // arrayOfPointers
2362                 deviceAddress,                                                        // data
2363             };
2364             buildRangeInfos[arrayIndex] = {
2365                 1u, // primitiveCount
2366                 0u, // primitiveOffset
2367                 0u, // firstVertex
2368                 0u, // transformOffset
2369             };
2370             arrayIndex++;
2371         }
2372 
2373         const VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo {
2374             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
2375             nullptr,                                                          // pNext
2376             VkAccelerationStructureTypeKHR(renderCmd.type),                   // type
2377             VkBuildAccelerationStructureFlagsKHR(renderCmd.flags),            // flags
2378             VkBuildAccelerationStructureModeKHR(renderCmd.mode),              // mode
2379             VK_NULL_HANDLE,                                                   // srcAccelerationStructure
2380             dstAs,                                                            // dstAccelerationStructure
2381             uint32_t(arrayIndex),                                             // geometryCount
2382             geometryData.data(),                                              // pGeometries
2383             nullptr,                                                          // ppGeometries
2384             scratchData,                                                      // scratchData
2385         };
2386 
2387         vector<const VkAccelerationStructureBuildRangeInfoKHR*> buildRangeInfosPtr(arrayIndex);
2388         for (size_t idx = 0; idx < buildRangeInfosPtr.size(); ++idx) {
2389             buildRangeInfosPtr[idx] = &buildRangeInfos[idx];
2390         }
2391         const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2392         if (extFunctions.vkCmdBuildAccelerationStructuresKHR) {
2393             extFunctions.vkCmdBuildAccelerationStructuresKHR(cmdBuf.commandBuffer, // commandBuffer
2394                 1u,                                                                // infoCount
2395                 &buildGeometryInfo,                                                // pInfos
2396                 buildRangeInfosPtr.data());                                        // ppBuildRangeInfos
2397         }
2398     }
2399 #endif
2400 }
2401 
RenderCommand(const RenderCommandClearColorImage & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2402 void RenderBackendVk::RenderCommand(const RenderCommandClearColorImage& renderCmd,
2403     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2404     const StateCache& stateCache)
2405 {
2406     const GpuImageVk* imagePtr = gpuResourceMgr_.GetImage<GpuImageVk>(renderCmd.handle);
2407     // the layout could be VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR but we don't support it at the moment
2408     const VkImageLayout imageLayout = (VkImageLayout)renderCmd.imageLayout;
2409     PLUGIN_ASSERT((imageLayout == VK_IMAGE_LAYOUT_GENERAL) || (imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL));
2410     if (imagePtr) {
2411         const GpuImagePlatformDataVk& platImage = imagePtr->GetPlatformData();
2412         if (platImage.image) {
2413             VkClearColorValue clearColor;
2414             PLUGIN_STATIC_ASSERT(sizeof(clearColor) == sizeof(renderCmd.color));
2415             CloneData(&clearColor, sizeof(clearColor), &renderCmd.color, sizeof(renderCmd.color));
2416 
2417             // NOTE: temporary vector allocated due to not having max limit
2418             vector<VkImageSubresourceRange> ranges(renderCmd.ranges.size());
2419             for (size_t idx = 0; idx < ranges.size(); ++idx) {
2420                 const auto& inputRef = renderCmd.ranges[idx];
2421                 ranges[idx] = {
2422                     (VkImageAspectFlags)inputRef.imageAspectFlags, // aspectMask
2423                     inputRef.baseMipLevel,                         // baseMipLevel
2424                     inputRef.levelCount,                           // levelCount
2425                     inputRef.baseArrayLayer,                       // baseArrayLayer
2426                     inputRef.layerCount,                           // layerCount
2427                 };
2428             }
2429 
2430             vkCmdClearColorImage(cmdBuf.commandBuffer, // commandBuffer
2431                 platImage.image,                       // image
2432                 imageLayout,                           // imageLayout
2433                 &clearColor,                           // pColor
2434                 static_cast<uint32_t>(ranges.size()),  // rangeCount
2435                 ranges.data());                        // pRanges
2436         }
2437     }
2438 }
2439 
RenderCommand(const RenderCommandDynamicStateViewport & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2440 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateViewport& renderCmd,
2441     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2442     const StateCache& stateCache)
2443 {
2444     const ViewportDesc& vd = renderCmd.viewportDesc;
2445 
2446     const VkViewport viewport {
2447         vd.x,        // x
2448         vd.y,        // y
2449         vd.width,    // width
2450         vd.height,   // height
2451         vd.minDepth, // minDepth
2452         vd.maxDepth, // maxDepth
2453     };
2454 
2455     vkCmdSetViewport(cmdBuf.commandBuffer, // commandBuffer
2456         0,                                 // firstViewport
2457         1,                                 // viewportCount
2458         &viewport);                        // pViewports
2459 }
2460 
RenderCommand(const RenderCommandDynamicStateScissor & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2461 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateScissor& renderCmd,
2462     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2463     const StateCache& stateCache)
2464 {
2465     const ScissorDesc& sd = renderCmd.scissorDesc;
2466 
2467     const VkRect2D scissor {
2468         { sd.offsetX, sd.offsetY },          // offset
2469         { sd.extentWidth, sd.extentHeight }, // extent
2470     };
2471 
2472     vkCmdSetScissor(cmdBuf.commandBuffer, // commandBuffer
2473         0,                                // firstScissor
2474         1,                                // scissorCount
2475         &scissor);                        // pScissors
2476 }
2477 
RenderCommand(const RenderCommandDynamicStateLineWidth & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2478 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateLineWidth& renderCmd,
2479     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2480     const StateCache& stateCache)
2481 {
2482     vkCmdSetLineWidth(cmdBuf.commandBuffer, // commandBuffer
2483         renderCmd.lineWidth);               // lineWidth
2484 }
2485 
RenderCommand(const RenderCommandDynamicStateDepthBias & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2486 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBias& renderCmd,
2487     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2488     const StateCache& stateCache)
2489 {
2490     vkCmdSetDepthBias(cmdBuf.commandBuffer, // commandBuffer
2491         renderCmd.depthBiasConstantFactor,  // depthBiasConstantFactor
2492         renderCmd.depthBiasClamp,           // depthBiasClamp
2493         renderCmd.depthBiasSlopeFactor);    // depthBiasSlopeFactor
2494 }
2495 
RenderCommand(const RenderCommandDynamicStateBlendConstants & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2496 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateBlendConstants& renderCmd,
2497     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2498     const StateCache& stateCache)
2499 {
2500     vkCmdSetBlendConstants(cmdBuf.commandBuffer, // commandBuffer
2501         renderCmd.blendConstants);               // blendConstants[4]
2502 }
2503 
RenderCommand(const RenderCommandDynamicStateDepthBounds & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2504 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateDepthBounds& renderCmd,
2505     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2506     const StateCache& stateCache)
2507 {
2508     vkCmdSetDepthBounds(cmdBuf.commandBuffer, // commandBuffer
2509         renderCmd.minDepthBounds,             // minDepthBounds
2510         renderCmd.maxDepthBounds);            // maxDepthBounds
2511 }
2512 
RenderCommand(const RenderCommandDynamicStateStencil & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2513 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateStencil& renderCmd,
2514     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2515     const StateCache& stateCache)
2516 {
2517     const VkStencilFaceFlags stencilFaceMask = (VkStencilFaceFlags)renderCmd.faceMask;
2518 
2519     if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2520         vkCmdSetStencilCompareMask(cmdBuf.commandBuffer, // commandBuffer
2521             stencilFaceMask,                             // faceMask
2522             renderCmd.mask);                             // compareMask
2523     } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2524         vkCmdSetStencilWriteMask(cmdBuf.commandBuffer, // commandBuffer
2525             stencilFaceMask,                           // faceMask
2526             renderCmd.mask);                           // writeMask
2527     } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2528         vkCmdSetStencilReference(cmdBuf.commandBuffer, // commandBuffer
2529             stencilFaceMask,                           // faceMask
2530             renderCmd.mask);                           // reference
2531     }
2532 }
2533 
RenderCommand(const RenderCommandDynamicStateFragmentShadingRate & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2534 void RenderBackendVk::RenderCommand(const RenderCommandDynamicStateFragmentShadingRate& renderCmd,
2535     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2536     const StateCache& stateCache)
2537 {
2538 #if (RENDER_VULKAN_FSR_ENABLED == 1)
2539     const DeviceVk::ExtFunctions& extFunctions = deviceVk_.GetExtFunctions();
2540     if (extFunctions.vkCmdSetFragmentShadingRateKHR) {
2541         const VkExtent2D fragmentSize = { renderCmd.fragmentSize.width, renderCmd.fragmentSize.height };
2542         const VkFragmentShadingRateCombinerOpKHR combinerOps[2] = {
2543             (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op1,
2544             (VkFragmentShadingRateCombinerOpKHR)renderCmd.combinerOps.op2,
2545         };
2546 
2547         extFunctions.vkCmdSetFragmentShadingRateKHR(cmdBuf.commandBuffer, // commandBuffer
2548             &fragmentSize,                                                // pFragmentSize
2549             combinerOps);                                                 // combinerOps
2550     }
2551 #endif
2552 }
2553 
RenderCommand(const RenderCommandExecuteBackendFramePosition & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2554 void RenderBackendVk::RenderCommand(const RenderCommandExecuteBackendFramePosition& renderCmd,
2555     const LowLevelCommandBufferVk& cmdBuf, NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr,
2556     const StateCache& stateCache)
2557 {
2558     if (stateCache.backendNode) {
2559         const RenderBackendRecordingStateVk recordingState = {
2560             {},
2561             cmdBuf.commandBuffer,                              // commandBuffer
2562             stateCache.lowLevelRenderPassData.renderPass,      // renderPass
2563             stateCache.lowLevelRenderPassData.framebuffer,     // framebuffer
2564             stateCache.lowLevelRenderPassData.framebufferSize, // framebufferSize
2565             stateCache.lowLevelRenderPassData.subpassIndex,    // subpassIndex
2566             stateCache.pipelineLayout,                         // pipelineLayout
2567         };
2568         const ILowLevelDeviceVk& lowLevelDevice = static_cast<ILowLevelDeviceVk&>(deviceVk_.GetLowLevelDevice());
2569         stateCache.backendNode->ExecuteBackendFrame(lowLevelDevice, recordingState);
2570     }
2571 }
2572 
RenderCommand(const RenderCommandWriteTimestamp & renderCmd,const LowLevelCommandBufferVk & cmdBuf,NodeContextPsoManager & psoMgr,const NodeContextPoolManager & poolMgr,const StateCache & stateCache)2573 void RenderBackendVk::RenderCommand(const RenderCommandWriteTimestamp& renderCmd, const LowLevelCommandBufferVk& cmdBuf,
2574     NodeContextPsoManager& psoMgr, const NodeContextPoolManager& poolMgr, const StateCache& stateCache)
2575 {
2576     PLUGIN_ASSERT_MSG(false, "not implemented");
2577 
2578     const VkPipelineStageFlagBits pipelineStageFlagBits = (VkPipelineStageFlagBits)renderCmd.pipelineStageFlagBits;
2579     const uint32_t queryIndex = renderCmd.queryIndex;
2580     VkQueryPool queryPool = VK_NULL_HANDLE;
2581 
2582     vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2583         queryPool,                            // queryPool
2584         queryIndex,                           // firstQuery
2585         1);                                   // queryCount
2586 
2587     vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer
2588         pipelineStageFlagBits,                // pipelineStage
2589         queryPool,                            // queryPool
2590         queryIndex);                          // query
2591 }
2592 
RenderPresentationLayout(const LowLevelCommandBufferVk & cmdBuf,const uint32_t cmdBufferIdx)2593 void RenderBackendVk::RenderPresentationLayout(const LowLevelCommandBufferVk& cmdBuf, const uint32_t cmdBufferIdx)
2594 {
2595     for (auto& presRef : presentationData_.infos) {
2596         if (presRef.renderNodeCommandListIndex != cmdBufferIdx) {
2597             continue;
2598         }
2599 
2600         PLUGIN_ASSERT(presRef.presentationLayoutChangeNeeded);
2601         PLUGIN_ASSERT(presRef.imageLayout != ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC);
2602 
2603         const GpuResourceState& state = presRef.renderGraphProcessedState;
2604         const VkAccessFlags srcAccessMask = (VkAccessFlags)state.accessFlags;
2605         const VkAccessFlags dstAccessMask = (VkAccessFlags)VkAccessFlagBits::VK_ACCESS_TRANSFER_READ_BIT;
2606         const VkPipelineStageFlags srcStageMask = ((VkPipelineStageFlags)state.pipelineStageFlags) |
2607                                                   (VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
2608         const VkPipelineStageFlags dstStageMask = VkPipelineStageFlagBits::VK_PIPELINE_STAGE_TRANSFER_BIT;
2609         const VkImageLayout oldLayout = (VkImageLayout)presRef.imageLayout;
2610         const VkImageLayout newLayout = VkImageLayout::VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2611         // NOTE: queue is not currently checked (should be in the same queue as last time used)
2612         constexpr uint32_t srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2613         constexpr uint32_t dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2614         constexpr VkDependencyFlags dependencyFlags { VkDependencyFlagBits::VK_DEPENDENCY_BY_REGION_BIT };
2615         constexpr VkImageSubresourceRange imageSubresourceRange {
2616             VkImageAspectFlagBits::VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
2617             0,                                                // baseMipLevel
2618             1,                                                // levelCount
2619             0,                                                // baseArrayLayer
2620             1,                                                // layerCount
2621         };
2622 
2623         const VkImageMemoryBarrier imageMemoryBarrier {
2624             VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
2625             nullptr,                                // pNext
2626             srcAccessMask,                          // srcAccessMask
2627             dstAccessMask,                          // dstAccessMask
2628             oldLayout,                              // oldLayout
2629             newLayout,                              // newLayout
2630             srcQueueFamilyIndex,                    // srcQueueFamilyIndex
2631             dstQueueFamilyIndex,                    // dstQueueFamilyIndex
2632             presRef.swapchainImage,                 // image
2633             imageSubresourceRange,                  // subresourceRange
2634         };
2635 
2636         vkCmdPipelineBarrier(cmdBuf.commandBuffer, // commandBuffer
2637             srcStageMask,                          // srcStageMask
2638             dstStageMask,                          // dstStageMask
2639             dependencyFlags,                       // dependencyFlags
2640             0,                                     // memoryBarrierCount
2641             nullptr,                               // pMemoryBarriers
2642             0,                                     // bufferMemoryBarrierCount
2643             nullptr,                               // pBufferMemoryBarriers
2644             1,                                     // imageMemoryBarrierCount
2645             &imageMemoryBarrier);                  // pImageMemoryBarriers
2646 
2647         presRef.presentationLayoutChangeNeeded = false;
2648         presRef.imageLayout = ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC;
2649     }
2650 }
2651 
2652 #if (RENDER_PERF_ENABLED == 1)
2653 
StartFrameTimers(RenderCommandFrameData & renderCommandFrameData)2654 void RenderBackendVk::StartFrameTimers(RenderCommandFrameData& renderCommandFrameData)
2655 {
2656     for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2657         const string_view& debugName = renderCommandContext.debugName;
2658         if (timers_.count(debugName) == 0) { // new timers
2659 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2660             PerfDataSet& perfDataSet = timers_[debugName];
2661             constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2662             perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryVk(device_, desc));
2663             constexpr uint32_t singleQueryByteSize = sizeof(uint64_t) * TIME_STAMP_PER_GPU_QUERY;
2664             perfDataSet.gpuBufferOffset = static_cast<uint32_t>(timers_.size()) * singleQueryByteSize;
2665 #else
2666             timers_.insert({ debugName, {} });
2667 #endif
2668         }
2669     }
2670 
2671 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2672     perfGpuTimerData_.mappedData = perfGpuTimerData_.gpuBuffer->Map();
2673     perfGpuTimerData_.currentOffset =
2674         (perfGpuTimerData_.currentOffset + perfGpuTimerData_.frameByteSize) % perfGpuTimerData_.fullByteSize;
2675 #endif
2676 }
2677 
EndFrameTimers()2678 void RenderBackendVk::EndFrameTimers()
2679 {
2680     int64_t fullGpuTime = 0;
2681 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2682     // already in micros
2683     fullGpuTime = perfGpuTimerData_.fullGpuCounter;
2684     perfGpuTimerData_.fullGpuCounter = 0;
2685 
2686     perfGpuTimerData_.gpuBuffer->Unmap();
2687 #endif
2688     if (IPerformanceDataManagerFactory* globalPerfData =
2689             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2690         globalPerfData) {
2691         IPerformanceDataManager* perfData = globalPerfData->Get("Renderer");
2692         perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2693         perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2694         perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2695         perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2696         perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2697         perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2698     }
2699 }
2700 
WritePerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const uint32_t queryIndex,const VkPipelineStageFlagBits stageFlagBits,const StateCache & stateCache)2701 void RenderBackendVk::WritePerfTimeStamp(const LowLevelCommandBufferVk& cmdBuf, const string_view name,
2702     const uint32_t queryIndex, const VkPipelineStageFlagBits stageFlagBits, const StateCache& stateCache)
2703 {
2704 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2705     if (stateCache.secondaryCommandBuffer) {
2706         return; // cannot be called inside render pass (e.g. with secondary command buffers)
2707     }
2708     PLUGIN_ASSERT(timers_.count(name) == 1);
2709     const PerfDataSet* perfDataSet = &timers_[name];
2710     if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2711         const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2712         if (platData.queryPool) {
2713             vkCmdResetQueryPool(cmdBuf.commandBuffer, // commandBuffer
2714                 platData.queryPool,                   // queryPool
2715                 queryIndex,                           // firstQuery
2716                 1);                                   // queryCount
2717 
2718             vkCmdWriteTimestamp(cmdBuf.commandBuffer, // commandBuffer,
2719                 stageFlagBits,                        // pipelineStage,
2720                 platData.queryPool,                   // queryPool,
2721                 queryIndex);                          // query
2722         }
2723     }
2724 #endif
2725 }
2726 
2727 namespace {
UpdatePerfCounters(IPerformanceDataManager & perfData,const string_view name,const PerfCounters & perfCounters)2728 void UpdatePerfCounters(IPerformanceDataManager& perfData, const string_view name, const PerfCounters& perfCounters)
2729 {
2730     perfData.UpdateData(name, "Backend_Count_Triangle", perfCounters.triangleCount);
2731     perfData.UpdateData(name, "Backend_Count_InstanceCount", perfCounters.instanceCount);
2732     perfData.UpdateData(name, "Backend_Count_Draw", perfCounters.drawCount);
2733     perfData.UpdateData(name, "Backend_Count_DrawIndirect", perfCounters.drawIndirectCount);
2734     perfData.UpdateData(name, "Backend_Count_Dispatch", perfCounters.dispatchCount);
2735     perfData.UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters.dispatchIndirectCount);
2736     perfData.UpdateData(name, "Backend_Count_BindPipeline", perfCounters.bindPipelineCount);
2737     perfData.UpdateData(name, "Backend_Count_RenderPass", perfCounters.renderPassCount);
2738     perfData.UpdateData(name, "Backend_Count_UpdateDescriptorSet", perfCounters.updateDescriptorSetCount);
2739     perfData.UpdateData(name, "Backend_Count_BindDescriptorSet", perfCounters.bindDescriptorSetCount);
2740 }
2741 } // namespace
2742 
CopyPerfTimeStamp(const LowLevelCommandBufferVk & cmdBuf,const string_view name,const StateCache & stateCache)2743 void RenderBackendVk::CopyPerfTimeStamp(
2744     const LowLevelCommandBufferVk& cmdBuf, const string_view name, const StateCache& stateCache)
2745 {
2746     PLUGIN_ASSERT(timers_.count(name) == 1);
2747     PerfDataSet* const perfDataSet = &timers_[name];
2748 
2749 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2750     // take data from earlier queries to cpu
2751     // and copy in from query to gpu buffer
2752     const uint32_t currentFrameByteOffset = perfGpuTimerData_.currentOffset + perfDataSet->gpuBufferOffset;
2753     int64_t gpuMicroSeconds = 0;
2754     {
2755         auto data = static_cast<const uint8_t*>(perfGpuTimerData_.mappedData);
2756         auto currentData = reinterpret_cast<const uint64_t*>(data + currentFrameByteOffset);
2757 
2758         const uint64_t startStamp = *currentData;
2759         const uint64_t endStamp = *(currentData + 1);
2760 
2761         const double timestampPeriod =
2762             static_cast<double>(static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
2763                                     .physicalDeviceProperties.physicalDeviceProperties.limits.timestampPeriod);
2764         constexpr int64_t nanosToMicrosDivisor { 1000 };
2765         gpuMicroSeconds = static_cast<int64_t>((endStamp - startStamp) * timestampPeriod) / nanosToMicrosDivisor;
2766         constexpr int64_t maxValidMicroSecondValue { 4294967295 };
2767         if (gpuMicroSeconds > maxValidMicroSecondValue) {
2768             gpuMicroSeconds = 0;
2769         }
2770         perfGpuTimerData_.fullGpuCounter += gpuMicroSeconds;
2771     }
2772 #endif
2773     const int64_t cpuMicroSeconds = perfDataSet->cpuTimer.GetMicroseconds();
2774 
2775     if (IPerformanceDataManagerFactory* globalPerfData =
2776             GetInstance<IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2777         globalPerfData) {
2778         IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2779 
2780         perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2781 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2782         perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2783 
2784         // cannot be called inside render pass (e.g. with secondary command buffers)
2785         if (!stateCache.secondaryCommandBuffer) {
2786             if (const GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet->gpuHandle); gpuQuery) {
2787                 const auto& platData = static_cast<const GpuQueryPlatformDataVk&>(gpuQuery->GetPlatformData());
2788 
2789                 const GpuBufferVk* gpuBuffer = static_cast<GpuBufferVk*>(perfGpuTimerData_.gpuBuffer.get());
2790                 PLUGIN_ASSERT(gpuBuffer);
2791                 const GpuBufferPlatformDataVk& platBuffer = gpuBuffer->GetPlatformData();
2792 
2793                 constexpr uint32_t queryCount = 2;
2794                 constexpr VkDeviceSize queryStride = sizeof(uint64_t);
2795                 constexpr VkQueryResultFlags queryResultFlags =
2796                     VkQueryResultFlagBits::VK_QUERY_RESULT_64_BIT | VkQueryResultFlagBits::VK_QUERY_RESULT_WAIT_BIT;
2797 
2798                 if (platData.queryPool) {
2799                     vkCmdCopyQueryPoolResults(cmdBuf.commandBuffer, // commandBuffer
2800                         platData.queryPool,                         // queryPool
2801                         0,                                          // firstQuery
2802                         queryCount,                                 // queryCount
2803                         platBuffer.buffer,                          // dstBuffer
2804                         currentFrameByteOffset,                     // dstOffset
2805                         queryStride,                                // stride
2806                         queryResultFlags);                          // flags
2807                 }
2808             }
2809         }
2810 #endif
2811         UpdatePerfCounters(*perfData, name, perfDataSet->perfCounters);
2812         perfDataSet->perfCounters = {}; // reset perf counters
2813     }
2814 }
2815 
2816 #endif
2817 RENDER_END_NAMESPACE()
2818