1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_graph.h"
17 
18 #include <cinttypes>
19 
20 #include <base/containers/array_view.h>
21 #include <base/containers/fixed_string.h>
22 #include <base/math/mathf.h>
23 #include <render/namespace.h>
24 
25 #include "device/gpu_resource_cache.h"
26 #include "device/gpu_resource_handle_util.h"
27 #include "device/gpu_resource_manager.h"
28 #include "nodecontext/render_barrier_list.h"
29 #include "nodecontext/render_command_list.h"
30 #include "nodecontext/render_node_graph_node_store.h"
31 #include "util/log.h"
32 
33 using namespace BASE_NS;
34 
35 RENDER_BEGIN_NAMESPACE()
36 namespace {
37 constexpr uint32_t INVALID_TRACK_IDX { ~0u };
38 
39 #if (RENDER_DEV_ENABLED == 1)
40 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_PRINT = false;
41 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS = false;
42 constexpr const bool CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES = false;
43 
DebugPrintCommandListCommand(const RenderCommandWithType & rc,GpuResourceManager & aMgr)44 void DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)
45 {
46     switch (rc.type) {
47         case RenderCommandType::BARRIER_POINT: {
48             PLUGIN_LOG_I("rc: BarrierPoint");
49             break;
50         }
51         case RenderCommandType::DRAW: {
52             PLUGIN_LOG_I("rc: Draw");
53             break;
54         }
55         case RenderCommandType::DRAW_INDIRECT: {
56             PLUGIN_LOG_I("rc: DrawIndirect");
57             break;
58         }
59         case RenderCommandType::DISPATCH: {
60             PLUGIN_LOG_I("rc: Dispatch");
61             break;
62         }
63         case RenderCommandType::DISPATCH_INDIRECT: {
64             PLUGIN_LOG_I("rc: DispatchIndirect");
65             break;
66         }
67         case RenderCommandType::BIND_PIPELINE: {
68             PLUGIN_LOG_I("rc: BindPipeline");
69             break;
70         }
71         case RenderCommandType::BEGIN_RENDER_PASS: {
72             PLUGIN_LOG_I("rc: BeginRenderPass");
73             if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
74                 const auto& beginRenderPass = *static_cast<RenderCommandBeginRenderPass*>(rc.rc);
75                 for (uint32_t idx = 0; idx < beginRenderPass.renderPassDesc.attachmentCount; ++idx) {
76                     const RenderHandle handle = beginRenderPass.renderPassDesc.attachmentHandles[idx];
77                     PLUGIN_LOG_I("    attachment idx: %u name: %s", idx, aMgr.GetName(handle).c_str());
78                 }
79                 PLUGIN_LOG_I("    subpass count: %u, subpass start idx: %u",
80                     static_cast<uint32_t>(beginRenderPass.renderPassDesc.subpassCount),
81                     beginRenderPass.subpassStartIndex);
82             }
83             break;
84         }
85         case RenderCommandType::NEXT_SUBPASS: {
86             PLUGIN_LOG_I("rc: NextSubpass");
87             break;
88         }
89         case RenderCommandType::END_RENDER_PASS: {
90             PLUGIN_LOG_I("rc: EndRenderPass");
91             break;
92         }
93         case RenderCommandType::BIND_VERTEX_BUFFERS: {
94             PLUGIN_LOG_I("rc: BindVertexBuffers");
95             break;
96         }
97         case RenderCommandType::BIND_INDEX_BUFFER: {
98             PLUGIN_LOG_I("rc: BindIndexBuffer");
99             break;
100         }
101         case RenderCommandType::COPY_BUFFER: {
102             PLUGIN_LOG_I("rc: CopyBuffer");
103             break;
104         }
105         case RenderCommandType::COPY_BUFFER_IMAGE: {
106             PLUGIN_LOG_I("rc: CopyBufferImage");
107             break;
108         }
109         case RenderCommandType::BIND_DESCRIPTOR_SETS: {
110             PLUGIN_LOG_I("rc: BindDescriptorSets");
111             break;
112         }
113         case RenderCommandType::PUSH_CONSTANT: {
114             PLUGIN_LOG_I("rc: PushConstant");
115             break;
116         }
117         case RenderCommandType::BLIT_IMAGE: {
118             PLUGIN_LOG_I("rc: BlitImage");
119             break;
120         }
121             // dynamic states
122         case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
123             PLUGIN_LOG_I("rc: DynamicStateViewport");
124             break;
125         }
126         case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
127             PLUGIN_LOG_I("rc: DynamicStateScissor");
128             break;
129         }
130         case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
131             PLUGIN_LOG_I("rc: DynamicStateLineWidth");
132             break;
133         }
134         case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
135             PLUGIN_LOG_I("rc: DynamicStateDepthBias");
136             break;
137         }
138         case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
139             PLUGIN_LOG_I("rc: DynamicStateBlendConstants");
140             break;
141         }
142         case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
143             PLUGIN_LOG_I("rc: DynamicStateDepthBounds");
144             break;
145         }
146         case RenderCommandType::DYNAMIC_STATE_STENCIL: {
147             PLUGIN_LOG_I("rc: DynamicStateStencil");
148             break;
149         }
150         case RenderCommandType::WRITE_TIMESTAMP: {
151             PLUGIN_LOG_I("rc: WriteTimestamp");
152             break;
153         }
154         case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE: {
155             PLUGIN_LOG_I("rc: GpuQueueTransferRelease");
156             break;
157         }
158         case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE: {
159             PLUGIN_LOG_I("rc: GpuQueueTransferAcquire");
160             break;
161         }
162         case RenderCommandType::UNDEFINED:
163         default: {
164             PLUGIN_ASSERT(false && "non-valid render command");
165             break;
166         }
167     }
168 }
169 
DebugBarrierPrint(const GpuResourceManager & gpuResourceMgr,const vector<CommandBarrier> & combinedBarriers)170 void DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)
171 {
172     PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
173     for (const auto& ref : combinedBarriers) {
174         const RenderHandleType type = RenderHandleUtil::GetHandleType(ref.resourceHandle);
175         if (type == RenderHandleType::GPU_BUFFER) {
176             PLUGIN_LOG_I("barrier buffer    :: handle:0x%" PRIx64 " name:%s, src_stage:%u dst_stage:%u",
177                 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(), ref.src.pipelineStageFlags,
178                 ref.dst.pipelineStageFlags);
179         } else {
180             PLUGIN_ASSERT(type == RenderHandleType::GPU_IMAGE);
181             PLUGIN_LOG_I("barrier image     :: handle:0x%" PRIx64
182                          " name:%s, src_stage:%u dst_stage:%u, src_layout:%u dst_layout:%u",
183                 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(), ref.src.pipelineStageFlags,
184                 ref.dst.pipelineStageFlags, ref.src.optionalImageLayout, ref.dst.optionalImageLayout);
185         }
186     }
187 }
188 
DebugRenderPassLayoutPrint(const GpuResourceManager & gpuResourceMgr,const RenderCommandBeginRenderPass & rc)189 void DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)
190 {
191     PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
192     for (uint32_t idx = 0; idx < rc.renderPassDesc.attachmentCount; ++idx) {
193         const auto handle = rc.renderPassDesc.attachmentHandles[idx];
194         const auto srcLayout = rc.imageLayouts.attachmentInitialLayouts[idx];
195         const auto dstLayout = rc.imageLayouts.attachmentFinalLayouts[idx];
196         PLUGIN_LOG_I("render_pass image :: handle:0x%" PRIx64 " name:%s, src_layout:%u dst_layout:%u (patched later)",
197             handle.id, gpuResourceMgr.GetName(handle).c_str(), srcLayout, dstLayout);
198     }
199 }
200 
DebugPrintImageState(const GpuResourceManager & gpuResourceMgr,const RenderGraph::RenderGraphImageState & resState)201 void DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)
202 {
203     PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
204     const EngineResourceHandle gpuHandle = gpuResourceMgr.GetGpuHandle(resState.resource.handle);
205     PLUGIN_LOG_I("image_state   :: handle:0x%" PRIx64 " name:%s, layout:%u, index:%u, gen:%u, gpu_gen:%u",
206         resState.resource.handle.id, gpuResourceMgr.GetName(resState.resource.handle).c_str(),
207         resState.resource.imageLayout, RenderHandleUtil::GetIndexPart(resState.resource.handle),
208         RenderHandleUtil::GetGenerationIndexPart(resState.resource.handle),
209         RenderHandleUtil::GetGenerationIndexPart(gpuHandle));
210     // one could fetch and print vulkan handle here as well e.g.
211     // 1. const GpuImagePlatformDataVk& plat =
212     // 2. (const GpuImagePlatformDataVk&)gpuResourceMgr.GetImage(ref.first)->GetBasePlatformData()
213     // 3. PLUGIN_LOG_I("end_frame image   :: vk_handle:0x%" PRIx64, VulkanHandleCast<uint64_t>(plat.image))
214 }
215 #endif // RENDER_DEV_ENABLED
216 
217 static constexpr uint32_t WRITE_ACCESS_FLAGS = CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
218                                                CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
219                                                CORE_ACCESS_TRANSFER_WRITE_BIT | CORE_ACCESS_HOST_WRITE_BIT |
220                                                CORE_ACCESS_MEMORY_WRITE_BIT;
221 
PatchRenderPassFinalLayout(const RenderHandle handle,const ImageLayout imageLayout,RenderCommandBeginRenderPass & beginRenderPass,RenderGraph::RenderGraphImageState & storeState)222 void PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout,
223     RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)
224 {
225     const uint32_t attachmentCount = beginRenderPass.renderPassDesc.attachmentCount;
226     for (uint32_t attachmentIdx = 0; attachmentIdx < attachmentCount; ++attachmentIdx) {
227         if (beginRenderPass.renderPassDesc.attachmentHandles[attachmentIdx].id == handle.id) {
228             beginRenderPass.imageLayouts.attachmentFinalLayouts[attachmentIdx] = imageLayout;
229             storeState.resource.imageLayout = imageLayout;
230         }
231     }
232 };
233 
UpdateMultiRenderCommandListRenderPasses(RenderGraph::MultiRenderPassStore & store)234 void UpdateMultiRenderCommandListRenderPasses(RenderGraph::MultiRenderPassStore& store)
235 {
236     const uint32_t renderPassCount = static_cast<uint32_t>(store.renderPasses.size());
237     PLUGIN_ASSERT(renderPassCount > 1);
238 
239     RenderCommandBeginRenderPass* firstRenderPass = store.renderPasses[0];
240     PLUGIN_ASSERT(firstRenderPass);
241     PLUGIN_ASSERT(firstRenderPass->subpasses.size() >= renderPassCount);
242     const RenderCommandBeginRenderPass* lastRenderPass = store.renderPasses[renderPassCount - 1];
243     PLUGIN_ASSERT(lastRenderPass);
244 
245     const uint32_t attachmentCount = firstRenderPass->renderPassDesc.attachmentCount;
246 
247     // take attachment loads from the first one, and stores from the last one
248     // take initial layouts from the first one, and final layouts from the last one (could take the next layout)
249     // initial store the correct render pass description to first render pass and then copy to others
250     // resource states are copied from valid subpasses to another render command list subpasses
251     for (uint32_t fromRpIdx = 0; fromRpIdx < renderPassCount; ++fromRpIdx) {
252         const auto& fromRenderPass = *(store.renderPasses[fromRpIdx]);
253         const uint32_t fromRpSubpassStartIndex = fromRenderPass.subpassStartIndex;
254         const auto& fromRpSubpassResourceStates = fromRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
255         for (uint32_t toRpIdx = 0; toRpIdx < renderPassCount; ++toRpIdx) {
256             if (fromRpIdx != toRpIdx) {
257                 auto& toRenderPass = *(store.renderPasses[toRpIdx]);
258                 auto& toRpSubpassResourceStates = toRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
259                 for (uint32_t idx = 0; idx < attachmentCount; ++idx) {
260                     toRpSubpassResourceStates.states[idx] = fromRpSubpassResourceStates.states[idx];
261                     toRpSubpassResourceStates.layouts[idx] = fromRpSubpassResourceStates.layouts[idx];
262                 }
263             }
264         }
265     }
266 
267     for (uint32_t idx = 0; idx < firstRenderPass->renderPassDesc.attachmentCount; ++idx) {
268         firstRenderPass->renderPassDesc.attachments[idx].storeOp =
269             lastRenderPass->renderPassDesc.attachments[idx].storeOp;
270         firstRenderPass->renderPassDesc.attachments[idx].stencilStoreOp =
271             lastRenderPass->renderPassDesc.attachments[idx].stencilStoreOp;
272 
273         firstRenderPass->imageLayouts.attachmentFinalLayouts[idx] =
274             lastRenderPass->imageLayouts.attachmentFinalLayouts[idx];
275     }
276 
277     // copy subpasses to first
278     for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
279         firstRenderPass->subpasses[idx] = store.renderPasses[idx]->subpasses[idx];
280     }
281 
282     // copy from first to following render passes
283     for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
284         // subpass start index is the only changing variables
285         const uint32_t subpassStartIndex = store.renderPasses[idx]->subpassStartIndex;
286         store.renderPasses[idx]->renderPassDesc = firstRenderPass->renderPassDesc;
287         store.renderPasses[idx]->subpassStartIndex = subpassStartIndex;
288 
289         // image layouts needs to match
290         store.renderPasses[idx]->imageLayouts = firstRenderPass->imageLayouts;
291         PLUGIN_ASSERT(store.renderPasses[idx]->subpasses.size() >= renderPassCount);
292         // copy all subpasses
293         if (!CloneData(store.renderPasses[idx]->subpasses.data(), sizeof(RenderPassSubpassDesc) * renderPassCount,
294                 firstRenderPass->subpasses.data(), sizeof(RenderPassSubpassDesc) * renderPassCount)) {
295             PLUGIN_LOG_E("Copying of renderPasses failed.");
296         }
297         // copy input resource state
298         if (!CloneData(store.renderPasses[idx]->inputResourceStates.states, sizeof(GpuResourceState) * attachmentCount,
299                 firstRenderPass->inputResourceStates.states, sizeof(GpuResourceState) * attachmentCount)) {
300             PLUGIN_LOG_E("Copying of renderPasses failed.");
301         }
302         // NOTE: subpassResourceStates are not copied to different render passes
303     }
304 
305 #if (RENDER_VULKAN_COMBINE_MULTI_COMMAND_LIST_MSAA_SUBPASSES_ENABLED == 1)
306     // copy the final layouts and resolves to first render pass
307     const uint32_t finalSubpassIdx = renderPassCount - 1U;
308     if ((renderPassCount > 1U) && (firstRenderPass->subpasses[finalSubpassIdx].resolveAttachmentCount > 0U)) {
309         firstRenderPass->renderPassDesc.subpassCount = 1U;
310         firstRenderPass->subpasses = { firstRenderPass->subpasses.data(), 1U };
311         firstRenderPass->subpassResourceStates = { firstRenderPass->subpassResourceStates.data(), 1U };
312         // copy resolve attachments from the final subpass
313         auto& firstSubpass = firstRenderPass->subpasses[0U];
314         const auto& finalSubpass = store.renderPasses[finalSubpassIdx]->subpasses[finalSubpassIdx];
315         firstSubpass.resolveAttachmentCount = finalSubpass.resolveAttachmentCount;
316         firstSubpass.depthResolveAttachmentCount = finalSubpass.depthResolveAttachmentCount;
317         firstSubpass.depthResolveAttachmentIndex = finalSubpass.depthResolveAttachmentIndex;
318         firstSubpass.depthResolveModeFlagBit = finalSubpass.depthResolveModeFlagBit;
319         CloneData(firstSubpass.resolveAttachmentIndices, sizeof(firstSubpass.resolveAttachmentIndices),
320             finalSubpass.resolveAttachmentIndices, sizeof(uint32_t) * firstSubpass.resolveAttachmentCount);
321         // layouts for resolve attachments
322         const auto& finalSubpassResourceStates =
323             store.renderPasses[finalSubpassIdx]->subpassResourceStates[finalSubpassIdx];
324         const uint32_t resolveAttachmentCount = firstSubpass.resolveAttachmentCount;
325         for (uint32_t resIdx = 0U; resIdx < resolveAttachmentCount; ++resIdx) {
326             const uint32_t resAttIdx = firstSubpass.resolveAttachmentIndices[resIdx];
327             firstRenderPass->subpassResourceStates[0U].layouts[resAttIdx] =
328                 finalSubpassResourceStates.layouts[resAttIdx];
329             firstRenderPass->subpassResourceStates[0U].states[resAttIdx] = finalSubpassResourceStates.states[resAttIdx];
330         }
331         if ((firstSubpass.depthResolveAttachmentCount > 0U) &&
332             (firstSubpass.depthResolveAttachmentIndex < PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT)) {
333             const uint32_t resAttIdx = firstSubpass.resolveAttachmentIndices[firstSubpass.depthResolveAttachmentIndex];
334             firstRenderPass->subpassResourceStates[0U].layouts[resAttIdx] =
335                 finalSubpassResourceStates.layouts[resAttIdx];
336             firstRenderPass->subpassResourceStates[0U].states[resAttIdx] = finalSubpassResourceStates.states[resAttIdx];
337         }
338 
339         // fix render command list indices
340         for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
341             store.renderPasses[idx]->renderPassDesc = firstRenderPass->renderPassDesc;
342             store.renderPasses[idx]->subpassStartIndex = 0U;
343             store.renderPasses[idx]->subpasses = firstRenderPass->subpasses;
344             store.renderPasses[idx]->subpassResourceStates = firstRenderPass->subpassResourceStates;
345         }
346 #if (RENDER_VALIDATION_ENABLED == 1)
347         PLUGIN_LOG_ONCE_I("combine_multi_command_list_msaa_subpasses_enabled",
348             "RENDER_VALIDATION: Combining multi-commandlist MSAA resolve subpasses");
349 #endif
350     }
351 #endif
352 }
353 
GetSrcBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)354 ResourceBarrier GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
355 {
356     return {
357         state.accessFlags,
358         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
359         ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
360         res.byteOffset,
361         res.byteSize,
362     };
363 }
364 
GetSrcImageBarrier(const GpuResourceState & state,const BindableImage & res)365 ResourceBarrier GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)
366 {
367     return {
368         state.accessFlags,
369         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
370         res.imageLayout,
371         0,
372         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
373     };
374 }
375 
GetSrcImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst,const RenderGraph::RenderGraphAdditionalImageState & additionalImageState)376 ResourceBarrier GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
377     const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
378 {
379     uint32_t mipLevel = 0U;
380     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
381     ImageLayout srcImageLayout = src.imageLayout;
382     if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
383         (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
384         if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
385             mipLevel = dst.mip;
386             mipCount = 1U;
387         } else {
388             mipLevel = src.mip;
389             // all mip levels
390         }
391         PLUGIN_ASSERT(additionalImageState.layouts);
392         srcImageLayout = additionalImageState.layouts[mipLevel];
393     }
394     return {
395         state.accessFlags,
396         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
397         srcImageLayout,
398         0,
399         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
400         { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
401     };
402 }
403 
GetDstBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)404 ResourceBarrier GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
405 {
406     return {
407         state.accessFlags,
408         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
409         ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
410         res.byteOffset,
411         res.byteSize,
412     };
413 }
414 
GetDstImageBarrier(const GpuResourceState & state,const BindableImage & res)415 ResourceBarrier GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)
416 {
417     return {
418         state.accessFlags,
419         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
420         res.imageLayout,
421         0,
422         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
423     };
424 }
425 
GetDstImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst,const RenderGraph::RenderGraphAdditionalImageState & additionalImageState)426 ResourceBarrier GetDstImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
427     const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
428 {
429     uint32_t mipLevel = 0U;
430     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
431     ImageLayout dstImageLayout = dst.imageLayout;
432     if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
433         (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
434         if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
435             mipLevel = dst.mip;
436             mipCount = 1U;
437         } else {
438             mipLevel = src.mip;
439             // all mip levels
440         }
441     }
442     return {
443         state.accessFlags,
444         state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
445         dstImageLayout,
446         0,
447         PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
448         { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
449     };
450 }
451 
ModifyAdditionalImageState(const BindableImage & res,RenderGraph::RenderGraphAdditionalImageState & additionalStateRef)452 void ModifyAdditionalImageState(
453     const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)
454 {
455 #if (RENDER_VALIDATION_ENABLED == 1)
456     // NOTE: should not be called for images without CORE_RESOURCE_HANDLE_ADDITIONAL_STATE
457     PLUGIN_ASSERT(RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle));
458 #endif
459     if (additionalStateRef.layouts) {
460         if ((res.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
461             (res.mip < RenderGraph::MAX_MIP_STATE_COUNT)) {
462             additionalStateRef.layouts[res.mip] = res.imageLayout;
463         } else {
464             // set layout for all mips
465             for (uint32_t idx = 0; idx < RenderGraph::MAX_MIP_STATE_COUNT; ++idx) {
466                 additionalStateRef.layouts[idx] = res.imageLayout;
467             }
468         }
469     } else {
470 #if (RENDER_VALIDATION_ENABLED == 1)
471         PLUGIN_LOG_ONCE_E(to_hex(res.handle.id), "mip layouts missing");
472 #endif
473     }
474 }
475 
GetQueueOwnershipTransferBarrier(const RenderHandle handle,const GpuQueue & srcGpuQueue,const GpuQueue & dstGpuQueue,const ImageLayout srcImageLayout,const ImageLayout dstImageLayout)476 CommandBarrier GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue,
477     const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)
478 {
479     return {
480         handle,
481 
482         ResourceBarrier {
483             0,
484             PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
485             srcImageLayout,
486             0,
487             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
488             ImageSubresourceRange {},
489         },
490         srcGpuQueue,
491 
492         ResourceBarrier {
493             0,
494             PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
495             dstImageLayout,
496             0,
497             PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
498             ImageSubresourceRange {},
499         },
500         dstGpuQueue,
501     };
502 }
503 
PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)504 void PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,
505     array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)
506 {
507     for (const auto& transferRef : currNodeGpuResourceTransfers) {
508         PLUGIN_ASSERT(transferRef.acquireNodeIdx < static_cast<uint32_t>(frameRenderNodeContextData.size()));
509 
510         auto& acquireNodeRef = frameRenderNodeContextData[transferRef.acquireNodeIdx];
511         const GpuQueue acquireGpuQueue = acquireNodeRef.renderCommandList->GetGpuQueue();
512         GpuQueue releaseGpuQueue = acquireGpuQueue;
513 
514         if (transferRef.releaseNodeIdx < static_cast<uint32_t>(frameRenderNodeContextData.size())) {
515             auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
516             releaseGpuQueue = releaseNodeRef.renderCommandList->GetGpuQueue();
517         }
518 
519         const CommandBarrier transferBarrier = GetQueueOwnershipTransferBarrier(transferRef.handle, releaseGpuQueue,
520             acquireGpuQueue, transferRef.optionalReleaseImageLayout, transferRef.optionalAcquireImageLayout);
521 
522         // release ownership (NOTE: not done for previous frame)
523         if (transferRef.releaseNodeIdx < static_cast<uint32_t>(frameRenderNodeContextData.size())) {
524             auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
525             const uint32_t rcIndex = releaseNodeRef.renderCommandList->GetRenderCommandCount() - 1;
526             const RenderCommandWithType& cmdRef = releaseNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
527             PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
528             const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
529             PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
530 
531             const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
532             releaseNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
533 
534             // inform that we are patching valid barriers
535             releaseNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
536         }
537         // acquire ownership
538         {
539             const uint32_t rcIndex = 0;
540             const RenderCommandWithType& cmdRef = acquireNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
541             PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
542             const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
543             PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
544 
545             const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
546             acquireNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
547 
548             // inform that we are patching valid barriers
549             acquireNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
550         }
551     }
552 }
553 
CheckForBarrierNeed(const unordered_map<RenderHandle,uint32_t> & handledCustomBarriers,const uint32_t customBarrierCount,const RenderHandle handle)554 bool CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers,
555     const uint32_t customBarrierCount, const RenderHandle handle)
556 {
557     bool needsBarrier = RenderHandleUtil::IsDynamicResource(handle);
558     if ((customBarrierCount > 0) && needsBarrier) {
559         needsBarrier = (handledCustomBarriers.count(handle) > 0) ? false : true;
560     }
561     return needsBarrier;
562 }
563 } // namespace
564 
RenderGraph(GpuResourceManager & gpuResourceMgr)565 RenderGraph::RenderGraph(GpuResourceManager& gpuResourceMgr) : gpuResourceMgr_(gpuResourceMgr) {}
566 
BeginFrame()567 void RenderGraph::BeginFrame()
568 {
569     stateCache_.multiRenderPassStore.renderPasses.clear();
570     stateCache_.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
571     stateCache_.multiRenderPassStore.firstBarrierPointIndex = ~0u;
572     stateCache_.multiRenderPassStore.supportOpen = false;
573     stateCache_.nodeCounter = 0u;
574     stateCache_.checkForBackbufferDependency = false;
575     stateCache_.usesSwapchainImage = false;
576 }
577 
ProcessRenderNodeGraph(const bool checkBackbufferDependancy,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)578 void RenderGraph::ProcessRenderNodeGraph(
579     const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
580 {
581     stateCache_.checkForBackbufferDependency = checkBackbufferDependancy;
582 
583     // NOTE: separate gpu buffers and gpu images due to larger structs, layers, mips in images
584     // all levels of mips and layers are not currently tracked -> needs more fine grained modifications
585     // handles:
586     // gpu images in descriptor sets, render passes, blits, and custom barriers
587     // gpu buffers in descriptor sets, and custom barriers
588 
589     {
590         // remove resources that will not be tracked anymore and release available slots
591         const GpuResourceManager::StateDestroyConsumeStruct stateResetData = gpuResourceMgr_.ConsumeStateDestroyData();
592         for (const auto& handle : stateResetData.resources) {
593             const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
594             const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
595             if ((handleType == RenderHandleType::GPU_IMAGE) &&
596                 (arrayIndex < static_cast<uint32_t>(gpuImageDataIndices_.size()))) {
597                 if (const uint32_t dataIdx = gpuImageDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
598                     PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuImageTracking_.size()));
599                     gpuImageTracking_[dataIdx] = {}; // reset
600                     gpuImageAvailableIndices_.push_back(dataIdx);
601                 }
602                 gpuImageDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
603             } else if (arrayIndex < static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
604                 if (const uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
605                     PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuBufferTracking_.size()));
606                     gpuBufferTracking_[dataIdx] = {}; // reset
607                     gpuBufferAvailableIndices_.push_back(dataIdx);
608                 }
609                 gpuBufferDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
610             }
611         }
612     }
613 
614     gpuBufferDataIndices_.resize(gpuResourceMgr_.GetBufferHandleCount(), INVALID_TRACK_IDX);
615     gpuImageDataIndices_.resize(gpuResourceMgr_.GetImageHandleCount(), INVALID_TRACK_IDX);
616 
617 #if (RENDER_DEV_ENABLED == 1)
618     if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT || CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES ||
619                   CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
620         static uint64_t debugFrame = 0;
621         debugFrame++;
622         PLUGIN_LOG_I("START RENDER GRAPH, FRAME %" PRIu64, debugFrame);
623     }
624 #endif
625 
626     // need to store some of the resource for frame state in undefined state (i.e. reset on frame boundaries)
627     ProcessRenderNodeGraphNodeStores(renderNodeGraphNodeStores, stateCache_);
628 
629     // store final state for next frame
630     StoreFinalBufferState();
631     StoreFinalImageState(); // processes gpuImageBackbufferState_ as well
632 }
633 
GetSwapchainResourceStates() const634 RenderGraph::SwapchainStates RenderGraph::GetSwapchainResourceStates() const
635 {
636     return swapchainStates_;
637 }
638 
ProcessRenderNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,StateCache & stateCache)639 void RenderGraph::ProcessRenderNodeGraphNodeStores(
640     const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores, StateCache& stateCache)
641 {
642     for (RenderNodeGraphNodeStore* graphStore : renderNodeGraphNodeStores) {
643         PLUGIN_ASSERT(graphStore);
644         if (!graphStore) {
645             continue;
646         }
647 
648         for (uint32_t nodeIdx = 0;
649             nodeIdx < static_cast<uint32_t>(graphStore->renderNodeContextData.size());
650             ++nodeIdx) {
651             auto& ref = graphStore->renderNodeContextData[nodeIdx];
652             ref.submitInfo.waitForSwapchainAcquireSignal = false; // reset
653             stateCache.usesSwapchainImage = false;                // reset
654 
655 #if (RENDER_DEV_ENABLED == 1)
656             if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
657                 PLUGIN_LOG_I("FULL NODENAME %s", graphStore->renderNodeData[nodeIdx].fullName.data());
658             }
659 #endif
660 
661             if (stateCache.multiRenderPassStore.supportOpen &&
662                 (stateCache.multiRenderPassStore.renderPasses.size() == 0)) {
663                 PLUGIN_LOG_E("invalid multi render node render pass subpass stitching");
664                 // NOTE: add more error handling and invalidate render command lists
665             }
666             stateCache.multiRenderPassStore.supportOpen =
667                 ref.renderCommandList->HasMultiRenderCommandListSubpasses();
668             array_view<const RenderCommandWithType> cmdListRef = ref.renderCommandList->GetRenderCommands();
669             // go through commands that affect or need transitions and barriers
670             ProcessRenderNodeCommands(cmdListRef, nodeIdx, ref, stateCache);
671 
672             // needs backbuffer/swapchain wait
673             if (stateCache.usesSwapchainImage) {
674                 ref.submitInfo.waitForSwapchainAcquireSignal = true;
675             }
676 
677             // patch gpu resource queue transfers
678             if (!currNodeGpuResourceTransfers_.empty()) {
679                 PatchGpuResourceQueueTransfers(graphStore->renderNodeContextData, currNodeGpuResourceTransfers_);
680                 // clear for next use
681                 currNodeGpuResourceTransfers_.clear();
682             }
683 
684             stateCache_.nodeCounter++;
685         }
686     }
687 }
688 
ProcessRenderNodeCommands(array_view<const RenderCommandWithType> & cmdListRef,const uint32_t & nodeIdx,RenderNodeContextData & ref,StateCache & stateCache)689 void RenderGraph::ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef,
690     const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)
691 {
692     for (uint32_t listIdx = 0; listIdx < static_cast<uint32_t>(cmdListRef.size()); ++listIdx) {
693         auto& cmdRef = cmdListRef[listIdx];
694 
695 #if (RENDER_DEV_ENABLED == 1)
696         if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
697             DebugPrintCommandListCommand(cmdRef, gpuResourceMgr_);
698         }
699 #endif
700 
701         // most of the commands are handled within BarrierPoint
702         switch (cmdRef.type) {
703             case RenderCommandType::BARRIER_POINT:
704                 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc), stateCache);
705                 break;
706 
707             case RenderCommandType::BEGIN_RENDER_PASS:
708                 RenderCommand(
709                     nodeIdx, listIdx, ref, *static_cast<RenderCommandBeginRenderPass*>(cmdRef.rc), stateCache);
710                 break;
711 
712             case RenderCommandType::END_RENDER_PASS:
713                 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandEndRenderPass*>(cmdRef.rc), stateCache);
714                 break;
715 
716             case RenderCommandType::NEXT_SUBPASS:
717             case RenderCommandType::DRAW:
718             case RenderCommandType::DRAW_INDIRECT:
719             case RenderCommandType::DISPATCH:
720             case RenderCommandType::DISPATCH_INDIRECT:
721             case RenderCommandType::BIND_PIPELINE:
722             case RenderCommandType::BIND_VERTEX_BUFFERS:
723             case RenderCommandType::BIND_INDEX_BUFFER:
724             case RenderCommandType::COPY_BUFFER:
725             case RenderCommandType::COPY_BUFFER_IMAGE:
726             case RenderCommandType::COPY_IMAGE:
727             case RenderCommandType::BIND_DESCRIPTOR_SETS:
728             case RenderCommandType::PUSH_CONSTANT:
729             case RenderCommandType::BLIT_IMAGE:
730             case RenderCommandType::BUILD_ACCELERATION_STRUCTURE:
731             case RenderCommandType::CLEAR_COLOR_IMAGE:
732             case RenderCommandType::DYNAMIC_STATE_VIEWPORT:
733             case RenderCommandType::DYNAMIC_STATE_SCISSOR:
734             case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH:
735             case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS:
736             case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS:
737             case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS:
738             case RenderCommandType::DYNAMIC_STATE_STENCIL:
739             case RenderCommandType::WRITE_TIMESTAMP:
740             case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
741             case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
742             case RenderCommandType::UNDEFINED:
743             default: {
744                 // nop
745                 break;
746             }
747         }
748     } // end command for
749 }
750 
StoreFinalBufferState()751 void RenderGraph::StoreFinalBufferState()
752 {
753     for (auto& ref : gpuBufferTracking_) {
754         if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
755             ref = {};
756             continue;
757         }
758         if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
759             // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
760             const RenderHandle handle = ref.resource.handle;
761             ref = {};
762             ref.resource.handle = handle;
763         }
764         // need to reset per frame variables for all buffers (so we do not try to patch or debug from previous
765         // frames)
766         ref.prevRc = {};
767         ref.prevRenderNodeIndex = { ~0u };
768     }
769 }
770 
StoreFinalImageState()771 void RenderGraph::StoreFinalImageState()
772 {
773     swapchainStates_ = {}; // reset
774 
775 #if (RENDER_DEV_ENABLED == 1)
776     if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
777         PLUGIN_LOG_I("end_frame image_state:");
778     }
779 #endif
780     for (auto& ref : gpuImageTracking_) {
781         // if resource is not dynamic, we do not track and care
782         if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
783             ref = {};
784             continue;
785         }
786         // handle automatic presentation layout
787         if (stateCache_.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(ref.resource.handle)) {
788             if (ref.prevRc.type == RenderCommandType::BEGIN_RENDER_PASS) {
789                 RenderCommandBeginRenderPass& beginRenderPass =
790                     *static_cast<RenderCommandBeginRenderPass*>(ref.prevRc.rc);
791                 PatchRenderPassFinalLayout(
792                     ref.resource.handle, ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC, beginRenderPass, ref);
793             }
794             // NOTE: currently we handle automatic presentation layout in vulkan backend if not in render pass
795             // store final state for backbuffer
796             // currently we only swapchains if they are really in use in this frame
797             const uint32_t flags = ref.state.accessFlags | ref.state.shaderStageFlags | ref.state.pipelineStageFlags;
798             if (flags != 0) {
799                 swapchainStates_.swapchains.push_back({ ref.resource.handle, ref.state, ref.resource.imageLayout });
800             }
801         }
802 #if (RENDER_DEV_ENABLED == 1)
803         // print before reset for next frame
804         if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
805             DebugPrintImageState(gpuResourceMgr_, ref);
806         }
807 #endif
808         // shallow resources are not tracked
809         // they are always in undefined state in the beging of the frame
810         if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
811             const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(ref.resource.handle);
812             // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
813             const RenderHandle handle = ref.resource.handle;
814             ref = {};
815             ref.resource.handle = handle;
816             if (addMips) {
817                 PLUGIN_ASSERT(!ref.additionalState.layouts);
818                 ref.additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
819             }
820         }
821 
822         // need to reset per frame variables for all images (so we do not try to patch from previous frames)
823         ref.prevRc = {};
824         ref.prevRenderNodeIndex = { ~0u };
825     }
826 }
827 
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBeginRenderPass & rc,StateCache & stateCache)828 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
829     RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)
830 {
831     // update layouts for attachments to gpu image state
832     BeginRenderPassParameters params { rc, stateCache, { RenderCommandType::BEGIN_RENDER_PASS, &rc } };
833 
834     PLUGIN_ASSERT(rc.renderPassDesc.subpassCount > 0);
835 
836     const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
837     if (hasRenderPassDependency) { // stitch render pass subpasses
838         BeginRenderPassHandleDependency(params, commandListCommandIndex, nodeData);
839     }
840 
841     const GpuQueue gpuQueue = nodeData.renderCommandList->GetGpuQueue();
842 
843     auto finalImageLayouts =
844         array_view(rc.imageLayouts.attachmentFinalLayouts, countof(rc.imageLayouts.attachmentFinalLayouts));
845 
846     BeginRenderPassUpdateImageStates(params, gpuQueue, finalImageLayouts, renderNodeIndex);
847 
848     for (uint32_t subpassIdx = 0; subpassIdx < rc.renderPassDesc.subpassCount; ++subpassIdx) {
849         const auto& subpassRef = rc.subpasses[subpassIdx];
850         const auto& subpassResourceStatesRef = rc.subpassResourceStates[subpassIdx];
851 
852         BeginRenderPassUpdateSubpassImageStates(
853             array_view(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount), rc.renderPassDesc,
854             subpassResourceStatesRef, finalImageLayouts, stateCache);
855 
856         BeginRenderPassUpdateSubpassImageStates(
857             array_view(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount), rc.renderPassDesc,
858             subpassResourceStatesRef, finalImageLayouts, stateCache);
859 
860         BeginRenderPassUpdateSubpassImageStates(
861             array_view(subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount), rc.renderPassDesc,
862             subpassResourceStatesRef, finalImageLayouts, stateCache);
863 
864         if (subpassRef.depthAttachmentCount == 1u) {
865             BeginRenderPassUpdateSubpassImageStates(
866                 array_view(&subpassRef.depthAttachmentIndex, subpassRef.depthAttachmentCount), rc.renderPassDesc,
867                 subpassResourceStatesRef, finalImageLayouts, stateCache);
868             if (subpassRef.depthResolveAttachmentCount == 1) {
869                 BeginRenderPassUpdateSubpassImageStates(
870                     array_view(&subpassRef.depthResolveAttachmentIndex, subpassRef.depthResolveAttachmentCount),
871                     rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts, stateCache);
872             }
873         }
874         if (subpassRef.fragmentShadingRateAttachmentCount == 1u) {
875             BeginRenderPassUpdateSubpassImageStates(array_view(&subpassRef.fragmentShadingRateAttachmentIndex,
876                                                         subpassRef.fragmentShadingRateAttachmentCount),
877                 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts, stateCache);
878         }
879     }
880 
881     if (hasRenderPassDependency) { // stitch render pass subpasses
882         if (rc.subpassStartIndex > 0) {
883             // stitched to behave as a nextSubpass() and not beginRenderPass()
884             rc.beginType = RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN;
885         }
886         const bool finalSubpass = (rc.subpassStartIndex == rc.renderPassDesc.subpassCount - 1);
887         if (finalSubpass) {
888             UpdateMultiRenderCommandListRenderPasses(stateCache.multiRenderPassStore);
889             // multiRenderPassStore cleared in EndRenderPass
890         }
891     }
892 #if (RENDER_DEV_ENABLED == 1)
893     if (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
894         DebugRenderPassLayoutPrint(gpuResourceMgr_, rc);
895     }
896 #endif
897 }
898 
BeginRenderPassHandleDependency(BeginRenderPassParameters & params,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData)899 void RenderGraph::BeginRenderPassHandleDependency(
900     BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)
901 {
902     params.stateCache.multiRenderPassStore.renderPasses.push_back(&params.rc);
903     // store the first begin render pass
904     params.rpForCmdRef = { RenderCommandType::BEGIN_RENDER_PASS,
905         params.stateCache.multiRenderPassStore.renderPasses[0] };
906 
907     if (params.rc.subpassStartIndex == 0) { // store the first render pass barrier point
908         // barrier point must be previous command
909         PLUGIN_ASSERT(commandListCommandIndex >= 1);
910         const uint32_t prevCommandIndex = commandListCommandIndex - 1;
911 
912         const RenderCommandWithType& barrierPointCmdRef =
913             nodeData.renderCommandList->GetRenderCommands()[prevCommandIndex];
914         PLUGIN_ASSERT(barrierPointCmdRef.type == RenderCommandType::BARRIER_POINT);
915         PLUGIN_ASSERT(static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc));
916 
917         params.stateCache.multiRenderPassStore.firstRenderPassBarrierList = nodeData.renderBarrierList.get();
918         params.stateCache.multiRenderPassStore.firstBarrierPointIndex =
919             static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc)->barrierPointIndex;
920     }
921 }
922 
BeginRenderPassUpdateImageStates(BeginRenderPassParameters & params,const GpuQueue & gpuQueue,array_view<ImageLayout> & finalImageLayouts,const uint32_t renderNodeIndex)923 void RenderGraph::BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue,
924     array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)
925 {
926     auto& initialImageLayouts = params.rc.imageLayouts.attachmentInitialLayouts;
927     const auto& attachmentHandles = params.rc.renderPassDesc.attachmentHandles;
928     auto& attachments = params.rc.renderPassDesc.attachments;
929     auto& attachmentInputResourceStates = params.rc.inputResourceStates;
930 
931     for (uint32_t attachmentIdx = 0; attachmentIdx < params.rc.renderPassDesc.attachmentCount; ++attachmentIdx) {
932         const RenderHandle handle = attachmentHandles[attachmentIdx];
933         // NOTE: invalidate invalid handle commands already in render command list
934         if (!RenderHandleUtil::IsGpuImage(handle)) {
935 #ifdef _DEBUG
936             PLUGIN_LOG_E("invalid handle in render node graph");
937 #endif
938             continue;
939         }
940         auto& stateRef = GetImageResourceStateRef(handle, gpuQueue);
941         ImageLayout imgLayout = stateRef.resource.imageLayout;
942 
943         const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
944         // image layout is undefined if automatic barriers have been disabled
945         if (params.rc.enableAutomaticLayoutChanges) {
946             const RenderPassDesc::AttachmentDesc& attachmentDesc = attachments[attachmentIdx];
947             if (addMips && (attachmentDesc.mipLevel < RenderGraph::MAX_MIP_STATE_COUNT)) {
948                 if (stateRef.additionalState.layouts) {
949                     imgLayout = stateRef.additionalState.layouts[attachmentDesc.mipLevel];
950                 } else {
951 #if (RENDER_VALIDATION_ENABLED == 1)
952                     PLUGIN_LOG_ONCE_E(to_hex(handle.id), "mip layouts missing");
953 #endif
954                 }
955             }
956 
957             initialImageLayouts[attachmentIdx] = imgLayout;
958         }
959         // undefined layout with load_op_load -> we modify to dont_care (and remove validation warning)
960         if ((imgLayout == ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED) &&
961             (attachments[attachmentIdx].loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_LOAD)) {
962             // dont care (user needs to be sure what is wanted, i.e. in first frame one should clear)
963             attachments[attachmentIdx].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
964         }
965         finalImageLayouts[attachmentIdx] = imgLayout;
966         attachmentInputResourceStates.states[attachmentIdx] = stateRef.state;
967         attachmentInputResourceStates.layouts[attachmentIdx] = imgLayout;
968 
969         // store render pass for final layout patching
970         stateRef.prevRc = params.rpForCmdRef;
971         stateRef.prevRenderNodeIndex = renderNodeIndex;
972 
973         // flag for backbuffer use
974         if (params.stateCache.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(handle)) {
975             params.stateCache.usesSwapchainImage = true;
976         }
977     }
978 }
979 
BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,const RenderPassDesc & renderPassDesc,const RenderPassAttachmentResourceStates & subpassResourceStatesRef,array_view<ImageLayout> finalImageLayouts,StateCache & stateCache)980 void RenderGraph::BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,
981     const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef,
982     array_view<ImageLayout> finalImageLayouts, StateCache& stateCache)
983 {
984     for (const uint32_t attachmentIndex : attatchmentIndices) {
985         // NOTE: handle invalid commands already in render command list and invalidate draws etc.
986         PLUGIN_ASSERT(attachmentIndex < renderPassDesc.attachmentCount);
987         const RenderHandle handle = renderPassDesc.attachmentHandles[attachmentIndex];
988         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
989         const GpuResourceState& refState = subpassResourceStatesRef.states[attachmentIndex];
990         const ImageLayout& refImgLayout = subpassResourceStatesRef.layouts[attachmentIndex];
991         // NOTE: we should support non dynamicity and GENERAL
992 
993         finalImageLayouts[attachmentIndex] = refImgLayout;
994         auto& ref = GetImageResourceStateRef(handle, refState.gpuQueue);
995         const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
996 
997         ref.state = refState;
998         ref.resource.handle = handle;
999         ref.resource.imageLayout = refImgLayout;
1000         if (addMips) {
1001             const RenderPassDesc::AttachmentDesc& attachmentDesc = renderPassDesc.attachments[attachmentIndex];
1002             const BindableImage image {
1003                 handle,
1004                 attachmentDesc.mipLevel,
1005                 attachmentDesc.layer,
1006                 refImgLayout,
1007                 RenderHandle {},
1008             };
1009             ModifyAdditionalImageState(image, ref.additionalState);
1010         }
1011     }
1012 }
1013 
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,const RenderNodeContextData & nodeData,RenderCommandEndRenderPass & rc,StateCache & stateCache)1014 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1015     const RenderNodeContextData& nodeData, RenderCommandEndRenderPass& rc, StateCache& stateCache)
1016 {
1017     const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
1018     if (hasRenderPassDependency) {
1019         const bool finalSubpass =
1020             (rc.subpassCount == static_cast<uint32_t>(stateCache.multiRenderPassStore.renderPasses.size()));
1021         if (finalSubpass) {
1022             if (rc.subpassStartIndex != (rc.subpassCount - 1)) {
1023                 PLUGIN_LOG_E("RenderGraph: error in multi render node render pass subpass ending");
1024                 // NOTE: add more error handling and invalidate render command lists
1025             }
1026             rc.endType = RenderPassEndType::END_RENDER_PASS;
1027             stateCache.multiRenderPassStore.renderPasses.clear();
1028             stateCache.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
1029             stateCache.multiRenderPassStore.firstBarrierPointIndex = ~0u;
1030             stateCache.multiRenderPassStore.supportOpen = false;
1031         } else {
1032             rc.endType = RenderPassEndType::END_SUBPASS;
1033         }
1034     }
1035 }
1036 
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBarrierPoint & rc,StateCache & stateCache)1037 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1038     RenderNodeContextData& nodeData, RenderCommandBarrierPoint& rc, StateCache& stateCache)
1039 {
1040     // go through required descriptors for current upcoming event
1041     const auto& customBarrierListRef = nodeData.renderCommandList->GetCustomBarriers();
1042     const auto& cmdListRef = nodeData.renderCommandList->GetRenderCommands();
1043     const auto& allDescriptorSetHandlesForBarriers = nodeData.renderCommandList->GetDescriptorSetHandles();
1044     const auto& nodeDescriptorSetMgrRef = *nodeData.nodeContextDescriptorSetMgr;
1045 
1046     parameterCachePools_.combinedBarriers.clear();
1047     parameterCachePools_.handledCustomBarriers.clear();
1048     ParameterCache parameters { parameterCachePools_.combinedBarriers, parameterCachePools_.handledCustomBarriers,
1049         rc.customBarrierCount, rc.vertexIndexBarrierCount, rc.indirectBufferBarrierCount, renderNodeIndex,
1050         nodeData.renderCommandList->GetGpuQueue(), { RenderCommandType::BARRIER_POINT, &rc }, stateCache };
1051     // first check custom barriers
1052     if (parameters.customBarrierCount > 0) {
1053         HandleCustomBarriers(parameters, rc.customBarrierIndexBegin, customBarrierListRef);
1054     }
1055     // then vertex / index buffer barriers in the barrier point before render pass
1056     if (parameters.vertexInputBarrierCount > 0) {
1057         PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1058         HandleVertexInputBufferBarriers(parameters, rc.vertexIndexBarrierIndexBegin,
1059             nodeData.renderCommandList->GetRenderpassVertexInputBufferBarriers());
1060     }
1061     if (parameters.indirectBufferBarrierCount > 0U) {
1062         PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1063         HandleRenderpassIndirectBufferBarriers(parameters, rc.indirectBufferBarrierIndexBegin,
1064             nodeData.renderCommandList->GetRenderpassIndirectBufferBarriers());
1065     }
1066 
1067     // in barrier point the next render command is known for which the barrier is needed
1068     if (rc.renderCommandType == RenderCommandType::CLEAR_COLOR_IMAGE) {
1069         HandleClearImage(parameters, commandListCommandIndex, cmdListRef);
1070     } else if (rc.renderCommandType == RenderCommandType::BLIT_IMAGE) {
1071         HandleBlitImage(parameters, commandListCommandIndex, cmdListRef);
1072     } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER) {
1073         HandleCopyBuffer(parameters, commandListCommandIndex, cmdListRef);
1074     } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER_IMAGE) {
1075         HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef);
1076     } else if (rc.renderCommandType == RenderCommandType::COPY_IMAGE) {
1077         HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef); // NOTE: handles image to image
1078     } else {                                                                    // descriptor sets
1079         if (rc.renderCommandType == RenderCommandType::DISPATCH_INDIRECT) {
1080             HandleDispatchIndirect(parameters, commandListCommandIndex, cmdListRef);
1081         }
1082         const uint32_t descriptorSetHandleBeginIndex = rc.descriptorSetHandleIndexBegin;
1083         const uint32_t descriptorSetHandleEndIndex = descriptorSetHandleBeginIndex + rc.descriptorSetHandleCount;
1084         const uint32_t descriptorSetHandleMaxIndex =
1085             Math::min(descriptorSetHandleEndIndex, static_cast<uint32_t>(allDescriptorSetHandlesForBarriers.size()));
1086         const auto descriptorSetHandlesForBarriers =
1087             array_view(allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleBeginIndex,
1088                 allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleMaxIndex);
1089         HandleDescriptorSets(parameters, descriptorSetHandlesForBarriers, nodeDescriptorSetMgrRef);
1090     }
1091 
1092     if (!parameters.combinedBarriers.empty()) {
1093         // use first render pass barrier point with following subpasses
1094         // firstRenderPassBarrierPoint is null for the first subpass
1095         const bool renderPassHasDependancy = stateCache.multiRenderPassStore.supportOpen;
1096         if (renderPassHasDependancy && stateCache.multiRenderPassStore.firstRenderPassBarrierList) {
1097             PLUGIN_ASSERT(!stateCache.multiRenderPassStore.renderPasses.empty());
1098             stateCache.multiRenderPassStore.firstRenderPassBarrierList->AddBarriersToBarrierPoint(
1099                 rc.barrierPointIndex, parameters.combinedBarriers);
1100         } else {
1101             nodeData.renderBarrierList->AddBarriersToBarrierPoint(rc.barrierPointIndex, parameters.combinedBarriers);
1102         }
1103     }
1104 #if (RENDER_DEV_ENABLED == 1)
1105     if (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
1106         DebugBarrierPrint(gpuResourceMgr_, parameters.combinedBarriers);
1107     }
1108 #endif
1109 }
1110 
UpdateBufferResourceState(RenderGraphBufferState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1111 inline void RenderGraph::UpdateBufferResourceState(
1112     RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1113 {
1114     stateRef.resource.handle = cb.resourceHandle;
1115     stateRef.state.shaderStageFlags = 0;
1116     stateRef.state.accessFlags = cb.dst.accessFlags;
1117     stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1118     stateRef.state.gpuQueue = params.gpuQueue;
1119     stateRef.prevRc = params.rcWithType;
1120     stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1121 }
1122 
UpdateImageResourceState(RenderGraphImageState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1123 inline void RenderGraph::UpdateImageResourceState(
1124     RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1125 {
1126     stateRef.resource.handle = cb.resourceHandle;
1127     stateRef.state.shaderStageFlags = 0;
1128     stateRef.state.accessFlags = cb.dst.accessFlags;
1129     stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1130     stateRef.state.gpuQueue = params.gpuQueue;
1131     stateRef.prevRc = params.rcWithType;
1132     stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1133 }
1134 
HandleCustomBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const CommandBarrier> & customBarrierListRef)1135 void RenderGraph::HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1136     const array_view<const CommandBarrier>& customBarrierListRef)
1137 {
1138     params.handledCustomBarriers.reserve(params.customBarrierCount);
1139     PLUGIN_ASSERT(barrierIndexBegin + params.customBarrierCount <= customBarrierListRef.size());
1140     for (auto begin = (customBarrierListRef.begin() + barrierIndexBegin),
1141               end = Math::min(customBarrierListRef.end(), begin + params.customBarrierCount);
1142          begin != end; ++begin) {
1143         // add a copy and modify if needed
1144         auto& cb = params.combinedBarriers.emplace_back(*begin);
1145 
1146         // NOTE: undefined type is for non-resource memory/pipeline barriers
1147         const RenderHandleType type = RenderHandleUtil::GetHandleType(cb.resourceHandle);
1148         const bool isDynamicTrack = RenderHandleUtil::IsDynamicResource(cb.resourceHandle);
1149         PLUGIN_ASSERT((type == RenderHandleType::UNDEFINED) || (type == RenderHandleType::GPU_BUFFER) ||
1150                       (type == RenderHandleType::GPU_IMAGE));
1151         if (type == RenderHandleType::GPU_BUFFER) {
1152             if (isDynamicTrack) {
1153                 auto& stateRef = GetBufferResourceStateRef(cb.resourceHandle, params.gpuQueue);
1154                 UpdateBufferResourceState(stateRef, params, cb);
1155             }
1156             params.handledCustomBarriers[cb.resourceHandle] = 0;
1157         } else if (type == RenderHandleType::GPU_IMAGE) {
1158             if (isDynamicTrack) {
1159                 const bool isAddMips = RenderHandleUtil::IsDynamicAdditionalStateResource(cb.resourceHandle);
1160                 auto& stateRef = GetImageResourceStateRef(cb.resourceHandle, params.gpuQueue);
1161                 if (cb.src.optionalImageLayout == CORE_IMAGE_LAYOUT_MAX_ENUM) {
1162                     uint32_t mipLevel = 0U;
1163                     uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
1164                     ImageLayout srcImageLayout = stateRef.resource.imageLayout;
1165                     if (isAddMips) {
1166                         const uint32_t srcMip = cb.src.optionalImageSubresourceRange.baseMipLevel;
1167                         const uint32_t dstMip = cb.dst.optionalImageSubresourceRange.baseMipLevel;
1168                         if ((srcMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
1169                             (dstMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
1170                             if (dstMip < RenderGraph::MAX_MIP_STATE_COUNT) {
1171                                 mipLevel = dstMip;
1172                                 mipCount = 1U;
1173                             } else {
1174                                 mipLevel = srcMip;
1175                                 // all mip levels
1176                             }
1177                             if (stateRef.additionalState.layouts) {
1178                                 srcImageLayout = stateRef.additionalState.layouts[mipLevel];
1179                             } else {
1180 #if (RENDER_VALIDATION_ENABLED == 1)
1181                                 PLUGIN_LOG_ONCE_E(to_hex(cb.resourceHandle.id), "mip layouts missing");
1182 #endif
1183                             }
1184                         }
1185                     }
1186                     cb.src.accessFlags = stateRef.state.accessFlags;
1187                     cb.src.pipelineStageFlags =
1188                         stateRef.state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1189                     cb.src.optionalImageLayout = srcImageLayout;
1190                     cb.src.optionalImageSubresourceRange = { 0, mipLevel, mipCount, 0u,
1191                         PipelineStateConstants::GPU_IMAGE_ALL_LAYERS };
1192                 }
1193                 UpdateImageResourceState(stateRef, params, cb);
1194                 stateRef.resource.imageLayout = cb.dst.optionalImageLayout;
1195                 if (isAddMips) {
1196                     const BindableImage image {
1197                         cb.resourceHandle,
1198                         cb.dst.optionalImageSubresourceRange.baseMipLevel,
1199                         cb.dst.optionalImageSubresourceRange.baseArrayLayer,
1200                         cb.dst.optionalImageLayout,
1201                         RenderHandle {},
1202                     };
1203                     ModifyAdditionalImageState(image, stateRef.additionalState);
1204                 }
1205             }
1206             params.handledCustomBarriers[cb.resourceHandle] = 0;
1207         }
1208     }
1209 }
1210 
HandleVertexInputBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & vertexInputBufferBarrierListRef)1211 void RenderGraph::HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1212     const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)
1213 {
1214     for (uint32_t idx = 0; idx < params.vertexInputBarrierCount; ++idx) {
1215         const uint32_t barrierIndex = barrierIndexBegin + idx;
1216         PLUGIN_ASSERT(barrierIndex < static_cast<uint32_t>(vertexInputBufferBarrierListRef.size()));
1217         if (barrierIndex < static_cast<uint32_t>(vertexInputBufferBarrierListRef.size())) {
1218             const VertexBuffer& vbInput = vertexInputBufferBarrierListRef[barrierIndex];
1219             const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1220                 CORE_ACCESS_INDEX_READ_BIT | CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1221                 CORE_PIPELINE_STAGE_VERTEX_INPUT_BIT, params.gpuQueue };
1222             UpdateStateAndCreateBarriersGpuBuffer(
1223                 resourceState, { vbInput.bufferHandle, vbInput.bufferOffset, vbInput.byteSize }, params);
1224         }
1225     }
1226 }
1227 
HandleRenderpassIndirectBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & indirectBufferBarrierListRef)1228 void RenderGraph::HandleRenderpassIndirectBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1229     const array_view<const VertexBuffer>& indirectBufferBarrierListRef)
1230 {
1231     for (uint32_t idx = 0; idx < params.indirectBufferBarrierCount; ++idx) {
1232         const uint32_t barrierIndex = barrierIndexBegin + idx;
1233         PLUGIN_ASSERT(barrierIndex < static_cast<uint32_t>(indirectBufferBarrierListRef.size()));
1234         if (barrierIndex < static_cast<uint32_t>(indirectBufferBarrierListRef.size())) {
1235             const VertexBuffer& ib = indirectBufferBarrierListRef[barrierIndex];
1236             const bool needsArgsBarrier =
1237                 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ib.bufferHandle);
1238             if (needsArgsBarrier) {
1239                 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1240                     CORE_ACCESS_INDIRECT_COMMAND_READ_BIT, CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue };
1241                 UpdateStateAndCreateBarriersGpuBuffer(
1242                     resourceState, { ib.bufferHandle, ib.bufferOffset, ib.byteSize }, params);
1243             }
1244         }
1245     }
1246 }
1247 
HandleClearImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1248 void RenderGraph::HandleClearImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1249     const array_view<const RenderCommandWithType>& cmdListRef)
1250 {
1251     const uint32_t nextListIdx = commandListCommandIndex + 1;
1252     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1253     const auto& nextCmdRef = cmdListRef[nextListIdx];
1254     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::CLEAR_COLOR_IMAGE);
1255 
1256     const RenderCommandClearColorImage& nextRc = *static_cast<RenderCommandClearColorImage*>(nextCmdRef.rc);
1257 
1258     const bool needsBarrier =
1259         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.handle);
1260     if (needsBarrier) {
1261         BindableImage bRes = {};
1262         bRes.handle = nextRc.handle;
1263         bRes.imageLayout = nextRc.imageLayout;
1264         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1265             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1266             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1267     }
1268 }
1269 
HandleBlitImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1270 void RenderGraph::HandleBlitImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1271     const array_view<const RenderCommandWithType>& cmdListRef)
1272 {
1273     const uint32_t nextListIdx = commandListCommandIndex + 1;
1274     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1275     const auto& nextCmdRef = cmdListRef[nextListIdx];
1276     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BLIT_IMAGE);
1277 
1278     const RenderCommandBlitImage& nextRc = *static_cast<RenderCommandBlitImage*>(nextCmdRef.rc);
1279 
1280     const bool needsSrcBarrier =
1281         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1282     if (needsSrcBarrier) {
1283         BindableImage bRes = {};
1284         bRes.handle = nextRc.srcHandle;
1285         bRes.imageLayout = nextRc.srcImageLayout;
1286         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1287             GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1288             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1289     }
1290 
1291     const bool needsDstBarrier =
1292         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1293     if (needsDstBarrier) {
1294         BindableImage bRes = {};
1295         bRes.handle = nextRc.dstHandle;
1296         bRes.imageLayout = nextRc.dstImageLayout;
1297         AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1298             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1299             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1300     }
1301 }
1302 
HandleCopyBuffer(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1303 void RenderGraph::HandleCopyBuffer(ParameterCache& params, const uint32_t& commandListCommandIndex,
1304     const array_view<const RenderCommandWithType>& cmdListRef)
1305 {
1306     const uint32_t nextListIdx = commandListCommandIndex + 1;
1307     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1308     const auto& nextCmdRef = cmdListRef[nextListIdx];
1309     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_BUFFER);
1310 
1311     const RenderCommandCopyBuffer& nextRc = *static_cast<RenderCommandCopyBuffer*>(nextCmdRef.rc);
1312 
1313     const bool needsSrcBarrier =
1314         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1315     if (needsSrcBarrier) {
1316         const BindableBuffer bRes = { nextRc.srcHandle, nextRc.bufferCopy.srcOffset, nextRc.bufferCopy.size };
1317         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1318             GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1319             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1320     }
1321 
1322     const bool needsDstBarrier =
1323         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1324     if (needsDstBarrier) {
1325         const BindableBuffer bRes = { nextRc.dstHandle, nextRc.bufferCopy.dstOffset, nextRc.bufferCopy.size };
1326         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1327             GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1328             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1329     }
1330 }
1331 
HandleCopyBufferImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1332 void RenderGraph::HandleCopyBufferImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1333     const array_view<const RenderCommandWithType>& cmdListRef)
1334 {
1335     const uint32_t nextListIdx = commandListCommandIndex + 1;
1336     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1337     const auto& nextCmdRef = cmdListRef[nextListIdx];
1338     PLUGIN_ASSERT((nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) ||
1339                   (nextCmdRef.type == RenderCommandType::COPY_IMAGE));
1340 
1341     // NOTE: two different command types supported
1342     RenderHandle srcHandle;
1343     RenderHandle dstHandle;
1344     ImageSubresourceLayers srcImgLayers;
1345     ImageSubresourceLayers dstImgLayers;
1346     if (nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) {
1347         const RenderCommandCopyBufferImage& nextRc = *static_cast<RenderCommandCopyBufferImage*>(nextCmdRef.rc);
1348         PLUGIN_ASSERT(nextRc.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1349         srcHandle = nextRc.srcHandle;
1350         dstHandle = nextRc.dstHandle;
1351         srcImgLayers = nextRc.bufferImageCopy.imageSubresource;
1352         dstImgLayers = nextRc.bufferImageCopy.imageSubresource;
1353     } else if (nextCmdRef.type == RenderCommandType::COPY_IMAGE) {
1354         const RenderCommandCopyImage& nextRc = *static_cast<RenderCommandCopyImage*>(nextCmdRef.rc);
1355         srcHandle = nextRc.srcHandle;
1356         dstHandle = nextRc.dstHandle;
1357         srcImgLayers = nextRc.imageCopy.srcSubresource;
1358         dstImgLayers = nextRc.imageCopy.dstSubresource;
1359     }
1360 
1361     const bool needsSrcBarrier =
1362         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, srcHandle);
1363     if (needsSrcBarrier) {
1364         const RenderHandleType handleType = RenderHandleUtil::GetHandleType(srcHandle);
1365         PLUGIN_UNUSED(handleType);
1366         PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1367         if (handleType == RenderHandleType::GPU_BUFFER) {
1368             BindableBuffer bRes;
1369             bRes.handle = srcHandle;
1370             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1371                 GpuResourceState {
1372                     0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1373                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1374         } else {
1375             BindableImage bRes;
1376             bRes.handle = srcHandle;
1377             bRes.mip = srcImgLayers.mipLevel;
1378             bRes.layer = srcImgLayers.baseArrayLayer;
1379             bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1380             AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1381                 GpuResourceState {
1382                     0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1383                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1384         }
1385     }
1386 
1387     const bool needsDstBarrier =
1388         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, dstHandle);
1389     if (needsDstBarrier) {
1390         const RenderHandleType handleType = RenderHandleUtil::GetHandleType(dstHandle);
1391         PLUGIN_UNUSED(handleType);
1392         PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1393         if (handleType == RenderHandleType::GPU_BUFFER) {
1394             BindableBuffer bRes;
1395             bRes.handle = dstHandle;
1396             AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1397                 GpuResourceState {
1398                     0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1399                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1400         } else {
1401             BindableImage bRes;
1402             bRes.handle = dstHandle;
1403             bRes.mip = dstImgLayers.mipLevel;
1404             bRes.layer = dstImgLayers.baseArrayLayer;
1405             bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1406             AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1407                 GpuResourceState {
1408                     0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1409                 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1410         }
1411     }
1412 }
1413 
HandleDispatchIndirect(ParameterCache & params,const uint32_t & commandListCommandIndex,const BASE_NS::array_view<const RenderCommandWithType> & cmdListRef)1414 void RenderGraph::HandleDispatchIndirect(ParameterCache& params, const uint32_t& commandListCommandIndex,
1415     const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1416 {
1417     const uint32_t nextListIdx = commandListCommandIndex + 1;
1418     PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1419     const auto& nextCmdRef = cmdListRef[nextListIdx];
1420     PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::DISPATCH_INDIRECT);
1421 
1422     const auto& nextRc = *static_cast<RenderCommandDispatchIndirect*>(nextCmdRef.rc);
1423 
1424     const bool needsArgsBarrier =
1425         CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.argsHandle);
1426     if (needsArgsBarrier) {
1427         const BindableBuffer bRes = { nextRc.argsHandle, nextRc.offset, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1428         AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1429             GpuResourceState { CORE_SHADER_STAGE_COMPUTE_BIT, CORE_ACCESS_INDIRECT_COMMAND_READ_BIT,
1430                 CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue },
1431             bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1432     }
1433 }
1434 
HandleDescriptorSets(ParameterCache & params,const array_view<const RenderHandle> & descriptorSetHandlesForBarriers,const NodeContextDescriptorSetManager & nodeDescriptorSetMgrRef)1435 void RenderGraph::HandleDescriptorSets(ParameterCache& params,
1436     const array_view<const RenderHandle>& descriptorSetHandlesForBarriers,
1437     const NodeContextDescriptorSetManager& nodeDescriptorSetMgrRef)
1438 {
1439     for (const RenderHandle descriptorSetHandle : descriptorSetHandlesForBarriers) {
1440         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET);
1441 
1442         const auto bindingResources = nodeDescriptorSetMgrRef.GetCpuDescriptorSetData(descriptorSetHandle);
1443         const auto& buffers = bindingResources.buffers;
1444         const auto& images = bindingResources.images;
1445         for (const auto& ref : buffers) {
1446             const uint32_t descriptorCount = ref.binding.descriptorCount;
1447             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1448             if (descriptorCount == 0) {
1449                 continue;
1450             }
1451             const uint32_t arrayOffset = ref.arrayOffset;
1452             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1453             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1454                 // first is the ref, starting from 1 we use array offsets
1455                 const auto& bRes = (idx == 0) ? ref : buffers[arrayOffset + idx - 1];
1456                 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ref.resource.handle)) {
1457                     UpdateStateAndCreateBarriersGpuBuffer(bRes.state, bRes.resource, params);
1458                 }
1459             }
1460         }
1461         for (const auto& ref : images) {
1462             const uint32_t descriptorCount = ref.binding.descriptorCount;
1463             // skip, array bindings which are bound from first index, they have also descriptorCount 0
1464             if (descriptorCount == 0) {
1465                 continue;
1466             }
1467             const uint32_t arrayOffset = ref.arrayOffset;
1468             PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1469             for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1470                 // first is the ref, starting from 1 we use array offsets
1471                 const auto& bRes = (idx == 0) ? ref : images[arrayOffset + idx - 1];
1472                 if (CheckForBarrierNeed(
1473                     params.handledCustomBarriers, params.customBarrierCount, bRes.resource.handle)) {
1474                     UpdateStateAndCreateBarriersGpuImage(bRes.state, bRes.resource, params);
1475                 }
1476             }
1477         }
1478     } // end for
1479 }
1480 
UpdateStateAndCreateBarriersGpuImage(const GpuResourceState & state,const BindableImage & res,RenderGraph::ParameterCache & params)1481 void RenderGraph::UpdateStateAndCreateBarriersGpuImage(
1482     const GpuResourceState& state, const BindableImage& res, RenderGraph::ParameterCache& params)
1483 {
1484     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1485     if (arrayIndex >= static_cast<uint32_t>(gpuImageDataIndices_.size())) {
1486         return;
1487     }
1488 
1489     auto& ref = GetImageResourceStateRef(res.handle, state.gpuQueue);
1490     // NOTE: we previous patched the final render pass layouts here
1491     // ATM: we only path the swapchain image if needed
1492 
1493     const GpuResourceState& prevState = ref.state;
1494     const BindableImage& prevImage = ref.resource;
1495     const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle);
1496     const ResourceBarrier prevStateRb = addMips ? GetSrcImageBarrierMips(prevState, prevImage, res, ref.additionalState)
1497                                                 : GetSrcImageBarrier(prevState, prevImage);
1498 
1499     const bool layoutChanged = (prevStateRb.optionalImageLayout != res.imageLayout);
1500     const bool accessFlagsChanged = (prevStateRb.accessFlags != state.accessFlags);
1501     const bool writeTarget = (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS);
1502     const bool inputAttachment = (state.accessFlags == CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT);
1503     // input attachments are handled with render passes and not with barriers
1504     if ((layoutChanged || accessFlagsChanged || writeTarget) && (!inputAttachment)) {
1505         if ((prevState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1506             (prevState.gpuQueue.type != state.gpuQueue.type)) {
1507             PLUGIN_ASSERT(state.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1508 
1509             PLUGIN_ASSERT(ref.prevRenderNodeIndex != params.renderNodeIndex);
1510             currNodeGpuResourceTransfers_.push_back(RenderGraph::GpuQueueTransferState {
1511                 res.handle, ref.prevRenderNodeIndex, params.renderNodeIndex, prevImage.imageLayout, res.imageLayout });
1512         } else {
1513             const ResourceBarrier dstImageBarrier =
1514                 addMips ? GetDstImageBarrierMips(state, prevImage, res, ref.additionalState)
1515                         : GetDstImageBarrier(state, res);
1516             params.combinedBarriers.push_back(
1517                 CommandBarrier { res.handle, prevStateRb, prevState.gpuQueue, dstImageBarrier, params.gpuQueue });
1518         }
1519 
1520         ref.state = state;
1521         ref.resource = res;
1522         ref.prevRc = params.rcWithType;
1523         ref.prevRenderNodeIndex = params.renderNodeIndex;
1524         if (addMips) {
1525             ModifyAdditionalImageState(res, ref.additionalState);
1526         }
1527     }
1528 }
1529 
UpdateStateAndCreateBarriersGpuBuffer(const GpuResourceState & dstState,const BindableBuffer & res,RenderGraph::ParameterCache & params)1530 void RenderGraph::UpdateStateAndCreateBarriersGpuBuffer(
1531     const GpuResourceState& dstState, const BindableBuffer& res, RenderGraph::ParameterCache& params)
1532 {
1533     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1534     if (arrayIndex >= static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
1535         return;
1536     }
1537 
1538     // get the current state of the buffer
1539     auto& srcStateRef = GetBufferResourceStateRef(res.handle, dstState.gpuQueue);
1540     const ResourceBarrier prevStateRb = GetSrcBufferBarrier(srcStateRef.state, res);
1541     if ((prevStateRb.accessFlags != dstState.accessFlags) || (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS)) {
1542         params.combinedBarriers.push_back(CommandBarrier {
1543             res.handle, prevStateRb, dstState.gpuQueue, GetDstBufferBarrier(dstState, res), params.gpuQueue });
1544     }
1545 
1546     // update the cached state to match the situation after the barrier
1547     srcStateRef.state = dstState;
1548     srcStateRef.resource = res;
1549     srcStateRef.prevRc = params.rcWithType;
1550     srcStateRef.prevRenderNodeIndex = params.renderNodeIndex;
1551 }
1552 
AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableBuffer & newBuffer,const RenderCommandWithType & rcWithType,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1553 void RenderGraph::AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,
1554     const GpuResourceState& newGpuResourceState, const BindableBuffer& newBuffer,
1555     const RenderCommandWithType& rcWithType, vector<CommandBarrier>& barriers,
1556     vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1557 {
1558     auto& stateRef = GetBufferResourceStateRef(newBuffer.handle, newGpuResourceState.gpuQueue);
1559     const GpuResourceState srcState = stateRef.state;
1560     const BindableBuffer srcBuffer = stateRef.resource;
1561 
1562     if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1563         (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1564         PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1565         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newBuffer.handle) == RenderHandleType::GPU_IMAGE);
1566         PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1567         currNodeGpuResourceTransfer.push_back(
1568             RenderGraph::GpuQueueTransferState { newBuffer.handle, stateRef.prevRenderNodeIndex, renderNodeIndex,
1569                 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED, ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED });
1570     } else {
1571         const ResourceBarrier srcBarrier = GetSrcBufferBarrier(srcState, srcBuffer);
1572         const ResourceBarrier dstBarrier = GetDstBufferBarrier(newGpuResourceState, newBuffer);
1573 
1574         barriers.push_back(CommandBarrier {
1575             newBuffer.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1576     }
1577 
1578     stateRef.state = newGpuResourceState;
1579     stateRef.resource = newBuffer;
1580     stateRef.prevRc = rcWithType;
1581     stateRef.prevRenderNodeIndex = renderNodeIndex;
1582 }
1583 
AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableImage & newImage,const RenderCommandWithType & rcWithType,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1584 void RenderGraph::AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,
1585     const GpuResourceState& newGpuResourceState, const BindableImage& newImage, const RenderCommandWithType& rcWithType,
1586     vector<CommandBarrier>& barriers, vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1587 {
1588     // newGpuResourceState has queue transfer image layout in old optionalImageLayout
1589 
1590     auto& stateRef = GetImageResourceStateRef(newImage.handle, newGpuResourceState.gpuQueue);
1591     const GpuResourceState srcState = stateRef.state;
1592     const BindableImage srcImage = stateRef.resource;
1593     const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(newImage.handle);
1594 
1595     if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1596         (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1597         PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1598         PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newImage.handle) == RenderHandleType::GPU_IMAGE);
1599         PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1600         currNodeGpuResourceTransfer.push_back(RenderGraph::GpuQueueTransferState { newImage.handle,
1601             stateRef.prevRenderNodeIndex, renderNodeIndex, srcImage.imageLayout, newImage.imageLayout });
1602     } else {
1603         const ResourceBarrier srcBarrier =
1604             addMips ? GetSrcImageBarrierMips(srcState, srcImage, newImage, stateRef.additionalState)
1605                     : GetSrcImageBarrier(srcState, srcImage);
1606         const ResourceBarrier dstBarrier =
1607             addMips ? GetDstImageBarrierMips(newGpuResourceState, srcImage, newImage, stateRef.additionalState)
1608                     : GetDstImageBarrier(newGpuResourceState, newImage);
1609 
1610         barriers.push_back(CommandBarrier {
1611             newImage.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1612     }
1613 
1614     stateRef.state = newGpuResourceState;
1615     stateRef.resource = newImage;
1616     stateRef.prevRc = rcWithType;
1617     stateRef.prevRenderNodeIndex = renderNodeIndex;
1618     if (addMips) {
1619         ModifyAdditionalImageState(newImage, stateRef.additionalState);
1620     }
1621 }
1622 
GetBufferResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1623 RenderGraph::RenderGraphBufferState& RenderGraph::GetBufferResourceStateRef(
1624     const RenderHandle handle, const GpuQueue& queue)
1625 {
1626     // NOTE: Do not call with non dynamic trackable
1627     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1628     PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_BUFFER);
1629     if (arrayIndex < gpuBufferDataIndices_.size()) {
1630         PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1631         uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex];
1632         if (dataIdx == INVALID_TRACK_IDX) {
1633             if (!gpuBufferAvailableIndices_.empty()) {
1634                 dataIdx = gpuBufferAvailableIndices_.back();
1635                 gpuBufferAvailableIndices_.pop_back();
1636             } else {
1637                 dataIdx = static_cast<uint32_t>(gpuBufferTracking_.size());
1638                 gpuBufferTracking_.emplace_back();
1639             }
1640             gpuBufferDataIndices_[arrayIndex] = dataIdx;
1641 
1642             gpuBufferTracking_[dataIdx].resource.handle = handle;
1643             gpuBufferTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1644         }
1645         return gpuBufferTracking_[dataIdx];
1646     }
1647 
1648     return defaultBufferState_;
1649 }
1650 
GetImageResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1651 RenderGraph::RenderGraphImageState& RenderGraph::GetImageResourceStateRef(
1652     const RenderHandle handle, const GpuQueue& queue)
1653 {
1654     // NOTE: Do not call with non dynamic trackable
1655     const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1656     PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1657     if (arrayIndex < gpuImageDataIndices_.size()) {
1658         // NOTE: render pass attachments expected to be dynamic resources always
1659         PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1660         uint32_t dataIdx = gpuImageDataIndices_[arrayIndex];
1661         if (dataIdx == INVALID_TRACK_IDX) {
1662             if (!gpuImageAvailableIndices_.empty()) {
1663                 dataIdx = gpuImageAvailableIndices_.back();
1664                 gpuImageAvailableIndices_.pop_back();
1665             } else {
1666                 dataIdx = static_cast<uint32_t>(gpuImageTracking_.size());
1667                 gpuImageTracking_.emplace_back();
1668             }
1669             gpuImageDataIndices_[arrayIndex] = dataIdx;
1670 
1671             gpuImageTracking_[dataIdx].resource.handle = handle;
1672             gpuImageTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1673             if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1674                 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1675                 gpuImageTracking_[dataIdx].additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
1676             }
1677         }
1678         return gpuImageTracking_[dataIdx];
1679     }
1680 
1681     return defaultImageState_;
1682 }
1683 RENDER_END_NAMESPACE()
1684