1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_graph.h"
17
18 #include <cinttypes>
19
20 #include <base/containers/array_view.h>
21 #include <base/containers/fixed_string.h>
22 #include <base/math/mathf.h>
23 #include <render/namespace.h>
24
25 #include "device/gpu_resource_cache.h"
26 #include "device/gpu_resource_handle_util.h"
27 #include "device/gpu_resource_manager.h"
28 #include "nodecontext/render_barrier_list.h"
29 #include "nodecontext/render_command_list.h"
30 #include "nodecontext/render_node_graph_node_store.h"
31 #include "util/log.h"
32
33 using namespace BASE_NS;
34
35 RENDER_BEGIN_NAMESPACE()
36 namespace {
37 constexpr uint32_t INVALID_TRACK_IDX { ~0u };
38
39 #if (RENDER_DEV_ENABLED == 1)
40 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_PRINT = false;
41 constexpr const bool CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS = false;
42 constexpr const bool CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES = false;
43
DebugPrintCommandListCommand(const RenderCommandWithType & rc,GpuResourceManager & aMgr)44 void DebugPrintCommandListCommand(const RenderCommandWithType& rc, GpuResourceManager& aMgr)
45 {
46 switch (rc.type) {
47 case RenderCommandType::BARRIER_POINT: {
48 PLUGIN_LOG_I("rc: BarrierPoint");
49 break;
50 }
51 case RenderCommandType::DRAW: {
52 PLUGIN_LOG_I("rc: Draw");
53 break;
54 }
55 case RenderCommandType::DRAW_INDIRECT: {
56 PLUGIN_LOG_I("rc: DrawIndirect");
57 break;
58 }
59 case RenderCommandType::DISPATCH: {
60 PLUGIN_LOG_I("rc: Dispatch");
61 break;
62 }
63 case RenderCommandType::DISPATCH_INDIRECT: {
64 PLUGIN_LOG_I("rc: DispatchIndirect");
65 break;
66 }
67 case RenderCommandType::BIND_PIPELINE: {
68 PLUGIN_LOG_I("rc: BindPipeline");
69 break;
70 }
71 case RenderCommandType::BEGIN_RENDER_PASS: {
72 PLUGIN_LOG_I("rc: BeginRenderPass");
73 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
74 const auto& beginRenderPass = *static_cast<RenderCommandBeginRenderPass*>(rc.rc);
75 for (uint32_t idx = 0; idx < beginRenderPass.renderPassDesc.attachmentCount; ++idx) {
76 const RenderHandle handle = beginRenderPass.renderPassDesc.attachmentHandles[idx];
77 PLUGIN_LOG_I(" attachment idx: %u name: %s", idx, aMgr.GetName(handle).c_str());
78 }
79 PLUGIN_LOG_I(" subpass count: %u, subpass start idx: %u",
80 static_cast<uint32_t>(beginRenderPass.renderPassDesc.subpassCount),
81 beginRenderPass.subpassStartIndex);
82 }
83 break;
84 }
85 case RenderCommandType::NEXT_SUBPASS: {
86 PLUGIN_LOG_I("rc: NextSubpass");
87 break;
88 }
89 case RenderCommandType::END_RENDER_PASS: {
90 PLUGIN_LOG_I("rc: EndRenderPass");
91 break;
92 }
93 case RenderCommandType::BIND_VERTEX_BUFFERS: {
94 PLUGIN_LOG_I("rc: BindVertexBuffers");
95 break;
96 }
97 case RenderCommandType::BIND_INDEX_BUFFER: {
98 PLUGIN_LOG_I("rc: BindIndexBuffer");
99 break;
100 }
101 case RenderCommandType::COPY_BUFFER: {
102 PLUGIN_LOG_I("rc: CopyBuffer");
103 break;
104 }
105 case RenderCommandType::COPY_BUFFER_IMAGE: {
106 PLUGIN_LOG_I("rc: CopyBufferImage");
107 break;
108 }
109 case RenderCommandType::BIND_DESCRIPTOR_SETS: {
110 PLUGIN_LOG_I("rc: BindDescriptorSets");
111 break;
112 }
113 case RenderCommandType::PUSH_CONSTANT: {
114 PLUGIN_LOG_I("rc: PushConstant");
115 break;
116 }
117 case RenderCommandType::BLIT_IMAGE: {
118 PLUGIN_LOG_I("rc: BlitImage");
119 break;
120 }
121 // dynamic states
122 case RenderCommandType::DYNAMIC_STATE_VIEWPORT: {
123 PLUGIN_LOG_I("rc: DynamicStateViewport");
124 break;
125 }
126 case RenderCommandType::DYNAMIC_STATE_SCISSOR: {
127 PLUGIN_LOG_I("rc: DynamicStateScissor");
128 break;
129 }
130 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH: {
131 PLUGIN_LOG_I("rc: DynamicStateLineWidth");
132 break;
133 }
134 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS: {
135 PLUGIN_LOG_I("rc: DynamicStateDepthBias");
136 break;
137 }
138 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS: {
139 PLUGIN_LOG_I("rc: DynamicStateBlendConstants");
140 break;
141 }
142 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS: {
143 PLUGIN_LOG_I("rc: DynamicStateDepthBounds");
144 break;
145 }
146 case RenderCommandType::DYNAMIC_STATE_STENCIL: {
147 PLUGIN_LOG_I("rc: DynamicStateStencil");
148 break;
149 }
150 case RenderCommandType::WRITE_TIMESTAMP: {
151 PLUGIN_LOG_I("rc: WriteTimestamp");
152 break;
153 }
154 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE: {
155 PLUGIN_LOG_I("rc: GpuQueueTransferRelease");
156 break;
157 }
158 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE: {
159 PLUGIN_LOG_I("rc: GpuQueueTransferAcquire");
160 break;
161 }
162 case RenderCommandType::UNDEFINED:
163 default: {
164 PLUGIN_ASSERT(false && "non-valid render command");
165 break;
166 }
167 }
168 }
169
DebugBarrierPrint(const GpuResourceManager & gpuResourceMgr,const vector<CommandBarrier> & combinedBarriers)170 void DebugBarrierPrint(const GpuResourceManager& gpuResourceMgr, const vector<CommandBarrier>& combinedBarriers)
171 {
172 PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
173 for (const auto& ref : combinedBarriers) {
174 const RenderHandleType type = RenderHandleUtil::GetHandleType(ref.resourceHandle);
175 if (type == RenderHandleType::GPU_BUFFER) {
176 PLUGIN_LOG_I("barrier buffer :: handle:0x%" PRIx64 " name:%s, src_stage:%u dst_stage:%u",
177 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(), ref.src.pipelineStageFlags,
178 ref.dst.pipelineStageFlags);
179 } else {
180 PLUGIN_ASSERT(type == RenderHandleType::GPU_IMAGE);
181 PLUGIN_LOG_I("barrier image :: handle:0x%" PRIx64
182 " name:%s, src_stage:%u dst_stage:%u, src_layout:%u dst_layout:%u",
183 ref.resourceHandle.id, gpuResourceMgr.GetName(ref.resourceHandle).c_str(), ref.src.pipelineStageFlags,
184 ref.dst.pipelineStageFlags, ref.src.optionalImageLayout, ref.dst.optionalImageLayout);
185 }
186 }
187 }
188
DebugRenderPassLayoutPrint(const GpuResourceManager & gpuResourceMgr,const RenderCommandBeginRenderPass & rc)189 void DebugRenderPassLayoutPrint(const GpuResourceManager& gpuResourceMgr, const RenderCommandBeginRenderPass& rc)
190 {
191 PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
192 for (uint32_t idx = 0; idx < rc.renderPassDesc.attachmentCount; ++idx) {
193 const auto handle = rc.renderPassDesc.attachmentHandles[idx];
194 const auto srcLayout = rc.imageLayouts.attachmentInitialLayouts[idx];
195 const auto dstLayout = rc.imageLayouts.attachmentFinalLayouts[idx];
196 PLUGIN_LOG_I("render_pass image :: handle:0x%" PRIx64 " name:%s, src_layout:%u dst_layout:%u (patched later)",
197 handle.id, gpuResourceMgr.GetName(handle).c_str(), srcLayout, dstLayout);
198 }
199 }
200
DebugPrintImageState(const GpuResourceManager & gpuResourceMgr,const RenderGraph::RenderGraphImageState & resState)201 void DebugPrintImageState(const GpuResourceManager& gpuResourceMgr, const RenderGraph::RenderGraphImageState& resState)
202 {
203 PLUGIN_ASSERT(CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES); // do not call this function normally
204 const EngineResourceHandle gpuHandle = gpuResourceMgr.GetGpuHandle(resState.resource.handle);
205 PLUGIN_LOG_I("image_state :: handle:0x%" PRIx64 " name:%s, layout:%u, index:%u, gen:%u, gpu_gen:%u",
206 resState.resource.handle.id, gpuResourceMgr.GetName(resState.resource.handle).c_str(),
207 resState.resource.imageLayout, RenderHandleUtil::GetIndexPart(resState.resource.handle),
208 RenderHandleUtil::GetGenerationIndexPart(resState.resource.handle),
209 RenderHandleUtil::GetGenerationIndexPart(gpuHandle));
210 // one could fetch and print vulkan handle here as well e.g.
211 // 1. const GpuImagePlatformDataVk& plat =
212 // 2. (const GpuImagePlatformDataVk&)gpuResourceMgr.GetImage(ref.first)->GetBasePlatformData()
213 // 3. PLUGIN_LOG_I("end_frame image :: vk_handle:0x%" PRIx64, VulkanHandleCast<uint64_t>(plat.image))
214 }
215 #endif // RENDER_DEV_ENABLED
216
217 static constexpr uint32_t WRITE_ACCESS_FLAGS = CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
218 CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
219 CORE_ACCESS_TRANSFER_WRITE_BIT | CORE_ACCESS_HOST_WRITE_BIT |
220 CORE_ACCESS_MEMORY_WRITE_BIT;
221
PatchRenderPassFinalLayout(const RenderHandle handle,const ImageLayout imageLayout,RenderCommandBeginRenderPass & beginRenderPass,RenderGraph::RenderGraphImageState & storeState)222 void PatchRenderPassFinalLayout(const RenderHandle handle, const ImageLayout imageLayout,
223 RenderCommandBeginRenderPass& beginRenderPass, RenderGraph::RenderGraphImageState& storeState)
224 {
225 const uint32_t attachmentCount = beginRenderPass.renderPassDesc.attachmentCount;
226 for (uint32_t attachmentIdx = 0; attachmentIdx < attachmentCount; ++attachmentIdx) {
227 if (beginRenderPass.renderPassDesc.attachmentHandles[attachmentIdx].id == handle.id) {
228 beginRenderPass.imageLayouts.attachmentFinalLayouts[attachmentIdx] = imageLayout;
229 storeState.resource.imageLayout = imageLayout;
230 }
231 }
232 };
233
UpdateMultiRenderCommandListRenderPasses(RenderGraph::MultiRenderPassStore & store)234 void UpdateMultiRenderCommandListRenderPasses(RenderGraph::MultiRenderPassStore& store)
235 {
236 const uint32_t renderPassCount = static_cast<uint32_t>(store.renderPasses.size());
237 PLUGIN_ASSERT(renderPassCount > 1);
238
239 RenderCommandBeginRenderPass* firstRenderPass = store.renderPasses[0];
240 PLUGIN_ASSERT(firstRenderPass);
241 PLUGIN_ASSERT(firstRenderPass->subpasses.size() >= renderPassCount);
242 const RenderCommandBeginRenderPass* lastRenderPass = store.renderPasses[renderPassCount - 1];
243 PLUGIN_ASSERT(lastRenderPass);
244
245 const uint32_t attachmentCount = firstRenderPass->renderPassDesc.attachmentCount;
246
247 // take attachment loads from the first one, and stores from the last one
248 // take initial layouts from the first one, and final layouts from the last one (could take the next layout)
249 // initial store the correct render pass description to first render pass and then copy to others
250 // resource states are copied from valid subpasses to another render command list subpasses
251 for (uint32_t fromRpIdx = 0; fromRpIdx < renderPassCount; ++fromRpIdx) {
252 const auto& fromRenderPass = *(store.renderPasses[fromRpIdx]);
253 const uint32_t fromRpSubpassStartIndex = fromRenderPass.subpassStartIndex;
254 const auto& fromRpSubpassResourceStates = fromRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
255 for (uint32_t toRpIdx = 0; toRpIdx < renderPassCount; ++toRpIdx) {
256 if (fromRpIdx != toRpIdx) {
257 auto& toRenderPass = *(store.renderPasses[toRpIdx]);
258 auto& toRpSubpassResourceStates = toRenderPass.subpassResourceStates[fromRpSubpassStartIndex];
259 for (uint32_t idx = 0; idx < attachmentCount; ++idx) {
260 toRpSubpassResourceStates.states[idx] = fromRpSubpassResourceStates.states[idx];
261 toRpSubpassResourceStates.layouts[idx] = fromRpSubpassResourceStates.layouts[idx];
262 }
263 }
264 }
265 }
266
267 for (uint32_t idx = 0; idx < firstRenderPass->renderPassDesc.attachmentCount; ++idx) {
268 firstRenderPass->renderPassDesc.attachments[idx].storeOp =
269 lastRenderPass->renderPassDesc.attachments[idx].storeOp;
270 firstRenderPass->renderPassDesc.attachments[idx].stencilStoreOp =
271 lastRenderPass->renderPassDesc.attachments[idx].stencilStoreOp;
272
273 firstRenderPass->imageLayouts.attachmentFinalLayouts[idx] =
274 lastRenderPass->imageLayouts.attachmentFinalLayouts[idx];
275 }
276
277 // copy subpasses to first
278 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
279 firstRenderPass->subpasses[idx] = store.renderPasses[idx]->subpasses[idx];
280 }
281
282 // copy from first to following render passes
283 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
284 // subpass start index is the only changing variables
285 const uint32_t subpassStartIndex = store.renderPasses[idx]->subpassStartIndex;
286 store.renderPasses[idx]->renderPassDesc = firstRenderPass->renderPassDesc;
287 store.renderPasses[idx]->subpassStartIndex = subpassStartIndex;
288
289 // image layouts needs to match
290 store.renderPasses[idx]->imageLayouts = firstRenderPass->imageLayouts;
291 PLUGIN_ASSERT(store.renderPasses[idx]->subpasses.size() >= renderPassCount);
292 // copy all subpasses
293 if (!CloneData(store.renderPasses[idx]->subpasses.data(), sizeof(RenderPassSubpassDesc) * renderPassCount,
294 firstRenderPass->subpasses.data(), sizeof(RenderPassSubpassDesc) * renderPassCount)) {
295 PLUGIN_LOG_E("Copying of renderPasses failed.");
296 }
297 // copy input resource state
298 if (!CloneData(store.renderPasses[idx]->inputResourceStates.states, sizeof(GpuResourceState) * attachmentCount,
299 firstRenderPass->inputResourceStates.states, sizeof(GpuResourceState) * attachmentCount)) {
300 PLUGIN_LOG_E("Copying of renderPasses failed.");
301 }
302 // NOTE: subpassResourceStates are not copied to different render passes
303 }
304
305 #if (RENDER_VULKAN_COMBINE_MULTI_COMMAND_LIST_MSAA_SUBPASSES_ENABLED == 1)
306 // copy the final layouts and resolves to first render pass
307 const uint32_t finalSubpassIdx = renderPassCount - 1U;
308 if ((renderPassCount > 1U) && (firstRenderPass->subpasses[finalSubpassIdx].resolveAttachmentCount > 0U)) {
309 firstRenderPass->renderPassDesc.subpassCount = 1U;
310 firstRenderPass->subpasses = { firstRenderPass->subpasses.data(), 1U };
311 firstRenderPass->subpassResourceStates = { firstRenderPass->subpassResourceStates.data(), 1U };
312 // copy resolve attachments from the final subpass
313 auto& firstSubpass = firstRenderPass->subpasses[0U];
314 const auto& finalSubpass = store.renderPasses[finalSubpassIdx]->subpasses[finalSubpassIdx];
315 firstSubpass.resolveAttachmentCount = finalSubpass.resolveAttachmentCount;
316 firstSubpass.depthResolveAttachmentCount = finalSubpass.depthResolveAttachmentCount;
317 firstSubpass.depthResolveAttachmentIndex = finalSubpass.depthResolveAttachmentIndex;
318 firstSubpass.depthResolveModeFlagBit = finalSubpass.depthResolveModeFlagBit;
319 CloneData(firstSubpass.resolveAttachmentIndices, sizeof(firstSubpass.resolveAttachmentIndices),
320 finalSubpass.resolveAttachmentIndices, sizeof(uint32_t) * firstSubpass.resolveAttachmentCount);
321 // layouts for resolve attachments
322 const auto& finalSubpassResourceStates =
323 store.renderPasses[finalSubpassIdx]->subpassResourceStates[finalSubpassIdx];
324 const uint32_t resolveAttachmentCount = firstSubpass.resolveAttachmentCount;
325 for (uint32_t resIdx = 0U; resIdx < resolveAttachmentCount; ++resIdx) {
326 const uint32_t resAttIdx = firstSubpass.resolveAttachmentIndices[resIdx];
327 firstRenderPass->subpassResourceStates[0U].layouts[resAttIdx] =
328 finalSubpassResourceStates.layouts[resAttIdx];
329 firstRenderPass->subpassResourceStates[0U].states[resAttIdx] = finalSubpassResourceStates.states[resAttIdx];
330 }
331 if ((firstSubpass.depthResolveAttachmentCount > 0U) &&
332 (firstSubpass.depthResolveAttachmentIndex < PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT)) {
333 const uint32_t resAttIdx = firstSubpass.resolveAttachmentIndices[firstSubpass.depthResolveAttachmentIndex];
334 firstRenderPass->subpassResourceStates[0U].layouts[resAttIdx] =
335 finalSubpassResourceStates.layouts[resAttIdx];
336 firstRenderPass->subpassResourceStates[0U].states[resAttIdx] = finalSubpassResourceStates.states[resAttIdx];
337 }
338
339 // fix render command list indices
340 for (uint32_t idx = 1; idx < renderPassCount; ++idx) {
341 store.renderPasses[idx]->renderPassDesc = firstRenderPass->renderPassDesc;
342 store.renderPasses[idx]->subpassStartIndex = 0U;
343 store.renderPasses[idx]->subpasses = firstRenderPass->subpasses;
344 store.renderPasses[idx]->subpassResourceStates = firstRenderPass->subpassResourceStates;
345 }
346 #if (RENDER_VALIDATION_ENABLED == 1)
347 PLUGIN_LOG_ONCE_I("combine_multi_command_list_msaa_subpasses_enabled",
348 "RENDER_VALIDATION: Combining multi-commandlist MSAA resolve subpasses");
349 #endif
350 }
351 #endif
352 }
353
GetSrcBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)354 ResourceBarrier GetSrcBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
355 {
356 return {
357 state.accessFlags,
358 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
359 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
360 res.byteOffset,
361 res.byteSize,
362 };
363 }
364
GetSrcImageBarrier(const GpuResourceState & state,const BindableImage & res)365 ResourceBarrier GetSrcImageBarrier(const GpuResourceState& state, const BindableImage& res)
366 {
367 return {
368 state.accessFlags,
369 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
370 res.imageLayout,
371 0,
372 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
373 };
374 }
375
GetSrcImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst,const RenderGraph::RenderGraphAdditionalImageState & additionalImageState)376 ResourceBarrier GetSrcImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
377 const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
378 {
379 uint32_t mipLevel = 0U;
380 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
381 ImageLayout srcImageLayout = src.imageLayout;
382 if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
383 (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
384 if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
385 mipLevel = dst.mip;
386 mipCount = 1U;
387 } else {
388 mipLevel = src.mip;
389 // all mip levels
390 }
391 PLUGIN_ASSERT(additionalImageState.layouts);
392 srcImageLayout = additionalImageState.layouts[mipLevel];
393 }
394 return {
395 state.accessFlags,
396 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
397 srcImageLayout,
398 0,
399 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
400 { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
401 };
402 }
403
GetDstBufferBarrier(const GpuResourceState & state,const BindableBuffer & res)404 ResourceBarrier GetDstBufferBarrier(const GpuResourceState& state, const BindableBuffer& res)
405 {
406 return {
407 state.accessFlags,
408 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
409 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED,
410 res.byteOffset,
411 res.byteSize,
412 };
413 }
414
GetDstImageBarrier(const GpuResourceState & state,const BindableImage & res)415 ResourceBarrier GetDstImageBarrier(const GpuResourceState& state, const BindableImage& res)
416 {
417 return {
418 state.accessFlags,
419 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
420 res.imageLayout,
421 0,
422 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
423 };
424 }
425
GetDstImageBarrierMips(const GpuResourceState & state,const BindableImage & src,const BindableImage & dst,const RenderGraph::RenderGraphAdditionalImageState & additionalImageState)426 ResourceBarrier GetDstImageBarrierMips(const GpuResourceState& state, const BindableImage& src,
427 const BindableImage& dst, const RenderGraph::RenderGraphAdditionalImageState& additionalImageState)
428 {
429 uint32_t mipLevel = 0U;
430 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
431 ImageLayout dstImageLayout = dst.imageLayout;
432 if ((src.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
433 (dst.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
434 if (dst.mip < RenderGraph::MAX_MIP_STATE_COUNT) {
435 mipLevel = dst.mip;
436 mipCount = 1U;
437 } else {
438 mipLevel = src.mip;
439 // all mip levels
440 }
441 }
442 return {
443 state.accessFlags,
444 state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
445 dstImageLayout,
446 0,
447 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
448 { 0, mipLevel, mipCount, 0u, PipelineStateConstants::GPU_IMAGE_ALL_LAYERS },
449 };
450 }
451
ModifyAdditionalImageState(const BindableImage & res,RenderGraph::RenderGraphAdditionalImageState & additionalStateRef)452 void ModifyAdditionalImageState(
453 const BindableImage& res, RenderGraph::RenderGraphAdditionalImageState& additionalStateRef)
454 {
455 #if (RENDER_VALIDATION_ENABLED == 1)
456 // NOTE: should not be called for images without CORE_RESOURCE_HANDLE_ADDITIONAL_STATE
457 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle));
458 #endif
459 if (additionalStateRef.layouts) {
460 if ((res.mip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) &&
461 (res.mip < RenderGraph::MAX_MIP_STATE_COUNT)) {
462 additionalStateRef.layouts[res.mip] = res.imageLayout;
463 } else {
464 // set layout for all mips
465 for (uint32_t idx = 0; idx < RenderGraph::MAX_MIP_STATE_COUNT; ++idx) {
466 additionalStateRef.layouts[idx] = res.imageLayout;
467 }
468 }
469 } else {
470 #if (RENDER_VALIDATION_ENABLED == 1)
471 PLUGIN_LOG_ONCE_E(to_hex(res.handle.id), "mip layouts missing");
472 #endif
473 }
474 }
475
GetQueueOwnershipTransferBarrier(const RenderHandle handle,const GpuQueue & srcGpuQueue,const GpuQueue & dstGpuQueue,const ImageLayout srcImageLayout,const ImageLayout dstImageLayout)476 CommandBarrier GetQueueOwnershipTransferBarrier(const RenderHandle handle, const GpuQueue& srcGpuQueue,
477 const GpuQueue& dstGpuQueue, const ImageLayout srcImageLayout, const ImageLayout dstImageLayout)
478 {
479 return {
480 handle,
481
482 ResourceBarrier {
483 0,
484 PipelineStageFlagBits::CORE_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
485 srcImageLayout,
486 0,
487 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
488 ImageSubresourceRange {},
489 },
490 srcGpuQueue,
491
492 ResourceBarrier {
493 0,
494 PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
495 dstImageLayout,
496 0,
497 PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE,
498 ImageSubresourceRange {},
499 },
500 dstGpuQueue,
501 };
502 }
503
PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)504 void PatchGpuResourceQueueTransfers(array_view<const RenderNodeContextData> frameRenderNodeContextData,
505 array_view<const RenderGraph::GpuQueueTransferState> currNodeGpuResourceTransfers)
506 {
507 for (const auto& transferRef : currNodeGpuResourceTransfers) {
508 PLUGIN_ASSERT(transferRef.acquireNodeIdx < static_cast<uint32_t>(frameRenderNodeContextData.size()));
509
510 auto& acquireNodeRef = frameRenderNodeContextData[transferRef.acquireNodeIdx];
511 const GpuQueue acquireGpuQueue = acquireNodeRef.renderCommandList->GetGpuQueue();
512 GpuQueue releaseGpuQueue = acquireGpuQueue;
513
514 if (transferRef.releaseNodeIdx < static_cast<uint32_t>(frameRenderNodeContextData.size())) {
515 auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
516 releaseGpuQueue = releaseNodeRef.renderCommandList->GetGpuQueue();
517 }
518
519 const CommandBarrier transferBarrier = GetQueueOwnershipTransferBarrier(transferRef.handle, releaseGpuQueue,
520 acquireGpuQueue, transferRef.optionalReleaseImageLayout, transferRef.optionalAcquireImageLayout);
521
522 // release ownership (NOTE: not done for previous frame)
523 if (transferRef.releaseNodeIdx < static_cast<uint32_t>(frameRenderNodeContextData.size())) {
524 auto& releaseNodeRef = frameRenderNodeContextData[transferRef.releaseNodeIdx];
525 const uint32_t rcIndex = releaseNodeRef.renderCommandList->GetRenderCommandCount() - 1;
526 const RenderCommandWithType& cmdRef = releaseNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
527 PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
528 const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
529 PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE);
530
531 const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
532 releaseNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
533
534 // inform that we are patching valid barriers
535 releaseNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
536 }
537 // acquire ownership
538 {
539 const uint32_t rcIndex = 0;
540 const RenderCommandWithType& cmdRef = acquireNodeRef.renderCommandList->GetRenderCommands()[rcIndex];
541 PLUGIN_ASSERT(cmdRef.type == RenderCommandType::BARRIER_POINT);
542 const auto& rcbp = *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc);
543 PLUGIN_ASSERT(rcbp.renderCommandType == RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE);
544
545 const uint32_t barrierPointIndex = rcbp.barrierPointIndex;
546 acquireNodeRef.renderBarrierList->AddBarriersToBarrierPoint(barrierPointIndex, { transferBarrier });
547
548 // inform that we are patching valid barriers
549 acquireNodeRef.renderCommandList->SetValidGpuQueueReleaseAcquireBarriers();
550 }
551 }
552 }
553
CheckForBarrierNeed(const unordered_map<RenderHandle,uint32_t> & handledCustomBarriers,const uint32_t customBarrierCount,const RenderHandle handle)554 bool CheckForBarrierNeed(const unordered_map<RenderHandle, uint32_t>& handledCustomBarriers,
555 const uint32_t customBarrierCount, const RenderHandle handle)
556 {
557 bool needsBarrier = RenderHandleUtil::IsDynamicResource(handle);
558 if ((customBarrierCount > 0) && needsBarrier) {
559 needsBarrier = (handledCustomBarriers.count(handle) > 0) ? false : true;
560 }
561 return needsBarrier;
562 }
563 } // namespace
564
RenderGraph(GpuResourceManager & gpuResourceMgr)565 RenderGraph::RenderGraph(GpuResourceManager& gpuResourceMgr) : gpuResourceMgr_(gpuResourceMgr) {}
566
BeginFrame()567 void RenderGraph::BeginFrame()
568 {
569 stateCache_.multiRenderPassStore.renderPasses.clear();
570 stateCache_.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
571 stateCache_.multiRenderPassStore.firstBarrierPointIndex = ~0u;
572 stateCache_.multiRenderPassStore.supportOpen = false;
573 stateCache_.nodeCounter = 0u;
574 stateCache_.checkForBackbufferDependency = false;
575 stateCache_.usesSwapchainImage = false;
576 }
577
ProcessRenderNodeGraph(const bool checkBackbufferDependancy,const array_view<RenderNodeGraphNodeStore * > renderNodeGraphNodeStores)578 void RenderGraph::ProcessRenderNodeGraph(
579 const bool checkBackbufferDependancy, const array_view<RenderNodeGraphNodeStore*> renderNodeGraphNodeStores)
580 {
581 stateCache_.checkForBackbufferDependency = checkBackbufferDependancy;
582
583 // NOTE: separate gpu buffers and gpu images due to larger structs, layers, mips in images
584 // all levels of mips and layers are not currently tracked -> needs more fine grained modifications
585 // handles:
586 // gpu images in descriptor sets, render passes, blits, and custom barriers
587 // gpu buffers in descriptor sets, and custom barriers
588
589 {
590 // remove resources that will not be tracked anymore and release available slots
591 const GpuResourceManager::StateDestroyConsumeStruct stateResetData = gpuResourceMgr_.ConsumeStateDestroyData();
592 for (const auto& handle : stateResetData.resources) {
593 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(handle);
594 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
595 if ((handleType == RenderHandleType::GPU_IMAGE) &&
596 (arrayIndex < static_cast<uint32_t>(gpuImageDataIndices_.size()))) {
597 if (const uint32_t dataIdx = gpuImageDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
598 PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuImageTracking_.size()));
599 gpuImageTracking_[dataIdx] = {}; // reset
600 gpuImageAvailableIndices_.push_back(dataIdx);
601 }
602 gpuImageDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
603 } else if (arrayIndex < static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
604 if (const uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex]; dataIdx != INVALID_TRACK_IDX) {
605 PLUGIN_ASSERT(dataIdx < static_cast<uint32_t>(gpuBufferTracking_.size()));
606 gpuBufferTracking_[dataIdx] = {}; // reset
607 gpuBufferAvailableIndices_.push_back(dataIdx);
608 }
609 gpuBufferDataIndices_[arrayIndex] = INVALID_TRACK_IDX;
610 }
611 }
612 }
613
614 gpuBufferDataIndices_.resize(gpuResourceMgr_.GetBufferHandleCount(), INVALID_TRACK_IDX);
615 gpuImageDataIndices_.resize(gpuResourceMgr_.GetImageHandleCount(), INVALID_TRACK_IDX);
616
617 #if (RENDER_DEV_ENABLED == 1)
618 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT || CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES ||
619 CORE_RENDER_GRAPH_FULL_DEBUG_ATTACHMENTS) {
620 static uint64_t debugFrame = 0;
621 debugFrame++;
622 PLUGIN_LOG_I("START RENDER GRAPH, FRAME %" PRIu64, debugFrame);
623 }
624 #endif
625
626 // need to store some of the resource for frame state in undefined state (i.e. reset on frame boundaries)
627 ProcessRenderNodeGraphNodeStores(renderNodeGraphNodeStores, stateCache_);
628
629 // store final state for next frame
630 StoreFinalBufferState();
631 StoreFinalImageState(); // processes gpuImageBackbufferState_ as well
632 }
633
GetSwapchainResourceStates() const634 RenderGraph::SwapchainStates RenderGraph::GetSwapchainResourceStates() const
635 {
636 return swapchainStates_;
637 }
638
ProcessRenderNodeGraphNodeStores(const array_view<RenderNodeGraphNodeStore * > & renderNodeGraphNodeStores,StateCache & stateCache)639 void RenderGraph::ProcessRenderNodeGraphNodeStores(
640 const array_view<RenderNodeGraphNodeStore*>& renderNodeGraphNodeStores, StateCache& stateCache)
641 {
642 for (RenderNodeGraphNodeStore* graphStore : renderNodeGraphNodeStores) {
643 PLUGIN_ASSERT(graphStore);
644 if (!graphStore) {
645 continue;
646 }
647
648 for (uint32_t nodeIdx = 0;
649 nodeIdx < static_cast<uint32_t>(graphStore->renderNodeContextData.size());
650 ++nodeIdx) {
651 auto& ref = graphStore->renderNodeContextData[nodeIdx];
652 ref.submitInfo.waitForSwapchainAcquireSignal = false; // reset
653 stateCache.usesSwapchainImage = false; // reset
654
655 #if (RENDER_DEV_ENABLED == 1)
656 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
657 PLUGIN_LOG_I("FULL NODENAME %s", graphStore->renderNodeData[nodeIdx].fullName.data());
658 }
659 #endif
660
661 if (stateCache.multiRenderPassStore.supportOpen &&
662 (stateCache.multiRenderPassStore.renderPasses.size() == 0)) {
663 PLUGIN_LOG_E("invalid multi render node render pass subpass stitching");
664 // NOTE: add more error handling and invalidate render command lists
665 }
666 stateCache.multiRenderPassStore.supportOpen =
667 ref.renderCommandList->HasMultiRenderCommandListSubpasses();
668 array_view<const RenderCommandWithType> cmdListRef = ref.renderCommandList->GetRenderCommands();
669 // go through commands that affect or need transitions and barriers
670 ProcessRenderNodeCommands(cmdListRef, nodeIdx, ref, stateCache);
671
672 // needs backbuffer/swapchain wait
673 if (stateCache.usesSwapchainImage) {
674 ref.submitInfo.waitForSwapchainAcquireSignal = true;
675 }
676
677 // patch gpu resource queue transfers
678 if (!currNodeGpuResourceTransfers_.empty()) {
679 PatchGpuResourceQueueTransfers(graphStore->renderNodeContextData, currNodeGpuResourceTransfers_);
680 // clear for next use
681 currNodeGpuResourceTransfers_.clear();
682 }
683
684 stateCache_.nodeCounter++;
685 }
686 }
687 }
688
ProcessRenderNodeCommands(array_view<const RenderCommandWithType> & cmdListRef,const uint32_t & nodeIdx,RenderNodeContextData & ref,StateCache & stateCache)689 void RenderGraph::ProcessRenderNodeCommands(array_view<const RenderCommandWithType>& cmdListRef,
690 const uint32_t& nodeIdx, RenderNodeContextData& ref, StateCache& stateCache)
691 {
692 for (uint32_t listIdx = 0; listIdx < static_cast<uint32_t>(cmdListRef.size()); ++listIdx) {
693 auto& cmdRef = cmdListRef[listIdx];
694
695 #if (RENDER_DEV_ENABLED == 1)
696 if constexpr (CORE_RENDER_GRAPH_FULL_DEBUG_PRINT) {
697 DebugPrintCommandListCommand(cmdRef, gpuResourceMgr_);
698 }
699 #endif
700
701 // most of the commands are handled within BarrierPoint
702 switch (cmdRef.type) {
703 case RenderCommandType::BARRIER_POINT:
704 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandBarrierPoint*>(cmdRef.rc), stateCache);
705 break;
706
707 case RenderCommandType::BEGIN_RENDER_PASS:
708 RenderCommand(
709 nodeIdx, listIdx, ref, *static_cast<RenderCommandBeginRenderPass*>(cmdRef.rc), stateCache);
710 break;
711
712 case RenderCommandType::END_RENDER_PASS:
713 RenderCommand(nodeIdx, listIdx, ref, *static_cast<RenderCommandEndRenderPass*>(cmdRef.rc), stateCache);
714 break;
715
716 case RenderCommandType::NEXT_SUBPASS:
717 case RenderCommandType::DRAW:
718 case RenderCommandType::DRAW_INDIRECT:
719 case RenderCommandType::DISPATCH:
720 case RenderCommandType::DISPATCH_INDIRECT:
721 case RenderCommandType::BIND_PIPELINE:
722 case RenderCommandType::BIND_VERTEX_BUFFERS:
723 case RenderCommandType::BIND_INDEX_BUFFER:
724 case RenderCommandType::COPY_BUFFER:
725 case RenderCommandType::COPY_BUFFER_IMAGE:
726 case RenderCommandType::COPY_IMAGE:
727 case RenderCommandType::BIND_DESCRIPTOR_SETS:
728 case RenderCommandType::PUSH_CONSTANT:
729 case RenderCommandType::BLIT_IMAGE:
730 case RenderCommandType::BUILD_ACCELERATION_STRUCTURE:
731 case RenderCommandType::CLEAR_COLOR_IMAGE:
732 case RenderCommandType::DYNAMIC_STATE_VIEWPORT:
733 case RenderCommandType::DYNAMIC_STATE_SCISSOR:
734 case RenderCommandType::DYNAMIC_STATE_LINE_WIDTH:
735 case RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS:
736 case RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS:
737 case RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS:
738 case RenderCommandType::DYNAMIC_STATE_STENCIL:
739 case RenderCommandType::WRITE_TIMESTAMP:
740 case RenderCommandType::GPU_QUEUE_TRANSFER_RELEASE:
741 case RenderCommandType::GPU_QUEUE_TRANSFER_ACQUIRE:
742 case RenderCommandType::UNDEFINED:
743 default: {
744 // nop
745 break;
746 }
747 }
748 } // end command for
749 }
750
StoreFinalBufferState()751 void RenderGraph::StoreFinalBufferState()
752 {
753 for (auto& ref : gpuBufferTracking_) {
754 if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
755 ref = {};
756 continue;
757 }
758 if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
759 // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
760 const RenderHandle handle = ref.resource.handle;
761 ref = {};
762 ref.resource.handle = handle;
763 }
764 // need to reset per frame variables for all buffers (so we do not try to patch or debug from previous
765 // frames)
766 ref.prevRc = {};
767 ref.prevRenderNodeIndex = { ~0u };
768 }
769 }
770
StoreFinalImageState()771 void RenderGraph::StoreFinalImageState()
772 {
773 swapchainStates_ = {}; // reset
774
775 #if (RENDER_DEV_ENABLED == 1)
776 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
777 PLUGIN_LOG_I("end_frame image_state:");
778 }
779 #endif
780 for (auto& ref : gpuImageTracking_) {
781 // if resource is not dynamic, we do not track and care
782 if (!RenderHandleUtil::IsDynamicResource(ref.resource.handle)) {
783 ref = {};
784 continue;
785 }
786 // handle automatic presentation layout
787 if (stateCache_.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(ref.resource.handle)) {
788 if (ref.prevRc.type == RenderCommandType::BEGIN_RENDER_PASS) {
789 RenderCommandBeginRenderPass& beginRenderPass =
790 *static_cast<RenderCommandBeginRenderPass*>(ref.prevRc.rc);
791 PatchRenderPassFinalLayout(
792 ref.resource.handle, ImageLayout::CORE_IMAGE_LAYOUT_PRESENT_SRC, beginRenderPass, ref);
793 }
794 // NOTE: currently we handle automatic presentation layout in vulkan backend if not in render pass
795 // store final state for backbuffer
796 // currently we only swapchains if they are really in use in this frame
797 const uint32_t flags = ref.state.accessFlags | ref.state.shaderStageFlags | ref.state.pipelineStageFlags;
798 if (flags != 0) {
799 swapchainStates_.swapchains.push_back({ ref.resource.handle, ref.state, ref.resource.imageLayout });
800 }
801 }
802 #if (RENDER_DEV_ENABLED == 1)
803 // print before reset for next frame
804 if constexpr (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
805 DebugPrintImageState(gpuResourceMgr_, ref);
806 }
807 #endif
808 // shallow resources are not tracked
809 // they are always in undefined state in the beging of the frame
810 if (RenderHandleUtil::IsResetOnFrameBorders(ref.resource.handle)) {
811 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(ref.resource.handle);
812 // reset, but we do not reset the handle, because the gpuImageTracking_ element is not removed
813 const RenderHandle handle = ref.resource.handle;
814 ref = {};
815 ref.resource.handle = handle;
816 if (addMips) {
817 PLUGIN_ASSERT(!ref.additionalState.layouts);
818 ref.additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
819 }
820 }
821
822 // need to reset per frame variables for all images (so we do not try to patch from previous frames)
823 ref.prevRc = {};
824 ref.prevRenderNodeIndex = { ~0u };
825 }
826 }
827
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBeginRenderPass & rc,StateCache & stateCache)828 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
829 RenderNodeContextData& nodeData, RenderCommandBeginRenderPass& rc, StateCache& stateCache)
830 {
831 // update layouts for attachments to gpu image state
832 BeginRenderPassParameters params { rc, stateCache, { RenderCommandType::BEGIN_RENDER_PASS, &rc } };
833
834 PLUGIN_ASSERT(rc.renderPassDesc.subpassCount > 0);
835
836 const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
837 if (hasRenderPassDependency) { // stitch render pass subpasses
838 BeginRenderPassHandleDependency(params, commandListCommandIndex, nodeData);
839 }
840
841 const GpuQueue gpuQueue = nodeData.renderCommandList->GetGpuQueue();
842
843 auto finalImageLayouts =
844 array_view(rc.imageLayouts.attachmentFinalLayouts, countof(rc.imageLayouts.attachmentFinalLayouts));
845
846 BeginRenderPassUpdateImageStates(params, gpuQueue, finalImageLayouts, renderNodeIndex);
847
848 for (uint32_t subpassIdx = 0; subpassIdx < rc.renderPassDesc.subpassCount; ++subpassIdx) {
849 const auto& subpassRef = rc.subpasses[subpassIdx];
850 const auto& subpassResourceStatesRef = rc.subpassResourceStates[subpassIdx];
851
852 BeginRenderPassUpdateSubpassImageStates(
853 array_view(subpassRef.inputAttachmentIndices, subpassRef.inputAttachmentCount), rc.renderPassDesc,
854 subpassResourceStatesRef, finalImageLayouts, stateCache);
855
856 BeginRenderPassUpdateSubpassImageStates(
857 array_view(subpassRef.colorAttachmentIndices, subpassRef.colorAttachmentCount), rc.renderPassDesc,
858 subpassResourceStatesRef, finalImageLayouts, stateCache);
859
860 BeginRenderPassUpdateSubpassImageStates(
861 array_view(subpassRef.resolveAttachmentIndices, subpassRef.resolveAttachmentCount), rc.renderPassDesc,
862 subpassResourceStatesRef, finalImageLayouts, stateCache);
863
864 if (subpassRef.depthAttachmentCount == 1u) {
865 BeginRenderPassUpdateSubpassImageStates(
866 array_view(&subpassRef.depthAttachmentIndex, subpassRef.depthAttachmentCount), rc.renderPassDesc,
867 subpassResourceStatesRef, finalImageLayouts, stateCache);
868 if (subpassRef.depthResolveAttachmentCount == 1) {
869 BeginRenderPassUpdateSubpassImageStates(
870 array_view(&subpassRef.depthResolveAttachmentIndex, subpassRef.depthResolveAttachmentCount),
871 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts, stateCache);
872 }
873 }
874 if (subpassRef.fragmentShadingRateAttachmentCount == 1u) {
875 BeginRenderPassUpdateSubpassImageStates(array_view(&subpassRef.fragmentShadingRateAttachmentIndex,
876 subpassRef.fragmentShadingRateAttachmentCount),
877 rc.renderPassDesc, subpassResourceStatesRef, finalImageLayouts, stateCache);
878 }
879 }
880
881 if (hasRenderPassDependency) { // stitch render pass subpasses
882 if (rc.subpassStartIndex > 0) {
883 // stitched to behave as a nextSubpass() and not beginRenderPass()
884 rc.beginType = RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN;
885 }
886 const bool finalSubpass = (rc.subpassStartIndex == rc.renderPassDesc.subpassCount - 1);
887 if (finalSubpass) {
888 UpdateMultiRenderCommandListRenderPasses(stateCache.multiRenderPassStore);
889 // multiRenderPassStore cleared in EndRenderPass
890 }
891 }
892 #if (RENDER_DEV_ENABLED == 1)
893 if (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
894 DebugRenderPassLayoutPrint(gpuResourceMgr_, rc);
895 }
896 #endif
897 }
898
BeginRenderPassHandleDependency(BeginRenderPassParameters & params,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData)899 void RenderGraph::BeginRenderPassHandleDependency(
900 BeginRenderPassParameters& params, const uint32_t commandListCommandIndex, RenderNodeContextData& nodeData)
901 {
902 params.stateCache.multiRenderPassStore.renderPasses.push_back(¶ms.rc);
903 // store the first begin render pass
904 params.rpForCmdRef = { RenderCommandType::BEGIN_RENDER_PASS,
905 params.stateCache.multiRenderPassStore.renderPasses[0] };
906
907 if (params.rc.subpassStartIndex == 0) { // store the first render pass barrier point
908 // barrier point must be previous command
909 PLUGIN_ASSERT(commandListCommandIndex >= 1);
910 const uint32_t prevCommandIndex = commandListCommandIndex - 1;
911
912 const RenderCommandWithType& barrierPointCmdRef =
913 nodeData.renderCommandList->GetRenderCommands()[prevCommandIndex];
914 PLUGIN_ASSERT(barrierPointCmdRef.type == RenderCommandType::BARRIER_POINT);
915 PLUGIN_ASSERT(static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc));
916
917 params.stateCache.multiRenderPassStore.firstRenderPassBarrierList = nodeData.renderBarrierList.get();
918 params.stateCache.multiRenderPassStore.firstBarrierPointIndex =
919 static_cast<RenderCommandBarrierPoint*>(barrierPointCmdRef.rc)->barrierPointIndex;
920 }
921 }
922
BeginRenderPassUpdateImageStates(BeginRenderPassParameters & params,const GpuQueue & gpuQueue,array_view<ImageLayout> & finalImageLayouts,const uint32_t renderNodeIndex)923 void RenderGraph::BeginRenderPassUpdateImageStates(BeginRenderPassParameters& params, const GpuQueue& gpuQueue,
924 array_view<ImageLayout>& finalImageLayouts, const uint32_t renderNodeIndex)
925 {
926 auto& initialImageLayouts = params.rc.imageLayouts.attachmentInitialLayouts;
927 const auto& attachmentHandles = params.rc.renderPassDesc.attachmentHandles;
928 auto& attachments = params.rc.renderPassDesc.attachments;
929 auto& attachmentInputResourceStates = params.rc.inputResourceStates;
930
931 for (uint32_t attachmentIdx = 0; attachmentIdx < params.rc.renderPassDesc.attachmentCount; ++attachmentIdx) {
932 const RenderHandle handle = attachmentHandles[attachmentIdx];
933 // NOTE: invalidate invalid handle commands already in render command list
934 if (!RenderHandleUtil::IsGpuImage(handle)) {
935 #ifdef _DEBUG
936 PLUGIN_LOG_E("invalid handle in render node graph");
937 #endif
938 continue;
939 }
940 auto& stateRef = GetImageResourceStateRef(handle, gpuQueue);
941 ImageLayout imgLayout = stateRef.resource.imageLayout;
942
943 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
944 // image layout is undefined if automatic barriers have been disabled
945 if (params.rc.enableAutomaticLayoutChanges) {
946 const RenderPassDesc::AttachmentDesc& attachmentDesc = attachments[attachmentIdx];
947 if (addMips && (attachmentDesc.mipLevel < RenderGraph::MAX_MIP_STATE_COUNT)) {
948 if (stateRef.additionalState.layouts) {
949 imgLayout = stateRef.additionalState.layouts[attachmentDesc.mipLevel];
950 } else {
951 #if (RENDER_VALIDATION_ENABLED == 1)
952 PLUGIN_LOG_ONCE_E(to_hex(handle.id), "mip layouts missing");
953 #endif
954 }
955 }
956
957 initialImageLayouts[attachmentIdx] = imgLayout;
958 }
959 // undefined layout with load_op_load -> we modify to dont_care (and remove validation warning)
960 if ((imgLayout == ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED) &&
961 (attachments[attachmentIdx].loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_LOAD)) {
962 // dont care (user needs to be sure what is wanted, i.e. in first frame one should clear)
963 attachments[attachmentIdx].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
964 }
965 finalImageLayouts[attachmentIdx] = imgLayout;
966 attachmentInputResourceStates.states[attachmentIdx] = stateRef.state;
967 attachmentInputResourceStates.layouts[attachmentIdx] = imgLayout;
968
969 // store render pass for final layout patching
970 stateRef.prevRc = params.rpForCmdRef;
971 stateRef.prevRenderNodeIndex = renderNodeIndex;
972
973 // flag for backbuffer use
974 if (params.stateCache.checkForBackbufferDependency && RenderHandleUtil::IsSwapchain(handle)) {
975 params.stateCache.usesSwapchainImage = true;
976 }
977 }
978 }
979
BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,const RenderPassDesc & renderPassDesc,const RenderPassAttachmentResourceStates & subpassResourceStatesRef,array_view<ImageLayout> finalImageLayouts,StateCache & stateCache)980 void RenderGraph::BeginRenderPassUpdateSubpassImageStates(array_view<const uint32_t> attatchmentIndices,
981 const RenderPassDesc& renderPassDesc, const RenderPassAttachmentResourceStates& subpassResourceStatesRef,
982 array_view<ImageLayout> finalImageLayouts, StateCache& stateCache)
983 {
984 for (const uint32_t attachmentIndex : attatchmentIndices) {
985 // NOTE: handle invalid commands already in render command list and invalidate draws etc.
986 PLUGIN_ASSERT(attachmentIndex < renderPassDesc.attachmentCount);
987 const RenderHandle handle = renderPassDesc.attachmentHandles[attachmentIndex];
988 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
989 const GpuResourceState& refState = subpassResourceStatesRef.states[attachmentIndex];
990 const ImageLayout& refImgLayout = subpassResourceStatesRef.layouts[attachmentIndex];
991 // NOTE: we should support non dynamicity and GENERAL
992
993 finalImageLayouts[attachmentIndex] = refImgLayout;
994 auto& ref = GetImageResourceStateRef(handle, refState.gpuQueue);
995 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(handle);
996
997 ref.state = refState;
998 ref.resource.handle = handle;
999 ref.resource.imageLayout = refImgLayout;
1000 if (addMips) {
1001 const RenderPassDesc::AttachmentDesc& attachmentDesc = renderPassDesc.attachments[attachmentIndex];
1002 const BindableImage image {
1003 handle,
1004 attachmentDesc.mipLevel,
1005 attachmentDesc.layer,
1006 refImgLayout,
1007 RenderHandle {},
1008 };
1009 ModifyAdditionalImageState(image, ref.additionalState);
1010 }
1011 }
1012 }
1013
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,const RenderNodeContextData & nodeData,RenderCommandEndRenderPass & rc,StateCache & stateCache)1014 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1015 const RenderNodeContextData& nodeData, RenderCommandEndRenderPass& rc, StateCache& stateCache)
1016 {
1017 const bool hasRenderPassDependency = stateCache.multiRenderPassStore.supportOpen;
1018 if (hasRenderPassDependency) {
1019 const bool finalSubpass =
1020 (rc.subpassCount == static_cast<uint32_t>(stateCache.multiRenderPassStore.renderPasses.size()));
1021 if (finalSubpass) {
1022 if (rc.subpassStartIndex != (rc.subpassCount - 1)) {
1023 PLUGIN_LOG_E("RenderGraph: error in multi render node render pass subpass ending");
1024 // NOTE: add more error handling and invalidate render command lists
1025 }
1026 rc.endType = RenderPassEndType::END_RENDER_PASS;
1027 stateCache.multiRenderPassStore.renderPasses.clear();
1028 stateCache.multiRenderPassStore.firstRenderPassBarrierList = nullptr;
1029 stateCache.multiRenderPassStore.firstBarrierPointIndex = ~0u;
1030 stateCache.multiRenderPassStore.supportOpen = false;
1031 } else {
1032 rc.endType = RenderPassEndType::END_SUBPASS;
1033 }
1034 }
1035 }
1036
RenderCommand(const uint32_t renderNodeIndex,const uint32_t commandListCommandIndex,RenderNodeContextData & nodeData,RenderCommandBarrierPoint & rc,StateCache & stateCache)1037 void RenderGraph::RenderCommand(const uint32_t renderNodeIndex, const uint32_t commandListCommandIndex,
1038 RenderNodeContextData& nodeData, RenderCommandBarrierPoint& rc, StateCache& stateCache)
1039 {
1040 // go through required descriptors for current upcoming event
1041 const auto& customBarrierListRef = nodeData.renderCommandList->GetCustomBarriers();
1042 const auto& cmdListRef = nodeData.renderCommandList->GetRenderCommands();
1043 const auto& allDescriptorSetHandlesForBarriers = nodeData.renderCommandList->GetDescriptorSetHandles();
1044 const auto& nodeDescriptorSetMgrRef = *nodeData.nodeContextDescriptorSetMgr;
1045
1046 parameterCachePools_.combinedBarriers.clear();
1047 parameterCachePools_.handledCustomBarriers.clear();
1048 ParameterCache parameters { parameterCachePools_.combinedBarriers, parameterCachePools_.handledCustomBarriers,
1049 rc.customBarrierCount, rc.vertexIndexBarrierCount, rc.indirectBufferBarrierCount, renderNodeIndex,
1050 nodeData.renderCommandList->GetGpuQueue(), { RenderCommandType::BARRIER_POINT, &rc }, stateCache };
1051 // first check custom barriers
1052 if (parameters.customBarrierCount > 0) {
1053 HandleCustomBarriers(parameters, rc.customBarrierIndexBegin, customBarrierListRef);
1054 }
1055 // then vertex / index buffer barriers in the barrier point before render pass
1056 if (parameters.vertexInputBarrierCount > 0) {
1057 PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1058 HandleVertexInputBufferBarriers(parameters, rc.vertexIndexBarrierIndexBegin,
1059 nodeData.renderCommandList->GetRenderpassVertexInputBufferBarriers());
1060 }
1061 if (parameters.indirectBufferBarrierCount > 0U) {
1062 PLUGIN_ASSERT(rc.renderCommandType == RenderCommandType::BEGIN_RENDER_PASS);
1063 HandleRenderpassIndirectBufferBarriers(parameters, rc.indirectBufferBarrierIndexBegin,
1064 nodeData.renderCommandList->GetRenderpassIndirectBufferBarriers());
1065 }
1066
1067 // in barrier point the next render command is known for which the barrier is needed
1068 if (rc.renderCommandType == RenderCommandType::CLEAR_COLOR_IMAGE) {
1069 HandleClearImage(parameters, commandListCommandIndex, cmdListRef);
1070 } else if (rc.renderCommandType == RenderCommandType::BLIT_IMAGE) {
1071 HandleBlitImage(parameters, commandListCommandIndex, cmdListRef);
1072 } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER) {
1073 HandleCopyBuffer(parameters, commandListCommandIndex, cmdListRef);
1074 } else if (rc.renderCommandType == RenderCommandType::COPY_BUFFER_IMAGE) {
1075 HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef);
1076 } else if (rc.renderCommandType == RenderCommandType::COPY_IMAGE) {
1077 HandleCopyBufferImage(parameters, commandListCommandIndex, cmdListRef); // NOTE: handles image to image
1078 } else { // descriptor sets
1079 if (rc.renderCommandType == RenderCommandType::DISPATCH_INDIRECT) {
1080 HandleDispatchIndirect(parameters, commandListCommandIndex, cmdListRef);
1081 }
1082 const uint32_t descriptorSetHandleBeginIndex = rc.descriptorSetHandleIndexBegin;
1083 const uint32_t descriptorSetHandleEndIndex = descriptorSetHandleBeginIndex + rc.descriptorSetHandleCount;
1084 const uint32_t descriptorSetHandleMaxIndex =
1085 Math::min(descriptorSetHandleEndIndex, static_cast<uint32_t>(allDescriptorSetHandlesForBarriers.size()));
1086 const auto descriptorSetHandlesForBarriers =
1087 array_view(allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleBeginIndex,
1088 allDescriptorSetHandlesForBarriers.data() + descriptorSetHandleMaxIndex);
1089 HandleDescriptorSets(parameters, descriptorSetHandlesForBarriers, nodeDescriptorSetMgrRef);
1090 }
1091
1092 if (!parameters.combinedBarriers.empty()) {
1093 // use first render pass barrier point with following subpasses
1094 // firstRenderPassBarrierPoint is null for the first subpass
1095 const bool renderPassHasDependancy = stateCache.multiRenderPassStore.supportOpen;
1096 if (renderPassHasDependancy && stateCache.multiRenderPassStore.firstRenderPassBarrierList) {
1097 PLUGIN_ASSERT(!stateCache.multiRenderPassStore.renderPasses.empty());
1098 stateCache.multiRenderPassStore.firstRenderPassBarrierList->AddBarriersToBarrierPoint(
1099 rc.barrierPointIndex, parameters.combinedBarriers);
1100 } else {
1101 nodeData.renderBarrierList->AddBarriersToBarrierPoint(rc.barrierPointIndex, parameters.combinedBarriers);
1102 }
1103 }
1104 #if (RENDER_DEV_ENABLED == 1)
1105 if (CORE_RENDER_GRAPH_PRINT_RESOURCE_STATES) {
1106 DebugBarrierPrint(gpuResourceMgr_, parameters.combinedBarriers);
1107 }
1108 #endif
1109 }
1110
UpdateBufferResourceState(RenderGraphBufferState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1111 inline void RenderGraph::UpdateBufferResourceState(
1112 RenderGraphBufferState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1113 {
1114 stateRef.resource.handle = cb.resourceHandle;
1115 stateRef.state.shaderStageFlags = 0;
1116 stateRef.state.accessFlags = cb.dst.accessFlags;
1117 stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1118 stateRef.state.gpuQueue = params.gpuQueue;
1119 stateRef.prevRc = params.rcWithType;
1120 stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1121 }
1122
UpdateImageResourceState(RenderGraphImageState & stateRef,const ParameterCache & params,const CommandBarrier & cb)1123 inline void RenderGraph::UpdateImageResourceState(
1124 RenderGraphImageState& stateRef, const ParameterCache& params, const CommandBarrier& cb)
1125 {
1126 stateRef.resource.handle = cb.resourceHandle;
1127 stateRef.state.shaderStageFlags = 0;
1128 stateRef.state.accessFlags = cb.dst.accessFlags;
1129 stateRef.state.pipelineStageFlags = cb.dst.pipelineStageFlags;
1130 stateRef.state.gpuQueue = params.gpuQueue;
1131 stateRef.prevRc = params.rcWithType;
1132 stateRef.prevRenderNodeIndex = params.renderNodeIndex;
1133 }
1134
HandleCustomBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const CommandBarrier> & customBarrierListRef)1135 void RenderGraph::HandleCustomBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1136 const array_view<const CommandBarrier>& customBarrierListRef)
1137 {
1138 params.handledCustomBarriers.reserve(params.customBarrierCount);
1139 PLUGIN_ASSERT(barrierIndexBegin + params.customBarrierCount <= customBarrierListRef.size());
1140 for (auto begin = (customBarrierListRef.begin() + barrierIndexBegin),
1141 end = Math::min(customBarrierListRef.end(), begin + params.customBarrierCount);
1142 begin != end; ++begin) {
1143 // add a copy and modify if needed
1144 auto& cb = params.combinedBarriers.emplace_back(*begin);
1145
1146 // NOTE: undefined type is for non-resource memory/pipeline barriers
1147 const RenderHandleType type = RenderHandleUtil::GetHandleType(cb.resourceHandle);
1148 const bool isDynamicTrack = RenderHandleUtil::IsDynamicResource(cb.resourceHandle);
1149 PLUGIN_ASSERT((type == RenderHandleType::UNDEFINED) || (type == RenderHandleType::GPU_BUFFER) ||
1150 (type == RenderHandleType::GPU_IMAGE));
1151 if (type == RenderHandleType::GPU_BUFFER) {
1152 if (isDynamicTrack) {
1153 auto& stateRef = GetBufferResourceStateRef(cb.resourceHandle, params.gpuQueue);
1154 UpdateBufferResourceState(stateRef, params, cb);
1155 }
1156 params.handledCustomBarriers[cb.resourceHandle] = 0;
1157 } else if (type == RenderHandleType::GPU_IMAGE) {
1158 if (isDynamicTrack) {
1159 const bool isAddMips = RenderHandleUtil::IsDynamicAdditionalStateResource(cb.resourceHandle);
1160 auto& stateRef = GetImageResourceStateRef(cb.resourceHandle, params.gpuQueue);
1161 if (cb.src.optionalImageLayout == CORE_IMAGE_LAYOUT_MAX_ENUM) {
1162 uint32_t mipLevel = 0U;
1163 uint32_t mipCount = PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS;
1164 ImageLayout srcImageLayout = stateRef.resource.imageLayout;
1165 if (isAddMips) {
1166 const uint32_t srcMip = cb.src.optionalImageSubresourceRange.baseMipLevel;
1167 const uint32_t dstMip = cb.dst.optionalImageSubresourceRange.baseMipLevel;
1168 if ((srcMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ||
1169 (dstMip != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS)) {
1170 if (dstMip < RenderGraph::MAX_MIP_STATE_COUNT) {
1171 mipLevel = dstMip;
1172 mipCount = 1U;
1173 } else {
1174 mipLevel = srcMip;
1175 // all mip levels
1176 }
1177 if (stateRef.additionalState.layouts) {
1178 srcImageLayout = stateRef.additionalState.layouts[mipLevel];
1179 } else {
1180 #if (RENDER_VALIDATION_ENABLED == 1)
1181 PLUGIN_LOG_ONCE_E(to_hex(cb.resourceHandle.id), "mip layouts missing");
1182 #endif
1183 }
1184 }
1185 }
1186 cb.src.accessFlags = stateRef.state.accessFlags;
1187 cb.src.pipelineStageFlags =
1188 stateRef.state.pipelineStageFlags | PipelineStageFlagBits::CORE_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1189 cb.src.optionalImageLayout = srcImageLayout;
1190 cb.src.optionalImageSubresourceRange = { 0, mipLevel, mipCount, 0u,
1191 PipelineStateConstants::GPU_IMAGE_ALL_LAYERS };
1192 }
1193 UpdateImageResourceState(stateRef, params, cb);
1194 stateRef.resource.imageLayout = cb.dst.optionalImageLayout;
1195 if (isAddMips) {
1196 const BindableImage image {
1197 cb.resourceHandle,
1198 cb.dst.optionalImageSubresourceRange.baseMipLevel,
1199 cb.dst.optionalImageSubresourceRange.baseArrayLayer,
1200 cb.dst.optionalImageLayout,
1201 RenderHandle {},
1202 };
1203 ModifyAdditionalImageState(image, stateRef.additionalState);
1204 }
1205 }
1206 params.handledCustomBarriers[cb.resourceHandle] = 0;
1207 }
1208 }
1209 }
1210
HandleVertexInputBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & vertexInputBufferBarrierListRef)1211 void RenderGraph::HandleVertexInputBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1212 const array_view<const VertexBuffer>& vertexInputBufferBarrierListRef)
1213 {
1214 for (uint32_t idx = 0; idx < params.vertexInputBarrierCount; ++idx) {
1215 const uint32_t barrierIndex = barrierIndexBegin + idx;
1216 PLUGIN_ASSERT(barrierIndex < static_cast<uint32_t>(vertexInputBufferBarrierListRef.size()));
1217 if (barrierIndex < static_cast<uint32_t>(vertexInputBufferBarrierListRef.size())) {
1218 const VertexBuffer& vbInput = vertexInputBufferBarrierListRef[barrierIndex];
1219 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1220 CORE_ACCESS_INDEX_READ_BIT | CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1221 CORE_PIPELINE_STAGE_VERTEX_INPUT_BIT, params.gpuQueue };
1222 UpdateStateAndCreateBarriersGpuBuffer(
1223 resourceState, { vbInput.bufferHandle, vbInput.bufferOffset, vbInput.byteSize }, params);
1224 }
1225 }
1226 }
1227
HandleRenderpassIndirectBufferBarriers(ParameterCache & params,const uint32_t barrierIndexBegin,const array_view<const VertexBuffer> & indirectBufferBarrierListRef)1228 void RenderGraph::HandleRenderpassIndirectBufferBarriers(ParameterCache& params, const uint32_t barrierIndexBegin,
1229 const array_view<const VertexBuffer>& indirectBufferBarrierListRef)
1230 {
1231 for (uint32_t idx = 0; idx < params.indirectBufferBarrierCount; ++idx) {
1232 const uint32_t barrierIndex = barrierIndexBegin + idx;
1233 PLUGIN_ASSERT(barrierIndex < static_cast<uint32_t>(indirectBufferBarrierListRef.size()));
1234 if (barrierIndex < static_cast<uint32_t>(indirectBufferBarrierListRef.size())) {
1235 const VertexBuffer& ib = indirectBufferBarrierListRef[barrierIndex];
1236 const bool needsArgsBarrier =
1237 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ib.bufferHandle);
1238 if (needsArgsBarrier) {
1239 const GpuResourceState resourceState { CORE_SHADER_STAGE_VERTEX_BIT,
1240 CORE_ACCESS_INDIRECT_COMMAND_READ_BIT, CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue };
1241 UpdateStateAndCreateBarriersGpuBuffer(
1242 resourceState, { ib.bufferHandle, ib.bufferOffset, ib.byteSize }, params);
1243 }
1244 }
1245 }
1246 }
1247
HandleClearImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1248 void RenderGraph::HandleClearImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1249 const array_view<const RenderCommandWithType>& cmdListRef)
1250 {
1251 const uint32_t nextListIdx = commandListCommandIndex + 1;
1252 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1253 const auto& nextCmdRef = cmdListRef[nextListIdx];
1254 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::CLEAR_COLOR_IMAGE);
1255
1256 const RenderCommandClearColorImage& nextRc = *static_cast<RenderCommandClearColorImage*>(nextCmdRef.rc);
1257
1258 const bool needsBarrier =
1259 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.handle);
1260 if (needsBarrier) {
1261 BindableImage bRes = {};
1262 bRes.handle = nextRc.handle;
1263 bRes.imageLayout = nextRc.imageLayout;
1264 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1265 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1266 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1267 }
1268 }
1269
HandleBlitImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1270 void RenderGraph::HandleBlitImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1271 const array_view<const RenderCommandWithType>& cmdListRef)
1272 {
1273 const uint32_t nextListIdx = commandListCommandIndex + 1;
1274 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1275 const auto& nextCmdRef = cmdListRef[nextListIdx];
1276 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::BLIT_IMAGE);
1277
1278 const RenderCommandBlitImage& nextRc = *static_cast<RenderCommandBlitImage*>(nextCmdRef.rc);
1279
1280 const bool needsSrcBarrier =
1281 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1282 if (needsSrcBarrier) {
1283 BindableImage bRes = {};
1284 bRes.handle = nextRc.srcHandle;
1285 bRes.imageLayout = nextRc.srcImageLayout;
1286 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1287 GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1288 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1289 }
1290
1291 const bool needsDstBarrier =
1292 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1293 if (needsDstBarrier) {
1294 BindableImage bRes = {};
1295 bRes.handle = nextRc.dstHandle;
1296 bRes.imageLayout = nextRc.dstImageLayout;
1297 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1298 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1299 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1300 }
1301 }
1302
HandleCopyBuffer(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1303 void RenderGraph::HandleCopyBuffer(ParameterCache& params, const uint32_t& commandListCommandIndex,
1304 const array_view<const RenderCommandWithType>& cmdListRef)
1305 {
1306 const uint32_t nextListIdx = commandListCommandIndex + 1;
1307 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1308 const auto& nextCmdRef = cmdListRef[nextListIdx];
1309 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::COPY_BUFFER);
1310
1311 const RenderCommandCopyBuffer& nextRc = *static_cast<RenderCommandCopyBuffer*>(nextCmdRef.rc);
1312
1313 const bool needsSrcBarrier =
1314 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.srcHandle);
1315 if (needsSrcBarrier) {
1316 const BindableBuffer bRes = { nextRc.srcHandle, nextRc.bufferCopy.srcOffset, nextRc.bufferCopy.size };
1317 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1318 GpuResourceState { 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1319 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1320 }
1321
1322 const bool needsDstBarrier =
1323 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.dstHandle);
1324 if (needsDstBarrier) {
1325 const BindableBuffer bRes = { nextRc.dstHandle, nextRc.bufferCopy.dstOffset, nextRc.bufferCopy.size };
1326 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1327 GpuResourceState { 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1328 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1329 }
1330 }
1331
HandleCopyBufferImage(ParameterCache & params,const uint32_t & commandListCommandIndex,const array_view<const RenderCommandWithType> & cmdListRef)1332 void RenderGraph::HandleCopyBufferImage(ParameterCache& params, const uint32_t& commandListCommandIndex,
1333 const array_view<const RenderCommandWithType>& cmdListRef)
1334 {
1335 const uint32_t nextListIdx = commandListCommandIndex + 1;
1336 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1337 const auto& nextCmdRef = cmdListRef[nextListIdx];
1338 PLUGIN_ASSERT((nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) ||
1339 (nextCmdRef.type == RenderCommandType::COPY_IMAGE));
1340
1341 // NOTE: two different command types supported
1342 RenderHandle srcHandle;
1343 RenderHandle dstHandle;
1344 ImageSubresourceLayers srcImgLayers;
1345 ImageSubresourceLayers dstImgLayers;
1346 if (nextCmdRef.type == RenderCommandType::COPY_BUFFER_IMAGE) {
1347 const RenderCommandCopyBufferImage& nextRc = *static_cast<RenderCommandCopyBufferImage*>(nextCmdRef.rc);
1348 PLUGIN_ASSERT(nextRc.copyType != RenderCommandCopyBufferImage::CopyType::UNDEFINED);
1349 srcHandle = nextRc.srcHandle;
1350 dstHandle = nextRc.dstHandle;
1351 srcImgLayers = nextRc.bufferImageCopy.imageSubresource;
1352 dstImgLayers = nextRc.bufferImageCopy.imageSubresource;
1353 } else if (nextCmdRef.type == RenderCommandType::COPY_IMAGE) {
1354 const RenderCommandCopyImage& nextRc = *static_cast<RenderCommandCopyImage*>(nextCmdRef.rc);
1355 srcHandle = nextRc.srcHandle;
1356 dstHandle = nextRc.dstHandle;
1357 srcImgLayers = nextRc.imageCopy.srcSubresource;
1358 dstImgLayers = nextRc.imageCopy.dstSubresource;
1359 }
1360
1361 const bool needsSrcBarrier =
1362 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, srcHandle);
1363 if (needsSrcBarrier) {
1364 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(srcHandle);
1365 PLUGIN_UNUSED(handleType);
1366 PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1367 if (handleType == RenderHandleType::GPU_BUFFER) {
1368 BindableBuffer bRes;
1369 bRes.handle = srcHandle;
1370 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1371 GpuResourceState {
1372 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1373 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1374 } else {
1375 BindableImage bRes;
1376 bRes.handle = srcHandle;
1377 bRes.mip = srcImgLayers.mipLevel;
1378 bRes.layer = srcImgLayers.baseArrayLayer;
1379 bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1380 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1381 GpuResourceState {
1382 0, CORE_ACCESS_TRANSFER_READ_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1383 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1384 }
1385 }
1386
1387 const bool needsDstBarrier =
1388 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, dstHandle);
1389 if (needsDstBarrier) {
1390 const RenderHandleType handleType = RenderHandleUtil::GetHandleType(dstHandle);
1391 PLUGIN_UNUSED(handleType);
1392 PLUGIN_ASSERT(handleType == RenderHandleType::GPU_IMAGE || handleType == RenderHandleType::GPU_BUFFER);
1393 if (handleType == RenderHandleType::GPU_BUFFER) {
1394 BindableBuffer bRes;
1395 bRes.handle = dstHandle;
1396 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1397 GpuResourceState {
1398 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1399 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1400 } else {
1401 BindableImage bRes;
1402 bRes.handle = dstHandle;
1403 bRes.mip = dstImgLayers.mipLevel;
1404 bRes.layer = dstImgLayers.baseArrayLayer;
1405 bRes.imageLayout = CORE_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1406 AddCommandBarrierAndUpdateStateCacheImage(params.renderNodeIndex,
1407 GpuResourceState {
1408 0, CORE_ACCESS_TRANSFER_WRITE_BIT, CORE_PIPELINE_STAGE_TRANSFER_BIT, params.gpuQueue },
1409 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1410 }
1411 }
1412 }
1413
HandleDispatchIndirect(ParameterCache & params,const uint32_t & commandListCommandIndex,const BASE_NS::array_view<const RenderCommandWithType> & cmdListRef)1414 void RenderGraph::HandleDispatchIndirect(ParameterCache& params, const uint32_t& commandListCommandIndex,
1415 const BASE_NS::array_view<const RenderCommandWithType>& cmdListRef)
1416 {
1417 const uint32_t nextListIdx = commandListCommandIndex + 1;
1418 PLUGIN_ASSERT(nextListIdx < cmdListRef.size());
1419 const auto& nextCmdRef = cmdListRef[nextListIdx];
1420 PLUGIN_ASSERT(nextCmdRef.type == RenderCommandType::DISPATCH_INDIRECT);
1421
1422 const auto& nextRc = *static_cast<RenderCommandDispatchIndirect*>(nextCmdRef.rc);
1423
1424 const bool needsArgsBarrier =
1425 CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, nextRc.argsHandle);
1426 if (needsArgsBarrier) {
1427 const BindableBuffer bRes = { nextRc.argsHandle, nextRc.offset, PipelineStateConstants::GPU_BUFFER_WHOLE_SIZE };
1428 AddCommandBarrierAndUpdateStateCacheBuffer(params.renderNodeIndex,
1429 GpuResourceState { CORE_SHADER_STAGE_COMPUTE_BIT, CORE_ACCESS_INDIRECT_COMMAND_READ_BIT,
1430 CORE_PIPELINE_STAGE_DRAW_INDIRECT_BIT, params.gpuQueue },
1431 bRes, params.rcWithType, params.combinedBarriers, currNodeGpuResourceTransfers_);
1432 }
1433 }
1434
HandleDescriptorSets(ParameterCache & params,const array_view<const RenderHandle> & descriptorSetHandlesForBarriers,const NodeContextDescriptorSetManager & nodeDescriptorSetMgrRef)1435 void RenderGraph::HandleDescriptorSets(ParameterCache& params,
1436 const array_view<const RenderHandle>& descriptorSetHandlesForBarriers,
1437 const NodeContextDescriptorSetManager& nodeDescriptorSetMgrRef)
1438 {
1439 for (const RenderHandle descriptorSetHandle : descriptorSetHandlesForBarriers) {
1440 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(descriptorSetHandle) == RenderHandleType::DESCRIPTOR_SET);
1441
1442 const auto bindingResources = nodeDescriptorSetMgrRef.GetCpuDescriptorSetData(descriptorSetHandle);
1443 const auto& buffers = bindingResources.buffers;
1444 const auto& images = bindingResources.images;
1445 for (const auto& ref : buffers) {
1446 const uint32_t descriptorCount = ref.binding.descriptorCount;
1447 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1448 if (descriptorCount == 0) {
1449 continue;
1450 }
1451 const uint32_t arrayOffset = ref.arrayOffset;
1452 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= buffers.size());
1453 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1454 // first is the ref, starting from 1 we use array offsets
1455 const auto& bRes = (idx == 0) ? ref : buffers[arrayOffset + idx - 1];
1456 if (CheckForBarrierNeed(params.handledCustomBarriers, params.customBarrierCount, ref.resource.handle)) {
1457 UpdateStateAndCreateBarriersGpuBuffer(bRes.state, bRes.resource, params);
1458 }
1459 }
1460 }
1461 for (const auto& ref : images) {
1462 const uint32_t descriptorCount = ref.binding.descriptorCount;
1463 // skip, array bindings which are bound from first index, they have also descriptorCount 0
1464 if (descriptorCount == 0) {
1465 continue;
1466 }
1467 const uint32_t arrayOffset = ref.arrayOffset;
1468 PLUGIN_ASSERT((arrayOffset + descriptorCount - 1) <= images.size());
1469 for (uint32_t idx = 0; idx < descriptorCount; ++idx) {
1470 // first is the ref, starting from 1 we use array offsets
1471 const auto& bRes = (idx == 0) ? ref : images[arrayOffset + idx - 1];
1472 if (CheckForBarrierNeed(
1473 params.handledCustomBarriers, params.customBarrierCount, bRes.resource.handle)) {
1474 UpdateStateAndCreateBarriersGpuImage(bRes.state, bRes.resource, params);
1475 }
1476 }
1477 }
1478 } // end for
1479 }
1480
UpdateStateAndCreateBarriersGpuImage(const GpuResourceState & state,const BindableImage & res,RenderGraph::ParameterCache & params)1481 void RenderGraph::UpdateStateAndCreateBarriersGpuImage(
1482 const GpuResourceState& state, const BindableImage& res, RenderGraph::ParameterCache& params)
1483 {
1484 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1485 if (arrayIndex >= static_cast<uint32_t>(gpuImageDataIndices_.size())) {
1486 return;
1487 }
1488
1489 auto& ref = GetImageResourceStateRef(res.handle, state.gpuQueue);
1490 // NOTE: we previous patched the final render pass layouts here
1491 // ATM: we only path the swapchain image if needed
1492
1493 const GpuResourceState& prevState = ref.state;
1494 const BindableImage& prevImage = ref.resource;
1495 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(res.handle);
1496 const ResourceBarrier prevStateRb = addMips ? GetSrcImageBarrierMips(prevState, prevImage, res, ref.additionalState)
1497 : GetSrcImageBarrier(prevState, prevImage);
1498
1499 const bool layoutChanged = (prevStateRb.optionalImageLayout != res.imageLayout);
1500 const bool accessFlagsChanged = (prevStateRb.accessFlags != state.accessFlags);
1501 const bool writeTarget = (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS);
1502 const bool inputAttachment = (state.accessFlags == CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT);
1503 // input attachments are handled with render passes and not with barriers
1504 if ((layoutChanged || accessFlagsChanged || writeTarget) && (!inputAttachment)) {
1505 if ((prevState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1506 (prevState.gpuQueue.type != state.gpuQueue.type)) {
1507 PLUGIN_ASSERT(state.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1508
1509 PLUGIN_ASSERT(ref.prevRenderNodeIndex != params.renderNodeIndex);
1510 currNodeGpuResourceTransfers_.push_back(RenderGraph::GpuQueueTransferState {
1511 res.handle, ref.prevRenderNodeIndex, params.renderNodeIndex, prevImage.imageLayout, res.imageLayout });
1512 } else {
1513 const ResourceBarrier dstImageBarrier =
1514 addMips ? GetDstImageBarrierMips(state, prevImage, res, ref.additionalState)
1515 : GetDstImageBarrier(state, res);
1516 params.combinedBarriers.push_back(
1517 CommandBarrier { res.handle, prevStateRb, prevState.gpuQueue, dstImageBarrier, params.gpuQueue });
1518 }
1519
1520 ref.state = state;
1521 ref.resource = res;
1522 ref.prevRc = params.rcWithType;
1523 ref.prevRenderNodeIndex = params.renderNodeIndex;
1524 if (addMips) {
1525 ModifyAdditionalImageState(res, ref.additionalState);
1526 }
1527 }
1528 }
1529
UpdateStateAndCreateBarriersGpuBuffer(const GpuResourceState & dstState,const BindableBuffer & res,RenderGraph::ParameterCache & params)1530 void RenderGraph::UpdateStateAndCreateBarriersGpuBuffer(
1531 const GpuResourceState& dstState, const BindableBuffer& res, RenderGraph::ParameterCache& params)
1532 {
1533 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(res.handle);
1534 if (arrayIndex >= static_cast<uint32_t>(gpuBufferDataIndices_.size())) {
1535 return;
1536 }
1537
1538 // get the current state of the buffer
1539 auto& srcStateRef = GetBufferResourceStateRef(res.handle, dstState.gpuQueue);
1540 const ResourceBarrier prevStateRb = GetSrcBufferBarrier(srcStateRef.state, res);
1541 if ((prevStateRb.accessFlags != dstState.accessFlags) || (prevStateRb.accessFlags & WRITE_ACCESS_FLAGS)) {
1542 params.combinedBarriers.push_back(CommandBarrier {
1543 res.handle, prevStateRb, dstState.gpuQueue, GetDstBufferBarrier(dstState, res), params.gpuQueue });
1544 }
1545
1546 // update the cached state to match the situation after the barrier
1547 srcStateRef.state = dstState;
1548 srcStateRef.resource = res;
1549 srcStateRef.prevRc = params.rcWithType;
1550 srcStateRef.prevRenderNodeIndex = params.renderNodeIndex;
1551 }
1552
AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableBuffer & newBuffer,const RenderCommandWithType & rcWithType,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1553 void RenderGraph::AddCommandBarrierAndUpdateStateCacheBuffer(const uint32_t renderNodeIndex,
1554 const GpuResourceState& newGpuResourceState, const BindableBuffer& newBuffer,
1555 const RenderCommandWithType& rcWithType, vector<CommandBarrier>& barriers,
1556 vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1557 {
1558 auto& stateRef = GetBufferResourceStateRef(newBuffer.handle, newGpuResourceState.gpuQueue);
1559 const GpuResourceState srcState = stateRef.state;
1560 const BindableBuffer srcBuffer = stateRef.resource;
1561
1562 if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1563 (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1564 PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1565 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newBuffer.handle) == RenderHandleType::GPU_IMAGE);
1566 PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1567 currNodeGpuResourceTransfer.push_back(
1568 RenderGraph::GpuQueueTransferState { newBuffer.handle, stateRef.prevRenderNodeIndex, renderNodeIndex,
1569 ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED, ImageLayout::CORE_IMAGE_LAYOUT_UNDEFINED });
1570 } else {
1571 const ResourceBarrier srcBarrier = GetSrcBufferBarrier(srcState, srcBuffer);
1572 const ResourceBarrier dstBarrier = GetDstBufferBarrier(newGpuResourceState, newBuffer);
1573
1574 barriers.push_back(CommandBarrier {
1575 newBuffer.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1576 }
1577
1578 stateRef.state = newGpuResourceState;
1579 stateRef.resource = newBuffer;
1580 stateRef.prevRc = rcWithType;
1581 stateRef.prevRenderNodeIndex = renderNodeIndex;
1582 }
1583
AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,const GpuResourceState & newGpuResourceState,const BindableImage & newImage,const RenderCommandWithType & rcWithType,vector<CommandBarrier> & barriers,vector<RenderGraph::GpuQueueTransferState> & currNodeGpuResourceTransfer)1584 void RenderGraph::AddCommandBarrierAndUpdateStateCacheImage(const uint32_t renderNodeIndex,
1585 const GpuResourceState& newGpuResourceState, const BindableImage& newImage, const RenderCommandWithType& rcWithType,
1586 vector<CommandBarrier>& barriers, vector<RenderGraph::GpuQueueTransferState>& currNodeGpuResourceTransfer)
1587 {
1588 // newGpuResourceState has queue transfer image layout in old optionalImageLayout
1589
1590 auto& stateRef = GetImageResourceStateRef(newImage.handle, newGpuResourceState.gpuQueue);
1591 const GpuResourceState srcState = stateRef.state;
1592 const BindableImage srcImage = stateRef.resource;
1593 const bool addMips = RenderHandleUtil::IsDynamicAdditionalStateResource(newImage.handle);
1594
1595 if ((srcState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED) &&
1596 (srcState.gpuQueue.type != newGpuResourceState.gpuQueue.type)) {
1597 PLUGIN_ASSERT(newGpuResourceState.gpuQueue.type != GpuQueue::QueueType::UNDEFINED);
1598 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(newImage.handle) == RenderHandleType::GPU_IMAGE);
1599 PLUGIN_ASSERT(stateRef.prevRenderNodeIndex != renderNodeIndex);
1600 currNodeGpuResourceTransfer.push_back(RenderGraph::GpuQueueTransferState { newImage.handle,
1601 stateRef.prevRenderNodeIndex, renderNodeIndex, srcImage.imageLayout, newImage.imageLayout });
1602 } else {
1603 const ResourceBarrier srcBarrier =
1604 addMips ? GetSrcImageBarrierMips(srcState, srcImage, newImage, stateRef.additionalState)
1605 : GetSrcImageBarrier(srcState, srcImage);
1606 const ResourceBarrier dstBarrier =
1607 addMips ? GetDstImageBarrierMips(newGpuResourceState, srcImage, newImage, stateRef.additionalState)
1608 : GetDstImageBarrier(newGpuResourceState, newImage);
1609
1610 barriers.push_back(CommandBarrier {
1611 newImage.handle, srcBarrier, srcState.gpuQueue, dstBarrier, newGpuResourceState.gpuQueue });
1612 }
1613
1614 stateRef.state = newGpuResourceState;
1615 stateRef.resource = newImage;
1616 stateRef.prevRc = rcWithType;
1617 stateRef.prevRenderNodeIndex = renderNodeIndex;
1618 if (addMips) {
1619 ModifyAdditionalImageState(newImage, stateRef.additionalState);
1620 }
1621 }
1622
GetBufferResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1623 RenderGraph::RenderGraphBufferState& RenderGraph::GetBufferResourceStateRef(
1624 const RenderHandle handle, const GpuQueue& queue)
1625 {
1626 // NOTE: Do not call with non dynamic trackable
1627 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1628 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_BUFFER);
1629 if (arrayIndex < gpuBufferDataIndices_.size()) {
1630 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1631 uint32_t dataIdx = gpuBufferDataIndices_[arrayIndex];
1632 if (dataIdx == INVALID_TRACK_IDX) {
1633 if (!gpuBufferAvailableIndices_.empty()) {
1634 dataIdx = gpuBufferAvailableIndices_.back();
1635 gpuBufferAvailableIndices_.pop_back();
1636 } else {
1637 dataIdx = static_cast<uint32_t>(gpuBufferTracking_.size());
1638 gpuBufferTracking_.emplace_back();
1639 }
1640 gpuBufferDataIndices_[arrayIndex] = dataIdx;
1641
1642 gpuBufferTracking_[dataIdx].resource.handle = handle;
1643 gpuBufferTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1644 }
1645 return gpuBufferTracking_[dataIdx];
1646 }
1647
1648 return defaultBufferState_;
1649 }
1650
GetImageResourceStateRef(const RenderHandle handle,const GpuQueue & queue)1651 RenderGraph::RenderGraphImageState& RenderGraph::GetImageResourceStateRef(
1652 const RenderHandle handle, const GpuQueue& queue)
1653 {
1654 // NOTE: Do not call with non dynamic trackable
1655 const uint32_t arrayIndex = RenderHandleUtil::GetIndexPart(handle);
1656 PLUGIN_ASSERT(RenderHandleUtil::GetHandleType(handle) == RenderHandleType::GPU_IMAGE);
1657 if (arrayIndex < gpuImageDataIndices_.size()) {
1658 // NOTE: render pass attachments expected to be dynamic resources always
1659 PLUGIN_ASSERT(RenderHandleUtil::IsDynamicResource(handle));
1660 uint32_t dataIdx = gpuImageDataIndices_[arrayIndex];
1661 if (dataIdx == INVALID_TRACK_IDX) {
1662 if (!gpuImageAvailableIndices_.empty()) {
1663 dataIdx = gpuImageAvailableIndices_.back();
1664 gpuImageAvailableIndices_.pop_back();
1665 } else {
1666 dataIdx = static_cast<uint32_t>(gpuImageTracking_.size());
1667 gpuImageTracking_.emplace_back();
1668 }
1669 gpuImageDataIndices_[arrayIndex] = dataIdx;
1670
1671 gpuImageTracking_[dataIdx].resource.handle = handle;
1672 gpuImageTracking_[dataIdx].state.gpuQueue = queue; // current queue for default state
1673 if (RenderHandleUtil::IsDynamicAdditionalStateResource(handle) &&
1674 (!gpuImageTracking_[dataIdx].additionalState.layouts)) {
1675 gpuImageTracking_[dataIdx].additionalState.layouts = make_unique<ImageLayout[]>(MAX_MIP_STATE_COUNT);
1676 }
1677 }
1678 return gpuImageTracking_[dataIdx];
1679 }
1680
1681 return defaultImageState_;
1682 }
1683 RENDER_END_NAMESPACE()
1684