1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_bloom.h"
17 
18 #include <base/containers/fixed_string.h>
19 #include <base/containers/unordered_map.h>
20 #include <base/math/vector.h>
21 #include <render/datastore/intf_render_data_store_manager.h>
22 #include <render/datastore/intf_render_data_store_pod.h>
23 #include <render/datastore/render_data_store_render_pods.h>
24 #include <render/device/intf_gpu_resource_manager.h>
25 #include <render/device/intf_shader_manager.h>
26 #include <render/namespace.h>
27 #include <render/nodecontext/intf_node_context_descriptor_set_manager.h>
28 #include <render/nodecontext/intf_node_context_pso_manager.h>
29 #include <render/nodecontext/intf_pipeline_descriptor_set_binder.h>
30 #include <render/nodecontext/intf_render_command_list.h>
31 #include <render/nodecontext/intf_render_node_context_manager.h>
32 #include <render/nodecontext/intf_render_node_util.h>
33 
34 #include "util/log.h"
35 
36 // shaders
37 #include <render/shaders/common/render_post_process_structs_common.h>
38 
39 using namespace BASE_NS;
40 
41 RENDER_BEGIN_NAMESPACE()
42 namespace {
43 constexpr DynamicStateEnum DYNAMIC_STATES[] = { CORE_DYNAMIC_STATE_ENUM_VIEWPORT, CORE_DYNAMIC_STATE_ENUM_SCISSOR };
44 }
45 
Init(IRenderNodeContextManager & renderNodeContextMgr,const BloomInfo & bloomInfo)46 void RenderBloom::Init(IRenderNodeContextManager& renderNodeContextMgr, const BloomInfo& bloomInfo)
47 {
48     bloomInfo_ = bloomInfo;
49 
50     // NOTE: target counts etc. should probably be resized based on configuration
51     CreatePsos(renderNodeContextMgr);
52 
53     auto& gpuResourceMgr = renderNodeContextMgr.GetGpuResourceManager();
54     samplerHandle_ = gpuResourceMgr.Create(samplerHandle_,
55         GpuSamplerDesc {
56             Filter::CORE_FILTER_LINEAR,                                  // magFilter
57             Filter::CORE_FILTER_LINEAR,                                  // minFilter
58             Filter::CORE_FILTER_LINEAR,                                  // mipMapMode
59             SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeU
60             SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeV
61             SamplerAddressMode::CORE_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // addressModeW
62         });
63 }
64 
PreExecute(IRenderNodeContextManager & renderNodeContextMgr,const BloomInfo & bloomInfo,const PostProcessConfiguration & ppConfig)65 void RenderBloom::PreExecute(IRenderNodeContextManager& renderNodeContextMgr, const BloomInfo& bloomInfo,
66     const PostProcessConfiguration& ppConfig)
67 {
68     bloomInfo_ = bloomInfo;
69 
70     const GpuImageDesc& imgDesc =
71         renderNodeContextMgr.GetGpuResourceManager().GetImageDescriptor(bloomInfo_.input.handle);
72     uint32_t sizeDenom = 1u;
73     if (ppConfig.bloomConfiguration.bloomQualityType == BloomConfiguration::QUALITY_TYPE_LOW) {
74         sizeDenom = 2u;
75     }
76     CreateTargets(renderNodeContextMgr, Math::UVec2(imgDesc.width, imgDesc.height) / sizeDenom);
77 }
78 
Execute(IRenderNodeContextManager & renderNodeContextMgr,IRenderCommandList & cmdList,const PostProcessConfiguration & ppConfig)79 void RenderBloom::Execute(IRenderNodeContextManager& renderNodeContextMgr, IRenderCommandList& cmdList,
80     const PostProcessConfiguration& ppConfig)
81 {
82     bloomEnabled_ = false;
83     BloomConfiguration bloomConfiguration;
84     if (ppConfig.enableFlags & PostProcessConfiguration::ENABLE_BLOOM_BIT) {
85         bloomConfiguration.thresholdHard = ppConfig.bloomConfiguration.thresholdHard;
86         bloomConfiguration.thresholdSoft = ppConfig.bloomConfiguration.thresholdSoft;
87         bloomConfiguration.amountCoefficient = ppConfig.bloomConfiguration.amountCoefficient;
88         bloomConfiguration.dirtMaskCoefficient = ppConfig.bloomConfiguration.dirtMaskCoefficient;
89 
90         bloomEnabled_ = true;
91     }
92 
93     const auto bloomQualityType = ppConfig.bloomConfiguration.bloomQualityType;
94     PLUGIN_ASSERT(bloomQualityType < CORE_BLOOM_QUALITY_COUNT);
95     if (bloomInfo_.useCompute) {
96         psos_.downscale = psos_.downscaleHandlesCompute[bloomQualityType].regular;
97         psos_.downscaleAndThreshold = psos_.downscaleHandlesCompute[bloomQualityType].threshold;
98     } else {
99         psos_.downscale = psos_.downscaleHandles[bloomQualityType].regular;
100         psos_.downscaleAndThreshold = psos_.downscaleHandles[bloomQualityType].threshold;
101     }
102 
103     if (!bloomEnabled_) {
104         bloomConfiguration.amountCoefficient = 0.0f;
105     }
106 
107     bloomParameters_ = Math::Vec4(
108         // .x = thresholdHard, luma values below this won't bloom
109         bloomConfiguration.thresholdHard,
110         // .y = thresholdSoft, luma values from this value to hard threshold will reduce bloom input from 1.0 -> 0.0
111         // i.e. this creates softer threshold for bloom
112         bloomConfiguration.thresholdSoft,
113         // .z = amountCoefficient, will multiply the colors from the bloom textures when combined with original color
114         // target
115         bloomConfiguration.amountCoefficient,
116         // .w = -will multiply the dirt mask effect
117         bloomConfiguration.dirtMaskCoefficient);
118 
119     const bool validBinders = binders_.globalSet0.get() != nullptr;
120     if (validBinders) {
121         if (bloomInfo_.useCompute) {
122             ComputeBloom(renderNodeContextMgr, cmdList);
123         } else {
124             GraphicsBloom(renderNodeContextMgr, cmdList);
125         }
126     }
127 }
128 
GetDescriptorCounts() const129 DescriptorCounts RenderBloom::GetDescriptorCounts() const
130 {
131     // NOTE: when added support for various bloom target counts, might need to be calculated for max
132     return DescriptorCounts { {
133         { CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 32u },
134         { CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 32u },
135         { CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE, 32u },
136         { CORE_DESCRIPTOR_TYPE_SAMPLER, 24u },
137         { CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2u },
138     } };
139 }
140 
GetFinalTarget() const141 RenderHandle RenderBloom::GetFinalTarget() const
142 {
143     if (RenderHandleUtil::IsValid(bloomInfo_.output.handle)) {
144         return bloomInfo_.output.handle;
145     } else {
146         // output tex1 on compute and tex2 on graphics
147         return bloomInfo_.useCompute ? (targets_.tex1[0u].GetHandle()) : (targets_.tex2[0u].GetHandle());
148     }
149 }
150 
UpdateGlobalSet(IRenderCommandList & cmdList)151 void RenderBloom::UpdateGlobalSet(IRenderCommandList& cmdList)
152 {
153     auto& binder = *binders_.globalSet0;
154     binder.ClearBindings();
155     uint32_t binding = 0u;
156     binder.BindBuffer(binding++, bloomInfo_.globalUbo, 0);
157     binder.BindBuffer(binding++, bloomInfo_.globalUbo, sizeof(GlobalPostProcessStruct));
158     cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
159 }
160 
ComputeBloom(IRenderNodeContextManager & renderNodeContextMgr,IRenderCommandList & cmdList)161 void RenderBloom::ComputeBloom(IRenderNodeContextManager& renderNodeContextMgr, IRenderCommandList& cmdList)
162 {
163     constexpr PushConstant pc { ShaderStageFlagBits::CORE_SHADER_STAGE_COMPUTE_BIT,
164         sizeof(LocalPostProcessPushConstantStruct) };
165 
166     UpdateGlobalSet(cmdList);
167     if (bloomEnabled_) {
168         ComputeDownscaleAndThreshold(pc, cmdList);
169         ComputeDownscale(pc, cmdList);
170         ComputeUpscale(pc, cmdList);
171     }
172     // needs to be done even when bloom is disabled if node is in use
173     if (RenderHandleUtil::IsValid(bloomInfo_.output.handle)) {
174         ComputeCombine(pc, cmdList);
175     }
176 }
177 
ComputeDownscaleAndThreshold(const PushConstant & pc,IRenderCommandList & cmdList)178 void RenderBloom::ComputeDownscaleAndThreshold(const PushConstant& pc, IRenderCommandList& cmdList)
179 {
180     RenderHandle sets[2u] {};
181     sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
182     {
183         auto& binder = *binders_.downscaleAndThreshold;
184         sets[1u] = binder.GetDescriptorSetHandle();
185         binder.ClearBindings();
186 
187         uint32_t binding = 0;
188         binder.BindImage(binding++, { targets_.tex1[0].GetHandle() });
189         binder.BindImage(binding++, { bloomInfo_.input });
190         binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
191 
192         cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
193     }
194 
195     cmdList.BindPipeline(psos_.downscaleAndThreshold);
196     const ShaderThreadGroup tgs = psos_.downscaleAndThresholdTGS;
197 
198     // bind all sets
199     cmdList.BindDescriptorSets(0, sets);
200 
201     const auto targetSize = targets_.tex1Size[0];
202 
203     LocalPostProcessPushConstantStruct uPc;
204     uPc.factor = bloomParameters_;
205     uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
206         1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
207 
208     cmdList.PushConstantData(pc, arrayviewU8(uPc));
209 
210     cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
211 }
212 
ComputeDownscale(const PushConstant & pc,IRenderCommandList & cmdList)213 void RenderBloom::ComputeDownscale(const PushConstant& pc, IRenderCommandList& cmdList)
214 {
215     cmdList.BindPipeline(psos_.downscale);
216     const ShaderThreadGroup tgs = psos_.downscaleTGS;
217 
218     RenderHandle sets[2u] {};
219     sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
220     for (size_t i = 1; i < targets_.tex1.size(); ++i) {
221         {
222             auto& binder = *binders_.downscale[i];
223             sets[1u] = binder.GetDescriptorSetHandle();
224             binder.ClearBindings();
225 
226             uint32_t binding = 0;
227             binder.BindImage(binding++, { targets_.tex1[i].GetHandle() });
228             binder.BindImage(binding++, { targets_.tex1[i - 1].GetHandle() });
229             binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
230 
231             cmdList.UpdateDescriptorSet(
232                 binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
233         }
234         cmdList.BindDescriptorSets(0u, sets);
235 
236         const auto targetSize = targets_.tex1Size[i];
237 
238         LocalPostProcessPushConstantStruct uPc;
239         uPc.factor = bloomParameters_;
240         uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
241             1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
242         cmdList.PushConstantData(pc, arrayviewU8(uPc));
243 
244         cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
245     }
246 }
247 
ComputeUpscale(const PushConstant & pc,IRenderCommandList & cmdList)248 void RenderBloom::ComputeUpscale(const PushConstant& pc, IRenderCommandList& cmdList)
249 {
250     cmdList.BindPipeline(psos_.upscale);
251     const ShaderThreadGroup tgs = psos_.upscaleTGS;
252 
253     RenderHandle sets[2u] {};
254     sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
255 
256     for (size_t i = targets_.tex1.size() - 1; i != 0; --i) {
257         {
258             auto& binder = *binders_.upscale[i];
259             sets[1u] = binder.GetDescriptorSetHandle();
260             binder.ClearBindings();
261 
262             binder.BindImage(0u, { targets_.tex1[i - 1].GetHandle() });
263             binder.BindImage(1u, { targets_.tex1[i].GetHandle() });
264             binder.BindSampler(2u, { samplerHandle_.GetHandle() });
265 
266             cmdList.UpdateDescriptorSet(
267                 binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
268         }
269         cmdList.BindDescriptorSets(0u, sets);
270 
271         const auto targetSize = targets_.tex1Size[i - 1];
272 
273         LocalPostProcessPushConstantStruct uPc;
274         uPc.factor = bloomParameters_;
275         uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
276             1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
277         cmdList.PushConstantData(pc, arrayviewU8(uPc));
278 
279         cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
280     }
281 }
282 
ComputeCombine(const PushConstant & pc,IRenderCommandList & cmdList)283 void RenderBloom::ComputeCombine(const PushConstant& pc, IRenderCommandList& cmdList)
284 {
285     cmdList.BindPipeline(psos_.combine);
286     const ShaderThreadGroup tgs = psos_.combineTGS;
287 
288     RenderHandle sets[2u] {};
289     sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
290     {
291         auto& binder = *binders_.combine;
292         sets[1u] = binder.GetDescriptorSetHandle();
293         binder.ClearBindings();
294         // bind resources to set 1
295         uint32_t binding = 0;
296         binder.BindImage(binding++, { bloomInfo_.output });
297         binder.BindImage(binding++, { bloomInfo_.input });
298         binder.BindImage(binding++, { targets_.tex1[0].GetHandle() });
299         binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
300 
301         // update the descriptor set bindings for set 1
302         cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
303     }
304 
305     cmdList.BindDescriptorSets(0u, sets);
306 
307     const auto targetSize = baseSize_;
308 
309     LocalPostProcessPushConstantStruct uPc;
310     uPc.factor = bloomParameters_;
311     uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
312         1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
313     cmdList.PushConstantData(pc, arrayviewU8(uPc));
314 
315     cmdList.Dispatch((targetSize.x + tgs.x - 1) / tgs.x, (targetSize.y + tgs.y - 1) / tgs.y, 1);
316 }
317 
GraphicsBloom(IRenderNodeContextManager & renderNodeContextMgr,IRenderCommandList & cmdList)318 void RenderBloom::GraphicsBloom(IRenderNodeContextManager& renderNodeContextMgr, IRenderCommandList& cmdList)
319 {
320     RenderPass renderPass;
321     renderPass.renderPassDesc.attachmentCount = 1;
322     renderPass.renderPassDesc.subpassCount = 1;
323     renderPass.renderPassDesc.attachments[0].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
324     renderPass.renderPassDesc.attachments[0].storeOp = AttachmentStoreOp::CORE_ATTACHMENT_STORE_OP_STORE;
325 
326     RenderPassSubpassDesc& subpassDesc = renderPass.subpassDesc;
327     subpassDesc.colorAttachmentCount = 1;
328     subpassDesc.colorAttachmentIndices[0] = 0;
329 
330     constexpr PushConstant pc { ShaderStageFlagBits::CORE_SHADER_STAGE_FRAGMENT_BIT,
331         sizeof(LocalPostProcessPushConstantStruct) };
332 
333     UpdateGlobalSet(cmdList);
334     if (bloomEnabled_) {
335         RenderDownscaleAndThreshold(renderPass, pc, cmdList);
336         RenderDownscale(renderPass, pc, cmdList);
337         RenderUpscale(renderPass, pc, cmdList);
338     }
339     // combine (needs to be done even when bloom is disabled if node is in use
340     if (RenderHandleUtil::IsValid(bloomInfo_.output.handle)) {
341         RenderCombine(renderPass, pc, cmdList);
342     }
343 }
344 
RenderDownscaleAndThreshold(RenderPass & renderPass,const PushConstant & pc,IRenderCommandList & cmdList)345 void RenderBloom::RenderDownscaleAndThreshold(
346     RenderPass& renderPass, const PushConstant& pc, IRenderCommandList& cmdList)
347 {
348     const auto targetSize = targets_.tex1Size[0];
349     const ViewportDesc viewportDesc { 0, 0, static_cast<float>(targetSize.x), static_cast<float>(targetSize.y) };
350     const ScissorDesc scissorDesc = { 0, 0, targetSize.x, targetSize.y };
351 
352     renderPass.renderPassDesc.attachmentHandles[0] = targets_.tex1[0].GetHandle();
353     renderPass.renderPassDesc.renderArea = { 0, 0, targetSize.x, targetSize.y };
354     cmdList.BeginRenderPass(renderPass.renderPassDesc, 0, renderPass.subpassDesc);
355 
356     cmdList.SetDynamicStateViewport(viewportDesc);
357     cmdList.SetDynamicStateScissor(scissorDesc);
358     cmdList.BindPipeline(psos_.downscaleAndThreshold);
359 
360     RenderHandle sets[2u] {};
361     sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
362     {
363         auto& binder = *binders_.downscaleAndThreshold;
364         sets[1u] = binder.GetDescriptorSetHandle();
365         binder.ClearBindings();
366 
367         binder.BindImage(0u, { bloomInfo_.input });
368         binder.BindSampler(1u, { samplerHandle_.GetHandle() });
369         cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
370     }
371     cmdList.BindDescriptorSets(0u, sets);
372 
373     LocalPostProcessPushConstantStruct uPc;
374     uPc.factor = bloomParameters_;
375     uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
376         1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
377 
378     cmdList.PushConstantData(pc, arrayviewU8(uPc));
379     cmdList.Draw(3u, 1u, 0u, 0u);
380     cmdList.EndRenderPass();
381 }
382 
RenderDownscale(RenderPass & renderPass,const PushConstant & pc,IRenderCommandList & cmdList)383 void RenderBloom::RenderDownscale(RenderPass& renderPass, const PushConstant& pc, IRenderCommandList& cmdList)
384 {
385     LocalPostProcessPushConstantStruct uPc;
386     uPc.factor = bloomParameters_;
387 
388     RenderHandle sets[2u] {};
389     sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
390     for (size_t idx = 1; idx < targets_.tex1.size(); ++idx) {
391         const auto targetSize = targets_.tex1Size[idx];
392         const ViewportDesc viewportDesc { 0, 0, static_cast<float>(targetSize.x), static_cast<float>(targetSize.y) };
393         const ScissorDesc scissorDesc = { 0, 0, targetSize.x, targetSize.y };
394 
395         renderPass.renderPassDesc.attachmentHandles[0] = targets_.tex1[idx].GetHandle();
396         renderPass.renderPassDesc.renderArea = { 0, 0, targetSize.x, targetSize.y };
397         cmdList.BeginRenderPass(renderPass.renderPassDesc, 0, renderPass.subpassDesc);
398 
399         cmdList.SetDynamicStateViewport(viewportDesc);
400         cmdList.SetDynamicStateScissor(scissorDesc);
401 
402         cmdList.BindPipeline(psos_.downscale);
403 
404         {
405             auto& binder = *binders_.downscale[idx];
406             sets[1u] = binder.GetDescriptorSetHandle();
407             binder.ClearBindings();
408             binder.BindImage(0u, { targets_.tex1[idx - 1].GetHandle() });
409             binder.BindSampler(1u, { samplerHandle_.GetHandle() });
410             cmdList.UpdateDescriptorSet(
411                 binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
412         }
413         cmdList.BindDescriptorSets(0u, sets);
414 
415         uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
416             1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
417 
418         cmdList.PushConstantData(pc, arrayviewU8(uPc));
419         cmdList.Draw(3u, 1u, 0u, 0u);
420         cmdList.EndRenderPass();
421     }
422 }
423 
RenderUpscale(RenderPass & renderPass,const PushConstant & pc,IRenderCommandList & cmdList)424 void RenderBloom::RenderUpscale(RenderPass& renderPass, const PushConstant& pc, IRenderCommandList& cmdList)
425 {
426     RenderPass renderPassUpscale = renderPass;
427     renderPassUpscale.subpassDesc.inputAttachmentCount = 1;
428     renderPassUpscale.subpassDesc.inputAttachmentIndices[0] = 0;
429     renderPassUpscale.renderPassDesc.attachments[0].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
430     renderPassUpscale.renderPassDesc.attachments[0].storeOp = AttachmentStoreOp::CORE_ATTACHMENT_STORE_OP_STORE;
431 
432     RenderHandle sets[2u] {};
433     sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
434     PLUGIN_ASSERT(targets_.tex1.size() == targets_.tex2.size());
435     RenderHandle input;
436     if (targets_.tex1.size() >= 1) {
437         input = targets_.tex1[targets_.tex1.size() - 1].GetHandle();
438     }
439     for (size_t idx = targets_.tex1.size() - 1; idx != 0; --idx) {
440         const auto targetSize = targets_.tex1Size[idx - 1];
441         const ViewportDesc viewportDesc { 0, 0, static_cast<float>(targetSize.x), static_cast<float>(targetSize.y) };
442         const ScissorDesc scissorDesc = { 0, 0, targetSize.x, targetSize.y };
443 
444         // tex2 as output
445         renderPassUpscale.renderPassDesc.attachmentHandles[0] = targets_.tex2[idx - 1].GetHandle();
446         renderPassUpscale.renderPassDesc.renderArea = { 0, 0, targetSize.x, targetSize.y };
447         cmdList.BeginRenderPass(renderPassUpscale.renderPassDesc, 0, renderPassUpscale.subpassDesc);
448 
449         cmdList.SetDynamicStateViewport(viewportDesc);
450         cmdList.SetDynamicStateScissor(scissorDesc);
451 
452         cmdList.BindPipeline(psos_.upscale);
453 
454         {
455             auto& binder = *binders_.upscale[idx];
456             sets[1u] = binder.GetDescriptorSetHandle();
457             binder.ClearBindings();
458 
459             uint32_t binding = 0;
460             binder.BindImage(binding++, { input });
461             binder.BindImage(binding++, { targets_.tex1[idx - 1].GetHandle() });
462             binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
463             cmdList.UpdateDescriptorSet(
464                 binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
465         }
466         cmdList.BindDescriptorSets(0u, sets);
467         LocalPostProcessPushConstantStruct uPc;
468         uPc.factor = bloomParameters_;
469         uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
470             1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
471 
472         cmdList.PushConstantData(pc, arrayviewU8(uPc));
473         cmdList.Draw(3u, 1u, 0u, 0u);
474         cmdList.EndRenderPass();
475 
476         // next pass input
477         input = renderPassUpscale.renderPassDesc.attachmentHandles[0];
478     }
479 }
480 
RenderCombine(RenderPass & renderPass,const PushConstant & pc,IRenderCommandList & cmdList)481 void RenderBloom::RenderCombine(RenderPass& renderPass, const PushConstant& pc, IRenderCommandList& cmdList)
482 {
483     const auto targetSize = baseSize_;
484 
485     renderPass.renderPassDesc.attachmentHandles[0] = bloomInfo_.output.handle;
486     renderPass.renderPassDesc.renderArea = { 0, 0, targetSize.x, targetSize.y };
487     cmdList.BeginRenderPass(renderPass.renderPassDesc, 0, renderPass.subpassDesc);
488 
489     cmdList.SetDynamicStateViewport(baseViewportDesc_);
490     cmdList.SetDynamicStateScissor(baseScissorDesc_);
491 
492     cmdList.BindPipeline(psos_.combine);
493 
494     RenderHandle sets[2u] {};
495     sets[0u] = binders_.globalSet0->GetDescriptorSetHandle();
496     {
497         auto& binder = *binders_.combine;
498         sets[1u] = binder.GetDescriptorSetHandle();
499         binder.ClearBindings();
500 
501         uint32_t binding = 0;
502         binder.BindImage(binding++, { bloomInfo_.input });
503         // tex2 handle has the final result
504         binder.BindImage(binding++, { targets_.tex2[0].GetHandle() });
505         binder.BindSampler(binding++, { samplerHandle_.GetHandle() });
506 
507         cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
508     }
509     cmdList.BindDescriptorSets(0u, sets);
510 
511     LocalPostProcessPushConstantStruct uPc;
512     uPc.factor = bloomParameters_;
513     uPc.viewportSizeInvSize = Math::Vec4(static_cast<float>(targetSize.x), static_cast<float>(targetSize.y),
514         1.0f / static_cast<float>(targetSize.x), 1.0f / static_cast<float>(targetSize.y));
515 
516     cmdList.PushConstantData(pc, arrayviewU8(uPc));
517     cmdList.Draw(3u, 1u, 0u, 0u);
518     cmdList.EndRenderPass();
519 }
520 
CreateTargets(IRenderNodeContextManager & renderNodeContextMgr,const Math::UVec2 baseSize)521 void RenderBloom::CreateTargets(IRenderNodeContextManager& renderNodeContextMgr, const Math::UVec2 baseSize)
522 {
523     if (baseSize.x != baseSize_.x || baseSize.y != baseSize_.y) {
524         baseSize_ = baseSize;
525 
526         format_ = Format::BASE_FORMAT_B10G11R11_UFLOAT_PACK32;
527         ImageUsageFlags usageFlags = CORE_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT |
528                                      CORE_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
529 
530         if (bloomInfo_.useCompute) {
531             format_ = Format::BASE_FORMAT_R16G16B16A16_SFLOAT; // used due to GLES
532             usageFlags = CORE_IMAGE_USAGE_STORAGE_BIT | CORE_IMAGE_USAGE_SAMPLED_BIT;
533         } else {
534             baseViewportDesc_ = { 0.0f, 0.0f, static_cast<float>(baseSize.x), static_cast<float>(baseSize.y), 0.0f,
535                 1.0f };
536             baseScissorDesc_ = { 0, 0, baseSize.x, baseSize.y };
537         }
538 
539         // create target image
540         const Math::UVec2 startTargetSize = baseSize_;
541         GpuImageDesc desc {
542             ImageType::CORE_IMAGE_TYPE_2D,
543             ImageViewType::CORE_IMAGE_VIEW_TYPE_2D,
544             format_,
545             ImageTiling::CORE_IMAGE_TILING_OPTIMAL,
546             usageFlags,
547             MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
548             0,
549             EngineImageCreationFlagBits::CORE_ENGINE_IMAGE_CREATION_DYNAMIC_BARRIERS |
550                 EngineImageCreationFlagBits::CORE_ENGINE_IMAGE_CREATION_RESET_STATE_ON_FRAME_BORDERS,
551             startTargetSize.x,
552             startTargetSize.y,
553             1u,
554             1u,
555             1u,
556             SampleCountFlagBits::CORE_SAMPLE_COUNT_1_BIT,
557             {},
558         };
559 
560         auto& gpuResourceMgr = renderNodeContextMgr.GetGpuResourceManager();
561 #if (RENDER_VALIDATION_ENABLED == 1)
562         const string_view nodeName = renderNodeContextMgr.GetName();
563 #endif
564         for (size_t idx = 0; idx < targets_.tex1.size(); ++idx) {
565             // every bloom target is half the size of the original/ previous bloom target
566             desc.width /= 2u;
567             desc.height /= 2u;
568             desc.width = (desc.width >= 1u) ? desc.width : 1u;
569             desc.height = (desc.height >= 1u) ? desc.height : 1u;
570             targets_.tex1Size[idx] = Math::UVec2(desc.width, desc.height);
571 #if (RENDER_VALIDATION_ENABLED == 1)
572             const auto baseTargetName = nodeName + "_Bloom_" + to_string(idx);
573             targets_.tex1[idx] = gpuResourceMgr.Create(baseTargetName + "_A", desc);
574             if (!bloomInfo_.useCompute) {
575                 targets_.tex2[idx] = gpuResourceMgr.Create(baseTargetName + "_B", desc);
576             }
577 #else
578             targets_.tex1[idx] = gpuResourceMgr.Create(targets_.tex1[idx], desc);
579             if (!bloomInfo_.useCompute) {
580                 targets_.tex2[idx] = gpuResourceMgr.Create(targets_.tex2[idx], desc);
581             }
582 #endif
583         }
584     }
585 }
586 
CreatePsos(IRenderNodeContextManager & renderNodeContextMgr)587 void RenderBloom::CreatePsos(IRenderNodeContextManager& renderNodeContextMgr)
588 {
589     if (bloomInfo_.useCompute) {
590         CreateComputePsos(renderNodeContextMgr);
591     } else {
592         CreateRenderPsos(renderNodeContextMgr);
593     }
594 }
595 
CreateComputePsos(IRenderNodeContextManager & renderNodeContextMgr)596 void RenderBloom::CreateComputePsos(IRenderNodeContextManager& renderNodeContextMgr)
597 {
598     const auto& shaderMgr = renderNodeContextMgr.GetShaderManager();
599     INodeContextPsoManager& psoMgr = renderNodeContextMgr.GetPsoManager();
600     INodeContextDescriptorSetManager& dSetMgr = renderNodeContextMgr.GetDescriptorSetManager();
601 
602     constexpr BASE_NS::pair<BloomConfiguration::BloomQualityType, uint32_t> configurations[] = {
603         { BloomConfiguration::BloomQualityType::QUALITY_TYPE_LOW, RenderBloom::CORE_BLOOM_QUALITY_LOW },
604         { BloomConfiguration::BloomQualityType::QUALITY_TYPE_NORMAL, RenderBloom::CORE_BLOOM_QUALITY_NORMAL },
605         { BloomConfiguration::BloomQualityType::QUALITY_TYPE_HIGH, RenderBloom::CORE_BLOOM_QUALITY_HIGH }
606     };
607     for (const auto& configuration : configurations) {
608         {
609             auto shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_downscale.shader");
610             const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
611             ShaderSpecializationConstantView specializations = shaderMgr.GetReflectionSpecialization(shader);
612             const ShaderSpecializationConstantDataView specDataView {
613                 { specializations.constants.data(), specializations.constants.size() },
614                 { &configuration.second, 1u },
615             };
616 
617             psos_.downscaleHandlesCompute[configuration.first].regular =
618                 psoMgr.GetComputePsoHandle(shader, pl, specDataView);
619         }
620         {
621             auto shader = shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_downscale_threshold.shader");
622             const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
623 
624             ShaderSpecializationConstantView specializations = shaderMgr.GetReflectionSpecialization(shader);
625             const ShaderSpecializationConstantDataView specDataView {
626                 { specializations.constants.data(), specializations.constants.size() },
627                 { &configuration.second, 1u },
628             };
629             psos_.downscaleHandlesCompute[configuration.first].threshold =
630                 psoMgr.GetComputePsoHandle(shader, pl, specDataView);
631         }
632     }
633 
634     constexpr uint32_t globalSet = 0u;
635     constexpr uint32_t localSetIdx = 1u;
636     // the first one creates the global set as well
637     {
638         const RenderHandle shaderHandle =
639             shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_downscale_threshold.shader");
640         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
641         psos_.downscaleAndThreshold = psoMgr.GetComputePsoHandle(shaderHandle, pl, {});
642         psos_.downscaleAndThresholdTGS = shaderMgr.GetReflectionThreadGroupSize(shaderHandle);
643 
644         const auto& gBinds = pl.descriptorSetLayouts[globalSet].bindings;
645         binders_.globalSet0 = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(gBinds), gBinds);
646 
647         const auto& lBinds = pl.descriptorSetLayouts[localSetIdx].bindings;
648         binders_.downscaleAndThreshold = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(lBinds), lBinds);
649     }
650     {
651         const RenderHandle shaderHandle =
652             shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_downscale.shader");
653         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
654         psos_.downscale = psoMgr.GetComputePsoHandle(shaderHandle, pl, {});
655         psos_.downscaleTGS = shaderMgr.GetReflectionThreadGroupSize(shaderHandle);
656 
657         PLUGIN_ASSERT(binders_.downscale.size() >= TARGET_COUNT);
658         const auto& binds = pl.descriptorSetLayouts[localSetIdx].bindings;
659         for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
660             binders_.downscale[idx] = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
661         }
662     }
663     {
664         const RenderHandle shaderHandle =
665             shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_upscale.shader");
666         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
667         psos_.upscale = psoMgr.GetComputePsoHandle(shaderHandle, pl, {});
668         psos_.upscaleTGS = shaderMgr.GetReflectionThreadGroupSize(shaderHandle);
669 
670         PLUGIN_ASSERT(binders_.upscale.size() >= TARGET_COUNT);
671         const auto& binds = pl.descriptorSetLayouts[localSetIdx].bindings;
672         for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
673             binders_.upscale[idx] = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
674         }
675     }
676     {
677         const RenderHandle shaderHandle =
678             shaderMgr.GetShaderHandle("rendershaders://computeshader/bloom_combine.shader");
679         const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
680         psos_.combine = psoMgr.GetComputePsoHandle(shaderHandle, pl, {});
681         psos_.combineTGS = shaderMgr.GetReflectionThreadGroupSize(shaderHandle);
682 
683         const auto& binds = pl.descriptorSetLayouts[localSetIdx].bindings;
684         binders_.combine = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
685     }
686 }
687 
CreateAndReflectRenderPso(IRenderNodeContextManager & renderNodeContextMgr,const string_view shader,const RenderPass & renderPass)688 std::pair<RenderHandle, const PipelineLayout&> RenderBloom::CreateAndReflectRenderPso(
689     IRenderNodeContextManager& renderNodeContextMgr, const string_view shader, const RenderPass& renderPass)
690 {
691     const auto& shaderMgr = renderNodeContextMgr.GetShaderManager();
692     const RenderHandle shaderHandle = shaderMgr.GetShaderHandle(shader.data());
693     const RenderHandle graphicsStateHandle = shaderMgr.GetGraphicsStateHandleByShaderHandle(shaderHandle);
694     const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
695 
696     auto& psoMgr = renderNodeContextMgr.GetPsoManager();
697     const RenderHandle pso = psoMgr.GetGraphicsPsoHandle(
698         shaderHandle, graphicsStateHandle, pl, {}, {}, { DYNAMIC_STATES, countof(DYNAMIC_STATES) });
699     return { pso, pl };
700 }
701 
CreateRenderPsos(IRenderNodeContextManager & renderNodeContextMgr)702 void RenderBloom::CreateRenderPsos(IRenderNodeContextManager& renderNodeContextMgr)
703 {
704     RenderPass renderPass;
705     renderPass.renderPassDesc.attachmentCount = 1;
706     renderPass.renderPassDesc.attachmentHandles[0] = bloomInfo_.input.handle;
707     renderPass.renderPassDesc.renderArea = { 0, 0, baseSize_.x, baseSize_.y };
708     renderPass.renderPassDesc.subpassCount = 1;
709     renderPass.renderPassDesc.attachments[0].loadOp = AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_DONT_CARE;
710     renderPass.renderPassDesc.attachments[0].storeOp = AttachmentStoreOp::CORE_ATTACHMENT_STORE_OP_STORE;
711 
712     RenderPassSubpassDesc subpassDesc = renderPass.subpassDesc;
713     subpassDesc.colorAttachmentCount = 1;
714     subpassDesc.colorAttachmentIndices[0] = 0;
715 
716     constexpr BASE_NS::pair<BloomConfiguration::BloomQualityType, uint32_t> configurations[] = {
717         { BloomConfiguration::BloomQualityType::QUALITY_TYPE_LOW, RenderBloom::CORE_BLOOM_QUALITY_LOW },
718         { BloomConfiguration::BloomQualityType::QUALITY_TYPE_NORMAL, RenderBloom::CORE_BLOOM_QUALITY_NORMAL },
719         { BloomConfiguration::BloomQualityType::QUALITY_TYPE_HIGH, RenderBloom::CORE_BLOOM_QUALITY_HIGH }
720     };
721 
722     const IRenderNodeShaderManager& shaderMgr = renderNodeContextMgr.GetShaderManager();
723     INodeContextPsoManager& psoMgr = renderNodeContextMgr.GetPsoManager();
724 
725     for (const auto& configuration : configurations) {
726         {
727             auto shader = shaderMgr.GetShaderHandle("rendershaders://shader/bloom_downscale.shader");
728             const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
729             ShaderSpecializationConstantView specializations = shaderMgr.GetReflectionSpecialization(shader);
730             const ShaderSpecializationConstantDataView specDataView {
731                 { specializations.constants.data(), specializations.constants.size() },
732                 { &configuration.second, 1u },
733             };
734             const RenderHandle graphicsState = shaderMgr.GetGraphicsStateHandleByShaderHandle(shader);
735             psos_.downscaleHandles[configuration.first].regular = psoMgr.GetGraphicsPsoHandle(
736                 shader, graphicsState, pl, {}, specDataView, { DYNAMIC_STATES, countof(DYNAMIC_STATES) });
737         }
738 
739         {
740             auto shader = shaderMgr.GetShaderHandle("rendershaders://shader/bloom_downscale_threshold.shader");
741             const PipelineLayout& pl = shaderMgr.GetReflectionPipelineLayout(shader);
742             ShaderSpecializationConstantView specializations = shaderMgr.GetReflectionSpecialization(shader);
743             const ShaderSpecializationConstantDataView specDataView {
744                 { specializations.constants.data(), specializations.constants.size() },
745                 { &configuration.second, 1u },
746             };
747             const RenderHandle graphicsState = shaderMgr.GetGraphicsStateHandleByShaderHandle(shader);
748             psos_.downscaleHandles[configuration.first].threshold = psoMgr.GetGraphicsPsoHandle(
749                 shader, graphicsState, pl, {}, specDataView, { DYNAMIC_STATES, countof(DYNAMIC_STATES) });
750         }
751     }
752 
753     INodeContextDescriptorSetManager& dSetMgr = renderNodeContextMgr.GetDescriptorSetManager();
754     constexpr uint32_t globalSet = 0u;
755     constexpr uint32_t localSet = 1u;
756     // the first one creates the global set as well
757     {
758         const auto [pso, pipelineLayout] = CreateAndReflectRenderPso(
759             renderNodeContextMgr, "rendershaders://shader/bloom_downscale_threshold.shader", renderPass);
760         psos_.downscaleAndThreshold = pso;
761 
762         const auto& gBinds = pipelineLayout.descriptorSetLayouts[globalSet].bindings;
763         binders_.globalSet0 = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(gBinds), gBinds);
764 
765         const auto& lBinds = pipelineLayout.descriptorSetLayouts[localSet].bindings;
766         binders_.downscaleAndThreshold = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(lBinds), lBinds);
767     }
768     {
769         const auto [pso, pipelineLayout] = CreateAndReflectRenderPso(
770             renderNodeContextMgr, "rendershaders://shader/bloom_downscale.shader", renderPass);
771         psos_.downscale = pso;
772         const auto& binds = pipelineLayout.descriptorSetLayouts[localSet].bindings;
773         for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
774             binders_.downscale[idx] = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
775         }
776     }
777     {
778         const auto [pso, pipelineLayout] =
779             CreateAndReflectRenderPso(renderNodeContextMgr, "rendershaders://shader/bloom_upscale.shader", renderPass);
780         psos_.upscale = pso;
781         const auto& binds = pipelineLayout.descriptorSetLayouts[localSet].bindings;
782         for (uint32_t idx = 0; idx < TARGET_COUNT; ++idx) {
783             binders_.upscale[idx] = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
784         }
785     }
786     {
787         const auto [pso, pipelineLayout] =
788             CreateAndReflectRenderPso(renderNodeContextMgr, "rendershaders://shader/bloom_combine.shader", renderPass);
789         psos_.combine = pso;
790         const auto& binds = pipelineLayout.descriptorSetLayouts[localSet].bindings;
791         binders_.combine = dSetMgr.CreateDescriptorSetBinder(dSetMgr.CreateDescriptorSet(binds), binds);
792     }
793 }
794 RENDER_END_NAMESPACE()
795