1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_node_morph.h"
17 
18 #include <algorithm>
19 
20 #include <3d/render/intf_render_data_store_morph.h>
21 #include <core/log.h>
22 #include <core/namespace.h>
23 #include <render/datastore/intf_render_data_store_manager.h>
24 #include <render/device/intf_gpu_resource_manager.h>
25 #include <render/device/intf_shader_manager.h>
26 #include <render/nodecontext/intf_node_context_descriptor_set_manager.h>
27 #include <render/nodecontext/intf_node_context_pso_manager.h>
28 #include <render/nodecontext/intf_pipeline_descriptor_set_binder.h>
29 #include <render/nodecontext/intf_render_command_list.h>
30 #include <render/nodecontext/intf_render_node_context_manager.h>
31 
32 namespace {
33 #include "3d/shaders/common/morph_target_structs.h"
34 
35 constexpr const uint32_t SET_WEIGHTS = 0u;
36 constexpr const uint32_t SET_INPUTS = 1u;
37 constexpr const uint32_t SET_OUTPUTS = 2u;
38 
39 constexpr const uint32_t BUFFER_ALIGN = 0x100; // on Nvidia = 0x20, on Mali and Intel = 0x10, SBO on Mali = 0x100
40 
Align(size_t value,size_t align)41 inline size_t Align(size_t value, size_t align)
42 {
43     if (align == 0U) {
44         return value;
45     }
46 
47     return ((value + align - 1U) / align) * align;
48 }
49 } // namespace
50 
51 CORE3D_BEGIN_NAMESPACE()
52 using namespace BASE_NS;
53 using namespace RENDER_NS;
54 
InitNode(IRenderNodeContextManager & renderNodeContextMgr)55 void RenderNodeMorph::InitNode(IRenderNodeContextManager& renderNodeContextMgr)
56 {
57     renderNodeContextMgr_ = &renderNodeContextMgr;
58 
59     const auto& renderNodeGraphData = renderNodeContextMgr_->GetRenderNodeGraphData();
60     stores_ = RenderNodeSceneUtil::GetSceneRenderDataStores(
61         renderNodeContextMgr, renderNodeGraphData.renderNodeGraphDataStoreName);
62 
63     auto& shaderMgr = renderNodeContextMgr.GetShaderManager();
64 
65     {
66         const RenderHandle shaderHandle = shaderMgr.GetShaderHandle("3dshaders://computeshader/core3d_dm_morph.shader");
67         threadGroupSize_ = shaderMgr.GetReflectionThreadGroupSize(shaderHandle);
68         pipelineLayout_ = shaderMgr.GetReflectionPipelineLayout(shaderHandle);
69 
70         auto& psoMgr = renderNodeContextMgr.GetPsoManager();
71         psoHandle_ = psoMgr.GetComputePsoHandle(shaderHandle, pipelineLayout_, {});
72     }
73 }
74 
PreExecuteFrame()75 void RenderNodeMorph::PreExecuteFrame()
76 {
77     hasExecuteData_ = false;
78 
79     // re-create needed gpu resources
80     const auto& renderDataStoreMgr = renderNodeContextMgr_->GetRenderDataStoreManager();
81     const auto* morphDataStore =
82         static_cast<IRenderDataStoreMorph*>(renderDataStoreMgr.GetRenderDataStore(stores_.dataStoreNameMorph));
83     if (!morphDataStore) {
84         return;
85     }
86 
87     const auto submeshes = morphDataStore->GetSubmeshes();
88     if (submeshes.empty()) {
89         return;
90     }
91 
92     hasExecuteData_ = true;
93     if (maxObjectCount_ < submeshes.size()) {
94         maxObjectCount_ = static_cast<uint32_t>(submeshes.size() + submeshes.size() / 2u);
95 
96         auto& descriptorSetMgr = renderNodeContextMgr_->GetDescriptorSetManager();
97         const DescriptorCounts dc { { // weight/indexset for all prims + number of inputs and outputs
98             { CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u + (2u + 3u) * maxObjectCount_ } } };
99         descriptorSetMgr.ResetAndReserve(dc);
100         {
101             const RenderHandle descriptorSetHandle = descriptorSetMgr.CreateDescriptorSet(SET_WEIGHTS, pipelineLayout_);
102             allDescriptorSets_.params = descriptorSetMgr.CreateDescriptorSetBinder(
103                 descriptorSetHandle, pipelineLayout_.descriptorSetLayouts[SET_WEIGHTS].bindings);
104         }
105         {
106             allDescriptorSets_.inputs.resize(maxObjectCount_);
107             for (uint32_t idx = 0; idx < maxObjectCount_; ++idx) {
108                 const RenderHandle descriptorSetHandle =
109                     descriptorSetMgr.CreateDescriptorSet(SET_INPUTS, pipelineLayout_);
110                 allDescriptorSets_.inputs[idx] = descriptorSetMgr.CreateDescriptorSetBinder(
111                     descriptorSetHandle, pipelineLayout_.descriptorSetLayouts[SET_INPUTS].bindings);
112             }
113         }
114         {
115             allDescriptorSets_.outputs.resize(maxObjectCount_);
116             for (uint32_t idx = 0; idx < maxObjectCount_; ++idx) {
117                 const RenderHandle descriptorSetHandle =
118                     descriptorSetMgr.CreateDescriptorSet(SET_OUTPUTS, pipelineLayout_);
119                 allDescriptorSets_.outputs[idx] = descriptorSetMgr.CreateDescriptorSetBinder(
120                     descriptorSetHandle, pipelineLayout_.descriptorSetLayouts[SET_OUTPUTS].bindings);
121             }
122         }
123     }
124 
125     uint32_t structCount = 0u;
126     for (const auto& submesh : submeshes) {
127         structCount += (static_cast<uint32_t>(submesh.activeTargets.size()) + 3u) / 4u;
128     }
129     if (maxStructCount_ < structCount) {
130         maxStructCount_ = structCount + structCount / 2u;
131         const uint32_t sizeOfBuffer = maxStructCount_ * sizeof(::MorphTargetInfoStruct);
132         if (bufferSize_ < sizeOfBuffer) {
133             bufferSize_ = static_cast<uint32_t>(Align(sizeOfBuffer, BUFFER_ALIGN));
134             auto& gpuResourceMgr = renderNodeContextMgr_->GetGpuResourceManager();
135             morphTargetBufferHandle_ = gpuResourceMgr.Create(morphTargetBufferHandle_,
136                 GpuBufferDesc { CORE_BUFFER_USAGE_STORAGE_BUFFER_BIT,
137                     (CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT | CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT),
138                     CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER, bufferSize_ });
139         }
140     }
141 }
142 
GetExecuteFlags() const143 IRenderNode::ExecuteFlags RenderNodeMorph::GetExecuteFlags() const
144 {
145     if (hasExecuteData_) {
146         return 0U;
147     } else {
148         return IRenderNode::ExecuteFlagBits::EXECUTE_FLAG_BITS_DO_NOT_EXECUTE;
149     }
150 }
151 
ExecuteFrame(IRenderCommandList & cmdList)152 void RenderNodeMorph::ExecuteFrame(IRenderCommandList& cmdList)
153 {
154     const auto& renderDataStoreMgr = renderNodeContextMgr_->GetRenderDataStoreManager();
155     const auto* morphDataStore =
156         static_cast<IRenderDataStoreMorph*>(renderDataStoreMgr.GetRenderDataStore(stores_.dataStoreNameMorph));
157     if (!morphDataStore) {
158         return;
159     }
160     array_view<const RenderDataMorph::Submesh> submeshes = morphDataStore->GetSubmeshes();
161     if (submeshes.empty()) {
162         return;
163     }
164 
165     const uint32_t maxSubmeshCount = std::min((uint32_t)submeshes.size(), maxObjectCount_);
166     submeshes = array_view(submeshes.data(), maxSubmeshCount);
167 
168     UpdateWeightsAndTargets(submeshes);
169 
170     ComputeMorphs(cmdList, submeshes);
171 }
172 
UpdateWeightsAndTargets(array_view<const RenderDataMorph::Submesh> submeshes)173 void RenderNodeMorph::UpdateWeightsAndTargets(array_view<const RenderDataMorph::Submesh> submeshes)
174 {
175     IRenderNodeGpuResourceManager& gpuResourceMgr = renderNodeContextMgr_->GetGpuResourceManager();
176     auto morphData =
177         reinterpret_cast<::MorphTargetInfoStruct*>(gpuResourceMgr.MapBuffer(morphTargetBufferHandle_.GetHandle()));
178     if (morphData) {
179         uint32_t offset = 0;
180         for (const RenderDataMorph::Submesh& submesh : submeshes) {
181             const auto& activeTargets = submesh.activeTargets;
182             const auto blockSize = (static_cast<uint32_t>(activeTargets.size()) + 3u) / 4u;
183             // Assert that the maximum active morph target count is not reached.
184             CORE_ASSERT((offset + blockSize) < bufferSize_);
185             if ((offset + blockSize) < bufferSize_) {
186                 for (size_t i = 0; i < activeTargets.size(); i++) {
187                     morphData[offset + i / 4u].target[i % 4u] = activeTargets[i].id;
188                     morphData[offset + i / 4u].weight[i % 4u] = activeTargets[i].weight;
189                 }
190             }
191             offset += blockSize;
192         }
193         // Could there be some way to mark the modified range of buffer dirty?
194         gpuResourceMgr.UnmapBuffer(morphTargetBufferHandle_.GetHandle());
195     }
196 }
197 
ComputeMorphs(IRenderCommandList & cmdList,array_view<const RenderDataMorph::Submesh> submeshes)198 void RenderNodeMorph::ComputeMorphs(IRenderCommandList& cmdList, array_view<const RenderDataMorph::Submesh> submeshes)
199 {
200     cmdList.BindPipeline(psoHandle_);
201     // set 0
202     {
203         auto& binder = *allDescriptorSets_.params;
204         binder.BindBuffer(0u, morphTargetBufferHandle_.GetHandle(), 0u);
205         cmdList.UpdateDescriptorSet(binder.GetDescriptorSetHandle(), binder.GetDescriptorSetLayoutBindingResources());
206 
207         cmdList.BindDescriptorSet(SET_WEIGHTS, binder.GetDescriptorSetHandle());
208     }
209     uint32_t offset = 0;
210     uint32_t outputIdx = 0u;
211     uint32_t inputIdx = 0u;
212     for (const RenderDataMorph::Submesh& submesh : submeshes) {
213         const auto blockSize = (static_cast<uint32_t>(submesh.activeTargets.size()) + 3u) / 4u;
214         if ((offset + blockSize) < bufferSize_) {
215             // Bind inputs = target position (set 1)
216             auto& inputBinder = *allDescriptorSets_.inputs[inputIdx++];
217 
218             const auto indexOffset = submesh.morphTargetBuffer.bufferOffset;
219             const auto indexSize = static_cast<uint32_t>(
220                 Align((submesh.vertexCount * submesh.morphTargetCount * static_cast<uint32_t>(sizeof(uint32_t))),
221                     BUFFER_ALIGN));
222 
223             inputBinder.BindBuffer(0u, submesh.morphTargetBuffer.bufferHandle.GetHandle(), indexOffset, indexSize);
224 
225             const auto vertexOffset = indexOffset + indexSize;
226             const auto vertexSize = submesh.morphTargetBuffer.byteSize - indexSize;
227             inputBinder.BindBuffer(1u, submesh.morphTargetBuffer.bufferHandle.GetHandle(), vertexOffset, vertexSize);
228             cmdList.UpdateDescriptorSet(
229                 inputBinder.GetDescriptorSetHandle(), inputBinder.GetDescriptorSetLayoutBindingResources());
230 
231             // Bind outputs = pos/nor/tangent buffers (set 2)
232             auto& outputBinder = *allDescriptorSets_.outputs[outputIdx++];
233             {
234                 outputBinder.BindBuffer(0u, submesh.vertexBuffers[0u].bufferHandle.GetHandle(),
235                     submesh.vertexBuffers[0u].bufferOffset,
236                     submesh.vertexBuffers[0u].byteSize); // position
237                 outputBinder.BindBuffer(1u, submesh.vertexBuffers[1u].bufferHandle.GetHandle(),
238                     submesh.vertexBuffers[1u].bufferOffset,
239                     submesh.vertexBuffers[1u].byteSize); // normal
240                 outputBinder.BindBuffer(2u, submesh.vertexBuffers[2u].bufferHandle.GetHandle(),
241                     submesh.vertexBuffers[2u].bufferOffset,
242                     submesh.vertexBuffers[2u].byteSize); // tangent
243 
244                 cmdList.UpdateDescriptorSet(
245                     outputBinder.GetDescriptorSetHandle(), outputBinder.GetDescriptorSetLayoutBindingResources());
246             }
247             const RenderHandle sets[] = { inputBinder.GetDescriptorSetHandle(), outputBinder.GetDescriptorSetHandle() };
248             cmdList.BindDescriptorSets(SET_INPUTS, sets);
249 
250             const ::MorphObjectPushConstantStruct pushData { offset, submesh.vertexCount, submesh.morphTargetCount,
251                 static_cast<uint32_t>(submesh.activeTargets.size()) };
252             cmdList.PushConstant(pipelineLayout_.pushConstant, reinterpret_cast<const uint8_t*>(&pushData));
253             cmdList.Dispatch((submesh.vertexCount + threadGroupSize_.x - 1) / threadGroupSize_.x, 1, 1);
254         }
255         offset += blockSize;
256     }
257 }
258 
259 // for plugin / factory interface
Create()260 RENDER_NS::IRenderNode* RenderNodeMorph::Create()
261 {
262     return new RenderNodeMorph();
263 }
264 
Destroy(IRenderNode * instance)265 void RenderNodeMorph::Destroy(IRenderNode* instance)
266 {
267     delete static_cast<RenderNodeMorph*>(instance);
268 }
269 CORE3D_END_NAMESPACE()
270