1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "gpu_buffer_vk.h"
17 
18 #include <cinttypes>
19 #include <cstdint>
20 #include <cstring>
21 #include <vulkan/vulkan_core.h>
22 
23 #include <base/math/mathf.h>
24 
25 #if (RENDER_PERF_ENABLED == 1)
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #endif
29 
30 #include <render/namespace.h>
31 
32 #include "device/device.h"
33 #include "device/gpu_buffer.h"
34 #include "device/gpu_resource_desc_flag_validation.h"
35 #include "util/log.h"
36 #include "vulkan/device_vk.h"
37 #include "vulkan/gpu_memory_allocator_vk.h"
38 #include "vulkan/validate_vk.h"
39 
40 using namespace BASE_NS;
41 
42 RENDER_BEGIN_NAMESPACE()
43 namespace {
GetAlignedByteSize(const uint32_t byteSize,const uint32_t alignment)44 constexpr uint32_t GetAlignedByteSize(const uint32_t byteSize, const uint32_t alignment)
45 {
46     return (byteSize + alignment - 1) & (~(alignment - 1));
47 }
48 
GetMinBufferAlignment(const VkPhysicalDeviceLimits & limits)49 constexpr uint32_t GetMinBufferAlignment(const VkPhysicalDeviceLimits& limits)
50 {
51     return Math::max(static_cast<uint32_t>(limits.minStorageBufferOffsetAlignment),
52         static_cast<uint32_t>(limits.minUniformBufferOffsetAlignment));
53 }
54 
GetMemoryMapAlignment(const VkPhysicalDeviceLimits & limits)55 constexpr uint32_t GetMemoryMapAlignment(const VkPhysicalDeviceLimits& limits)
56 {
57     return Math::max(
58         static_cast<uint32_t>(limits.minMemoryMapAlignment), static_cast<uint32_t>(limits.nonCoherentAtomSize));
59 }
60 
GetPlatMemory(const VmaAllocationInfo & allocationInfo,const VkMemoryPropertyFlags flags)61 GpuResourceMemoryVk GetPlatMemory(const VmaAllocationInfo& allocationInfo, const VkMemoryPropertyFlags flags)
62 {
63     return GpuResourceMemoryVk {
64         allocationInfo.deviceMemory,
65         allocationInfo.offset,
66         allocationInfo.size,
67         allocationInfo.pMappedData,
68         allocationInfo.memoryType,
69         flags,
70     };
71 }
72 
73 #if (RENDER_PERF_ENABLED == 1)
RecordAllocation(PlatformGpuMemoryAllocator & gpuMemAllocator,const GpuBufferDesc & desc,const int64_t alignedByteSize)74 void RecordAllocation(
75     PlatformGpuMemoryAllocator& gpuMemAllocator, const GpuBufferDesc& desc, const int64_t alignedByteSize)
76 {
77     if (auto* inst = CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
78         inst) {
79         CORE_NS::IPerformanceDataManager* pdm = inst->Get("Memory");
80 
81         pdm->UpdateData("AllGpuBuffers", "GPU_BUFFER", alignedByteSize);
82         const string poolDebugName = gpuMemAllocator.GetBufferPoolDebugName(desc);
83         if (!poolDebugName.empty()) {
84             pdm->UpdateData(poolDebugName, "GPU_BUFFER", alignedByteSize);
85         }
86     }
87 }
88 #endif
89 } // namespace
90 
GpuBufferVk(Device & device,const GpuBufferDesc & desc)91 GpuBufferVk::GpuBufferVk(Device& device, const GpuBufferDesc& desc)
92     : device_(device), desc_(desc),
93       isPersistantlyMapped_(
94           (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
95           (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
96       isRingBuffer_(desc.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
97       bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
98 {
99     CreateBufferImpl();
100 }
101 
GpuBufferVk(Device & device,const GpuAccelerationStructureDesc & desc)102 GpuBufferVk::GpuBufferVk(Device& device, const GpuAccelerationStructureDesc& desc)
103     : device_(device), desc_(desc.bufferDesc), descAccel_(desc),
104       isPersistantlyMapped_(
105           (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
106           (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
107       isRingBuffer_(desc_.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
108       isAccelerationStructure_(true), bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
109 {
110     CreateBufferImpl();
111 
112 #if (RENDER_VULKAN_RT_ENABLED == 1)
113     PLUGIN_ASSERT(desc.bufferDesc.usageFlags & CORE_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT);
114     platAccel_.buffer = plat_.buffer;
115     platAccel_.byteSize = plat_.fullByteSize;
116 
117     constexpr VkFlags createFlags = 0;
118     const VkAccelerationStructureTypeKHR accelerationStructureType =
119         static_cast<VkAccelerationStructureTypeKHR>(descAccel_.accelerationStructureType);
120     VkAccelerationStructureCreateInfoKHR createInfo {
121         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // sType
122         nullptr,                                                  // pNext
123         createFlags,                                              // createFlags
124         plat_.buffer,                                             // buffer
125         0,                                                        // offset
126         (VkDeviceSize)platAccel_.byteSize,                        // size
127         accelerationStructureType,                                // type
128         0,                                                        // deviceAddress
129     };
130 
131     const DeviceVk& deviceVk = (const DeviceVk&)device_;
132     const DevicePlatformDataVk& devicePlat = (const DevicePlatformDataVk&)device_.GetPlatformData();
133     const VkDevice vkDevice = devicePlat.device;
134 
135     const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
136     if (extFunctions.vkCreateAccelerationStructureKHR && extFunctions.vkGetAccelerationStructureDeviceAddressKHR) {
137         VALIDATE_VK_RESULT(extFunctions.vkCreateAccelerationStructureKHR(vkDevice, // device
138             &createInfo,                                                           // pCreateInfo
139             nullptr,                                                               // pAllocator
140             &platAccel_.accelerationStructure));                                   // pAccelerationStructure
141 
142         if (platAccel_.accelerationStructure) {
143             const VkAccelerationStructureDeviceAddressInfoKHR addressInfo {
144                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // sType
145                 nullptr,                                                          // pNext
146                 platAccel_.accelerationStructure,                                 // accelerationStructure
147             };
148             platAccel_.deviceAddress = extFunctions.vkGetAccelerationStructureDeviceAddressKHR(vkDevice, // device
149                 &addressInfo);                                                                           // pInfo
150         }
151     }
152 #endif
153 }
154 
~GpuBufferVk()155 GpuBufferVk::~GpuBufferVk()
156 {
157     if (isMapped_) {
158         Unmap();
159     }
160 
161 #if (RENDER_VULKAN_RT_ENABLED == 1)
162     if (isAccelerationStructure_ && platAccel_.accelerationStructure) {
163         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
164         const DeviceVk& deviceVk = (const DeviceVk&)device_;
165         const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
166         if (extFunctions.vkDestroyAccelerationStructureKHR) {
167             extFunctions.vkDestroyAccelerationStructureKHR(device, // device
168                 platAccel_.accelerationStructure,                  // accelerationStructure
169                 nullptr);                                          // pAllocator
170         }
171     }
172 #endif
173 
174     if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
175         gpuMemAllocator->DestroyBuffer(plat_.buffer, mem_.allocation);
176 #if (RENDER_PERF_ENABLED == 1)
177         RecordAllocation(*gpuMemAllocator, desc_, -static_cast<int64_t>(plat_.fullByteSize));
178 #endif
179     }
180 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
181     PLUGIN_LOG_E("gpu buffer id <: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
182 #endif
183 }
184 
CreateBufferImpl()185 void GpuBufferVk::CreateBufferImpl()
186 {
187     PLUGIN_ASSERT_MSG(
188         (isRingBuffer_ && isPersistantlyMapped_) || !isRingBuffer_, "dynamic ring buffer needs persistent mapping");
189 
190     VkMemoryPropertyFlags memoryPropertyFlags = static_cast<VkMemoryPropertyFlags>(desc_.memoryPropertyFlags);
191     const VkMemoryPropertyFlags requiredFlags =
192         (memoryPropertyFlags & (~(VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
193                                    CORE_MEMORY_PROPERTY_PROTECTED_BIT)));
194     const VkMemoryPropertyFlags preferredFlags = memoryPropertyFlags;
195 
196     const auto& limits = static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
197                              .physicalDeviceProperties.physicalDeviceProperties.limits;
198     // force min buffer alignment always
199     const uint32_t minBufferAlignment = GetMinBufferAlignment(limits);
200     const uint32_t minMapAlignment = (isRingBuffer_ || isPersistantlyMapped_) ? GetMemoryMapAlignment(limits) : 1u;
201     plat_.bindMemoryByteSize = GetAlignedByteSize(desc_.byteSize, Math::max(minBufferAlignment, minMapAlignment));
202     plat_.fullByteSize = plat_.bindMemoryByteSize * bufferingCount_;
203     plat_.currentByteOffset = 0;
204     plat_.usage = static_cast<VkBufferUsageFlags>(desc_.usageFlags);
205 
206     AllocateMemory(requiredFlags, preferredFlags);
207 
208     if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
209         const VkMemoryPropertyFlags memFlags =
210             (VkMemoryPropertyFlags)gpuMemAllocator->GetMemoryTypeProperties(mem_.allocationInfo.memoryType);
211         isMappable_ = (memFlags & VkMemoryPropertyFlagBits::VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ? true : false;
212 #if (RENDER_PERF_ENABLED == 1)
213         RecordAllocation(*gpuMemAllocator, desc_, plat_.fullByteSize);
214 #endif
215     }
216 
217 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
218     PLUGIN_LOG_E("gpu buffer id >: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
219 #endif
220 }
221 
GetDesc() const222 const GpuBufferDesc& GpuBufferVk::GetDesc() const
223 {
224     return desc_;
225 }
226 
GetPlatformData() const227 const GpuBufferPlatformDataVk& GpuBufferVk::GetPlatformData() const
228 {
229     return plat_;
230 }
231 
GetDescAccelerationStructure() const232 const GpuAccelerationStructureDesc& GpuBufferVk::GetDescAccelerationStructure() const
233 {
234     return descAccel_;
235 }
236 
GetPlatformDataAccelerationStructure() const237 const GpuAccelerationStructurePlatformDataVk& GpuBufferVk::GetPlatformDataAccelerationStructure() const
238 {
239     return platAccel_;
240 }
241 
Map()242 void* GpuBufferVk::Map()
243 {
244     if (!isMappable_) {
245         PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
246         return nullptr;
247     }
248     if (isMapped_) {
249         PLUGIN_LOG_E("gpu buffer already mapped");
250         Unmap();
251     }
252     isMapped_ = true;
253 
254     if (isRingBuffer_) {
255         plat_.currentByteOffset = (plat_.currentByteOffset + plat_.bindMemoryByteSize) % plat_.fullByteSize;
256     }
257 
258     void* data { nullptr };
259     if (isPersistantlyMapped_) {
260         if (mem_.allocationInfo.pMappedData) {
261             data = reinterpret_cast<uint8_t*>(mem_.allocationInfo.pMappedData) + plat_.currentByteOffset;
262         }
263     } else {
264         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
265         if (gpuMemAllocator) {
266             data = gpuMemAllocator->MapMemory(mem_.allocation);
267         }
268     }
269     return data;
270 }
271 
MapMemory()272 void* GpuBufferVk::MapMemory()
273 {
274     if (!isMappable_) {
275         PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
276         return nullptr;
277     }
278     if (isMapped_) {
279         PLUGIN_LOG_E("gpu buffer already mapped");
280         Unmap();
281     }
282     isMapped_ = true;
283 
284     void* data { nullptr };
285     if (isPersistantlyMapped_) {
286         data = mem_.allocationInfo.pMappedData;
287     } else {
288         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
289         if (gpuMemAllocator) {
290             data = gpuMemAllocator->MapMemory(mem_.allocation);
291         }
292     }
293     return data;
294 }
295 
Unmap() const296 void GpuBufferVk::Unmap() const
297 {
298     if (!isMappable_) {
299         PLUGIN_LOG_E("trying to unmap non-mappable gpu buffer");
300     }
301     if (!isMapped_) {
302         PLUGIN_LOG_E("gpu buffer not mapped");
303     }
304     isMapped_ = false;
305 
306     if (!isPersistantlyMapped_) {
307         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
308         if (gpuMemAllocator) {
309             gpuMemAllocator->FlushAllocation(mem_.allocation, 0, VK_WHOLE_SIZE);
310             gpuMemAllocator->UnmapMemory(mem_.allocation);
311         }
312     }
313 }
314 
AllocateMemory(const VkMemoryPropertyFlags requiredFlags,const VkMemoryPropertyFlags preferredFlags)315 void GpuBufferVk::AllocateMemory(const VkMemoryPropertyFlags requiredFlags, const VkMemoryPropertyFlags preferredFlags)
316 {
317     constexpr VkBufferCreateFlags bufferCreateFlags { 0 };
318     const VkBufferCreateInfo bufferCreateInfo {
319         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,     // sType
320         nullptr,                                  // pNext
321         bufferCreateFlags,                        // flags
322         (VkDeviceSize)plat_.fullByteSize,         // size
323         plat_.usage,                              // usage
324         VkSharingMode::VK_SHARING_MODE_EXCLUSIVE, // sharingMode
325         0,                                        // queueFamilyIndexCount
326         nullptr,                                  // pQueueFamilyIndices
327     };
328 
329     VmaAllocationCreateFlags allocationCreateFlags { 0 };
330     if (isPersistantlyMapped_) {
331         allocationCreateFlags |= static_cast<VmaAllocationCreateFlags>(
332             VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_MAPPED_BIT
333 #ifdef USE_NEW_VMA
334             | VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
335 #endif
336         );
337     }
338     if (desc_.memoryPropertyFlags & CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
339 #ifdef USE_NEW_VMA
340         allocationCreateFlags |= VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
341 #endif
342     }
343 
344     PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
345     PLUGIN_ASSERT(gpuMemAllocator);
346     if (gpuMemAllocator) {
347         // can be null handle -> default allocator
348         const VmaPool customPool = gpuMemAllocator->GetBufferPool(desc_);
349         const VmaAllocationCreateInfo allocationCreateInfo {
350             allocationCreateFlags, // flags
351 #ifdef USE_NEW_VMA
352             VmaMemoryUsage::VMA_MEMORY_USAGE_AUTO, // usage
353 #else
354             VmaMemoryUsage::VMA_MEMORY_USAGE_UNKNOWN, // usage
355 #endif
356             requiredFlags,  // requiredFlags
357             preferredFlags, // preferredFlags
358             0,              // memoryTypeBits
359             customPool,     // pool
360             nullptr,        // pUserData
361 #ifdef USE_NEW_VMA
362             0.f, // priority
363 #endif
364         };
365 
366         gpuMemAllocator->CreateBuffer(
367             bufferCreateInfo, allocationCreateInfo, plat_.buffer, mem_.allocation, mem_.allocationInfo);
368     }
369 
370     plat_.memory = GetPlatMemory(mem_.allocationInfo, preferredFlags);
371 }
372 
373 RENDER_END_NAMESPACE()
374