1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "device_vk.h"
17 
18 #include <algorithm>
19 #include <cinttypes>
20 #include <cstdint>
21 #include <vulkan/vulkan_core.h>
22 
23 #include <base/containers/vector.h>
24 #include <base/math/mathf.h>
25 #include <core/engine_info.h>
26 #include <render/intf_render_context.h>
27 #include <render/namespace.h>
28 
29 #include "device/device.h"
30 #include "device/gpu_program_util.h"
31 #include "device/gpu_resource_manager.h"
32 #include "device/shader_manager.h"
33 #include "device/shader_module.h"
34 #include "platform_vk.h"
35 #include "util/log.h"
36 #include "vulkan/create_functions_vk.h"
37 #include "vulkan/gpu_buffer_vk.h"
38 #include "vulkan/gpu_image_vk.h"
39 #include "vulkan/gpu_memory_allocator_vk.h"
40 #include "vulkan/gpu_program_vk.h"
41 #include "vulkan/gpu_query_vk.h"
42 #include "vulkan/gpu_sampler_vk.h"
43 #include "vulkan/gpu_semaphore_vk.h"
44 #include "vulkan/node_context_descriptor_set_manager_vk.h"
45 #include "vulkan/node_context_pool_manager_vk.h"
46 #include "vulkan/pipeline_state_object_vk.h"
47 #include "vulkan/render_backend_vk.h"
48 #include "vulkan/render_frame_sync_vk.h"
49 #include "vulkan/shader_module_vk.h"
50 #include "vulkan/swapchain_vk.h"
51 #include "vulkan/validate_vk.h"
52 
53 using namespace BASE_NS;
54 
55 RENDER_BEGIN_NAMESPACE()
56 namespace {
57 static constexpr string_view DEVICE_EXTENSION_SWAPCHAIN { VK_KHR_SWAPCHAIN_EXTENSION_NAME };
58 
59 // promoted to 1.2, requires VK_KHR_create_renderpass2
60 static constexpr string_view DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE { VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME };
61 static constexpr string_view DEVICE_EXTENSION_CREATE_RENDERPASS2 { VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME };
62 
63 static constexpr string_view DEVICE_EXTENSION_EXTERNAL_MEMORY { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME };
64 static constexpr string_view DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2 {
65     VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME
66 };
67 static constexpr string_view DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION {
68     VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME
69 };
70 static constexpr string_view DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN { VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME };
71 static constexpr string_view DEVICE_EXTENSION_MULTIVIEW { VK_KHR_MULTIVIEW_EXTENSION_NAME };
72 static constexpr string_view DEVICE_EXTENSION_MAINTENANCE4 = VK_KHR_MAINTENANCE_4_EXTENSION_NAME;
73 static constexpr string_view DEVICE_EXTENSION_DESCRIPTOR_INDEXING = VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME;
74 
75 struct ChainWrapper {
76     void** ppNextFeatures { nullptr };
77     void** ppNextProperties { nullptr };
78 };
79 
80 struct PhysicalDeviceYcbcrStructsVk {
81     VkPhysicalDeviceSamplerYcbcrConversionFeatures ycbcrConversionFeatures {};
82 };
83 
84 #if (RENDER_VULKAN_FSR_ENABLED == 1)
85 struct PhysicalDeviceFragmentShadingRateStructsVk {
86     VkPhysicalDeviceFragmentShadingRateFeaturesKHR physicalDeviceFragmentShadingRateFeatures;
87     VkPhysicalDeviceFragmentShadingRatePropertiesKHR physicalDeviceFragmentShadingRateProperties;
88 };
89 #endif
90 
91 #if (RENDER_VULKAN_RT_ENABLED == 1)
92 struct PhysicalDeviceRayTracingStructsVk {
93     VkPhysicalDeviceBufferDeviceAddressFeatures physicalDeviceBufferDeviceAddressFeatures;
94     VkPhysicalDeviceRayTracingPipelineFeaturesKHR physicalDeviceRayTracingPipelineFeatures;
95     VkPhysicalDeviceAccelerationStructureFeaturesKHR physicalDeviceAccelerationStructureFeatures;
96     VkPhysicalDeviceRayQueryFeaturesKHR physicalDeviceRayQueryFeatures;
97 };
98 #endif
99 
100 struct PhysicalDeviceMultiviewStructsVk {
101     VkPhysicalDeviceMultiviewFeaturesKHR physicalDeviceMultiviewFeatures;
102     VkPhysicalDeviceMultiviewPropertiesKHR physicalDeviceMultiviewProperties;
103 };
104 
105 struct PhysicalDeviceDesciptorIndexingStructsVk {
106     VkPhysicalDeviceDescriptorIndexingFeatures physicalDeviceDescriptorIndexingFeatures;
107     VkPhysicalDeviceDescriptorIndexingProperties physicalDeviceDescriptorIndexingProperties;
108 };
109 
110 struct PhysicalDeviceMaintenance4Vk {
111     VkPhysicalDeviceMaintenance4Features maintenance4Features {};
112 };
113 
114 struct ChainObjects {
115     unique_ptr<PhysicalDeviceYcbcrStructsVk> ycbcr;
116 #if (RENDER_VULKAN_RT_ENABLED == 1)
117     unique_ptr<PhysicalDeviceRayTracingStructsVk> rt;
118 #endif
119 #if (RENDER_VULKAN_FSR_ENABLED == 1)
120     unique_ptr<PhysicalDeviceFragmentShadingRateStructsVk> fsr;
121 #endif
122     unique_ptr<PhysicalDeviceMultiviewStructsVk> mv;
123     unique_ptr<PhysicalDeviceDesciptorIndexingStructsVk> di;
124     unique_ptr<PhysicalDeviceMaintenance4Vk> maintenance4;
125 };
126 
127 // fragment shading rate
128 #if (RENDER_VULKAN_FSR_ENABLED == 1)
129 // VK_KHR_fragment_shading_rate, requires VK_KHR_create_renderpass2, requires VK_KHR_get_physical_device_properties2
130 static constexpr string_view DEVICE_EXTENSION_FRAGMENT_SHADING_RATE { VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME };
131 
GetPhysicalDeviceFragmentShadingRateStructs(ChainObjects & co,ChainWrapper & cw)132 void GetPhysicalDeviceFragmentShadingRateStructs(ChainObjects& co, ChainWrapper& cw)
133 {
134     co.fsr = make_unique<PhysicalDeviceFragmentShadingRateStructsVk>();
135     auto& fsr = co.fsr;
136     fsr->physicalDeviceFragmentShadingRateFeatures = {
137         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, // sType
138         nullptr,                                                              // pNext
139         VK_FALSE,                                                             // pipelineFragmentShadingRate
140         VK_FALSE,                                                             // primitiveFragmentShadingRate
141         VK_FALSE,                                                             // attachmentFragmentShadingRate
142     };
143     *cw.ppNextFeatures = &fsr->physicalDeviceFragmentShadingRateFeatures;
144     cw.ppNextFeatures = &fsr->physicalDeviceFragmentShadingRateFeatures.pNext;
145 
146     fsr->physicalDeviceFragmentShadingRateProperties = {
147         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, // sType
148         nullptr,                                                                // pNext
149     };
150     *cw.ppNextProperties = &fsr->physicalDeviceFragmentShadingRateProperties;
151     cw.ppNextProperties = &fsr->physicalDeviceFragmentShadingRateProperties.pNext;
152 }
153 #endif
154 
GetPhysicalDeviceMultiviewFeaturesStructs(ChainObjects & co,ChainWrapper & cw)155 void GetPhysicalDeviceMultiviewFeaturesStructs(ChainObjects& co, ChainWrapper& cw)
156 {
157     co.mv = make_unique<PhysicalDeviceMultiviewStructsVk>();
158     auto& mv = co.mv;
159     mv->physicalDeviceMultiviewFeatures = {
160         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR, // sType
161         nullptr,                                                  // pNext
162         VK_FALSE,                                                 // multiview
163         VK_FALSE,                                                 // multiviewGeometryShader
164         VK_FALSE,                                                 // multiviewTessellationShader
165     };
166     *cw.ppNextFeatures = &mv->physicalDeviceMultiviewFeatures;
167     cw.ppNextFeatures = &mv->physicalDeviceMultiviewFeatures.pNext;
168 
169     mv->physicalDeviceMultiviewProperties = {
170         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR, // sType
171         nullptr,                                                    // pNext
172         0,                                                          // maxMultiviewViewCount
173         0,                                                          // maxMultiviewInstanceIndex
174     };
175     *cw.ppNextProperties = &mv->physicalDeviceMultiviewProperties;
176     cw.ppNextProperties = &mv->physicalDeviceMultiviewProperties.pNext;
177 }
178 
GetPhysicalDeviceDescriptorIndexingFeaturesStructs(ChainObjects & co,ChainWrapper & cw)179 void GetPhysicalDeviceDescriptorIndexingFeaturesStructs(ChainObjects& co, ChainWrapper& cw)
180 {
181     co.di = make_unique<PhysicalDeviceDesciptorIndexingStructsVk>();
182     auto& di = co.di;
183     di->physicalDeviceDescriptorIndexingFeatures = {
184         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES, // sType
185         nullptr,                                                        // pNext
186         VK_FALSE,                                                       // shaderInputAttachmentArrayDynamicIndexing
187         VK_FALSE,                                                       // shaderUniformTexelBufferArrayDynamicIndexing
188         VK_FALSE,                                                       // shaderStorageTexelBufferArrayDynamicIndexing
189         VK_FALSE,                                                       // shaderUniformBufferArrayNonUniformIndexing
190         VK_FALSE,                                                       // shaderSampledImageArrayNonUniformIndexing
191         VK_FALSE,                                                       // shaderStorageBufferArrayNonUniformIndexing
192         VK_FALSE,                                                       // shaderStorageImageArrayNonUniformIndexing
193         VK_FALSE,                                                       // shaderInputAttachmentArrayNonUniformIndexing
194         VK_FALSE, // shaderUniformTexelBufferArrayNonUniformIndexing
195         VK_FALSE, // shaderStorageTexelBufferArrayNonUniformIndexing
196         VK_FALSE, // descriptorBindingUniformBufferUpdateAfterBind
197         VK_FALSE, // descriptorBindingSampledImageUpdateAsfterBind
198         VK_FALSE, // descriptorBindingStorageImageUpdateAfterBind
199         VK_FALSE, // descriptorBindingStorageBufferUpdateAfterBind
200         VK_FALSE, // descriptorBindingUniformTexelBufferUpdateAfterBind
201         VK_FALSE, // descriptorBindingStorageTexelBufferUpdateAfterBind
202         VK_FALSE, // descriptorBindingUpdateUnusedWhilePending
203         VK_FALSE, // descriptorBindingPartiallyBound
204         VK_FALSE, // descriptorBindingVariableDescriptorCount
205         VK_FALSE, // runtimeDescriptorArray
206     };
207     *cw.ppNextFeatures = &di->physicalDeviceDescriptorIndexingFeatures;
208     cw.ppNextFeatures = &di->physicalDeviceDescriptorIndexingFeatures.pNext;
209 
210     di->physicalDeviceDescriptorIndexingProperties = {
211         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES, // sType
212         nullptr,                                                          // pNext
213         0U,                                                               // maxUpdateAfterBindDescriptorsInAllPools
214         VK_FALSE, // shaderUniformBufferArrayNonUniformIndexingNative
215         VK_FALSE, // shaderSampledImageArrayNonUniformIndexingNative
216         VK_FALSE, // shaderStorageBufferArrayNonUniformIndexingNative
217         VK_FALSE, // shaderStorageImageArrayNonUniformIndexingNative
218         VK_FALSE, // shaderInputAttachmentArrayNonUniformIndexingNative
219         VK_FALSE, // robustBufferAccessUpdateAfterBind
220         VK_FALSE, // quadDivergentImplicitLod
221         0U,       // maxPerStageDescriptorUpdateAfterBindSamplers
222         0U,       // maxPerStageDescriptorUpdateAfterBindUniformBuffers
223         0U,       // maxPerStageDescriptorUpdateAfterBindStorageBuffers
224         0U,       // maxPerStageDescriptorUpdateAfterBindSampledImages
225         0U,       // maxPerStageDescriptorUpdateAfterBindStorageImages
226         0U,       // maxPerStageDescriptorUpdateAfterBindInputAttachments
227         0U,       // maxPerStageUpdateAfterBindResources
228         0U,       // maxDescriptorSetUpdateAfterBindSamplers
229         0U,       // maxDescriptorSetUpdateAfterBindUniformBuffers
230         0U,       // maxDescriptorSetUpdateAfterBindUniformBuffersDynamic
231         0U,       // maxDescriptorSetUpdateAfterBindStorageBuffers
232         0U,       // maxDescriptorSetUpdateAfterBindStorageBuffersDynamic
233         0U,       // maxDescriptorSetUpdateAfterBindSampledImages
234         0U,       // maxDescriptorSetUpdateAfterBindStorageImages
235         0U,       // maxDescriptorSetUpdateAfterBindInputAttachments
236     };
237     *cw.ppNextProperties = &di->physicalDeviceDescriptorIndexingProperties;
238     cw.ppNextProperties = &di->physicalDeviceDescriptorIndexingProperties.pNext;
239 }
240 
241 // ray-tracing
242 #if (RENDER_VULKAN_RT_ENABLED == 1)
243 static constexpr string_view DEVICE_EXTENSION_ACCELERATION_STRUCTURE { "VK_KHR_acceleration_structure" };
244 static constexpr string_view DEVICE_EXTENSION_RAY_QUERY { "VK_KHR_ray_query" };
245 static constexpr string_view DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS { "VK_KHR_deferred_host_operations" };
246 static constexpr string_view DEVICE_EXTENSION_RAY_TRACING_PIPELINE { "VK_KHR_ray_tracing_pipeline" };
247 static constexpr string_view DEVICE_EXTENSION_PIPELINE_LIBRARY { "VK_KHR_pipeline_library" };
248 
GetPhysicalDeviceRayTracingStructs(ChainObjects & co,ChainWrapper & cw)249 void GetPhysicalDeviceRayTracingStructs(ChainObjects& co, ChainWrapper& cw)
250 {
251     co.rt = make_unique<PhysicalDeviceRayTracingStructsVk>();
252     auto& rt = co.rt;
253     rt->physicalDeviceBufferDeviceAddressFeatures = {
254         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, // sType
255         nullptr,                                                          // pNext
256         VK_FALSE,                                                         // bufferDeviceAddress;
257         VK_FALSE,                                                         // bufferDeviceAddressCaptureReplay
258         VK_FALSE,                                                         // bufferDeviceAddressMultiDevice
259     };
260     rt->physicalDeviceRayTracingPipelineFeatures = {
261         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, // sType
262         &rt->physicalDeviceBufferDeviceAddressFeatures,                      // pNext
263         VK_FALSE,                                                            // rayTracingPipeline;
264         VK_FALSE, // rayTracingPipelineShaderGroupHandleCaptureReplay;
265         VK_FALSE, // rayTracingPipelineShaderGroupHandleCaptureReplayMixed;
266         VK_FALSE, // rayTracingPipelineTraceRaysIndirect;
267         VK_FALSE, // rayTraversalPrimitiveCulling;
268     };
269     rt->physicalDeviceAccelerationStructureFeatures = {
270         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, // sType
271         &rt->physicalDeviceRayTracingPipelineFeatures,                         // pNext
272         VK_FALSE,                                                              // accelerationStructure;
273         VK_FALSE,                                                              // accelerationStructureCaptureReplay
274         VK_FALSE,                                                              // accelerationStructureIndirectBuild
275         VK_FALSE,                                                              // accelerationStructureHostCommands
276         VK_FALSE, // descriptorBindingAccelerationStructureUpdateAfterBind
277     };
278     rt->physicalDeviceRayQueryFeatures = {
279         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, // sType
280         &rt->physicalDeviceAccelerationStructureFeatures,         // pNext
281         true,                                                     // rayQuery
282     };
283 
284     *cw.ppNextFeatures = &rt->physicalDeviceRayQueryFeatures;
285     cw.ppNextFeatures = &rt->physicalDeviceBufferDeviceAddressFeatures.pNext;
286 }
287 #endif
288 
GetPhysicalDeviceYcbcrStructs(ChainObjects & co,ChainWrapper & cw)289 void GetPhysicalDeviceYcbcrStructs(ChainObjects& co, ChainWrapper& cw)
290 {
291     co.ycbcr = make_unique<PhysicalDeviceYcbcrStructsVk>();
292     auto& ycbcr = co.ycbcr;
293     ycbcr->ycbcrConversionFeatures = {
294         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES, // sType
295         nullptr,                                                             // pNext
296         VK_FALSE,                                                            // samplerYcbcrConversion
297     };
298 
299     *cw.ppNextFeatures = &ycbcr->ycbcrConversionFeatures;
300     cw.ppNextFeatures = &ycbcr->ycbcrConversionFeatures.pNext;
301 }
302 
GetYcbcrExtFunctions(const VkInstance instance,DeviceVk::ExtFunctions & extFunctions)303 void GetYcbcrExtFunctions(const VkInstance instance, DeviceVk::ExtFunctions& extFunctions)
304 {
305     extFunctions.vkCreateSamplerYcbcrConversion =
306         (PFN_vkCreateSamplerYcbcrConversion)(void*)vkGetInstanceProcAddr(instance, "vkCreateSamplerYcbcrConversion");
307     if (!extFunctions.vkCreateSamplerYcbcrConversion) {
308         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateSamplerYcbcrConversion");
309     }
310     extFunctions.vkDestroySamplerYcbcrConversion =
311         (PFN_vkDestroySamplerYcbcrConversion)vkGetInstanceProcAddr(instance, "vkDestroySamplerYcbcrConversion");
312     if (!extFunctions.vkDestroySamplerYcbcrConversion) {
313         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroySamplerYcbcrConversion");
314     }
315 }
316 
GetPhysicalDeviceMaintenance4Structs(ChainObjects & co,ChainWrapper & cw)317 void GetPhysicalDeviceMaintenance4Structs(ChainObjects& co, ChainWrapper& cw)
318 {
319     co.maintenance4 = make_unique<PhysicalDeviceMaintenance4Vk>();
320     auto& m4 = co.maintenance4;
321     m4->maintenance4Features = {
322         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES, // sType
323         nullptr,                                                  // pNext
324         true,                                                     // maintenance4
325     };
326 
327     *cw.ppNextFeatures = &m4->maintenance4Features;
328     cw.ppNextFeatures = &m4->maintenance4Features.pNext;
329 }
330 
331 constexpr uint32_t MIN_ALLOCATION_BLOCK_SIZE { 4u * 1024u * 1024u };
332 constexpr uint32_t MAX_ALLOCATION_BLOCK_SIZE { 1024u * 1024u * 1024u };
333 static constexpr const QueueProperties DEFAULT_QUEUE {
334     VK_QUEUE_GRAPHICS_BIT, // requiredFlags
335     1,                     // count
336     1.0f,                  // priority
337     false,                 // explicitFlags
338     true,                  // canPresent
339 };
340 
GetAllocatorCreateInfo(const BackendExtraVk * backendExtra)341 PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo GetAllocatorCreateInfo(const BackendExtraVk* backendExtra)
342 {
343     // create default pools
344     PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo createInfo;
345     uint32_t dynamicUboByteSize = 16u * 1024u * 1024u;
346     if (backendExtra) {
347         const auto& sizes = backendExtra->gpuMemoryAllocatorSizes;
348         if (sizes.defaultAllocationBlockSize != ~0u) {
349             createInfo.preferredLargeHeapBlockSize = Math::min(
350                 MAX_ALLOCATION_BLOCK_SIZE, Math::max(sizes.defaultAllocationBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
351         }
352         if (sizes.customAllocationDynamicUboBlockSize != ~0u) {
353             dynamicUboByteSize = Math::min(MAX_ALLOCATION_BLOCK_SIZE,
354                 Math::max(sizes.customAllocationDynamicUboBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
355         }
356     }
357 
358     // staging
359     {
360         GpuBufferDesc desc;
361         desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_SINGLE_SHOT_STAGING;
362         desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
363                                    MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
364         desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_SRC_BIT;
365         createInfo.customPools.push_back({
366             "STAGING_GPU_BUFFER",
367             PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
368             0u,
369             // if linear allocator is used, depending clients usage pattern, memory can be easily wasted.
370             false,
371             { move(desc) },
372         });
373     }
374     // dynamic uniform ring buffers
375     {
376         GpuBufferDesc desc;
377         desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER;
378         desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
379                                    MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
380         desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
381         createInfo.customPools.push_back({
382             "DYNAMIC_UNIFORM_GPU_BUFFER",
383             PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
384             dynamicUboByteSize,
385             false,
386             { move(desc) },
387         });
388     }
389 
390     return createInfo;
391 }
392 
DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,VkDebugUtilsMessageTypeFlagsEXT messageTypes,const VkDebugUtilsMessengerCallbackDataEXT * pCallbackData,void * pUserData)393 VkBool32 VKAPI_PTR DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
394     VkDebugUtilsMessageTypeFlagsEXT messageTypes, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
395     void* pUserData)
396 {
397     if (pCallbackData && pCallbackData->pMessage) {
398         if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
399             PLUGIN_LOG_E("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
400         } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
401                    (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)) {
402             PLUGIN_LOG_W("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
403         } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
404                    VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
405             PLUGIN_LOG_I("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
406         } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
407                    VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
408             PLUGIN_LOG_V("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
409         }
410     }
411 
412     // The application should always return VK_FALSE.
413     return VK_FALSE;
414 }
415 
DebugReportCallback(VkDebugReportFlagsEXT flags,VkDebugReportObjectTypeEXT,uint64_t,size_t,int32_t,const char *,const char * pMessage,void *)416 VkBool32 VKAPI_PTR DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT, uint64_t, size_t,
417     int32_t, const char*, const char* pMessage, void*)
418 {
419     if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) {
420         PLUGIN_LOG_E("%s", pMessage);
421     } else if (flags & (VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT)) {
422         PLUGIN_LOG_W("%s", pMessage);
423     } else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) {
424         PLUGIN_LOG_I("%s", pMessage);
425     } else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) {
426         PLUGIN_LOG_D("%s", pMessage);
427     }
428     return VK_FALSE;
429 }
430 
EmplaceDeviceQueue(const VkDevice device,const LowLevelQueueInfo & aQueueInfo,vector<LowLevelGpuQueueVk> & aLowLevelQueues)431 void EmplaceDeviceQueue(
432     const VkDevice device, const LowLevelQueueInfo& aQueueInfo, vector<LowLevelGpuQueueVk>& aLowLevelQueues)
433 {
434     for (uint32_t idx = 0; idx < aQueueInfo.queueCount; ++idx) {
435         VkQueue queue = VK_NULL_HANDLE;
436         vkGetDeviceQueue(device,         // device
437             aQueueInfo.queueFamilyIndex, // queueFamilyIndex
438             idx,                         // queueIndex
439             &queue);                     // pQueue
440         aLowLevelQueues.push_back(LowLevelGpuQueueVk { queue, aQueueInfo });
441     }
442 }
443 
CheckValidDepthFormats(const DevicePlatformDataVk & devicePlat,DevicePlatformInternalDataVk & dataInternal)444 void CheckValidDepthFormats(const DevicePlatformDataVk& devicePlat, DevicePlatformInternalDataVk& dataInternal)
445 {
446     constexpr uint32_t DEPTH_FORMAT_COUNT { 4 };
447     constexpr Format DEPTH_FORMATS[DEPTH_FORMAT_COUNT] = { BASE_FORMAT_D24_UNORM_S8_UINT, BASE_FORMAT_D32_SFLOAT,
448         BASE_FORMAT_D16_UNORM, BASE_FORMAT_X8_D24_UNORM_PACK32 };
449     for (uint32_t idx = 0; idx < DEPTH_FORMAT_COUNT; ++idx) {
450         VkFormatProperties formatProperties;
451         Format format = DEPTH_FORMATS[idx];
452         vkGetPhysicalDeviceFormatProperties(devicePlat.physicalDevice, // physicalDevice
453             (VkFormat)format,                                          // format
454             &formatProperties);                                        // pFormatProperties
455         const VkFormatFeatureFlags optimalTilingFeatureFlags = formatProperties.optimalTilingFeatures;
456         if (optimalTilingFeatureFlags & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
457             dataInternal.supportedDepthFormats.push_back(format);
458         }
459     }
460 }
461 
GetPreferredDeviceExtensions(const BackendExtraVk * backendExtra,DevicePlatformDataVk & plat)462 vector<string_view> GetPreferredDeviceExtensions(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
463 {
464     vector<string_view> extensions { DEVICE_EXTENSION_SWAPCHAIN };
465     extensions.push_back(DEVICE_EXTENSION_CREATE_RENDERPASS2);
466     extensions.push_back(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE);
467     extensions.push_back(DEVICE_EXTENSION_MAINTENANCE4);
468     GetPlatformDeviceExtensions(extensions);
469 #if (RENDER_VULKAN_FSR_ENABLED == 1)
470     extensions.push_back(DEVICE_EXTENSION_FRAGMENT_SHADING_RATE);
471 #endif
472 #if (RENDER_VULKAN_RT_ENABLED == 1)
473     extensions.push_back(DEVICE_EXTENSION_ACCELERATION_STRUCTURE);
474     extensions.push_back(DEVICE_EXTENSION_RAY_TRACING_PIPELINE);
475     extensions.push_back(DEVICE_EXTENSION_RAY_QUERY);
476     extensions.push_back(DEVICE_EXTENSION_PIPELINE_LIBRARY);
477     extensions.push_back(DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS);
478 #endif
479     if (plat.deviceApiMinor >= 1) { // enable only for 1.1+
480         extensions.push_back(DEVICE_EXTENSION_MULTIVIEW);
481     }
482     if (plat.deviceApiMinor >= 2) { // enable only for 1.2+
483         extensions.push_back(DEVICE_EXTENSION_DESCRIPTOR_INDEXING);
484     }
485     if (backendExtra) {
486         for (const auto str : backendExtra->extensions.extensionNames) {
487             extensions.push_back(str);
488         }
489     }
490     return extensions;
491 }
492 
GetEnabledCommonDeviceExtensions(const unordered_map<string,uint32_t> & enabledDeviceExtensions)493 DeviceVk::CommonDeviceExtensions GetEnabledCommonDeviceExtensions(
494     const unordered_map<string, uint32_t>& enabledDeviceExtensions)
495 {
496     DeviceVk::CommonDeviceExtensions extensions;
497     extensions.swapchain = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SWAPCHAIN);
498     // renderpass2 required on 1.2, we only use renderpass 2 when we need depth stencil resolve
499     extensions.renderPass2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE) &&
500                              enabledDeviceExtensions.contains(DEVICE_EXTENSION_CREATE_RENDERPASS2);
501     extensions.externalMemory = enabledDeviceExtensions.contains(DEVICE_EXTENSION_EXTERNAL_MEMORY);
502     extensions.getMemoryRequirements2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2);
503     extensions.queueFamilyForeign = enabledDeviceExtensions.contains(DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN);
504     extensions.samplerYcbcrConversion = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION);
505     extensions.multiView = enabledDeviceExtensions.contains(DEVICE_EXTENSION_MULTIVIEW);
506     extensions.descriptorIndexing = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DESCRIPTOR_INDEXING);
507 #if (RENDER_VULKAN_FSR_ENABLED == 1)
508     extensions.fragmentShadingRate = enabledDeviceExtensions.contains(DEVICE_EXTENSION_FRAGMENT_SHADING_RATE);
509 #endif
510 
511     return extensions;
512 }
513 
GetCommonDevicePropertiesFunc(const ChainObjects & co)514 CommonDeviceProperties GetCommonDevicePropertiesFunc(const ChainObjects& co)
515 {
516     CommonDeviceProperties cdp;
517 #if (RENDER_VULKAN_FSR_ENABLED == 1)
518     if (co.fsr) {
519         const auto& fsrVk = co.fsr->physicalDeviceFragmentShadingRateProperties;
520         cdp.fragmentShadingRateProperties.minFragmentShadingRateAttachmentTexelSize = {
521             fsrVk.minFragmentShadingRateAttachmentTexelSize.width,
522             fsrVk.minFragmentShadingRateAttachmentTexelSize.height
523         };
524         cdp.fragmentShadingRateProperties.maxFragmentShadingRateAttachmentTexelSize = {
525             fsrVk.maxFragmentShadingRateAttachmentTexelSize.width,
526             fsrVk.maxFragmentShadingRateAttachmentTexelSize.height
527         };
528         cdp.fragmentShadingRateProperties.maxFragmentSize = { fsrVk.maxFragmentSize.width,
529             fsrVk.maxFragmentSize.height };
530     }
531 #endif
532     return cdp;
533 }
534 
PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk * backendExtra,DevicePlatformDataVk & plat)535 void PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
536 {
537     // enable all by default and then disable few
538     plat.enabledPhysicalDeviceFeatures = plat.physicalDeviceProperties.physicalDeviceFeatures;
539     // prepare feature disable for core engine
540     plat.enabledPhysicalDeviceFeatures.geometryShader = VK_FALSE;
541     plat.enabledPhysicalDeviceFeatures.tessellationShader = VK_FALSE;
542     plat.enabledPhysicalDeviceFeatures.sampleRateShading = VK_FALSE;
543     plat.enabledPhysicalDeviceFeatures.occlusionQueryPrecise = VK_FALSE;
544     plat.enabledPhysicalDeviceFeatures.pipelineStatisticsQuery = VK_FALSE;
545     plat.enabledPhysicalDeviceFeatures.shaderTessellationAndGeometryPointSize = VK_FALSE;
546     plat.enabledPhysicalDeviceFeatures.inheritedQueries = VK_FALSE;
547     if (backendExtra) {
548         // check for support and prepare enabling
549         if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
550             const size_t valueCount = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
551             const array_view<const VkBool32> supported(
552                 reinterpret_cast<VkBool32*>(&plat.physicalDeviceProperties.physicalDeviceFeatures), valueCount);
553             VkPhysicalDeviceFeatures* wantedFeatures =
554                 (&backendExtra->extensions.physicalDeviceFeaturesToEnable->features);
555             const array_view<const VkBool32> wanted(reinterpret_cast<VkBool32*>(wantedFeatures), valueCount);
556 
557             array_view<VkBool32> enabledPhysicalDeviceFeatures(
558                 reinterpret_cast<VkBool32*>(&plat.enabledPhysicalDeviceFeatures), valueCount);
559             for (size_t idx = 0; idx < valueCount; ++idx) {
560                 if (supported[idx] && wanted[idx]) {
561                     enabledPhysicalDeviceFeatures[idx] = VK_TRUE;
562                 } else if (wanted[idx]) {
563                     PLUGIN_LOG_W(
564                         "physical device feature not supported/enabled from idx: %u", static_cast<uint32_t>(idx));
565                 }
566             }
567         }
568     }
569 }
570 
FillDeviceFormatSupport(VkPhysicalDevice physicalDevice,const Format format)571 FormatProperties FillDeviceFormatSupport(VkPhysicalDevice physicalDevice, const Format format)
572 {
573     VkFormatProperties formatProperties;
574     vkGetPhysicalDeviceFormatProperties(physicalDevice, // physicalDevice
575         (VkFormat)format,                               // format
576         &formatProperties);                             // pFormatProperties
577     return FormatProperties {
578         (FormatFeatureFlags)formatProperties.linearTilingFeatures,
579         (FormatFeatureFlags)formatProperties.optimalTilingFeatures,
580         (FormatFeatureFlags)formatProperties.bufferFeatures,
581         GpuProgramUtil::FormatByteSize(format),
582     };
583 }
584 
FillFormatSupport(VkPhysicalDevice physicalDevice,vector<FormatProperties> & formats)585 void FillFormatSupport(VkPhysicalDevice physicalDevice, vector<FormatProperties>& formats)
586 {
587     const uint32_t fullSize = DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT +
588                               DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT;
589     formats.resize(fullSize);
590     for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT; ++idx) {
591         formats[idx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(idx));
592     }
593     // pre-build additional formats
594     for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT; ++idx) {
595         const uint32_t currIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_BASE_IDX;
596         PLUGIN_ASSERT(currIdx < static_cast<uint32_t>(formats.size()));
597         const uint32_t formatIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
598         formats[currIdx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(formatIdx));
599     }
600 }
601 } // namespace
602 
DeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)603 DeviceVk::DeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo) : Device(renderContext, createInfo)
604 {
605     // assume instance and device will be created internally
606     ownInstanceAndDevice_ = true;
607 
608     const BackendExtraVk* backendExtra = static_cast<const BackendExtraVk*>(createInfo.backendConfiguration);
609     // update internal state based the optional backend configuration given by the client. the size of queuProperties
610     // will depend on the enableMultiQueue setting.
611     const auto queueProperties = CheckExternalConfig(backendExtra);
612 
613     // these check internally ownInstanceAndDevice_ and skip creation if provided by user
614     CreateInstance();
615     CreatePhysicalDevice();
616 
617     const auto availableQueues = CreateFunctionsVk::GetAvailableQueues(plat_.physicalDevice, queueProperties);
618 
619     // own device creation does a lot of work for figuring out what to create, but for external device
620     // CheckExternalConfig stored the enabled extensions and features, and we just need to check what is available
621     if (ownInstanceAndDevice_) {
622         CreateDevice(backendExtra, availableQueues);
623     } else {
624         commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
625         platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
626         // filling commonDeviceProperties_ isn't done, but at the moment that only contains fragment rate shading
627         // should walk through BackendExtraVk::extensions::physicalDeviceFeaturesToEnable::pNext and see what's
628         // available.
629     }
630 
631     CreateDebugFunctions();
632     CreateExtFunctions();
633     CreatePlatformExtFunctions();
634     SortAvailableQueues(availableQueues);
635 
636     CheckValidDepthFormats(plat_, platInternal_);
637     FillFormatSupport(plat_.physicalDevice, formatProperties_);
638 
639     PLUGIN_ASSERT_MSG(!lowLevelGpuQueues_.graphicsQueues.empty(), "default queue not initialized");
640     if (!lowLevelGpuQueues_.graphicsQueues.empty()) {
641         lowLevelGpuQueues_.defaultQueue = lowLevelGpuQueues_.graphicsQueues[0];
642     } else {
643         PLUGIN_LOG_E("default vulkan queue not initialized");
644     }
645 
646     gpuQueueCount_ =
647         static_cast<uint32_t>(lowLevelGpuQueues_.computeQueues.size() + lowLevelGpuQueues_.graphicsQueues.size() +
648                               lowLevelGpuQueues_.transferQueues.size());
649 
650     const PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo allocatorCreateInfo =
651         GetAllocatorCreateInfo(backendExtra);
652     platformGpuMemoryAllocator_ = make_unique<PlatformGpuMemoryAllocator>(
653         plat_.instance, plat_.physicalDevice, plat_.device, allocatorCreateInfo);
654 
655     if (queueProperties.size() > 1) {
656         PLUGIN_LOG_I("gpu queue count: %u", gpuQueueCount_);
657     }
658 
659     SetDeviceStatus(true);
660 
661     const GpuResourceManager::CreateInfo grmCreateInfo {
662         GpuResourceManager::GPU_RESOURCE_MANAGER_OPTIMIZE_STAGING_MEMORY,
663     };
664     gpuResourceMgr_ = make_unique<GpuResourceManager>(*this, grmCreateInfo);
665     shaderMgr_ = make_unique<ShaderManager>(*this);
666 
667     lowLevelDevice_ = make_unique<LowLevelDeviceVk>(*this);
668 }
669 
~DeviceVk()670 DeviceVk::~DeviceVk()
671 {
672     WaitForIdle();
673 
674     // must release handles before taking down gpu resource manager.
675     swapchains_.clear();
676 
677     gpuResourceMgr_.reset();
678     shaderMgr_.reset();
679 
680     platformGpuMemoryAllocator_.reset();
681 
682     if (plat_.pipelineCache) {
683         CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
684     }
685 
686     if (ownInstanceAndDevice_) {
687         CreateFunctionsVk::DestroyDevice(plat_.device);
688         CreateFunctionsVk::DestroyDebugMessenger(plat_.instance, debugFunctionUtilities_.debugMessenger);
689         CreateFunctionsVk::DestroyDebugCallback(plat_.instance, debugFunctionUtilities_.debugCallback);
690         CreateFunctionsVk::DestroyInstance(plat_.instance);
691     }
692 }
693 
CreateInstance()694 void DeviceVk::CreateInstance()
695 {
696     const auto instanceWrapper = (plat_.instance == VK_NULL_HANDLE) ?
697         CreateFunctionsVk::CreateInstance(VersionInfo { "core_renderer", 0, 1, 0},
698         VersionInfo { "core_renderer_app", 0, 1, 0 }) : CreateFunctionsVk::GetWrapper(plat_.instance);
699 
700     plat_.instance = instanceWrapper.instance;
701     // update with physical device creation
702     plat_.deviceApiMajor = instanceWrapper.apiMajor;
703     plat_.deviceApiMinor = instanceWrapper.apiMinor;
704     if (instanceWrapper.debugUtilsSupported) {
705         debugFunctionUtilities_.debugMessenger =
706             CreateFunctionsVk::CreateDebugMessenger(plat_.instance, DebugMessengerCallback);
707     }
708     if (!debugFunctionUtilities_.debugMessenger && instanceWrapper.debugReportSupported) {
709         debugFunctionUtilities_.debugCallback =
710             CreateFunctionsVk::CreateDebugCallback(plat_.instance, DebugReportCallback);
711     }
712 
713     extFunctions_.vkAcquireNextImageKHR =
714         (PFN_vkAcquireNextImageKHR)(void*)vkGetInstanceProcAddr(plat_.instance, "vkAcquireNextImageKHR");
715     if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
716         extFunctions_.vkGetPhysicalDeviceFeatures2 =
717             (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceFeatures2");
718         extFunctions_.vkGetPhysicalDeviceProperties2 =
719             (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceProperties2");
720     }
721 }
722 
CreatePhysicalDevice()723 void DeviceVk::CreatePhysicalDevice()
724 {
725     auto physicalDeviceWrapper = (plat_.physicalDevice == VK_NULL_HANDLE) ?
726         CreateFunctionsVk::CreatePhysicalDevice(plat_.instance, DEFAULT_QUEUE ) :
727         CreateFunctionsVk::GetWrapper(plat_.physicalDevice);
728     const uint32_t physicalDeviceApiMajor =
729         VK_VERSION_MAJOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
730     const uint32_t physicalDeviceApiMinor =
731         VK_VERSION_MINOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
732     plat_.deviceApiMajor = Math::min(plat_.deviceApiMajor, physicalDeviceApiMajor);
733     plat_.deviceApiMinor = Math::min(plat_.deviceApiMinor, physicalDeviceApiMinor);
734     PLUGIN_LOG_D("device api version %u.%u", plat_.deviceApiMajor, plat_.deviceApiMinor);
735 
736     plat_.physicalDevice = physicalDeviceWrapper.physicalDevice;
737     plat_.physicalDeviceProperties = move(physicalDeviceWrapper.physicalDeviceProperties);
738     plat_.physicalDeviceExtensions = move(physicalDeviceWrapper.physicalDeviceExtensions);
739     const auto& memoryProperties = plat_.physicalDeviceProperties.physicalDeviceMemoryProperties;
740     deviceSharedMemoryPropertyFlags_ =
741         (memoryProperties.memoryTypeCount > 0) ? (MemoryPropertyFlags)memoryProperties.memoryTypes[0].propertyFlags : 0;
742     for (uint32_t idx = 1; idx < memoryProperties.memoryTypeCount; ++idx) {
743         const MemoryPropertyFlags memoryPropertyFlags =
744             (MemoryPropertyFlags)memoryProperties.memoryTypes[idx].propertyFlags;
745         // do not compare lazily allocated or protected memory blocks
746         if ((memoryPropertyFlags & (CORE_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | CORE_MEMORY_PROPERTY_PROTECTED_BIT)) ==
747             0) {
748             deviceSharedMemoryPropertyFlags_ &= memoryPropertyFlags;
749         }
750     }
751 }
752 
CreateDevice(const BackendExtraVk * backendExtra,const vector<LowLevelQueueInfo> & availableQueues)753 void DeviceVk::CreateDevice(const BackendExtraVk* backendExtra, const vector<LowLevelQueueInfo>& availableQueues)
754 {
755     vector<string_view> preferredExtensions = GetPreferredDeviceExtensions(backendExtra, plat_);
756     PreparePhysicalDeviceFeaturesForEnabling(backendExtra, plat_);
757 
758     ChainWrapper chainWrapper;
759     ChainObjects chainObjects;
760 
761     VkPhysicalDeviceFeatures2* physicalDeviceFeatures2Ptr = nullptr;
762     VkPhysicalDeviceFeatures2 physicalDeviceFeatures2 {
763         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, // sType
764         nullptr,                                      // pNext
765         {},                                           // features
766     };
767     chainWrapper.ppNextFeatures = &physicalDeviceFeatures2.pNext;
768 
769     VkPhysicalDeviceProperties2 physicalDeviceProperties2 {
770         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, // sType
771         nullptr,                                        // pNext
772         {},                                             // properties
773     };
774     chainWrapper.ppNextProperties = &physicalDeviceProperties2.pNext;
775 
776     GetPhysicalDeviceYcbcrStructs(chainObjects, chainWrapper);
777 #if (RENDER_VULKAN_RT_ENABLED == 1)
778     GetPhysicalDeviceRayTracingStructs(chainObjects, chainWrapper);
779 #endif
780 #if (RENDER_VULKAN_FSR_ENABLED == 1)
781     if (CreateFunctionsVk::HasExtension(plat_.physicalDeviceExtensions, DEVICE_EXTENSION_FRAGMENT_SHADING_RATE)) {
782         GetPhysicalDeviceFragmentShadingRateStructs(chainObjects, chainWrapper);
783     }
784 #endif
785     if (plat_.deviceApiMinor >= 1) { // enable only for 1.1 + for now
786         GetPhysicalDeviceMultiviewFeaturesStructs(chainObjects, chainWrapper);
787     }
788     if (plat_.deviceApiMinor >= 2) { // enable only for 1.2 + for now
789         GetPhysicalDeviceDescriptorIndexingFeaturesStructs(chainObjects, chainWrapper);
790     }
791     if (CreateFunctionsVk::HasExtension(plat_.physicalDeviceExtensions, DEVICE_EXTENSION_MAINTENANCE4)) {
792         GetPhysicalDeviceMaintenance4Structs(chainObjects, chainWrapper);
793     }
794     if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
795         // pipe user extension physical device features
796         if (backendExtra) {
797             if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
798                 *chainWrapper.ppNextFeatures = backendExtra->extensions.physicalDeviceFeaturesToEnable->pNext;
799             }
800         }
801         if (extFunctions_.vkGetPhysicalDeviceFeatures2) {
802             extFunctions_.vkGetPhysicalDeviceFeatures2(plat_.physicalDevice, &physicalDeviceFeatures2);
803         }
804         if (extFunctions_.vkGetPhysicalDeviceProperties2) {
805             extFunctions_.vkGetPhysicalDeviceProperties2(plat_.physicalDevice, &physicalDeviceProperties2);
806         }
807 
808         // vkGetPhysicalDeviceFeatures has already filled this and PreparePhysicalDeviceFeaturesForEnabling
809         // disabled/ enabled some features.
810         physicalDeviceFeatures2.features = plat_.enabledPhysicalDeviceFeatures;
811         physicalDeviceFeatures2Ptr = &physicalDeviceFeatures2;
812     }
813     const DeviceWrapper deviceWrapper =
814         CreateFunctionsVk::CreateDevice(plat_.instance, plat_.physicalDevice, plat_.physicalDeviceExtensions,
815             plat_.enabledPhysicalDeviceFeatures, physicalDeviceFeatures2Ptr, availableQueues, preferredExtensions);
816     plat_.device = deviceWrapper.device;
817     for (const auto& ref : deviceWrapper.extensions) {
818         extensions_[ref] = 1u;
819     }
820     commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
821     platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
822     commonDeviceProperties_ = GetCommonDevicePropertiesFunc(chainObjects);
823 }
824 
CheckExternalConfig(const BackendExtraVk * backendConfiguration)825 vector<QueueProperties> DeviceVk::CheckExternalConfig(const BackendExtraVk* backendConfiguration)
826 {
827     vector<QueueProperties> queueProperties;
828     queueProperties.push_back(DEFAULT_QUEUE);
829 
830     if (!backendConfiguration) {
831         return queueProperties;
832     }
833 
834     const auto& extra = *backendConfiguration;
835     if (extra.enableMultiQueue) {
836         queueProperties.push_back(QueueProperties {
837             VK_QUEUE_COMPUTE_BIT, // requiredFlags
838             1,                    // count
839             1.0f,                 // priority
840             true,                 // explicitFlags
841             false,                // canPresent
842         });
843         PLUGIN_LOG_I("trying to enable gpu multi-queue, with queue count: %u",
844             static_cast<uint32_t>(queueProperties.size()));
845     }
846 
847     if (extra.instance != VK_NULL_HANDLE) {
848         PLUGIN_LOG_D("trying to use application given vulkan instance, device, and physical device");
849         PLUGIN_ASSERT((extra.instance && extra.physicalDevice && extra.device));
850         plat_.instance = extra.instance;
851         plat_.physicalDevice = extra.physicalDevice;
852         plat_.device = extra.device;
853         if (extra.extensions.physicalDeviceFeaturesToEnable) {
854             plat_.enabledPhysicalDeviceFeatures = extra.extensions.physicalDeviceFeaturesToEnable->features;
855         }
856         ownInstanceAndDevice_ = false; // everything given from the application
857 
858         const auto myDevice = plat_.physicalDevice;
859         auto& myProperties = plat_.physicalDeviceProperties;
860         vkGetPhysicalDeviceProperties(myDevice, &myProperties.physicalDeviceProperties);
861         vkGetPhysicalDeviceFeatures(myDevice, &myProperties.physicalDeviceFeatures);
862         vkGetPhysicalDeviceMemoryProperties(myDevice, &myProperties.physicalDeviceMemoryProperties);
863 
864         for (const auto& extension : extra.extensions.extensionNames) {
865             extensions_[extension] = 1u;
866         }
867     }
868     return queueProperties;
869 }
870 
SortAvailableQueues(const vector<LowLevelQueueInfo> & availableQueues)871 void DeviceVk::SortAvailableQueues(const vector<LowLevelQueueInfo>& availableQueues)
872 {
873     for (const auto& ref : availableQueues) {
874         if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_COMPUTE_BIT) {
875             EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.computeQueues);
876         } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_GRAPHICS_BIT) {
877             EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.graphicsQueues);
878         } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_TRANSFER_BIT) {
879             EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.transferQueues);
880         }
881     }
882 }
883 
GetBackendType() const884 DeviceBackendType DeviceVk::GetBackendType() const
885 {
886     return DeviceBackendType::VULKAN;
887 }
888 
GetPlatformData() const889 const DevicePlatformData& DeviceVk::GetPlatformData() const
890 {
891     return plat_;
892 }
893 
GetPlatformDataVk() const894 const DevicePlatformDataVk& DeviceVk::GetPlatformDataVk() const
895 {
896     return plat_;
897 }
898 
GetPlatformInternalDataVk() const899 const DevicePlatformInternalDataVk& DeviceVk::GetPlatformInternalDataVk() const
900 {
901     return platInternal_;
902 }
903 
GetLowLevelDevice() const904 ILowLevelDevice& DeviceVk::GetLowLevelDevice() const
905 {
906     return *lowLevelDevice_;
907 }
908 
GetFormatProperties(const Format format) const909 FormatProperties DeviceVk::GetFormatProperties(const Format format) const
910 {
911     const uint32_t formatSupportSize = static_cast<uint32_t>(formatProperties_.size());
912     const uint32_t formatIdx = static_cast<uint32_t>(format);
913     if (formatIdx < formatSupportSize) {
914         return formatProperties_[formatIdx];
915     } else if ((formatIdx >= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER) &&
916                (formatIdx <= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_END_NUMBER)) {
917         const uint32_t currIdx = formatIdx - DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
918         PLUGIN_UNUSED(currIdx);
919         PLUGIN_ASSERT(currIdx < formatSupportSize);
920         return formatProperties_[currIdx];
921     }
922     return {};
923 }
924 
GetAccelerationStructureBuildSizes(const AccelerationStructureBuildGeometryInfo & geometry,BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const925 AccelerationStructureBuildSizes DeviceVk::GetAccelerationStructureBuildSizes(
926     const AccelerationStructureBuildGeometryInfo& geometry,
927     BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,
928     BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,
929     BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const
930 {
931 #if (RENDER_VULKAN_RT_ENABLED == 1)
932     const VkDevice device = plat_.device;
933 
934     const size_t arraySize = triangles.size() + aabbs.size() + instances.size();
935     vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
936     vector<uint32_t> maxPrimitiveCounts(arraySize);
937     uint32_t arrayIndex = 0;
938     for (const auto& trianglesRef : triangles) {
939         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
940             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
941             nullptr,                                               // pNext
942             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR,     // geometryType
943             {},                                                    // geometry;
944             VkGeometryFlagsKHR(trianglesRef.geometryFlags),        // flags
945         };
946         geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
947             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
948             nullptr,                                                              // pNext
949             VkFormat(trianglesRef.vertexFormat),                                  // vertexFormat
950             {},                                                                   // vertexData
951             VkDeviceSize(trianglesRef.vertexStride),                              // vertexStride
952             trianglesRef.maxVertex,                                               // maxVertex
953             VkIndexType(trianglesRef.indexType),                                  // indexType
954             {},                                                                   // indexData
955             {},                                                                   // transformData
956         };
957         maxPrimitiveCounts[arrayIndex] = trianglesRef.indexCount / 3u; // triangles;
958         arrayIndex++;
959     }
960     for (const auto& aabbsRef : aabbs) {
961         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
962             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
963             nullptr,                                               // pNext
964             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR,         // geometryType
965             {},                                                    // geometry;
966             0,                                                     // flags
967         };
968         geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
969             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
970             nullptr,                                                          // pNext
971             {},                                                               // data
972             aabbsRef.stride,                                                  // stride
973         };
974         maxPrimitiveCounts[arrayIndex] = 1u;
975         arrayIndex++;
976     }
977     for (const auto& instancesRef : instances) {
978         geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
979             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
980             nullptr,                                               // pNext
981             VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR,     // geometryType
982             {},                                                    // geometry;
983             0,                                                     // flags
984         };
985         geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
986             VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
987             nullptr,                                                              // pNext
988             instancesRef.arrayOfPointers,                                         // arrayOfPointers
989             {},                                                                   // data
990         };
991         maxPrimitiveCounts[arrayIndex] = 1u;
992         arrayIndex++;
993     }
994 
995     const VkAccelerationStructureBuildGeometryInfoKHR geometryInfoVk {
996         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
997         nullptr,                                                          // pNext
998         VkAccelerationStructureTypeKHR(geometry.type),                    // type
999         VkBuildAccelerationStructureFlagsKHR(geometry.flags),             // flags
1000         VkBuildAccelerationStructureModeKHR(geometry.mode),               // mode
1001         VK_NULL_HANDLE,                                                   // srcAccelerationStructure
1002         VK_NULL_HANDLE,                                                   // dstAccelerationStructure
1003         arrayIndex,                                                       // geometryCount
1004         geometryData.data(),                                              // pGeometries
1005         nullptr,                                                          // ppGeometries
1006         {},                                                               // scratchData
1007     };
1008 
1009     VkAccelerationStructureBuildSizesInfoKHR buildSizesInfo {
1010         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // sType
1011         nullptr,                                                       // pNext
1012         0,                                                             // accelerationStructureSize
1013         0,                                                             // updateScratchSize
1014         0,                                                             // buildScratchSize
1015     };
1016     if ((arrayIndex > 0) && extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1017         extFunctions_.vkGetAccelerationStructureBuildSizesKHR(device, // device
1018             VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,          // buildType,
1019             &geometryInfoVk,                                          // pBuildInfo
1020             maxPrimitiveCounts.data(),                                // pMaxPrimitiveCounts
1021             &buildSizesInfo);                                         // pSizeInfo
1022     }
1023 
1024     return AccelerationStructureBuildSizes {
1025         static_cast<uint32_t>(buildSizesInfo.accelerationStructureSize),
1026         static_cast<uint32_t>(buildSizesInfo.updateScratchSize),
1027         static_cast<uint32_t>(buildSizesInfo.buildScratchSize),
1028     };
1029 #else
1030     return AccelerationStructureBuildSizes { 0, 0, 0 };
1031 #endif
1032 }
1033 
CreateDeviceSwapchain(const SwapchainCreateInfo & swapchainCreateInfo)1034 unique_ptr<Swapchain> DeviceVk::CreateDeviceSwapchain(const SwapchainCreateInfo& swapchainCreateInfo)
1035 {
1036     return make_unique<SwapchainVk>(*this, swapchainCreateInfo);
1037 }
1038 
DestroyDeviceSwapchain()1039 void DeviceVk::DestroyDeviceSwapchain() {}
1040 
GetPlatformGpuMemoryAllocator()1041 PlatformGpuMemoryAllocator* DeviceVk::GetPlatformGpuMemoryAllocator()
1042 {
1043     return platformGpuMemoryAllocator_.get();
1044 }
1045 
GetValidGpuQueue(const GpuQueue & gpuQueue) const1046 GpuQueue DeviceVk::GetValidGpuQueue(const GpuQueue& gpuQueue) const
1047 {
1048     const auto getSpecificQueue = [](const uint32_t queueIndex, const GpuQueue::QueueType queueType,
1049                                      const vector<LowLevelGpuQueueVk>& specificQueues, const GpuQueue& defaultQueue) {
1050         const uint32_t queueCount = static_cast<uint32_t>(specificQueues.size());
1051         if (queueIndex < queueCount) {
1052             return GpuQueue { queueType, queueIndex };
1053         } else if (queueCount > 0) {
1054             return GpuQueue { queueType, 0 };
1055         }
1056         return defaultQueue;
1057     };
1058 
1059     GpuQueue defaultQueue { GpuQueue::QueueType::GRAPHICS, 0 };
1060     if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
1061         return getSpecificQueue(
1062             gpuQueue.index, GpuQueue::QueueType::COMPUTE, lowLevelGpuQueues_.computeQueues, defaultQueue);
1063     } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
1064         return getSpecificQueue(
1065             gpuQueue.index, GpuQueue::QueueType::GRAPHICS, lowLevelGpuQueues_.graphicsQueues, defaultQueue);
1066     } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
1067         return getSpecificQueue(
1068             gpuQueue.index, GpuQueue::QueueType::TRANSFER, lowLevelGpuQueues_.transferQueues, defaultQueue);
1069     } else {
1070         return defaultQueue;
1071     }
1072 }
1073 
GetGpuQueueCount() const1074 uint32_t DeviceVk::GetGpuQueueCount() const
1075 {
1076     return gpuQueueCount_;
1077 }
1078 
InitializePipelineCache(array_view<const uint8_t> initialData)1079 void DeviceVk::InitializePipelineCache(array_view<const uint8_t> initialData)
1080 {
1081     if (plat_.pipelineCache) {
1082         CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
1083     }
1084     struct CacheHeader {
1085         uint32_t bytes;
1086         uint32_t version;
1087         uint32_t vendorId;
1088         uint32_t deviceId;
1089         uint8_t pipelineCacheUUID[VK_UUID_SIZE];
1090     };
1091     if (initialData.size() > sizeof(CacheHeader)) {
1092         CacheHeader header;
1093         CloneData(&header, sizeof(header), initialData.data(), sizeof(header));
1094         const auto& props = plat_.physicalDeviceProperties.physicalDeviceProperties;
1095         if (header.version != VkPipelineCacheHeaderVersion::VK_PIPELINE_CACHE_HEADER_VERSION_ONE ||
1096             header.vendorId != props.vendorID || header.deviceId != props.deviceID ||
1097             memcmp(header.pipelineCacheUUID, props.pipelineCacheUUID, VK_UUID_SIZE)) {
1098             initialData = {};
1099         }
1100     }
1101 
1102     plat_.pipelineCache = CreateFunctionsVk::CreatePipelineCache(plat_.device, initialData);
1103 }
1104 
GetPipelineCache() const1105 vector<uint8_t> DeviceVk::GetPipelineCache() const
1106 {
1107     vector<uint8_t> deviceData;
1108     if (plat_.pipelineCache) {
1109         size_t dataSize = 0u;
1110         if (auto result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, nullptr);
1111             result == VK_SUCCESS && dataSize) {
1112             deviceData.resize(dataSize);
1113             dataSize = deviceData.size();
1114             result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, deviceData.data());
1115             if (result == VK_SUCCESS) {
1116                 deviceData.resize(dataSize);
1117             } else {
1118                 deviceData.clear();
1119             }
1120         }
1121     }
1122     return deviceData;
1123 }
1124 
GetGpuQueue(const GpuQueue & gpuQueue) const1125 LowLevelGpuQueueVk DeviceVk::GetGpuQueue(const GpuQueue& gpuQueue) const
1126 {
1127     // 1. tries to return the typed queue with given index
1128     // 2. tries to return the typed queue with an index 0
1129     // 3. returns the default queue
1130     const auto getSpecificQueue = [](const uint32_t queueIndex, const vector<LowLevelGpuQueueVk>& specificQueues,
1131                                       const LowLevelGpuQueueVk& defaultQueue) {
1132         const uint32_t queueCount = static_cast<uint32_t>(specificQueues.size());
1133         if (queueIndex < queueCount) {
1134             return specificQueues[queueIndex];
1135         } else if (queueCount > 0) {
1136             return specificQueues[0];
1137         }
1138         return defaultQueue;
1139     };
1140 
1141     if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
1142         return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.computeQueues, lowLevelGpuQueues_.defaultQueue);
1143     } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
1144         return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.graphicsQueues, lowLevelGpuQueues_.defaultQueue);
1145     } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
1146         return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.transferQueues, lowLevelGpuQueues_.defaultQueue);
1147     } else {
1148         return lowLevelGpuQueues_.defaultQueue;
1149     }
1150 }
1151 
GetPresentationGpuQueue() const1152 LowLevelGpuQueueVk DeviceVk::GetPresentationGpuQueue() const
1153 {
1154     // NOTE: expected presentation
1155     return GetGpuQueue(GpuQueue { GpuQueue::QueueType::GRAPHICS, 0 });
1156 }
1157 
GetLowLevelGpuQueues() const1158 vector<LowLevelGpuQueueVk> DeviceVk::GetLowLevelGpuQueues() const
1159 {
1160     vector<LowLevelGpuQueueVk> gpuQueues;
1161     gpuQueues.reserve(gpuQueueCount_);
1162     gpuQueues.insert(gpuQueues.end(), lowLevelGpuQueues_.computeQueues.begin(), lowLevelGpuQueues_.computeQueues.end());
1163     gpuQueues.insert(
1164         gpuQueues.end(), lowLevelGpuQueues_.graphicsQueues.begin(), lowLevelGpuQueues_.graphicsQueues.end());
1165     gpuQueues.insert(
1166         gpuQueues.end(), lowLevelGpuQueues_.transferQueues.begin(), lowLevelGpuQueues_.transferQueues.end());
1167     return gpuQueues;
1168 }
1169 
WaitForIdle()1170 void DeviceVk::WaitForIdle()
1171 {
1172     if (plat_.device) {
1173         if (!isRenderbackendRunning_) {
1174             PLUGIN_LOG_D("Device - WaitForIdle");
1175             vkDeviceWaitIdle(plat_.device); // device
1176         } else {
1177             PLUGIN_LOG_E("Device WaitForIdle can only called when render backend is not running");
1178         }
1179     }
1180 }
1181 
Activate()1182 void DeviceVk::Activate() {}
1183 
Deactivate()1184 void DeviceVk::Deactivate() {}
1185 
AllowThreadedProcessing() const1186 bool DeviceVk::AllowThreadedProcessing() const
1187 {
1188     return true;
1189 }
1190 
GetFeatureConfigurations() const1191 const DeviceVk::FeatureConfigurations& DeviceVk::GetFeatureConfigurations() const
1192 {
1193     return featureConfigurations_;
1194 }
1195 
GetCommonDeviceExtensions() const1196 const DeviceVk::CommonDeviceExtensions& DeviceVk::GetCommonDeviceExtensions() const
1197 {
1198     return commonDeviceExtensions_;
1199 }
1200 
GetPlatformDeviceExtensions() const1201 const PlatformDeviceExtensions& DeviceVk::GetPlatformDeviceExtensions() const
1202 {
1203     return platformDeviceExtensions_;
1204 }
1205 
HasDeviceExtension(const string_view extensionName) const1206 bool DeviceVk::HasDeviceExtension(const string_view extensionName) const
1207 {
1208     return extensions_.contains(extensionName);
1209 }
1210 
CreateDeviceVk(RenderContext & renderContext,DeviceCreateInfo const & createInfo)1211 unique_ptr<Device> CreateDeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo)
1212 {
1213     return make_unique<DeviceVk>(renderContext, createInfo);
1214 }
1215 
CreateGpuBuffer(const GpuBufferDesc & desc)1216 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuBufferDesc& desc)
1217 {
1218     return make_unique<GpuBufferVk>(*this, desc);
1219 }
1220 
CreateGpuBuffer(const GpuAccelerationStructureDesc & descAccel)1221 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuAccelerationStructureDesc& descAccel)
1222 {
1223     return make_unique<GpuBufferVk>(*this, descAccel);
1224 }
1225 
CreateGpuImage(const GpuImageDesc & desc)1226 unique_ptr<GpuImage> DeviceVk::CreateGpuImage(const GpuImageDesc& desc)
1227 {
1228     return make_unique<GpuImageVk>(*this, desc);
1229 }
1230 
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData,const uintptr_t hwBuffer)1231 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
1232     const GpuImageDesc& desc, const GpuImagePlatformData& platformData, const uintptr_t hwBuffer)
1233 {
1234     return make_unique<GpuImageVk>(*this, desc, platformData, hwBuffer);
1235 }
1236 
CreateGpuImageView(const GpuImageDesc & desc,const GpuImagePlatformData & platformData)1237 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(const GpuImageDesc& desc, const GpuImagePlatformData& platformData)
1238 {
1239     return CreateGpuImageView(desc, platformData, 0);
1240 }
1241 
CreateGpuImageViews(const Swapchain & swapchain)1242 vector<unique_ptr<GpuImage>> DeviceVk::CreateGpuImageViews(const Swapchain& swapchain)
1243 {
1244     const GpuImageDesc& desc = swapchain.GetDesc();
1245     const auto& swapchainPlat = static_cast<const SwapchainVk&>(swapchain).GetPlatformData();
1246 
1247     vector<unique_ptr<GpuImage>> gpuImages(swapchainPlat.swapchainImages.images.size());
1248     for (size_t idx = 0; idx < gpuImages.size(); ++idx) {
1249         GpuImagePlatformDataVk gpuImagePlat;
1250         gpuImagePlat.image = swapchainPlat.swapchainImages.images[idx];
1251         gpuImagePlat.imageView = swapchainPlat.swapchainImages.imageViews[idx];
1252         gpuImages[idx] = this->CreateGpuImageView(desc, gpuImagePlat);
1253     }
1254     return gpuImages;
1255 }
1256 
CreateGpuImageView(const GpuImageDesc & desc,const BackendSpecificImageDesc & platformData)1257 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
1258     const GpuImageDesc& desc, const BackendSpecificImageDesc& platformData)
1259 {
1260     const ImageDescVk& imageDesc = (const ImageDescVk&)platformData;
1261     GpuImagePlatformDataVk platData;
1262     platData.image = imageDesc.image;
1263     platData.imageView = imageDesc.imageView;
1264     return CreateGpuImageView(desc, platData, imageDesc.platformHwBuffer);
1265 }
1266 
CreateGpuSampler(const GpuSamplerDesc & desc)1267 unique_ptr<GpuSampler> DeviceVk::CreateGpuSampler(const GpuSamplerDesc& desc)
1268 {
1269     return make_unique<GpuSamplerVk>(*this, desc);
1270 }
1271 
CreateRenderFrameSync()1272 unique_ptr<RenderFrameSync> DeviceVk::CreateRenderFrameSync()
1273 {
1274     return make_unique<RenderFrameSyncVk>(*this);
1275 }
1276 
CreateRenderBackend(GpuResourceManager & gpuResourceMgr,const CORE_NS::IParallelTaskQueue::Ptr & queue)1277 unique_ptr<RenderBackend> DeviceVk::CreateRenderBackend(
1278     GpuResourceManager& gpuResourceMgr, const CORE_NS::IParallelTaskQueue::Ptr& queue)
1279 {
1280     return make_unique<RenderBackendVk>(*this, gpuResourceMgr, queue);
1281 }
1282 
CreateShaderModule(const ShaderModuleCreateInfo & data)1283 unique_ptr<ShaderModule> DeviceVk::CreateShaderModule(const ShaderModuleCreateInfo& data)
1284 {
1285     return make_unique<ShaderModuleVk>(*this, data);
1286 }
1287 
CreateComputeShaderModule(const ShaderModuleCreateInfo & data)1288 unique_ptr<ShaderModule> DeviceVk::CreateComputeShaderModule(const ShaderModuleCreateInfo& data)
1289 {
1290     return make_unique<ShaderModuleVk>(*this, data);
1291 }
1292 
CreateGpuShaderProgram(const GpuShaderProgramCreateData & data)1293 unique_ptr<GpuShaderProgram> DeviceVk::CreateGpuShaderProgram(const GpuShaderProgramCreateData& data)
1294 {
1295     return make_unique<GpuShaderProgramVk>(*this, data);
1296 }
1297 
CreateGpuComputeProgram(const GpuComputeProgramCreateData & data)1298 unique_ptr<GpuComputeProgram> DeviceVk::CreateGpuComputeProgram(const GpuComputeProgramCreateData& data)
1299 {
1300     return make_unique<GpuComputeProgramVk>(*this, data);
1301 }
1302 
CreateNodeContextDescriptorSetManager()1303 unique_ptr<NodeContextDescriptorSetManager> DeviceVk::CreateNodeContextDescriptorSetManager()
1304 {
1305     return make_unique<NodeContextDescriptorSetManagerVk>(*this);
1306 }
1307 
CreateNodeContextPoolManager(GpuResourceManager & gpuResourceMgr,const GpuQueue & gpuQueue)1308 unique_ptr<NodeContextPoolManager> DeviceVk::CreateNodeContextPoolManager(
1309     GpuResourceManager& gpuResourceMgr, const GpuQueue& gpuQueue)
1310 {
1311     return make_unique<NodeContextPoolManagerVk>(*this, gpuResourceMgr, gpuQueue);
1312 }
1313 
CreateGraphicsPipelineStateObject(const GpuShaderProgram & gpuProgram,const GraphicsState & graphicsState,const PipelineLayout & pipelineLayout,const VertexInputDeclarationView & vertexInputDeclaration,const ShaderSpecializationConstantDataView & specializationConstants,const array_view<const DynamicStateEnum> dynamicStates,const RenderPassDesc & renderPassDesc,const array_view<const RenderPassSubpassDesc> & renderPassSubpassDescs,const uint32_t subpassIndex,const LowLevelRenderPassData * renderPassData,const LowLevelPipelineLayoutData * pipelineLayoutData)1314 unique_ptr<GraphicsPipelineStateObject> DeviceVk::CreateGraphicsPipelineStateObject(const GpuShaderProgram& gpuProgram,
1315     const GraphicsState& graphicsState, const PipelineLayout& pipelineLayout,
1316     const VertexInputDeclarationView& vertexInputDeclaration,
1317     const ShaderSpecializationConstantDataView& specializationConstants,
1318     const array_view<const DynamicStateEnum> dynamicStates, const RenderPassDesc& renderPassDesc,
1319     const array_view<const RenderPassSubpassDesc>& renderPassSubpassDescs, const uint32_t subpassIndex,
1320     const LowLevelRenderPassData* renderPassData, const LowLevelPipelineLayoutData* pipelineLayoutData)
1321 {
1322     PLUGIN_ASSERT(renderPassData);
1323     PLUGIN_ASSERT(pipelineLayoutData);
1324     return make_unique<GraphicsPipelineStateObjectVk>(*this, gpuProgram, graphicsState, pipelineLayout,
1325         vertexInputDeclaration, specializationConstants, dynamicStates, renderPassDesc, renderPassSubpassDescs,
1326         subpassIndex, *renderPassData, *pipelineLayoutData);
1327 }
1328 
CreateComputePipelineStateObject(const GpuComputeProgram & gpuProgram,const PipelineLayout & pipelineLayout,const ShaderSpecializationConstantDataView & specializationConstants,const LowLevelPipelineLayoutData * pipelineLayoutData)1329 unique_ptr<ComputePipelineStateObject> DeviceVk::CreateComputePipelineStateObject(const GpuComputeProgram& gpuProgram,
1330     const PipelineLayout& pipelineLayout, const ShaderSpecializationConstantDataView& specializationConstants,
1331     const LowLevelPipelineLayoutData* pipelineLayoutData)
1332 {
1333     PLUGIN_ASSERT(pipelineLayoutData);
1334     return make_unique<ComputePipelineStateObjectVk>(
1335         *this, gpuProgram, pipelineLayout, specializationConstants, *pipelineLayoutData);
1336 }
1337 
CreateGpuSemaphore()1338 unique_ptr<GpuSemaphore> DeviceVk::CreateGpuSemaphore()
1339 {
1340     return make_unique<GpuSemaphoreVk>(*this);
1341 }
1342 
CreateGpuSemaphoreView(const uint64_t handle)1343 unique_ptr<GpuSemaphore> DeviceVk::CreateGpuSemaphoreView(const uint64_t handle)
1344 {
1345     return make_unique<GpuSemaphoreVk>(*this, handle);
1346 }
1347 
GetDebugFunctionUtilities() const1348 const DebugFunctionUtilitiesVk& DeviceVk::GetDebugFunctionUtilities() const
1349 {
1350     return debugFunctionUtilities_;
1351 }
1352 
CreateDebugFunctions()1353 void DeviceVk::CreateDebugFunctions()
1354 {
1355 #if (RENDER_VULKAN_VALIDATION_ENABLED == 1)
1356     debugFunctionUtilities_.vkSetDebugUtilsObjectNameEXT =
1357         (PFN_vkSetDebugUtilsObjectNameEXT)(void*)vkGetDeviceProcAddr(plat_.device, "vkSetDebugUtilsObjectNameEXT");
1358 #endif
1359 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1360     debugFunctionUtilities_.vkCmdBeginDebugUtilsLabelEXT =
1361         (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdBeginDebugUtilsLabelEXT");
1362     debugFunctionUtilities_.vkCmdEndDebugUtilsLabelEXT =
1363         (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdEndDebugUtilsLabelEXT");
1364 #endif
1365 }
1366 
GetExtFunctions() const1367 const DeviceVk::ExtFunctions& DeviceVk::GetExtFunctions() const
1368 {
1369     return extFunctions_;
1370 }
1371 
GetPlatformExtFunctions() const1372 const PlatformExtFunctions& DeviceVk::GetPlatformExtFunctions() const
1373 {
1374     return platformExtFunctions_;
1375 }
1376 
CreateExtFunctions()1377 void DeviceVk::CreateExtFunctions()
1378 {
1379     if (commonDeviceExtensions_.renderPass2) {
1380         extFunctions_.vkCreateRenderPass2KHR =
1381             (PFN_vkCreateRenderPass2KHR)(void*)vkGetInstanceProcAddr(plat_.instance, "vkCreateRenderPass2KHR");
1382         if (!extFunctions_.vkCreateRenderPass2KHR) {
1383             commonDeviceExtensions_.renderPass2 = false;
1384             PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateRenderPass2KHR");
1385         }
1386     }
1387     if (commonDeviceExtensions_.getMemoryRequirements2) {
1388         extFunctions_.vkGetImageMemoryRequirements2 = (PFN_vkGetImageMemoryRequirements2)vkGetInstanceProcAddr(
1389             plat_.instance, "vkGetImageMemoryRequirements2KHR");
1390         if (!extFunctions_.vkGetImageMemoryRequirements2) {
1391             PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetImageMemoryRequirements2");
1392         }
1393     }
1394     if (commonDeviceExtensions_.samplerYcbcrConversion) {
1395         GetYcbcrExtFunctions(plat_.instance, extFunctions_);
1396     }
1397 #if (RENDER_VULKAN_FSR_ENABLED == 1)
1398     if (commonDeviceExtensions_.fragmentShadingRate) {
1399         extFunctions_.vkCmdSetFragmentShadingRateKHR =
1400             (PFN_vkCmdSetFragmentShadingRateKHR)vkGetInstanceProcAddr(plat_.instance, "vkCmdSetFragmentShadingRateKHR");
1401     }
1402 #endif
1403 
1404 #if (RENDER_VULKAN_RT_ENABLED == 1)
1405     extFunctions_.vkGetAccelerationStructureBuildSizesKHR =
1406         (PFN_vkGetAccelerationStructureBuildSizesKHR)vkGetInstanceProcAddr(
1407             plat_.instance, "vkGetAccelerationStructureBuildSizesKHR");
1408     if (!extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1409         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureBuildSizesKHR");
1410     }
1411     extFunctions_.vkCmdBuildAccelerationStructuresKHR = (PFN_vkCmdBuildAccelerationStructuresKHR)vkGetInstanceProcAddr(
1412         plat_.instance, "vkCmdBuildAccelerationStructuresKHR");
1413     if (!extFunctions_.vkCmdBuildAccelerationStructuresKHR) {
1414         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCmdBuildAccelerationStructuresKHR");
1415     }
1416     extFunctions_.vkCreateAccelerationStructureKHR =
1417         (PFN_vkCreateAccelerationStructureKHR)vkGetInstanceProcAddr(plat_.instance, "vkCreateAccelerationStructureKHR");
1418     if (!extFunctions_.vkCreateAccelerationStructureKHR) {
1419         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateAccelerationStructureKHR");
1420     }
1421     extFunctions_.vkDestroyAccelerationStructureKHR = (PFN_vkDestroyAccelerationStructureKHR)vkGetInstanceProcAddr(
1422         plat_.instance, "vkDestroyAccelerationStructureKHR");
1423     if (!extFunctions_.vkDestroyAccelerationStructureKHR) {
1424         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroyAccelerationStructureKHR");
1425     }
1426     extFunctions_.vkGetAccelerationStructureDeviceAddressKHR =
1427         (PFN_vkGetAccelerationStructureDeviceAddressKHR)vkGetInstanceProcAddr(
1428             plat_.instance, "vkGetAccelerationStructureDeviceAddressKHR");
1429     if (!extFunctions_.vkGetAccelerationStructureDeviceAddressKHR) {
1430         PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureDeviceAddressKHR");
1431     }
1432 #endif
1433 }
1434 
LowLevelDeviceVk(DeviceVk & deviceVk)1435 LowLevelDeviceVk::LowLevelDeviceVk(DeviceVk& deviceVk)
1436     : deviceVk_(deviceVk), gpuResourceMgr_(static_cast<GpuResourceManager&>(deviceVk_.GetGpuResourceManager()))
1437 {}
1438 
GetBackendType() const1439 DeviceBackendType LowLevelDeviceVk::GetBackendType() const
1440 {
1441     return DeviceBackendType::VULKAN;
1442 }
1443 
GetPlatformDataVk() const1444 const DevicePlatformDataVk& LowLevelDeviceVk::GetPlatformDataVk() const
1445 {
1446     return deviceVk_.GetPlatformDataVk();
1447 }
1448 
GetBuffer(RenderHandle handle) const1449 GpuBufferPlatformDataVk LowLevelDeviceVk::GetBuffer(RenderHandle handle) const
1450 {
1451     if (deviceVk_.GetLockResourceBackendAccess()) {
1452         GpuBufferVk* buffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(handle);
1453         if (buffer) {
1454             return buffer->GetPlatformData();
1455         }
1456     } else {
1457         PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1458     }
1459     return {};
1460 }
1461 
GetImage(RenderHandle handle) const1462 GpuImagePlatformDataVk LowLevelDeviceVk::GetImage(RenderHandle handle) const
1463 {
1464     if (deviceVk_.GetLockResourceBackendAccess()) {
1465         GpuImageVk* image = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
1466         if (image) {
1467             return image->GetPlatformData();
1468         }
1469     } else {
1470         PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1471     }
1472     return {};
1473 }
1474 
GetSampler(RenderHandle handle) const1475 GpuSamplerPlatformDataVk LowLevelDeviceVk::GetSampler(RenderHandle handle) const
1476 {
1477     if (deviceVk_.GetLockResourceBackendAccess()) {
1478         GpuSamplerVk* sampler = gpuResourceMgr_.GetSampler<GpuSamplerVk>(handle);
1479         if (sampler) {
1480             return sampler->GetPlatformData();
1481         }
1482     } else {
1483         PLUGIN_LOG_E("low level device methods can be only used within specific methods");
1484     }
1485     return {};
1486 }
1487 RENDER_END_NAMESPACE()
1488