diff --git a/framework/common/hpp_vk_common.h b/framework/common/hpp_vk_common.h index 8bca5a3ab..1891c5afd 100644 --- a/framework/common/hpp_vk_common.h +++ b/framework/common/hpp_vk_common.h @@ -47,8 +47,8 @@ struct HPPImageMemoryBarrier vk::AccessFlags dst_access_mask; vk::ImageLayout old_layout = vk::ImageLayout::eUndefined; vk::ImageLayout new_layout = vk::ImageLayout::eUndefined; - uint32_t old_queue_family = VK_QUEUE_FAMILY_IGNORED; - uint32_t new_queue_family = VK_QUEUE_FAMILY_IGNORED; + uint32_t src_queue_family = VK_QUEUE_FAMILY_IGNORED; + uint32_t dst_queue_family = VK_QUEUE_FAMILY_IGNORED; }; struct HPPLoadStoreInfo diff --git a/framework/common/vk_common.h b/framework/common/vk_common.h index 5849a95a6..ec83fee4b 100644 --- a/framework/common/vk_common.h +++ b/framework/common/vk_common.h @@ -182,9 +182,9 @@ struct ImageMemoryBarrier VkImageLayout new_layout{VK_IMAGE_LAYOUT_UNDEFINED}; - uint32_t old_queue_family{VK_QUEUE_FAMILY_IGNORED}; + uint32_t src_queue_family{VK_QUEUE_FAMILY_IGNORED}; - uint32_t new_queue_family{VK_QUEUE_FAMILY_IGNORED}; + uint32_t dst_queue_family{VK_QUEUE_FAMILY_IGNORED}; }; /** diff --git a/framework/core/command_buffer.h b/framework/core/command_buffer.h index 7cde74b59..c64422bdc 100644 --- a/framework/core/command_buffer.h +++ b/framework/core/command_buffer.h @@ -979,13 +979,13 @@ inline void CommandBuffer::image_memory_barrier_impl(vkb::core::HPP subresource_range.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; } - // actively ignore queue family indices provided by memory_barrier !! + // This can cause a queue family ownership transfer. Check the async_compute sample. vk::ImageMemoryBarrier image_memory_barrier{.srcAccessMask = memory_barrier.src_access_mask, .dstAccessMask = memory_barrier.dst_access_mask, .oldLayout = memory_barrier.old_layout, .newLayout = memory_barrier.new_layout, - .srcQueueFamilyIndex = vk::QueueFamilyIgnored, - .dstQueueFamilyIndex = vk::QueueFamilyIgnored, + .srcQueueFamilyIndex = memory_barrier.src_queue_family, + .dstQueueFamilyIndex = memory_barrier.dst_queue_family, .image = image_view.get_image().get_handle(), .subresourceRange = subresource_range}; diff --git a/framework/core/hpp_instance.cpp b/framework/core/hpp_instance.cpp index 4af942957..e443c7575 100644 --- a/framework/core/hpp_instance.cpp +++ b/framework/core/hpp_instance.cpp @@ -139,6 +139,36 @@ bool enable_extension(const char *requested_exte return is_available; } +bool enable_layer_setting(const vk::LayerSettingEXT &requested_layer_setting, + const std::vector &enabled_layers, + std::vector &enabled_layer_settings) +{ + // We are checking if the layer is available. + // Vulkan does not provide a reflection API for layer settings. Layer settings are described in each layer JSON manifest. + bool is_available = + std::ranges::any_of(enabled_layers, + [&requested_layer_setting](auto const &available_layer) { return strcmp(available_layer, requested_layer_setting.pLayerName) == 0; }); + if (!is_available) + { + LOGW("Layer: {} not found. Disabling layer setting: {}", requested_layer_setting.pLayerName, requested_layer_setting.pSettingName); + return false; + } + + bool is_already_enabled = + std::ranges::any_of(enabled_layer_settings, + [&requested_layer_setting](VkLayerSettingEXT const &enabled_layer_setting) { return (strcmp(requested_layer_setting.pLayerName, enabled_layer_setting.pLayerName) == 0) && (strcmp(requested_layer_setting.pSettingName, enabled_layer_setting.pSettingName) == 0); }); + + if (is_already_enabled) + { + LOGW("Ignoring duplicated layer setting {} in layer {}.", requested_layer_setting.pSettingName, requested_layer_setting.pLayerName); + return false; + } + + LOGI("Enabling layer setting {} in layer {}.", requested_layer_setting.pSettingName, requested_layer_setting.pLayerName); + enabled_layer_settings.push_back(requested_layer_setting); + return true; +} + bool enable_layer(const char *requested_layer, const std::vector &available_layers, std::vector &enabled_layers) @@ -169,7 +199,7 @@ bool enable_layer(const char *requested_layer, HPPInstance::HPPInstance(const std::string &application_name, const std::unordered_map &requested_extensions, const std::unordered_map &requested_layers, - const std::vector &required_layer_settings, + const std::vector &requested_layer_settings, uint32_t api_version) { std::vector available_instance_extensions = vk::enumerateInstanceExtensionProperties(); @@ -194,14 +224,16 @@ HPPInstance::HPPInstance(const std::string &applicati bool portability_enumeration_available = enable_extension(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, available_instance_extensions, enabled_extensions); #endif -#ifdef USE_VALIDATION_LAYER_FEATURES +#ifdef USE_VALIDATION_LAYERS + const char *validation_layer_name = "VK_LAYER_KHRONOS_validation"; +# ifdef USE_VALIDATION_LAYER_FEATURES bool validation_features = false; { - std::vector available_layer_instance_extensions = vk::enumerateInstanceExtensionProperties(std::string("VK_LAYER_KHRONOS_validation")); - - enable_extension(VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME, available_layer_instance_extensions, enabled_extensions); + std::vector available_layer_instance_extensions = vk::enumerateInstanceExtensionProperties(std::string(validation_layer_name)); + validation_features = enable_extension(VK_EXT_LAYER_SETTINGS_EXTENSION_NAME, available_layer_instance_extensions, enabled_extensions); } -#endif +# endif // USE_VALIDATION_LAYER_FEATURES +#endif // USE_VALIDATION_LAYERS // Specific surface extensions are obtained from Window::get_required_surface_extensions // They are already added to requested_extensions by VulkanSample::prepare @@ -257,7 +289,7 @@ HPPInstance::HPPInstance(const std::string &applicati #ifdef USE_VALIDATION_LAYERS // NOTE: It's important to have the validation layer as the last one here!!!! // Otherwise, device creation fails !?! - enable_layer("VK_LAYER_KHRONOS_validation", supported_layers, enabled_layers); + enable_layer(validation_layer_name, supported_layers, enabled_layers); #endif vk::ApplicationInfo app_info{.pApplicationName = application_name.c_str(), .pEngineName = "Vulkan Samples", .apiVersion = api_version}; @@ -268,6 +300,13 @@ HPPInstance::HPPInstance(const std::string &applicati .enabledExtensionCount = static_cast(enabled_extensions.size()), .ppEnabledExtensionNames = enabled_extensions.data()}; + std::vector enabled_layer_settings; + + for (const vk::LayerSettingEXT &layer_setting : requested_layer_settings) + { + enable_layer_setting(layer_setting, enabled_layers, enabled_layer_settings); + } + #ifdef USE_VALIDATION_LAYERS vk::DebugUtilsMessengerCreateInfoEXT debug_utils_create_info; vk::DebugReportCallbackCreateInfoEXT debug_report_create_info; @@ -297,31 +336,52 @@ HPPInstance::HPPInstance(const std::string &applicati } #endif + // Some of the specialized layers need to be enabled explicitly + // The validation layer does not need to be enabled in code and it can also be configured using the vulkan configurator. #ifdef USE_VALIDATION_LAYER_FEATURES - vk::ValidationFeaturesEXT validation_features_info; - std::vector enable_features{}; + +# if defined(VKB_VALIDATION_LAYERS_GPU_ASSISTED) + const VkBool32 setting_validate_gpuav = VK_TRUE; if (validation_features) { -# if defined(VKB_VALIDATION_LAYERS_GPU_ASSISTED) - enable_features.push_back(vk::ValidationFeatureEnableEXT::eGpuAssistedReserveBindingSlot); - enable_features.push_back(vk::ValidationFeatureEnableEXT::eGpuAssisted); + enable_layer_setting(vk::LayerSettingEXT(validation_layer_name, "gpuav_enable", vk::LayerSettingTypeEXT::eBool32, 1, &setting_validate_gpuav), enabled_layers, enabled_layer_settings); + } # endif + # if defined(VKB_VALIDATION_LAYERS_BEST_PRACTICES) - enable_features.push_back(vk::ValidationFeatureEnableEXT::eBestPractices); + const VkBool32 setting_validate_best_practices = VK_TRUE; + const VkBool32 setting_validate_best_practices_arm = VK_TRUE; + const VkBool32 setting_validate_best_practices_amd = VK_TRUE; + const VkBool32 setting_validate_best_practices_img = VK_TRUE; + const VkBool32 setting_validate_best_practices_nvidia = VK_TRUE; + if (validation_features) + { + enable_layer_setting(vk::LayerSettingEXT(validation_layer_name, "validate_best_practices", vk::LayerSettingTypeEXT::eBool32, 1, &setting_validate_best_practices), enabled_layers, enabled_layer_settings); + enable_layer_setting(vk::LayerSettingEXT(validation_layer_name, "validate_best_practices_arm", vk::LayerSettingTypeEXT::eBool32, 1, &setting_validate_best_practices_arm), enabled_layers, enabled_layer_settings); + enable_layer_setting(vk::LayerSettingEXT(validation_layer_name, "validate_best_practices_amd", vk::LayerSettingTypeEXT::eBool32, 1, &setting_validate_best_practices_amd), enabled_layers, enabled_layer_settings); + enable_layer_setting(vk::LayerSettingEXT(validation_layer_name, "validate_best_practices_img", vk::LayerSettingTypeEXT::eBool32, 1, &setting_validate_best_practices_img), enabled_layers, enabled_layer_settings); + enable_layer_setting(vk::LayerSettingEXT(validation_layer_name, "validate_best_practices_nvidia", vk::LayerSettingTypeEXT::eBool32, 1, &setting_validate_best_practices_nvidia), enabled_layers, enabled_layer_settings); + } # endif - validation_features_info.setEnabledValidationFeatures(enable_features); - validation_features_info.pNext = instance_info.pNext; - instance_info.pNext = &validation_features_info; + +# if defined(VKB_VALIDATION_LAYERS_SYNCHRONIZATION) + const VkBool32 setting_validate_sync = VK_TRUE; + const VkBool32 setting_validate_sync_heuristics = VK_TRUE; + if (validation_features) + { + enable_layer_setting(vk::LayerSettingEXT(validation_layer_name, "validate_sync", vk::LayerSettingTypeEXT::eBool32, 1, &setting_validate_sync), enabled_layers, enabled_layer_settings); + enable_layer_setting(vk::LayerSettingEXT(validation_layer_name, "syncval_shader_accesses_heuristic", vk::LayerSettingTypeEXT::eBool32, 1, &setting_validate_sync_heuristics), enabled_layers, enabled_layer_settings); } +# endif #endif vk::LayerSettingsCreateInfoEXT layerSettingsCreateInfo; // If layer settings are defined, then activate the sample's required layer settings during instance creation - if (required_layer_settings.size() > 0) + if (enabled_layer_settings.size() > 0) { - layerSettingsCreateInfo.settingCount = static_cast(required_layer_settings.size()); - layerSettingsCreateInfo.pSettings = required_layer_settings.data(); + layerSettingsCreateInfo.settingCount = static_cast(enabled_layer_settings.size()); + layerSettingsCreateInfo.pSettings = enabled_layer_settings.data(); layerSettingsCreateInfo.pNext = instance_info.pNext; instance_info.pNext = &layerSettingsCreateInfo; } diff --git a/framework/core/hpp_instance.h b/framework/core/hpp_instance.h index 160cf1094..bfbfa905a 100644 --- a/framework/core/hpp_instance.h +++ b/framework/core/hpp_instance.h @@ -48,15 +48,15 @@ class HPPInstance * @param application_name The name of the application * @param requested_extensions The extensions requested to be enabled * @param requested_layers The validation layers to be enabled - * @param required_layer_settings The layer settings to be enabled + * @param requested_layer_settings The layer settings to be enabled * @param api_version The Vulkan API version that the instance will be using * @throws runtime_error if the required extensions and validation layers are not found */ HPPInstance(const std::string &application_name, - const std::unordered_map &requested_extensions = {}, - const std::unordered_map &requested_layers = {}, - const std::vector &required_layer_settings = {}, - uint32_t api_version = VK_API_VERSION_1_1); + const std::unordered_map &requested_extensions = {}, + const std::unordered_map &requested_layers = {}, + const std::vector &requested_layer_settings = {}, + uint32_t api_version = VK_API_VERSION_1_1); /** * @brief Queries the GPUs of a vk::Instance that is already created diff --git a/framework/core/instance.cpp b/framework/core/instance.cpp index b6abd0d79..f2c22499e 100644 --- a/framework/core/instance.cpp +++ b/framework/core/instance.cpp @@ -132,6 +132,36 @@ bool enable_extension(const char *requested_extens return is_available; } +bool enable_layer_setting(const VkLayerSettingEXT &requested_layer_setting, + const std::vector &enabled_layers, + std::vector &enabled_layer_settings) +{ + // We are checking if the layer is available. + // Vulkan does not provide a reflection API for layer settings. Layer settings are described in each layer JSON manifest. + bool is_available = + std::ranges::any_of(enabled_layers, + [&requested_layer_setting](auto const &available_layer) { return strcmp(available_layer, requested_layer_setting.pLayerName) == 0; }); + if (!is_available) + { + LOGW("Layer: {} not found. Disabling layer setting: {}", requested_layer_setting.pLayerName, requested_layer_setting.pSettingName); + return false; + } + + bool is_already_enabled = + std::ranges::any_of(enabled_layer_settings, + [&requested_layer_setting](VkLayerSettingEXT const &enabled_layer_setting) { return (strcmp(requested_layer_setting.pLayerName, enabled_layer_setting.pLayerName) == 0) && (strcmp(requested_layer_setting.pSettingName, enabled_layer_setting.pSettingName) == 0); }); + + if (is_already_enabled) + { + LOGW("Ignoring duplicated layer setting {} in layer {}.", requested_layer_setting.pSettingName, requested_layer_setting.pLayerName); + return false; + } + + LOGI("Enabling layer setting {} in layer {}.", requested_layer_setting.pSettingName, requested_layer_setting.pLayerName); + enabled_layer_settings.push_back(requested_layer_setting); + return true; +} + bool enable_layer(const char *requested_layer, const std::vector &available_layers, std::vector &enabled_layers) @@ -163,7 +193,7 @@ bool enable_layer(const char *requested_layer, Instance::Instance(const std::string &application_name, const std::unordered_map &requested_extensions, const std::unordered_map &requested_layers, - const std::vector &required_layer_settings, + const std::vector &requested_layer_settings, uint32_t api_version) { uint32_t instance_extension_count; @@ -192,18 +222,21 @@ Instance::Instance(const std::string &application_nam bool portability_enumeration_available = enable_extension(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, available_instance_extensions, enabled_extensions); #endif -#ifdef USE_VALIDATION_LAYER_FEATURES +#ifdef USE_VALIDATION_LAYERS + const char *validation_layer_name = "VK_LAYER_KHRONOS_validation"; + +# ifdef USE_VALIDATION_LAYER_FEATURES bool validation_features = false; { uint32_t layer_instance_extension_count; - VK_CHECK(vkEnumerateInstanceExtensionProperties("VK_LAYER_KHRONOS_validation", &layer_instance_extension_count, nullptr)); - + VK_CHECK(vkEnumerateInstanceExtensionProperties(validation_layer_name, &layer_instance_extension_count, nullptr)); std::vector available_layer_instance_extensions(layer_instance_extension_count); - VK_CHECK(vkEnumerateInstanceExtensionProperties("VK_LAYER_KHRONOS_validation", &layer_instance_extension_count, available_layer_instance_extensions.data())); + VK_CHECK(vkEnumerateInstanceExtensionProperties(validation_layer_name, &layer_instance_extension_count, available_layer_instance_extensions.data())); - enable_extension(VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME, available_layer_instance_extensions, enabled_extensions); + validation_features = enable_extension(VK_EXT_LAYER_SETTINGS_EXTENSION_NAME, available_layer_instance_extensions, enabled_extensions); } -#endif +# endif // USE_VALIDATION_LAYER_FEATURES +#endif // USE_VALIDATION_LAYERS // Specific surface extensions are obtained from Window::get_required_surface_extensions // They are already added to requested_extensions by VulkanSample::prepare @@ -263,7 +296,7 @@ Instance::Instance(const std::string &application_nam #ifdef USE_VALIDATION_LAYERS // NOTE: It's important to have the validation layer as the last one here!!!! // Otherwise, device creation fails !?! - enable_layer("VK_LAYER_KHRONOS_validation", supported_layers, enabled_layers); + enable_layer(validation_layer_name, supported_layers, enabled_layers); #endif VkApplicationInfo app_info{VK_STRUCTURE_TYPE_APPLICATION_INFO}; @@ -311,36 +344,59 @@ Instance::Instance(const std::string &application_nam } #endif + std::vector enabled_layer_settings; + + for (const VkLayerSettingEXT &layer_setting : requested_layer_settings) + { + enable_layer_setting(layer_setting, enabled_layers, enabled_layer_settings); + } + // Some of the specialized layers need to be enabled explicitly + // The validation layer does not need to be enabled in code and it can also be configured using the vulkan configurator. #ifdef USE_VALIDATION_LAYER_FEATURES - VkValidationFeaturesEXT validation_features_info = {VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT}; - std::vector enable_features{}; + +# if defined(VKB_VALIDATION_LAYERS_GPU_ASSISTED) + const VkBool32 setting_validate_gpuav = VK_TRUE; if (validation_features) { -# if defined(VKB_VALIDATION_LAYERS_GPU_ASSISTED) - enable_features.push_back(VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT); - enable_features.push_back(VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT); + enable_layer_setting(VkLayerSettingEXT(validation_layer_name, "gpuav_enable", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &setting_validate_gpuav), enabled_layers, enabled_layer_settings); + } # endif + # if defined(VKB_VALIDATION_LAYERS_BEST_PRACTICES) - enable_features.push_back(VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT); + const VkBool32 setting_validate_best_practices = VK_TRUE; + const VkBool32 setting_validate_best_practices_arm = VK_TRUE; + const VkBool32 setting_validate_best_practices_amd = VK_TRUE; + const VkBool32 setting_validate_best_practices_img = VK_TRUE; + const VkBool32 setting_validate_best_practices_nvidia = VK_TRUE; + if (validation_features) + { + enable_layer_setting(VkLayerSettingEXT(validation_layer_name, "validate_best_practices", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &setting_validate_best_practices), enabled_layers, enabled_layer_settings); + enable_layer_setting(VkLayerSettingEXT(validation_layer_name, "validate_best_practices_arm", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &setting_validate_best_practices_arm), enabled_layers, enabled_layer_settings); + enable_layer_setting(VkLayerSettingEXT(validation_layer_name, "validate_best_practices_amd", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &setting_validate_best_practices_amd), enabled_layers, enabled_layer_settings); + enable_layer_setting(VkLayerSettingEXT(validation_layer_name, "validate_best_practices_img", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &setting_validate_best_practices_img), enabled_layers, enabled_layer_settings); + enable_layer_setting(VkLayerSettingEXT(validation_layer_name, "validate_best_practices_nvidia", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &setting_validate_best_practices_nvidia), enabled_layers, enabled_layer_settings); + } # endif + # if defined(VKB_VALIDATION_LAYERS_SYNCHRONIZATION) - enable_features.push_back(VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT); -# endif - validation_features_info.enabledValidationFeatureCount = static_cast(enable_features.size()); - validation_features_info.pEnabledValidationFeatures = enable_features.data(); - validation_features_info.pNext = instance_info.pNext; - instance_info.pNext = &validation_features_info; + const VkBool32 setting_validate_sync = VK_TRUE; + const VkBool32 setting_validate_sync_heuristics = VK_TRUE; + if (validation_features) + { + enable_layer_setting(VkLayerSettingEXT(validation_layer_name, "validate_sync", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &setting_validate_sync), enabled_layers, enabled_layer_settings); + enable_layer_setting(VkLayerSettingEXT(validation_layer_name, "syncval_shader_accesses_heuristic", VK_LAYER_SETTING_TYPE_BOOL32_EXT, 1, &setting_validate_sync_heuristics), enabled_layers, enabled_layer_settings); } +# endif #endif VkLayerSettingsCreateInfoEXT layerSettingsCreateInfo{VK_STRUCTURE_TYPE_LAYER_SETTINGS_CREATE_INFO_EXT}; // If layer settings are defined, then activate the sample's required layer settings during instance creation - if (required_layer_settings.size() > 0) + if (enabled_layer_settings.size() > 0) { - layerSettingsCreateInfo.settingCount = static_cast(required_layer_settings.size()); - layerSettingsCreateInfo.pSettings = required_layer_settings.data(); + layerSettingsCreateInfo.settingCount = static_cast(enabled_layer_settings.size()); + layerSettingsCreateInfo.pSettings = enabled_layer_settings.data(); layerSettingsCreateInfo.pNext = instance_info.pNext; instance_info.pNext = &layerSettingsCreateInfo; } diff --git a/framework/core/instance.h b/framework/core/instance.h index 5b5ac22b3..bb93fde69 100644 --- a/framework/core/instance.h +++ b/framework/core/instance.h @@ -44,15 +44,15 @@ class Instance * @param application_name The name of the application * @param requested_extensions The extensions requested to be enabled * @param requested_layers The validation layers to be enabled - * @param required_layer_settings The layer settings to be enabled + * @param requested_layer_settings The layer settings to be enabled * @param api_version The Vulkan API version that the instance will be using * @throws runtime_error if the required extensions and validation layers are not found */ Instance(const std::string &application_name, - const std::unordered_map &requested_extensions = {}, - const std::unordered_map &requested_layers = {}, - const std::vector &required_layer_settings = {}, - uint32_t api_version = VK_API_VERSION_1_1); + const std::unordered_map &requested_extensions = {}, + const std::unordered_map &requested_layers = {}, + const std::vector &requested_layer_settings = {}, + uint32_t api_version = VK_API_VERSION_1_1); /** * @brief Queries the GPUs of a VkInstance that is already created diff --git a/framework/gltf_loader.cpp b/framework/gltf_loader.cpp index ec13c7d9e..183ae8f3c 100644 --- a/framework/gltf_loader.cpp +++ b/framework/gltf_loader.cpp @@ -1462,6 +1462,11 @@ std::unique_ptr GLTFLoader::parse_image(tinygltf::Image &gltf_image) { std::unique_ptr image{nullptr}; + if (gltf_image.name.empty()) + { + gltf_image.name = gltf_image.uri; + } + if (!gltf_image.image.empty()) { // Image embedded in gltf file diff --git a/samples/performance/async_compute/async_compute.cpp b/samples/performance/async_compute/async_compute.cpp index 3d6292673..9dd415149 100644 --- a/samples/performance/async_compute/async_compute.cpp +++ b/samples/performance/async_compute/async_compute.cpp @@ -252,9 +252,9 @@ bool AsyncComputeSample::prepare(const vkb::ApplicationOptions &options) // Hardcoded to fit to the scene. auto ortho_camera = std::make_unique("shadow_camera", - -2000, 3000, - -2500, 1500, - -2000, 2000); + -2000.0f, 3000.0f, + -2500.0f, 1500.0f, + -2000.0f, 2000.0f); ortho_camera->set_node(*node); get_scene().add_component(std::move(ortho_camera), *node); @@ -275,6 +275,7 @@ bool AsyncComputeSample::prepare(const vkb::ApplicationOptions &options) auto shadow_scene_subpass = std::make_unique(get_render_context(), std::move(shadow_vert_shader), std::move(shadow_frag_shader), get_scene(), *shadow_camera); shadow_render_pipeline.add_subpass(std::move(shadow_scene_subpass)); + shadow_render_pipeline.set_load_store({{VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE}}); vkb::ShaderSource composite_vert_shader("async_compute/composite.vert"); vkb::ShaderSource composite_frag_shader("async_compute/composite.frag"); @@ -353,6 +354,7 @@ void AsyncComputeSample::render_shadow_pass() memory_barrier.old_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; memory_barrier.new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; memory_barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + memory_barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT; memory_barrier.src_stage_mask = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; memory_barrier.dst_stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; @@ -381,6 +383,13 @@ VkSemaphore AsyncComputeSample::render_forward_offscreen_pass(VkSemaphore hdr_wa assert(1 < views.size()); { + // If maintenance9 is not enabled, resources with VK_SHARING_MODE_EXCLUSIVE must only be accessed by queues in the queue family that has ownership of the resource. + // Upon creation resources with VK_SHARING_MODE_EXCLUSIVE are not owned by any queue, ownership is implicitly acquired upon first use. + // The application must perform a queue family ownership transfer if it wishes to make the memory contents of the resource accessible to a different queue family. + // A queue family can take ownership of a resource without an ownership transfer, in the same way as for a resource that was just created, but the content will be undefined. + // We do not need to acquire color_targets[0] from present_graphics to early_graphics + // A queue transfer barrier is not necessary for the resource first access. + // Moreover, in our sample we do not care about the content at this point so we can skip the queue transfer barrier. vkb::ImageMemoryBarrier memory_barrier{}; memory_barrier.old_layout = VK_IMAGE_LAYOUT_UNDEFINED; memory_barrier.new_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; @@ -408,26 +417,27 @@ VkSemaphore AsyncComputeSample::render_forward_offscreen_pass(VkSemaphore hdr_wa forward_render_pipeline.draw(*command_buffer, get_current_forward_render_target(), VK_SUBPASS_CONTENTS_INLINE); command_buffer->end_render_pass(); + const bool queue_family_transfer = early_graphics_queue->get_family_index() != post_compute_queue->get_family_index(); { - vkb::ImageMemoryBarrier memory_barrier{}; - memory_barrier.old_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - memory_barrier.new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - memory_barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - memory_barrier.dst_access_mask = 0; - memory_barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - memory_barrier.dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - - // In a release barrier, dst_stage_mask/access_mask should be BOTTOM_OF_PIPE/0. - // We cannot access the resource anymore after all. Semaphore takes care of things from here. - - // Release barrier if we're going to read HDR texture in compute queue - // of a different queue family index. We'll have to duplicate this barrier - // on compute queue's end. - if (early_graphics_queue->get_family_index() != post_compute_queue->get_family_index()) - { - memory_barrier.old_queue_family = early_graphics_queue->get_family_index(); - memory_barrier.new_queue_family = post_compute_queue->get_family_index(); - } + // When doing async compute this barrier is used to do a queue family ownership transfer + + // release_barrier_0: Releasing color_targets[0] from early_graphics to post_compute + // This release barrier is replicated by the corresponding acquire_barrier_0 in the post_compute queue + // The application must ensure the release operation happens before the acquire operation. This sample uses semaphores for that. + // The transfer ownership barriers are submitted twice (release and acquire) but they are only executed once. + vkb::ImageMemoryBarrier memory_barrier{ + .src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // Ignored for the release barrier. + // Release barriers ignore dst_access_mask unless using VK_DEPENDENCY_QUEUE_FAMILY_OWNERSHIP_TRANSFER_USE_ALL_STAGES_BIT_KHR + .src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dst_access_mask = 0, // dst_access_mask is ignored for release barriers, without affecting its validity + .old_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // We want a layout transition, so the old_layout and new_layout values need to be replicated in the acquire barrier + .new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .src_queue_family = queue_family_transfer ? + early_graphics_queue->get_family_index() : + VK_QUEUE_FAMILY_IGNORED, // Release barriers are executed from a queue of the source queue family + .dst_queue_family = queue_family_transfer ? post_compute_queue->get_family_index() : VK_QUEUE_FAMILY_IGNORED, + }; command_buffer->image_memory_barrier(views[0], memory_barrier); } @@ -436,8 +446,12 @@ VkSemaphore AsyncComputeSample::render_forward_offscreen_pass(VkSemaphore hdr_wa // Conditionally waits on hdr_wait_semaphore. // This resolves the write-after-read hazard where previous frame tonemap read from HDR buffer. - auto signal_semaphore = get_render_context().submit(queue, {command_buffer}, - hdr_wait_semaphore, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); + + // We are not using VK_DEPENDENCY_QUEUE_FAMILY_OWNERSHIP_TRANSFER_USE_ALL_STAGES_BIT_KHR + // so VK_PIPELINE_STAGE_ALL_COMMANDS_BIT is the only valid stage to wait for queue transfer operations. + const VkPipelineStageFlags wait_stage = queue_family_transfer ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT : VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + auto signal_semaphore = get_render_context().submit(queue, {command_buffer}, hdr_wait_semaphore, wait_stage); if (hdr_wait_semaphore) { @@ -457,22 +471,53 @@ VkSemaphore AsyncComputeSample::render_swapchain(VkSemaphore post_semaphore) if (post_compute_queue->get_family_index() != present_graphics_queue->get_family_index()) { - // Purely ownership transfer here. No layout change required. - vkb::ImageMemoryBarrier memory_barrier{}; - memory_barrier.old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - memory_barrier.new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - memory_barrier.src_access_mask = 0; - memory_barrier.dst_access_mask = 0; - memory_barrier.src_stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - memory_barrier.dst_stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - memory_barrier.old_queue_family = post_compute_queue->get_family_index(); - memory_barrier.new_queue_family = present_graphics_queue->get_family_index(); + // acquire_barrier_1: Acquiring color_targets[0] from post_compute to present_graphics + // This acquire barrier is replicated by the corresponding release_barrier_1 in the post_compute queue + // The application must ensure the acquire operation happens after the release operation. This sample uses semaphores for that. + // The transfer ownership barriers are submitted twice (release and acquire) but they are only executed once. + vkb::ImageMemoryBarrier memory_barrier{ + .src_stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, // Ignored for the acquire barrier. + // Acquire barriers ignore src_access_mask unless using VK_DEPENDENCY_QUEUE_FAMILY_OWNERSHIP_TRANSFER_USE_ALL_STAGES_BIT_KHR + .dst_stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + .src_access_mask = 0, // src_access_mask is ignored for acquire barriers, without affecting its validity + .dst_access_mask = VK_ACCESS_SHADER_READ_BIT, + .old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, // Purely ownership transfer. We do not need a layout transition. + .new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .src_queue_family = post_compute_queue->get_family_index(), + .dst_queue_family = present_graphics_queue->get_family_index(), // Acquire barriers are executed from a queue of the destination queue family + }; command_buffer->image_memory_barrier(get_current_forward_render_target().get_views()[0], memory_barrier); + + // acquire_barrier_2: Acquiring blur_chain_views[1] from post_compute to present_graphics + // This acquire barrier is replicated by the corresponding release_barrier_2 in the post_compute queue + // The application must ensure the acquire operation happens after the release operation. This sample uses semaphores for that. + // The transfer ownership barriers are submitted twice (release and acquire) but they are only executed once. + vkb::ImageMemoryBarrier memory_barrier_2{ + .src_stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, // Ignored for the acquire barrier. + // Acquire barriers ignore src_access_mask unless using VK_DEPENDENCY_QUEUE_FAMILY_OWNERSHIP_TRANSFER_USE_ALL_STAGES_BIT_KHR + .dst_stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + .src_access_mask = 0, // src_access_mask is ignored for acquire barriers, without affecting its validity + .dst_access_mask = VK_ACCESS_SHADER_READ_BIT, + .old_layout = VK_IMAGE_LAYOUT_GENERAL, // We want a layout transition, so the old_layout and new_layout values need to be replicated in the acquire barrier + .new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .src_queue_family = post_compute_queue->get_family_index(), + .dst_queue_family = present_graphics_queue->get_family_index(), // Acquire barriers are executed from a queue of the destination queue family + + }; + command_buffer->image_memory_barrier(*blur_chain_views[1], memory_barrier_2); } draw(*command_buffer, get_render_context().get_active_frame().get_render_target()); + // If maintenance9 is not enabled, resources with VK_SHARING_MODE_EXCLUSIVE must only be accessed by queues in the queue family that has ownership of the resource. + // Upon creation resources with VK_SHARING_MODE_EXCLUSIVE are not owned by any queue, ownership is implicitly acquired upon first use. + // The application must perform a queue family ownership transfer if it wishes to make the memory contents of the resource accessible to a different queue family. + // A queue family can take ownership of a resource without an ownership transfer, in the same way as for a resource that was just created, but the content will be undefined. + // We do not need to release blur_chain_views[1] and color_targets[0] from present_graphics + // A queue transfer barrier is not necessary for the resource first access. + // Moreover, in our sample we do not care about the content after presenting so we can skip the queue transfer barrier. + command_buffer->end(); // We're going to wait on this semaphore in different frame, @@ -523,26 +568,34 @@ VkSemaphore AsyncComputeSample::render_compute_post(VkSemaphore wait_graphics_se command_buffer->begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - // Acquire barrier if we're going to read HDR texture in compute queue - // of a different queue family index. We'll have to duplicate this barrier - // on compute queue's end. if (early_graphics_queue->get_family_index() != post_compute_queue->get_family_index()) { - vkb::ImageMemoryBarrier memory_barrier{}; - memory_barrier.old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - memory_barrier.new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - memory_barrier.src_access_mask = 0; - memory_barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT; - // Match pWaitDstStages for src stage here. - memory_barrier.src_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - memory_barrier.dst_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - memory_barrier.old_queue_family = early_graphics_queue->get_family_index(); - memory_barrier.new_queue_family = post_compute_queue->get_family_index(); - + // acquire_barrier_0: Acquiring color_targets[0] from early_graphics to post_compute + // This acquire barrier is replicated by the corresponding release_barrier_0 in the early_graphics queue + // The application must ensure the acquire operation happens after the release operation. This sample uses semaphores for that. + // The transfer ownership barriers are submitted twice (release and acquire) but they are only executed once. + vkb::ImageMemoryBarrier memory_barrier{ + .src_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // Ignored for the acquire barrier. + // Acquire barriers ignore src_access_mask unless using VK_DEPENDENCY_QUEUE_FAMILY_OWNERSHIP_TRANSFER_USE_ALL_STAGES_BIT_KHR + .dst_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + .src_access_mask = 0, // src_access_mask is ignored for acquire barriers, without affecting its validity + .dst_access_mask = VK_ACCESS_SHADER_READ_BIT, + .old_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // We want a layout transition, so the old_layout and new_layout values need to be replicated in the release barrier + .new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .src_queue_family = early_graphics_queue->get_family_index(), + .dst_queue_family = post_compute_queue->get_family_index(), // Acquire barriers are executed from a queue of the destination queue family + }; command_buffer->image_memory_barrier(get_current_forward_render_target().get_views()[0], memory_barrier); } const auto discard_blur_view = [&](const vkb::core::ImageView &view) { + // If maintenance9 is not enabled, resources with VK_SHARING_MODE_EXCLUSIVE must only be accessed by queues in the queue family that has ownership of the resource. + // Upon creation resources with VK_SHARING_MODE_EXCLUSIVE are not owned by any queue, ownership is implicitly acquired upon first use. + // The application must perform a queue family ownership transfer if it wishes to make the memory contents of the resource accessible to a different queue family. + // A queue family can take ownership of a resource without an ownership transfer, in the same way as for a resource that was just created, but the content will be undefined. + // We do not need to acquire blur_chain_views[1] from present_graphics to post_compute + // A queue transfer barrier is not necessary for the resource first access. + // Moreover, in our sample we do not care about the content at this point so we can skip the queue transfer barrier. vkb::ImageMemoryBarrier memory_barrier{}; memory_barrier.old_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -555,15 +608,24 @@ VkSemaphore AsyncComputeSample::render_compute_post(VkSemaphore wait_graphics_se command_buffer->image_memory_barrier(view, memory_barrier); }; - const auto read_only_blur_view = [&](const vkb::core::ImageView &view, bool final) { - vkb::ImageMemoryBarrier memory_barrier{}; - - memory_barrier.old_layout = VK_IMAGE_LAYOUT_GENERAL; - memory_barrier.new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - memory_barrier.src_access_mask = VK_ACCESS_SHADER_WRITE_BIT; - memory_barrier.dst_access_mask = final ? 0 : VK_ACCESS_SHADER_READ_BIT; - memory_barrier.src_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - memory_barrier.dst_stage_mask = final ? VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT : VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + const auto read_only_blur_view = [&](const vkb::core::ImageView &view, bool is_final) { + const bool queue_family_transfer = is_final && post_compute_queue->get_family_index() != present_graphics_queue->get_family_index(); + + // release_barrier_2: Releasing blur_chain_views[1] from post_compute to present_graphics + // This release barrier is replicated by the corresponding acquire_barrier_2 in the present_graphics queue + // The application must ensure the release operation happens before the acquire operation. This sample uses semaphores for that. + // The transfer ownership barriers are submitted twice (release and acquire) but they are only executed once. + vkb::ImageMemoryBarrier memory_barrier{ + .src_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + .dst_stage_mask = is_final ? VkPipelineStageFlags(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT) : VkPipelineStageFlags(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT), // Ignored for the release barrier. + // Release barriers ignore dst_access_mask unless using VK_DEPENDENCY_QUEUE_FAMILY_OWNERSHIP_TRANSFER_USE_ALL_STAGES_BIT_KHR + .src_access_mask = VK_ACCESS_SHADER_WRITE_BIT, + .dst_access_mask = is_final ? VkAccessFlags(0) : VkAccessFlags(VK_ACCESS_SHADER_READ_BIT), // dst_access_mask is ignored for release barriers, without affecting its validity + .old_layout = VK_IMAGE_LAYOUT_GENERAL, // We want a layout transition, so the old_layout and new_layout values need to be replicated in the acquire barrier + .new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .src_queue_family = queue_family_transfer ? post_compute_queue->get_family_index() : VK_QUEUE_FAMILY_IGNORED, // Release barriers are executed from a queue of the source queue family + .dst_queue_family = queue_family_transfer ? present_graphics_queue->get_family_index() : VK_QUEUE_FAMILY_IGNORED, // Release barriers are executed from a queue of the source queue family + }; command_buffer->image_memory_barrier(view, memory_barrier); }; @@ -575,7 +637,7 @@ VkSemaphore AsyncComputeSample::render_compute_post(VkSemaphore wait_graphics_se float inv_input_width, inv_input_height; }; - const auto dispatch_pass = [&](const vkb::core::ImageView &dst, const vkb::core::ImageView &src, bool final = false) { + const auto dispatch_pass = [&](const vkb::core::ImageView &dst, const vkb::core::ImageView &src, bool is_final = false) { discard_blur_view(dst); auto dst_extent = downsample_extent(dst.get_image().get_extent(), dst.get_subresource_range().baseMipLevel); @@ -594,7 +656,7 @@ VkSemaphore AsyncComputeSample::render_compute_post(VkSemaphore wait_graphics_se command_buffer->bind_image(dst, 0, 1, 0); command_buffer->dispatch((push.width + 7) / 8, (push.height + 7) / 8, 1); - read_only_blur_view(dst, final); + read_only_blur_view(dst, is_final); }; // A very basic and dumb HDR Bloom pipeline. Don't consider this a particularly good or efficient implementation. @@ -617,19 +679,23 @@ VkSemaphore AsyncComputeSample::render_compute_post(VkSemaphore wait_graphics_se dispatch_pass(*blur_chain_views[index], *blur_chain_views[index + 1], index == 1); } - // We're going to read the HDR texture again in the present queue. - // Need to release ownership back to that queue. if (post_compute_queue->get_family_index() != present_graphics_queue->get_family_index()) { - vkb::ImageMemoryBarrier memory_barrier{}; - memory_barrier.old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - memory_barrier.new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - memory_barrier.src_access_mask = 0; - memory_barrier.dst_access_mask = 0; - memory_barrier.src_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - memory_barrier.dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - memory_barrier.old_queue_family = post_compute_queue->get_family_index(); - memory_barrier.new_queue_family = present_graphics_queue->get_family_index(); + // release_barrier_1: Releasing color_targets[0] from post_compute to present_graphics + // This release barrier is replicated by the corresponding acquire_barrier_1 in the present_graphics queue + // The application must ensure the release operation happens before the acquire operation. This sample uses semaphores for that. + // The transfer ownership barriers are submitted twice (release and acquire) but they are only executed once. + vkb::ImageMemoryBarrier memory_barrier{ + .src_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + .dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // Ignored for the release barrier. + // Release barriers ignore dst_access_mask unless using VK_DEPENDENCY_QUEUE_FAMILY_OWNERSHIP_TRANSFER_USE_ALL_STAGES_BIT_KHR + .src_access_mask = VK_ACCESS_SHADER_READ_BIT, + .dst_access_mask = 0, // dst_access_mask is ignored for release barriers, without affecting its validity + .old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, // Purely ownership transfer. We do not need a layout transition. + .new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .src_queue_family = post_compute_queue->get_family_index(), // Release barriers are executed from a queue of the source queue family + .dst_queue_family = present_graphics_queue->get_family_index(), + }; command_buffer->image_memory_barrier(get_current_forward_render_target().get_views()[0], memory_barrier); } @@ -683,7 +749,8 @@ void AsyncComputeSample::update(float delta_time) auto *composite_subpass = static_cast(get_render_pipeline().get_subpasses()[0].get()); forward_subpass->set_shadow_map(&shadow_render_target->get_views()[0], comparison_sampler.get()); - composite_subpass->set_texture(&get_current_forward_render_target().get_views()[0], blur_chain_views[1].get(), linear_sampler.get()); + + composite_subpass->set_texture(&get_current_forward_render_target().get_views()[0], blur_chain_views[1].get(), linear_sampler.get()); // blur_chain[1] and color_targets[0] will be used by the present queue float rotation_factor = std::chrono::duration(std::chrono::system_clock::now() - start_time).count();