Merge pull request #6759 from ReinUsesLisp/pipeline-statistics

renderer_vulkan: Add setting to log pipeline statistics
master
bunnei 2021-07-30 11:18:52 +07:00 committed by GitHub
commit 7530594602
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 307 additions and 24 deletions

@ -319,6 +319,7 @@ struct Values {
// Renderer // Renderer
Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"}; Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"};
BasicSetting<bool> renderer_debug{false, "debug"}; BasicSetting<bool> renderer_debug{false, "debug"};
BasicSetting<bool> renderer_shader_feedback{false, "shader_feedback"};
BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
BasicSetting<bool> disable_shader_loop_safety_checks{false, BasicSetting<bool> disable_shader_loop_safety_checks{false,
"disable_shader_loop_safety_checks"}; "disable_shader_loop_safety_checks"};

@ -106,6 +106,8 @@ add_library(video_core STATIC
renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.cpp
renderer_vulkan/maxwell_to_vk.h renderer_vulkan/maxwell_to_vk.h
renderer_vulkan/pipeline_helper.h renderer_vulkan/pipeline_helper.h
renderer_vulkan/pipeline_statistics.cpp
renderer_vulkan/pipeline_statistics.h
renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.h
renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/vk_blit_screen.cpp renderer_vulkan/vk_blit_screen.cpp

@ -0,0 +1,100 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include <fmt/format.h>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using namespace std::string_view_literals;
static u64 GetUint64(const VkPipelineExecutableStatisticKHR& statistic) {
switch (statistic.format) {
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
return static_cast<u64>(statistic.value.i64);
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
return statistic.value.u64;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
return static_cast<u64>(statistic.value.f64);
default:
return 0;
}
}
PipelineStatistics::PipelineStatistics(const Device& device_) : device{device_} {}
void PipelineStatistics::Collect(VkPipeline pipeline) {
const auto& dev{device.GetLogical()};
const std::vector properties{dev.GetPipelineExecutablePropertiesKHR(pipeline)};
const u32 num_executables{static_cast<u32>(properties.size())};
for (u32 executable = 0; executable < num_executables; ++executable) {
const auto statistics{dev.GetPipelineExecutableStatisticsKHR(pipeline, executable)};
if (statistics.empty()) {
continue;
}
Stats stage_stats;
for (const auto& statistic : statistics) {
const char* const name{statistic.name};
if (name == "Binary Size"sv || name == "Code size"sv || name == "Instruction Count"sv) {
stage_stats.code_size = GetUint64(statistic);
} else if (name == "Register Count"sv) {
stage_stats.register_count = GetUint64(statistic);
} else if (name == "SGPRs"sv || name == "numUsedSgprs"sv) {
stage_stats.sgpr_count = GetUint64(statistic);
} else if (name == "VGPRs"sv || name == "numUsedVgprs"sv) {
stage_stats.vgpr_count = GetUint64(statistic);
} else if (name == "Branches"sv) {
stage_stats.branches_count = GetUint64(statistic);
} else if (name == "Basic Block Count"sv) {
stage_stats.basic_block_count = GetUint64(statistic);
}
}
std::lock_guard lock{mutex};
collected_stats.push_back(stage_stats);
}
}
void PipelineStatistics::Report() const {
double num{};
Stats total;
{
std::lock_guard lock{mutex};
for (const Stats& stats : collected_stats) {
total.code_size += stats.code_size;
total.register_count += stats.register_count;
total.sgpr_count += stats.sgpr_count;
total.vgpr_count += stats.vgpr_count;
total.branches_count += stats.branches_count;
total.basic_block_count += stats.basic_block_count;
}
num = static_cast<double>(collected_stats.size());
}
std::string report;
const auto add = [&](const char* fmt, u64 value) {
if (value > 0) {
report += fmt::format(fmt::runtime(fmt), static_cast<double>(value) / num);
}
};
add("Code size: {:9.03f}\n", total.code_size);
add("Register count: {:9.03f}\n", total.register_count);
add("SGPRs: {:9.03f}\n", total.sgpr_count);
add("VGPRs: {:9.03f}\n", total.vgpr_count);
add("Branches count: {:9.03f}\n", total.branches_count);
add("Basic blocks: {:9.03f}\n", total.basic_block_count);
LOG_INFO(Render_Vulkan,
"\nAverage pipeline statistics\n"
"==========================================\n"
"{}\n",
report);
}
} // namespace Vulkan

@ -0,0 +1,40 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <mutex>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class PipelineStatistics {
public:
explicit PipelineStatistics(const Device& device_);
void Collect(VkPipeline pipeline);
void Report() const;
private:
struct Stats {
u64 code_size{};
u64 register_count{};
u64 sgpr_count{};
u64 vgpr_count{};
u64 branches_count{};
u64 basic_block_count{};
};
const Device& device;
mutable std::mutex mutex;
std::vector<Stats> collected_stats;
};
} // namespace Vulkan

@ -8,6 +8,7 @@
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
@ -26,6 +27,7 @@ using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_, VKUpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker, Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
vk::ShaderModule spv_module_) vk::ShaderModule spv_module_)
: device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
@ -36,7 +38,7 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin()); uniform_buffer_sizes.begin());
auto func{[this, &descriptor_pool, shader_notify] { auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] {
DescriptorLayoutBuilder builder{device}; DescriptorLayoutBuilder builder{device};
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
@ -50,10 +52,14 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
.pNext = nullptr, .pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize, .requiredSubgroupSize = GuestWarpSize,
}; };
VkPipelineCreateFlags flags{};
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
pipeline = device.GetLogical().CreateComputePipeline({ pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = flags,
.stage{ .stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
@ -67,6 +73,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
.basePipelineHandle = 0, .basePipelineHandle = 0,
.basePipelineIndex = 0, .basePipelineIndex = 0,
}); });
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
std::lock_guard lock{build_mutex}; std::lock_guard lock{build_mutex};
is_built = true; is_built = true;
build_condvar.notify_one(); build_condvar.notify_one();

@ -25,6 +25,7 @@ class ShaderNotify;
namespace Vulkan { namespace Vulkan {
class Device; class Device;
class PipelineStatistics;
class VKScheduler; class VKScheduler;
class ComputePipeline { class ComputePipeline {
@ -32,6 +33,7 @@ public:
explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue, VKUpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* thread_worker, Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info, VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
vk::ShaderModule spv_module); vk::ShaderModule spv_module);

@ -11,6 +11,7 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_render_pass_cache.h"
@ -217,8 +218,8 @@ GraphicsPipeline::GraphicsPipeline(
VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
std::array<vk::ShaderModule, NUM_STAGES> stages, const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos) const std::array<const Shader::Info*, NUM_STAGES>& infos)
: key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
@ -235,7 +236,7 @@ GraphicsPipeline::GraphicsPipeline(
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
} }
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
uses_push_descriptor = builder.CanUsePushDescriptor(); uses_push_descriptor = builder.CanUsePushDescriptor();
descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
@ -250,6 +251,9 @@ GraphicsPipeline::GraphicsPipeline(
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
Validate(); Validate();
MakePipeline(render_pass); MakePipeline(render_pass);
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
std::lock_guard lock{build_mutex}; std::lock_guard lock{build_mutex};
is_built = true; is_built = true;
@ -782,10 +786,14 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
} }
*/ */
} }
VkPipelineCreateFlags flags{};
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
pipeline = device.GetLogical().CreateGraphicsPipeline({ pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = flags,
.stageCount = static_cast<u32>(shader_stages.size()), .stageCount = static_cast<u32>(shader_stages.size()),
.pStages = shader_stages.data(), .pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_ci, .pVertexInputState = &vertex_input_ci,

@ -60,6 +60,7 @@ struct hash<Vulkan::GraphicsPipelineCacheKey> {
namespace Vulkan { namespace Vulkan {
class Device; class Device;
class PipelineStatistics;
class RenderPassCache; class RenderPassCache;
class VKScheduler; class VKScheduler;
class VKUpdateDescriptorQueue; class VKUpdateDescriptorQueue;
@ -73,8 +74,9 @@ public:
VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
VideoCore::ShaderNotify* shader_notify, const Device& device, VideoCore::ShaderNotify* shader_notify, const Device& device,
DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics,
const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key,
std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos); const std::array<const Shader::Info*, NUM_STAGES>& infos);
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;

@ -29,6 +29,7 @@
#include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@ -389,15 +390,19 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
size_t total{}; size_t total{};
size_t built{}; size_t built{};
bool has_loaded{}; bool has_loaded{};
std::unique_ptr<PipelineStatistics> statistics;
} state; } state;
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
state.statistics = std::make_unique<PipelineStatistics>(device);
}
const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
ComputePipelineCacheKey key; ComputePipelineCacheKey key;
file.read(reinterpret_cast<char*>(&key), sizeof(key)); file.read(reinterpret_cast<char*>(&key), sizeof(key));
workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
ShaderPools pools; ShaderPools pools;
auto pipeline{CreateComputePipeline(pools, key, env, false)}; auto pipeline{CreateComputePipeline(pools, key, env, state.statistics.get(), false)};
std::lock_guard lock{state.mutex}; std::lock_guard lock{state.mutex};
if (pipeline) { if (pipeline) {
compute_cache.emplace(key, std::move(pipeline)); compute_cache.emplace(key, std::move(pipeline));
@ -425,7 +430,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
for (auto& env : envs) { for (auto& env : envs) {
env_ptrs.push_back(&env); env_ptrs.push_back(&env);
} }
auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs),
state.statistics.get(), false)};
std::lock_guard lock{state.mutex}; std::lock_guard lock{state.mutex};
graphics_cache.emplace(key, std::move(pipeline)); graphics_cache.emplace(key, std::move(pipeline));
@ -445,6 +451,10 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
lock.unlock(); lock.unlock();
workers.WaitForRequests(); workers.WaitForRequests();
if (state.statistics) {
state.statistics->Report();
}
} }
GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() { GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
@ -486,7 +496,8 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineCacheKey& key, ShaderPools& pools, const GraphicsPipelineCacheKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel) try { std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
bool build_in_parallel) try {
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
size_t env_index{0}; size_t env_index{0};
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
@ -540,7 +551,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<GraphicsPipeline>( return std::make_unique<GraphicsPipeline>(
maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
std::move(modules), infos); std::move(modules), infos);
} catch (const Shader::Exception& exception) { } catch (const Shader::Exception& exception) {
@ -553,7 +564,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
GetGraphicsEnvironments(environments, graphics_key.unique_hashes); GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
main_pools.ReleaseContents(); main_pools.ReleaseContents();
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; auto pipeline{
CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), nullptr, true)};
if (!pipeline || pipeline_cache_filename.empty()) { if (!pipeline || pipeline_cache_filename.empty()) {
return pipeline; return pipeline;
} }
@ -578,7 +590,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
env.SetCachedSize(shader->size_bytes); env.SetCachedSize(shader->size_bytes);
main_pools.ReleaseContents(); main_pools.ReleaseContents();
auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; auto pipeline{CreateComputePipeline(main_pools, key, env, nullptr, true)};
if (!pipeline || pipeline_cache_filename.empty()) { if (!pipeline || pipeline_cache_filename.empty()) {
return pipeline; return pipeline;
} }
@ -591,7 +603,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
bool build_in_parallel) try { PipelineStatistics* statistics, bool build_in_parallel) try {
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
@ -605,8 +617,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
} }
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue, return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
thread_worker, &shader_notify, program.info, thread_worker, statistics, &shader_notify,
std::move(spv_module)); program.info, std::move(spv_module));
} catch (const Shader::Exception& exception) { } catch (const Shader::Exception& exception) {
LOG_ERROR(Render_Vulkan, "{}", exception.what()); LOG_ERROR(Render_Vulkan, "{}", exception.what());

@ -80,8 +80,9 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
namespace Vulkan { namespace Vulkan {
class ComputePipeline; class ComputePipeline;
class Device;
class DescriptorPool; class DescriptorPool;
class Device;
class PipelineStatistics;
class RasterizerVulkan; class RasterizerVulkan;
class RenderPassCache; class RenderPassCache;
class VKScheduler; class VKScheduler;
@ -128,7 +129,8 @@ private:
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineCacheKey& key, ShaderPools& pools, const GraphicsPipelineCacheKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel); std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
bool build_in_parallel);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key, std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
const ShaderInfo* shader); const ShaderInfo* shader);
@ -136,6 +138,7 @@ private:
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools, std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
const ComputePipelineCacheKey& key, const ComputePipelineCacheKey& key,
Shader::Environment& env, Shader::Environment& env,
PipelineStatistics* statistics,
bool build_in_parallel); bool build_in_parallel);
const Device& device; const Device& device;

@ -526,6 +526,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
SetNext(next, workgroup_layout); SetNext(next, workgroup_layout);
} }
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties;
if (khr_pipeline_executable_properties) {
LOG_INFO(Render_Vulkan, "Enabling shader feedback, expect slower shader build times");
executable_properties = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR,
.pNext = nullptr,
.pipelineExecutableInfo = VK_TRUE,
};
SetNext(next, executable_properties);
}
if (!ext_depth_range_unrestricted) { if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
} }
@ -824,6 +835,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
bool has_khr_shader_float16_int8{}; bool has_khr_shader_float16_int8{};
bool has_khr_workgroup_memory_explicit_layout{}; bool has_khr_workgroup_memory_explicit_layout{};
bool has_khr_pipeline_executable_properties{};
bool has_ext_subgroup_size_control{}; bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{}; bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{}; bool has_ext_custom_border_color{};
@ -878,6 +890,10 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
true); true);
} }
if (Settings::values.renderer_shader_feedback) {
test(has_khr_pipeline_executable_properties,
VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, false);
}
} }
VkPhysicalDeviceFeatures2KHR features{}; VkPhysicalDeviceFeatures2KHR features{};
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
@ -1033,6 +1049,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
khr_workgroup_memory_explicit_layout = true; khr_workgroup_memory_explicit_layout = true;
} }
} }
if (has_khr_pipeline_executable_properties) {
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties;
executable_properties.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR;
executable_properties.pNext = nullptr;
features.pNext = &executable_properties;
physical.GetFeatures2KHR(features);
if (executable_properties.pipelineExecutableInfo) {
extensions.push_back(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME);
khr_pipeline_executable_properties = true;
}
}
if (khr_push_descriptor) { if (khr_push_descriptor) {
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;

@ -214,6 +214,11 @@ public:
return khr_push_descriptor; return khr_push_descriptor;
} }
/// Returns true if VK_KHR_pipeline_executable_properties is enabled.
bool IsKhrPipelineEexecutablePropertiesEnabled() const {
return khr_pipeline_executable_properties;
}
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
return khr_workgroup_memory_explicit_layout; return khr_workgroup_memory_explicit_layout;
@ -378,6 +383,7 @@ private:
bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
bool khr_pipeline_executable_properties{}; ///< Support for executable properties.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.

@ -181,6 +181,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkGetMemoryWin32HandleKHR); X(vkGetMemoryWin32HandleKHR);
#endif #endif
X(vkGetQueryPoolResults); X(vkGetQueryPoolResults);
X(vkGetPipelineExecutablePropertiesKHR);
X(vkGetPipelineExecutableStatisticsKHR);
X(vkGetSemaphoreCounterValueKHR); X(vkGetSemaphoreCounterValueKHR);
X(vkMapMemory); X(vkMapMemory);
X(vkQueueSubmit); X(vkQueueSubmit);
@ -809,6 +811,42 @@ VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noe
return requirements; return requirements;
} }
std::vector<VkPipelineExecutablePropertiesKHR> Device::GetPipelineExecutablePropertiesKHR(
VkPipeline pipeline) const {
const VkPipelineInfoKHR info{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
.pNext = nullptr,
.pipeline = pipeline,
};
u32 num{};
dld->vkGetPipelineExecutablePropertiesKHR(handle, &info, &num, nullptr);
std::vector<VkPipelineExecutablePropertiesKHR> properties(num);
for (auto& property : properties) {
property.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
}
Check(dld->vkGetPipelineExecutablePropertiesKHR(handle, &info, &num, properties.data()));
return properties;
}
std::vector<VkPipelineExecutableStatisticKHR> Device::GetPipelineExecutableStatisticsKHR(
VkPipeline pipeline, u32 executable_index) const {
const VkPipelineExecutableInfoKHR executable_info{
.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
.pNext = nullptr,
.pipeline = pipeline,
.executableIndex = executable_index,
};
u32 num{};
dld->vkGetPipelineExecutableStatisticsKHR(handle, &executable_info, &num, nullptr);
std::vector<VkPipelineExecutableStatisticKHR> statistics(num);
for (auto& statistic : statistics) {
statistic.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
}
Check(dld->vkGetPipelineExecutableStatisticsKHR(handle, &executable_info, &num,
statistics.data()));
return statistics;
}
void Device::UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes, void Device::UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes,
Span<VkCopyDescriptorSet> copies) const noexcept { Span<VkCopyDescriptorSet> copies) const noexcept {
dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data()); dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data());

@ -295,6 +295,8 @@ struct DeviceDispatch : InstanceDispatch {
#ifdef _WIN32 #ifdef _WIN32
PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
#endif #endif
PFN_vkGetPipelineExecutablePropertiesKHR vkGetPipelineExecutablePropertiesKHR{};
PFN_vkGetPipelineExecutableStatisticsKHR vkGetPipelineExecutableStatisticsKHR{};
PFN_vkGetQueryPoolResults vkGetQueryPoolResults{}; PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{}; PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{};
PFN_vkMapMemory vkMapMemory{}; PFN_vkMapMemory vkMapMemory{};
@ -879,6 +881,12 @@ public:
VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
std::vector<VkPipelineExecutablePropertiesKHR> GetPipelineExecutablePropertiesKHR(
VkPipeline pipeline) const;
std::vector<VkPipelineExecutableStatisticKHR> GetPipelineExecutableStatisticsKHR(
VkPipeline pipeline, u32 executable_index) const;
void UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes, void UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes,
Span<VkCopyDescriptorSet> copies) const noexcept; Span<VkCopyDescriptorSet> copies) const noexcept;

@ -825,6 +825,7 @@ void Config::ReadRendererValues() {
if (global) { if (global) {
ReadBasicSetting(Settings::values.fps_cap); ReadBasicSetting(Settings::values.fps_cap);
ReadBasicSetting(Settings::values.renderer_debug); ReadBasicSetting(Settings::values.renderer_debug);
ReadBasicSetting(Settings::values.renderer_shader_feedback);
ReadBasicSetting(Settings::values.enable_nsight_aftermath); ReadBasicSetting(Settings::values.enable_nsight_aftermath);
ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks); ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks);
} }
@ -1363,6 +1364,7 @@ void Config::SaveRendererValues() {
if (global) { if (global) {
WriteBasicSetting(Settings::values.fps_cap); WriteBasicSetting(Settings::values.fps_cap);
WriteBasicSetting(Settings::values.renderer_debug); WriteBasicSetting(Settings::values.renderer_debug);
WriteBasicSetting(Settings::values.renderer_shader_feedback);
WriteBasicSetting(Settings::values.enable_nsight_aftermath); WriteBasicSetting(Settings::values.enable_nsight_aftermath);
WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks); WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks);
} }

@ -43,6 +43,8 @@ void ConfigureDebug::SetConfiguration() {
ui->use_auto_stub->setChecked(Settings::values.use_auto_stub.GetValue()); ui->use_auto_stub->setChecked(Settings::values.use_auto_stub.GetValue());
ui->enable_graphics_debugging->setEnabled(runtime_lock); ui->enable_graphics_debugging->setEnabled(runtime_lock);
ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue());
ui->enable_shader_feedback->setEnabled(runtime_lock);
ui->enable_shader_feedback->setChecked(Settings::values.renderer_shader_feedback.GetValue());
ui->enable_cpu_debugging->setEnabled(runtime_lock); ui->enable_cpu_debugging->setEnabled(runtime_lock);
ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue());
ui->enable_nsight_aftermath->setEnabled(runtime_lock); ui->enable_nsight_aftermath->setEnabled(runtime_lock);
@ -65,6 +67,7 @@ void ConfigureDebug::ApplyConfiguration() {
Settings::values.use_debug_asserts = ui->use_debug_asserts->isChecked(); Settings::values.use_debug_asserts = ui->use_debug_asserts->isChecked();
Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); Settings::values.use_auto_stub = ui->use_auto_stub->isChecked();
Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked();
Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();
Settings::values.disable_shader_loop_safety_checks = Settings::values.disable_shader_loop_safety_checks =

@ -111,8 +111,8 @@
<property name="title"> <property name="title">
<string>Graphics</string> <string>Graphics</string>
</property> </property>
<layout class="QVBoxLayout" name="verticalLayout_6"> <layout class="QGridLayout" name="gridLayout_3">
<item> <item row="0" column="0">
<widget class="QCheckBox" name="enable_graphics_debugging"> <widget class="QCheckBox" name="enable_graphics_debugging">
<property name="enabled"> <property name="enabled">
<bool>true</bool> <bool>true</bool>
@ -125,7 +125,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item> <item row="2" column="0">
<widget class="QCheckBox" name="enable_nsight_aftermath"> <widget class="QCheckBox" name="enable_nsight_aftermath">
<property name="toolTip"> <property name="toolTip">
<string>When checked, it enables Nsight Aftermath crash dumps</string> <string>When checked, it enables Nsight Aftermath crash dumps</string>
@ -135,7 +135,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item> <item row="0" column="1">
<widget class="QCheckBox" name="disable_macro_jit"> <widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled"> <property name="enabled">
<bool>true</bool> <bool>true</bool>
@ -148,7 +148,17 @@
</property> </property>
</widget> </widget>
</item> </item>
<item> <item row="1" column="0">
<widget class="QCheckBox" name="enable_shader_feedback">
<property name="toolTip">
<string>When checked, yuzu will log statistics about the compiled pipeline cache</string>
</property>
<property name="text">
<string>Enable Shader Feedback</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="disable_loop_safety_checks"> <widget class="QCheckBox" name="disable_loop_safety_checks">
<property name="toolTip"> <property name="toolTip">
<string>When checked, it executes shaders without loop logic changes</string> <string>When checked, it executes shaders without loop logic changes</string>
@ -276,11 +286,14 @@
<tabstop>open_log_button</tabstop> <tabstop>open_log_button</tabstop>
<tabstop>homebrew_args_edit</tabstop> <tabstop>homebrew_args_edit</tabstop>
<tabstop>enable_graphics_debugging</tabstop> <tabstop>enable_graphics_debugging</tabstop>
<tabstop>enable_shader_feedback</tabstop>
<tabstop>enable_nsight_aftermath</tabstop> <tabstop>enable_nsight_aftermath</tabstop>
<tabstop>disable_macro_jit</tabstop> <tabstop>disable_macro_jit</tabstop>
<tabstop>disable_loop_safety_checks</tabstop> <tabstop>disable_loop_safety_checks</tabstop>
<tabstop>fs_access_log</tabstop>
<tabstop>reporting_services</tabstop> <tabstop>reporting_services</tabstop>
<tabstop>quest_flag</tabstop> <tabstop>quest_flag</tabstop>
<tabstop>enable_cpu_debugging</tabstop>
<tabstop>use_debug_asserts</tabstop> <tabstop>use_debug_asserts</tabstop>
<tabstop>use_auto_stub</tabstop> <tabstop>use_auto_stub</tabstop>
</tabstops> </tabstops>

@ -444,6 +444,7 @@ void Config::ReadValues() {
// Renderer // Renderer
ReadSetting("Renderer", Settings::values.renderer_backend); ReadSetting("Renderer", Settings::values.renderer_backend);
ReadSetting("Renderer", Settings::values.renderer_debug); ReadSetting("Renderer", Settings::values.renderer_debug);
ReadSetting("Renderer", Settings::values.renderer_shader_feedback);
ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); ReadSetting("Renderer", Settings::values.enable_nsight_aftermath);
ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks);
ReadSetting("Renderer", Settings::values.vulkan_device); ReadSetting("Renderer", Settings::values.vulkan_device);

@ -221,6 +221,10 @@ backend =
# 0 (default): Disabled, 1: Enabled # 0 (default): Disabled, 1: Enabled
debug = debug =
# Enable shader feedback.
# 0 (default): Disabled, 1: Enabled
renderer_shader_feedback =
# Enable Nsight Aftermath crash dumps # Enable Nsight Aftermath crash dumps
# 0 (default): Disabled, 1: Enabled # 0 (default): Disabled, 1: Enabled
nsight_aftermath = nsight_aftermath =