Merge pull request #9552 from liamwhite/turbo

vulkan: implement 'turbo mode' clock booster
master
liamwhite 2023-01-06 09:59:59 +07:00 committed by GitHub
commit 020dbcdbc7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 303 additions and 2 deletions

@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) {
// Renderer
values.fsr_sharpening_slider.SetGlobal(true);
values.renderer_backend.SetGlobal(true);
values.renderer_force_max_clock.SetGlobal(true);
values.vulkan_device.SetGlobal(true);
values.aspect_ratio.SetGlobal(true);
values.max_anisotropy.SetGlobal(true);

@ -415,6 +415,7 @@ struct Values {
// Renderer
SwitchableSetting<RendererBackend, true> renderer_backend{
RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
SwitchableSetting<bool> renderer_force_max_clock{true, "force_max_clock"};
Setting<bool> renderer_debug{false, "debug"};
Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};

@ -191,6 +191,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_texture_cache.cpp
renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_texture_cache_base.cpp
renderer_vulkan/vk_turbo_mode.cpp
renderer_vulkan/vk_turbo_mode.h
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
shader_cache.cpp

@ -47,6 +47,7 @@ set(SHADER_FILES
vulkan_present_scaleforce_fp16.frag
vulkan_present_scaleforce_fp32.frag
vulkan_quad_indexed.comp
vulkan_turbo_mode.comp
vulkan_uint8.comp
)

@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in;
layout (binding = 0) buffer ThreadData {
uint data[];
};
uint xorshift32(uint x) {
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return x;
}
uint getGlobalIndex() {
return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y;
}
void main() {
uint myIndex = xorshift32(getGlobalIndex());
uint otherIndex = xorshift32(myIndex);
uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1;
atomicAdd(data[myIndex % data.length()], otherValue);
}

@ -78,6 +78,8 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
return separated_extensions;
}
} // Anonymous namespace
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface) {
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
@ -89,7 +91,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl
const vk::PhysicalDevice physical_device(devices[device_index], dld);
return Device(*instance, physical_device, surface, dld);
}
} // Anonymous namespace
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
@ -109,6 +110,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
if (Settings::values.renderer_force_max_clock.GetValue()) {
turbo_mode.emplace(instance, dld);
}
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());

@ -13,6 +13,7 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_turbo_mode.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@ -31,6 +32,9 @@ class GPU;
namespace Vulkan {
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface);
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@ -74,6 +78,7 @@ private:
Swapchain swapchain;
BlitScreen blit_screen;
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;
};
} // namespace Vulkan

@ -0,0 +1,205 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/literals.h"
#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_turbo_mode.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
using namespace Common::Literals;
TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
: m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} {
m_thread = std::jthread([&](auto stop_token) { Run(stop_token); });
}
TurboMode::~TurboMode() = default;
void TurboMode::Run(std::stop_token stop_token) {
auto& dld = m_device.GetLogical();
// Allocate buffer. 2MiB should be sufficient.
auto buffer = dld.CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 2_MiB,
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
// Commit some device local memory for the buffer.
auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
// Create the descriptor pool to contain our descriptor.
constexpr VkDescriptorPoolSize pool_size{
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
};
auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = 1,
.poolSizeCount = 1,
.pPoolSizes = &pool_size,
});
// Create the descriptor set layout from the pool.
constexpr VkDescriptorSetLayoutBinding layout_binding{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
};
auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = 1,
.pBindings = &layout_binding,
});
// Actually create the descriptor set.
auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *descriptor_pool,
.descriptorSetCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
});
// Create the shader.
auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV);
// Create the pipeline layout.
auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
});
// Actually create the pipeline.
const VkPipelineShaderStageCreateInfo shader_stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *shader,
.pName = "main",
.pSpecializationInfo = nullptr,
};
auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = shader_stage,
.layout = *pipeline_layout,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
// Create a fence to wait on.
auto fence = dld.CreateFence(VkFenceCreateInfo{
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
});
// Create a command pool to allocate a command buffer from.
auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = nullptr,
.flags =
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = m_device.GetGraphicsFamily(),
});
// Create a single command buffer.
auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()};
while (!stop_token.stop_requested()) {
// Reset the fence.
fence.Reset();
// Update descriptor set.
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
const VkWriteDescriptorSet buffer_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_set[0],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pImageInfo = nullptr,
.pBufferInfo = &buffer_info,
.pTexelBufferView = nullptr,
};
dld.UpdateDescriptorSets(std::array{buffer_write}, {});
// Set up the command buffer.
cmdbuf.Begin(VkCommandBufferBeginInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
// Clear the buffer.
cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0);
// Bind descriptor set.
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
descriptor_set, {});
// Bind the pipeline.
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
// Dispatch.
cmdbuf.Dispatch(64, 64, 1);
// Finish.
cmdbuf.End();
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = nullptr,
.waitSemaphoreCount = 0,
.pWaitSemaphores = nullptr,
.pWaitDstStageMask = nullptr,
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.signalSemaphoreCount = 0,
.pSignalSemaphores = nullptr,
};
m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence);
// Wait for completion.
fence.Wait();
}
}
} // namespace Vulkan

@ -0,0 +1,26 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/polyfill_thread.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class TurboMode {
public:
explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld);
~TurboMode();
private:
void Run(std::stop_token stop_token);
Device m_device;
MemoryAllocator m_allocator;
std::jthread m_thread;
};
} // namespace Vulkan

@ -1472,7 +1472,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
is_patch_list_restart_supported =
primitive_topology_list_restart.primitiveTopologyPatchListRestart;
}
if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
if (requires_surface && has_khr_image_format_list && has_khr_swapchain_mutable_format) {
extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
khr_swapchain_mutable_format = true;

@ -690,6 +690,7 @@ void Config::ReadRendererValues() {
qt_config->beginGroup(QStringLiteral("Renderer"));
ReadGlobalSetting(Settings::values.renderer_backend);
ReadGlobalSetting(Settings::values.renderer_force_max_clock);
ReadGlobalSetting(Settings::values.vulkan_device);
ReadGlobalSetting(Settings::values.fullscreen_mode);
ReadGlobalSetting(Settings::values.aspect_ratio);
@ -1306,6 +1307,9 @@ void Config::SaveRendererValues() {
static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
Settings::values.renderer_backend.UsingGlobal());
WriteSetting(QString::fromStdString(Settings::values.renderer_force_max_clock.GetLabel()),
static_cast<u32>(Settings::values.renderer_force_max_clock.GetValue(global)),
static_cast<u32>(Settings::values.renderer_force_max_clock.GetDefault()));
WriteGlobalSetting(Settings::values.vulkan_device);
WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),
static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)),

@ -25,6 +25,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
ui->use_asynchronous_shaders->setEnabled(runtime_lock);
ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
@ -39,6 +40,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
Settings::values.max_anisotropy.GetValue());
} else {
ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy);
ConfigurationShared::SetPerGameSetting(ui->renderer_force_max_clock,
&Settings::values.renderer_force_max_clock);
ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox,
&Settings::values.max_anisotropy);
ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
@ -50,6 +53,9 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
void ConfigureGraphicsAdvanced::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
ui->renderer_force_max_clock,
renderer_force_max_clock);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
ui->anisotropic_filtering_combobox);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
@ -81,6 +87,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
// Disable if not global (only happens during game)
if (Settings::IsConfiguringGlobal()) {
ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
ui->renderer_force_max_clock->setEnabled(
Settings::values.renderer_force_max_clock.UsingGlobal());
ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
ui->use_asynchronous_shaders->setEnabled(
Settings::values.use_asynchronous_shaders.UsingGlobal());
@ -95,6 +103,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
return;
}
ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
Settings::values.renderer_force_max_clock,
renderer_force_max_clock);
ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
Settings::values.use_asynchronous_shaders,

@ -36,6 +36,7 @@ private:
std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
ConfigurationShared::CheckState renderer_force_max_clock;
ConfigurationShared::CheckState use_vsync;
ConfigurationShared::CheckState use_asynchronous_shaders;
ConfigurationShared::CheckState use_fast_gpu_time;

@ -69,6 +69,16 @@
</layout>
</widget>
</item>
<item>
<widget class="QCheckBox" name="renderer_force_max_clock">
<property name="toolTip">
<string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string>
</property>
<property name="text">
<string>Force maximum clocks (Vulkan only)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_vsync">
<property name="toolTip">

@ -296,6 +296,7 @@ void Config::ReadValues() {
// Renderer
ReadSetting("Renderer", Settings::values.renderer_backend);
ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
ReadSetting("Renderer", Settings::values.renderer_debug);
ReadSetting("Renderer", Settings::values.renderer_shader_feedback);
ReadSetting("Renderer", Settings::values.enable_nsight_aftermath);