Merge pull request #9973 from GPUCode/async-present

Implement asynchronous presentation
merge-requests/60/head
bunnei 2023-05-02 17:54:57 +07:00 committed by GitHub
commit 8f43b05d6b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 771 additions and 225 deletions

@ -205,6 +205,7 @@ void RestoreGlobalState(bool is_powered_on) {
// Renderer
values.fsr_sharpening_slider.SetGlobal(true);
values.renderer_backend.SetGlobal(true);
values.async_presentation.SetGlobal(true);
values.renderer_force_max_clock.SetGlobal(true);
values.vulkan_device.SetGlobal(true);
values.fullscreen_mode.SetGlobal(true);

@ -422,6 +422,7 @@ struct Values {
// Renderer
SwitchableSetting<RendererBackend, true> renderer_backend{
RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
SwitchableSetting<bool> async_presentation{false, "async_presentation"};
SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"};
Setting<bool> renderer_debug{false, "debug"};
Setting<bool> renderer_shader_feedback{false, "shader_feedback"};

@ -179,6 +179,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_present_manager.cpp
renderer_vulkan/vk_present_manager.h
renderer_vulkan/vk_query_cache.cpp
renderer_vulkan/vk_query_cache.h
renderer_vulkan/vk_rasterizer.cpp

@ -93,8 +93,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
state_tracker(), scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
present_manager(render_window, device, memory_allocator, scheduler, swapchain),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager,
scheduler, screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
@ -121,46 +122,19 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
return;
}
// Update screen info if the framebuffer size has changed.
if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) {
screen_info.width = framebuffer->width;
screen_info.height = framebuffer->height;
}
screen_info.width = framebuffer->width;
screen_info.height = framebuffer->height;
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
RenderScreenshot(*framebuffer, use_accelerated);
bool has_been_recreated = false;
const auto recreate_swapchain = [&](u32 width, u32 height) {
if (!has_been_recreated) {
has_been_recreated = true;
scheduler.Finish();
}
swapchain.Create(width, height, is_srgb);
};
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width ||
swapchain.GetHeight() != layout.height) {
recreate_swapchain(layout.width, layout.height);
}
bool is_outdated;
do {
swapchain.AcquireNextImage();
is_outdated = swapchain.IsOutDated();
if (is_outdated) {
recreate_swapchain(layout.width, layout.height);
}
} while (is_outdated);
if (has_been_recreated) {
blit_screen.Recreate();
}
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
scheduler.Flush(render_semaphore, present_semaphore);
scheduler.WaitWorker();
swapchain.Present(render_semaphore);
Frame* frame = present_manager.GetRenderFrame();
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
scheduler.Flush(*frame->render_ready);
scheduler.Record([this, frame](vk::CommandBuffer) { present_manager.PushFrame(frame); });
gpu.RendererFrameEndNotify();
rasterizer.TickFrame();
@ -246,8 +220,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
});
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
// Since we're not rendering to the screen, ignore the render semaphore.
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated);
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
const VkBufferCreateInfo dst_buffer_info{
@ -270,7 +243,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,

@ -9,6 +9,7 @@
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_present_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
@ -76,6 +77,7 @@ private:
StateTracker state_tracker;
Scheduler scheduler;
Swapchain swapchain;
PresentManager present_manager;
BlitScreen blit_screen;
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;

@ -122,10 +122,12 @@ struct BlitScreen::BufferData {
BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_,
const Device& device_, MemoryAllocator& memory_allocator_,
Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_)
Swapchain& swapchain_, PresentManager& present_manager_,
Scheduler& scheduler_, const ScreenInfo& screen_info_)
: cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_},
scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_},
current_srgb{swapchain.IsSrgb()}, image_view_format{swapchain.GetImageViewFormat()} {
resource_ticks.resize(image_count);
CreateStaticResources();
@ -135,25 +137,20 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
BlitScreen::~BlitScreen() = default;
void BlitScreen::Recreate() {
present_manager.WaitPresent();
scheduler.Finish();
device.GetLogical().WaitIdle();
CreateDynamicResources();
}
VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated) {
void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout,
VkExtent2D render_area, bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) {
image_count = swapchain_images;
Recreate();
}
const std::size_t image_index = swapchain.GetImageIndex();
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
@ -169,7 +166,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
const u64 image_offset = GetRawImageOffset(framebuffer);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
@ -204,8 +201,8 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
.depth = 1,
},
};
scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
const VkImage image = *raw_images[image_index];
scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) {
const VkImage image = *raw_images[index];
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@ -245,14 +242,15 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) {
UpdateAADescriptorSet(image_index, source_image_view, false);
UpdateAADescriptorSet(source_image_view, false);
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
};
scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) {
scheduler.Record([this, index = image_index, size,
anti_alias_pass](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@ -326,7 +324,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
aa_descriptor_sets[image_index], {});
aa_descriptor_sets[index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
@ -369,81 +367,99 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
};
VkImageView fsr_image_view =
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
UpdateDescriptorSet(image_index, fsr_image_view, true);
UpdateDescriptorSet(fsr_image_view, true);
} else {
const bool is_nn =
Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
UpdateDescriptorSet(image_index, source_image_view, is_nn);
UpdateDescriptorSet(source_image_view, is_nn);
}
scheduler.Record(
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *renderpass,
.framebuffer = host_framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
auto graphics_pipeline = [this]() {
switch (Settings::values.scaling_filter.GetValue()) {
case Settings::ScalingFilter::NearestNeighbor:
case Settings::ScalingFilter::Bilinear:
return *bilinear_pipeline;
case Settings::ScalingFilter::Bicubic:
return *bicubic_pipeline;
case Settings::ScalingFilter::Gaussian:
return *gaussian_pipeline;
case Settings::ScalingFilter::ScaleForce:
return *scaleforce_pipeline;
default:
return *bilinear_pipeline;
}
}();
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
scheduler.Record([this, host_framebuffer, index = image_index,
size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *renderpass,
.framebuffer = host_framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
auto graphics_pipeline = [this]() {
switch (Settings::values.scaling_filter.GetValue()) {
case Settings::ScalingFilter::NearestNeighbor:
case Settings::ScalingFilter::Bilinear:
return *bilinear_pipeline;
case Settings::ScalingFilter::Bicubic:
return *bicubic_pipeline;
case Settings::ScalingFilter::Gaussian:
return *gaussian_pipeline;
case Settings::ScalingFilter::ScaleForce:
return *scaleforce_pipeline;
default:
return *bilinear_pipeline;
}
}();
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
return *semaphores[image_index];
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_sets[index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
}
VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
const std::size_t image_index = swapchain.GetImageIndex();
const VkExtent2D render_area = swapchain.GetSize();
void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated, bool is_srgb) {
// Recreate dynamic resources if the the image count or colorspace changed
if (const std::size_t swapchain_images = swapchain.GetImageCount();
swapchain_images != image_count || current_srgb != is_srgb) {
current_srgb = is_srgb;
image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
image_count = swapchain_images;
Recreate();
}
// Recreate the presentation frame if the dimensions of the window changed
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated);
if (layout.width != frame->width || layout.height != frame->height ||
is_srgb != frame->is_srgb) {
Recreate();
present_manager.RecreateFrame(frame, layout.width, layout.height, is_srgb,
image_view_format, *renderpass);
}
const VkExtent2D render_area{frame->width, frame->height};
Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated);
if (++image_index >= image_count) {
image_index = 0;
}
}
vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
@ -471,13 +487,11 @@ void BlitScreen::CreateStaticResources() {
}
void BlitScreen::CreateDynamicResources() {
CreateSemaphores();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayout();
CreateRenderPass();
CreateFramebuffers();
CreateGraphicsPipeline();
fsr.reset();
smaa.reset();
@ -525,11 +539,6 @@ void BlitScreen::CreateShaders() {
}
}
void BlitScreen::CreateSemaphores() {
semaphores.resize(image_count);
std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
}
void BlitScreen::CreateDescriptorPool() {
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
{
@ -571,10 +580,10 @@ void BlitScreen::CreateDescriptorPool() {
}
void BlitScreen::CreateRenderPass() {
renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
renderpass = CreateRenderPassImpl(image_view_format);
}
vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) {
const VkAttachmentDescription color_attachment{
.flags = 0,
.format = format,
@ -584,7 +593,7 @@ vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkAttachmentReference color_attachment_ref{
@ -1052,16 +1061,6 @@ void BlitScreen::CreateSampler() {
nn_sampler = device.GetLogical().CreateSampler(ci_nn);
}
void BlitScreen::CreateFramebuffers() {
const VkExtent2D size{swapchain.GetSize()};
framebuffers.resize(image_count);
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
}
}
void BlitScreen::ReleaseRawImages() {
for (const u64 tick : resource_ticks) {
scheduler.Wait(tick);
@ -1175,7 +1174,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
return;
}
aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false);
aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer));
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
@ -1319,8 +1318,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
}
void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const {
const VkDescriptorImageInfo image_info{
.sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
@ -1356,8 +1354,7 @@ void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView imag
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
}
void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const {
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = offsetof(BufferData, uniform),
@ -1480,8 +1477,7 @@ u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer)
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}
u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const {
u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const {
constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
}

@ -5,6 +5,7 @@
#include <memory>
#include "core/frontend/framebuffer_layout.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@ -42,6 +43,9 @@ class RasterizerVulkan;
class Scheduler;
class SMAA;
class Swapchain;
class PresentManager;
struct Frame;
struct ScreenInfo {
VkImage image{};
@ -55,18 +59,17 @@ class BlitScreen {
public:
explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
Scheduler& scheduler, const ScreenInfo& screen_info);
PresentManager& present_manager, Scheduler& scheduler,
const ScreenInfo& screen_info);
~BlitScreen();
void Recreate();
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated);
void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated);
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated, bool is_srgb);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
@ -79,10 +82,9 @@ private:
void CreateStaticResources();
void CreateShaders();
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
vk::RenderPass CreateRenderPassImpl(VkFormat format);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
@ -90,15 +92,14 @@ private:
void CreateSampler();
void CreateDynamicResources();
void CreateFramebuffers();
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void ReleaseRawImages();
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateDescriptorSet(VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
@ -107,16 +108,17 @@ private:
void CreateFSR();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const;
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Swapchain& swapchain;
PresentManager& present_manager;
Scheduler& scheduler;
std::size_t image_count;
std::size_t image_index{};
const ScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
@ -135,7 +137,6 @@ private:
vk::Pipeline gaussian_pipeline;
vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
vk::Sampler nn_sampler;
vk::Sampler sampler;
@ -145,7 +146,6 @@ private:
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<MemoryCommit> raw_buffer_commits;
@ -164,6 +164,8 @@ private:
u32 raw_width = 0;
u32 raw_height = 0;
Service::android::PixelFormat pixel_format{};
bool current_srgb;
VkFormat image_view_format;
std::unique_ptr<FSR> fsr;
std::unique_ptr<SMAA> smaa;

@ -0,0 +1,454 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/microprofile.h"
#include "common/settings.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_present_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
MICROPROFILE_DEFINE(Vulkan_CopyToSwapchain, "Vulkan", "Copy to swapchain", MP_RGB(192, 255, 192));
namespace {
bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat format) {
const VkFormatProperties props{physical_device.GetFormatProperties(format)};
return (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT);
}
[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers() {
return VkImageSubresourceLayers{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
};
}
[[nodiscard]] VkImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width,
s32 swapchain_height) {
return VkImageBlit{
.srcSubresource = MakeImageSubresourceLayers(),
.srcOffsets =
{
{
.x = 0,
.y = 0,
.z = 0,
},
{
.x = frame_width,
.y = frame_height,
.z = 1,
},
},
.dstSubresource = MakeImageSubresourceLayers(),
.dstOffsets =
{
{
.x = 0,
.y = 0,
.z = 0,
},
{
.x = swapchain_width,
.y = swapchain_height,
.z = 1,
},
},
};
}
[[nodiscard]] VkImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width,
u32 swapchain_height) {
return VkImageCopy{
.srcSubresource = MakeImageSubresourceLayers(),
.srcOffset =
{
.x = 0,
.y = 0,
.z = 0,
},
.dstSubresource = MakeImageSubresourceLayers(),
.dstOffset =
{
.x = 0,
.y = 0,
.z = 0,
},
.extent =
{
.width = std::min(frame_width, swapchain_width),
.height = std::min(frame_height, swapchain_height),
.depth = 1,
},
};
}
} // Anonymous namespace
PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_,
MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
Swapchain& swapchain_)
: render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_},
blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())},
use_present_thread{Settings::values.async_presentation.GetValue()},
image_count{swapchain.GetImageCount()} {
auto& dld = device.GetLogical();
cmdpool = dld.CreateCommandPool({
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = nullptr,
.flags =
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = device.GetGraphicsFamily(),
});
auto cmdbuffers = cmdpool.Allocate(image_count);
frames.resize(image_count);
for (u32 i = 0; i < frames.size(); i++) {
Frame& frame = frames[i];
frame.cmdbuf = vk::CommandBuffer{cmdbuffers[i], device.GetDispatchLoader()};
frame.render_ready = dld.CreateSemaphore({
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
});
frame.present_done = dld.CreateFence({
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_FENCE_CREATE_SIGNALED_BIT,
});
free_queue.push(&frame);
}
if (use_present_thread) {
present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); });
}
}
PresentManager::~PresentManager() = default;
Frame* PresentManager::GetRenderFrame() {
MICROPROFILE_SCOPE(Vulkan_WaitPresent);
// Wait for free presentation frames
std::unique_lock lock{free_mutex};
free_cv.wait(lock, [this] { return !free_queue.empty(); });
// Take the frame from the queue
Frame* frame = free_queue.front();
free_queue.pop();
// Wait for the presentation to be finished so all frame resources are free
frame->present_done.Wait();
frame->present_done.Reset();
return frame;
}
void PresentManager::PushFrame(Frame* frame) {
if (!use_present_thread) {
CopyToSwapchain(frame);
free_queue.push(frame);
return;
}
std::unique_lock lock{queue_mutex};
present_queue.push(frame);
frame_cv.notify_one();
}
void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
VkFormat image_view_format, VkRenderPass rd) {
auto& dld = device.GetLogical();
frame->width = width;
frame->height = height;
frame->is_srgb = is_srgb;
frame->image = dld.CreateImage({
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
.imageType = VK_IMAGE_TYPE_2D,
.format = swapchain.GetImageFormat(),
.extent =
{
.width = width,
.height = height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal);
frame->image_view = dld.CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *frame->image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
});
const VkImageView image_view{*frame->image_view};
frame->framebuffer = dld.CreateFramebuffer({
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.renderPass = rd,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = width,
.height = height,
.layers = 1,
});
}
void PresentManager::WaitPresent() {
if (!use_present_thread) {
return;
}
// Wait for the present queue to be empty
{
std::unique_lock queue_lock{queue_mutex};
frame_cv.wait(queue_lock, [this] { return present_queue.empty(); });
}
// The above condition will be satisfied when the last frame is taken from the queue.
// To ensure that frame has been presented as well take hold of the swapchain
// mutex.
std::scoped_lock swapchain_lock{swapchain_mutex};
}
void PresentManager::PresentThread(std::stop_token token) {
Common::SetCurrentThreadName("VulkanPresent");
while (!token.stop_requested()) {
std::unique_lock lock{queue_mutex};
// Wait for presentation frames
Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); });
if (token.stop_requested()) {
return;
}
// Take the frame and notify anyone waiting
Frame* frame = present_queue.front();
present_queue.pop();
frame_cv.notify_one();
// By exchanging the lock ownership we take the swapchain lock
// before the queue lock goes out of scope. This way the swapchain
// lock in WaitPresent is guaranteed to occur after here.
std::exchange(lock, std::unique_lock{swapchain_mutex});
CopyToSwapchain(frame);
// Free the frame for reuse
std::scoped_lock fl{free_mutex};
free_queue.push(frame);
free_cv.notify_one();
}
}
void PresentManager::CopyToSwapchain(Frame* frame) {
MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
const auto recreate_swapchain = [&] {
swapchain.Create(frame->width, frame->height, frame->is_srgb);
image_count = swapchain.GetImageCount();
};
// If the size or colorspace of the incoming frames has changed, recreate the swapchain
// to account for that.
const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb);
const bool size_changed =
swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
if (srgb_changed || size_changed) {
recreate_swapchain();
}
while (swapchain.AcquireNextImage()) {
recreate_swapchain();
}
const vk::CommandBuffer cmdbuf{frame->cmdbuf};
cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
const VkImage image{swapchain.CurrentImage()};
const VkExtent2D extent = swapchain.GetExtent();
const std::array pre_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *frame->image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
const std::array post_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *frame->image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {},
{}, {}, pre_barriers);
if (blit_supported) {
cmdbuf.BlitImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
VK_FILTER_LINEAR);
} else {
cmdbuf.CopyImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
MakeImageCopy(frame->width, frame->height, extent.width, extent.height));
}
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, {},
{}, {}, post_barriers);
cmdbuf.End();
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
const VkSemaphore render_semaphore = swapchain.CurrentRenderSemaphore();
const std::array wait_semaphores = {present_semaphore, *frame->render_ready};
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
};
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = nullptr,
.waitSemaphoreCount = 2U,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.signalSemaphoreCount = 1U,
.pSignalSemaphores = &render_semaphore,
};
// Submit the image copy/blit to the swapchain
{
std::scoped_lock lock{scheduler.submit_mutex};
switch (const VkResult result =
device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
device.ReportLoss();
[[fallthrough]];
default:
vk::Check(result);
break;
}
}
// Present
swapchain.Present(render_semaphore);
}
} // namespace Vulkan

@ -0,0 +1,83 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <mutex>
#include <queue>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core::Frontend {
class EmuWindow;
} // namespace Core::Frontend
namespace Vulkan {
class Device;
class Scheduler;
class Swapchain;
struct Frame {
u32 width;
u32 height;
bool is_srgb;
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
MemoryCommit image_commit;
vk::CommandBuffer cmdbuf;
vk::Semaphore render_ready;
vk::Fence present_done;
};
class PresentManager {
public:
PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device,
MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain);
~PresentManager();
/// Returns the last used presentation frame
Frame* GetRenderFrame();
/// Pushes a frame for presentation
void PushFrame(Frame* frame);
/// Recreates the present frame to match the provided parameters
void RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
VkFormat image_view_format, VkRenderPass rd);
/// Waits for the present thread to finish presenting all queued frames.
void WaitPresent();
private:
void PresentThread(std::stop_token token);
void CopyToSwapchain(Frame* frame);
private:
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
Swapchain& swapchain;
vk::CommandPool cmdpool;
std::vector<Frame> frames;
std::queue<Frame*> present_queue;
std::queue<Frame*> free_queue;
std::condition_variable_any frame_cv;
std::condition_variable free_cv;
std::mutex swapchain_mutex;
std::mutex queue_mutex;
std::mutex free_mutex;
std::jthread present_thread;
bool blit_supported;
bool use_present_thread;
std::size_t image_count;
};
} // namespace Vulkan

@ -46,10 +46,11 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
Scheduler::~Scheduler() = default;
void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
// When flushing, we only send data to the worker thread; no waiting is necessary.
SubmitExecution(signal_semaphore, wait_semaphore);
const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
return signal_value;
}
void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@ -205,7 +206,7 @@ void Scheduler::AllocateWorkerCommandBuffer() {
});
}
void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
EndPendingOperations();
InvalidateState();
@ -217,6 +218,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
on_submit();
}
std::scoped_lock lock{submit_mutex};
switch (const VkResult result = master_semaphore->SubmitQueue(
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
case VK_SUCCESS:
@ -231,6 +233,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
});
chunk->MarkSubmit();
DispatchWork();
return signal_value;
}
void Scheduler::AllocateNewContext() {

@ -34,7 +34,7 @@ public:
~Scheduler();
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
@ -106,6 +106,8 @@ public:
return *master_semaphore;
}
std::mutex submit_mutex;
private:
class Command {
public:
@ -201,7 +203,7 @@ private:
void AllocateWorkerCommandBuffer();
void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
u64 SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
void AllocateNewContext();

@ -99,18 +99,16 @@ void Swapchain::Create(u32 width_, u32 height_, bool srgb) {
return;
}
device.GetLogical().WaitIdle();
Destroy();
CreateSwapchain(capabilities, srgb);
CreateSemaphores();
CreateImageViews();
resource_ticks.clear();
resource_ticks.resize(image_count);
}
void Swapchain::AcquireNextImage() {
bool Swapchain::AcquireNextImage() {
const VkResult result = device.GetLogical().AcquireNextImageKHR(
*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
VK_NULL_HANDLE, &image_index);
@ -127,8 +125,11 @@ void Swapchain::AcquireNextImage() {
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
break;
}
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
return is_suboptimal || is_outdated;
}
void Swapchain::Present(VkSemaphore render_semaphore) {
@ -143,6 +144,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
.pImageIndices = &image_index,
.pResults = nullptr,
};
std::scoped_lock lock{scheduler.submit_mutex};
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
break;
@ -168,7 +170,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)};
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
surface_format = ChooseSwapSurfaceFormat(formats);
present_mode = ChooseSwapPresentMode(present_modes);
u32 requested_image_count{capabilities.minImageCount + 1};
@ -193,7 +195,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
.imageColorSpace = surface_format.colorSpace,
.imageExtent = {},
.imageArrayLayers = 1,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@ -241,45 +243,14 @@ void Swapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
std::ranges::generate(present_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
}
void Swapchain::CreateImageViews() {
VkImageViewCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = {},
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
image_views.resize(image_count);
for (std::size_t i = 0; i < image_count; i++) {
ci.image = images[i];
image_views[i] = device.GetLogical().CreateImageView(ci);
}
render_semaphores.resize(image_count);
std::ranges::generate(render_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
}
void Swapchain::Destroy() {
frame_index = 0;
present_semaphores.clear();
framebuffers.clear();
image_views.clear();
swapchain.reset();
}

@ -27,7 +27,7 @@ public:
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
void AcquireNextImage();
bool AcquireNextImage();
/// Presents the rendered image to the swapchain.
void Present(VkSemaphore render_semaphore);
@ -52,6 +52,11 @@ public:
return is_suboptimal;
}
/// Returns true when the swapchain format is in the srgb color space
bool IsSrgb() const {
return current_srgb;
}
VkExtent2D GetSize() const {
return extent;
}
@ -64,22 +69,34 @@ public:
return image_index;
}
std::size_t GetFrameIndex() const {
return frame_index;
}
VkImage GetImageIndex(std::size_t index) const {
return images[index];
}
VkImageView GetImageViewIndex(std::size_t index) const {
return *image_views[index];
VkImage CurrentImage() const {
return images[image_index];
}
VkFormat GetImageViewFormat() const {
return image_view_format;
}
VkFormat GetImageFormat() const {
return surface_format.format;
}
VkSemaphore CurrentPresentSemaphore() const {
return *present_semaphores[frame_index];
}
VkSemaphore CurrentRenderSemaphore() const {
return *render_semaphores[frame_index];
}
u32 GetWidth() const {
return width;
}
@ -88,6 +105,10 @@ public:
return height;
}
VkExtent2D GetExtent() const {
return extent;
}
private:
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb);
void CreateSemaphores();
@ -107,10 +128,9 @@ private:
std::size_t image_count{};
std::vector<VkImage> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
std::vector<vk::Semaphore> render_semaphores;
u32 width;
u32 height;
@ -121,6 +141,7 @@ private:
VkFormat image_view_format{};
VkExtent2D extent{};
VkPresentModeKHR present_mode{};
VkSurfaceFormatKHR surface_format{};
bool current_srgb{};
bool current_fps_unlocked{};

@ -14,13 +14,18 @@ namespace Vulkan {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
payload_start = payload.data();
payload_cursor = payload.data();
}
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
void UpdateDescriptorQueue::TickFrame() {
payload_cursor = payload.data();
if (++frame_index >= FRAMES_IN_FLIGHT) {
frame_index = 0;
}
payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE;
payload_cursor = payload_start;
}
void UpdateDescriptorQueue::Acquire() {
@ -28,10 +33,10 @@ void UpdateDescriptorQueue::Acquire() {
// This is the maximum number of entries a single draw call might use.
static constexpr size_t MIN_ENTRIES = 0x400;
if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload_cursor = payload.data();
payload_cursor = payload_start;
}
upload_start = payload_cursor;
}

@ -29,6 +29,12 @@ struct DescriptorUpdateEntry {
};
class UpdateDescriptorQueue final {
// This should be plenty for the vast majority of cases. Most desktop platforms only
// provide up to 3 swapchain images.
static constexpr size_t FRAMES_IN_FLIGHT = 5;
static constexpr size_t FRAME_PAYLOAD_SIZE = 0x10000;
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
public:
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
~UpdateDescriptorQueue();
@ -73,9 +79,11 @@ private:
const Device& device;
Scheduler& scheduler;
size_t frame_index{0};
DescriptorUpdateEntry* payload_cursor = nullptr;
DescriptorUpdateEntry* payload_start = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
std::array<DescriptorUpdateEntry, 0x10000> payload;
std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload;
};
} // namespace Vulkan

@ -692,6 +692,7 @@ void Config::ReadRendererValues() {
qt_config->beginGroup(QStringLiteral("Renderer"));
ReadGlobalSetting(Settings::values.renderer_backend);
ReadGlobalSetting(Settings::values.async_presentation);
ReadGlobalSetting(Settings::values.renderer_force_max_clock);
ReadGlobalSetting(Settings::values.vulkan_device);
ReadGlobalSetting(Settings::values.fullscreen_mode);
@ -1313,6 +1314,7 @@ void Config::SaveRendererValues() {
static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
Settings::values.renderer_backend.UsingGlobal());
WriteGlobalSetting(Settings::values.async_presentation);
WriteGlobalSetting(Settings::values.renderer_force_max_clock);
WriteGlobalSetting(Settings::values.vulkan_device);
WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),

@ -22,11 +22,13 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
void ConfigureGraphicsAdvanced::SetConfiguration() {
const bool runtime_lock = !system.IsPoweredOn();
ui->use_vsync->setEnabled(runtime_lock);
ui->async_present->setEnabled(runtime_lock);
ui->renderer_force_max_clock->setEnabled(runtime_lock);
ui->async_astc->setEnabled(runtime_lock);
ui->use_asynchronous_shaders->setEnabled(runtime_lock);
ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
ui->async_present->setChecked(Settings::values.async_presentation.GetValue());
ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
ui->async_astc->setChecked(Settings::values.async_astc.GetValue());
@ -54,6 +56,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
void ConfigureGraphicsAdvanced::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation,
ui->async_present, async_present);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
ui->renderer_force_max_clock,
renderer_force_max_clock);
@ -90,6 +94,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
// Disable if not global (only happens during game)
if (Settings::IsConfiguringGlobal()) {
ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
ui->async_present->setEnabled(Settings::values.async_presentation.UsingGlobal());
ui->renderer_force_max_clock->setEnabled(
Settings::values.renderer_force_max_clock.UsingGlobal());
ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
@ -107,6 +112,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
return;
}
ConfigurationShared::SetColoredTristate(ui->async_present, Settings::values.async_presentation,
async_present);
ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
Settings::values.renderer_force_max_clock,
renderer_force_max_clock);

@ -36,6 +36,7 @@ private:
std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
ConfigurationShared::CheckState async_present;
ConfigurationShared::CheckState renderer_force_max_clock;
ConfigurationShared::CheckState use_vsync;
ConfigurationShared::CheckState async_astc;

@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>404</width>
<height>321</height>
<height>376</height>
</rect>
</property>
<property name="windowTitle">
@ -69,6 +69,13 @@
</layout>
</widget>
</item>
<item>
<widget class="QCheckBox" name="async_present">
<property name="text">
<string>Enable asynchronous presentation (Vulkan only)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="renderer_force_max_clock">
<property name="toolTip">
@ -112,7 +119,7 @@
<item>
<widget class="QCheckBox" name="use_fast_gpu_time">
<property name="toolTip">
<string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
<string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
</property>
<property name="text">
<string>Use Fast GPU Time (Hack)</string>
@ -122,7 +129,7 @@
<item>
<widget class="QCheckBox" name="use_pessimistic_flushes">
<property name="toolTip">
<string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string>
<string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string>
</property>
<property name="text">
<string>Use pessimistic buffer flushes (Hack)</string>
@ -132,7 +139,7 @@
<item>
<widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache">
<property name="toolTip">
<string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
<string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
</property>
<property name="text">
<string>Use Vulkan pipeline cache</string>

@ -300,6 +300,7 @@ void Config::ReadValues() {
// Renderer
ReadSetting("Renderer", Settings::values.renderer_backend);
ReadSetting("Renderer", Settings::values.async_presentation);
ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
ReadSetting("Renderer", Settings::values.renderer_debug);
ReadSetting("Renderer", Settings::values.renderer_shader_feedback);

@ -264,6 +264,10 @@ cpuopt_unsafe_ignore_global_monitor =
# 0: OpenGL, 1 (default): Vulkan
backend =
# Whether to enable asynchronous presentation (Vulkan only)
# 0 (default): Off, 1: On
async_presentation =
# Enable graphics API debugging mode.
# 0 (default): Disabled, 1: Enabled
debug =