Merge pull request #11535 from GPUCode/upload_cmdbuf

renderer_vulkan: Introduce separate cmd buffer for uploads
master
Fernando S 2023-11-26 18:33:05 +07:00 committed by GitHub
commit f21340f7aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 344 additions and 171 deletions

@ -384,6 +384,8 @@ struct Values {
Category::RendererDebug}; Category::RendererDebug};
// TODO: remove this once AMDVLK supports VK_EXT_depth_bias_control // TODO: remove this once AMDVLK supports VK_EXT_depth_bias_control
bool renderer_amdvlk_depth_bias_workaround{}; bool renderer_amdvlk_depth_bias_workaround{};
Setting<bool> disable_buffer_reorder{linkage, false, "disable_buffer_reorder",
Category::RendererDebug};
// System // System
SwitchableSetting<Language, true> language_index{linkage, SwitchableSetting<Language, true> language_index{linkage,

@ -15,6 +15,7 @@ add_library(video_core STATIC
buffer_cache/buffer_cache.cpp buffer_cache/buffer_cache.cpp
buffer_cache/buffer_cache.h buffer_cache/buffer_cache.h
buffer_cache/memory_tracker_base.h buffer_cache/memory_tracker_base.h
buffer_cache/usage_tracker.h
buffer_cache/word_manager.h buffer_cache/word_manager.h
cache_types.h cache_types.h
cdma_pusher.cpp cdma_pusher.cpp

@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() {
if (!channel_state) { if (!channel_state) {
return; return;
} }
runtime.TickFrame(slot_buffers);
// Calculate hits and shots and move hit bits to the right // Calculate hits and shots and move hit bits to the right
const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
for (const IntervalType& add_interval : tmp_intervals) { for (const IntervalType& add_interval : tmp_intervals) {
common_ranges.add(add_interval); common_ranges.add(add_interval);
} }
runtime.CopyBuffer(dest_buffer, src_buffer, copies); const auto& copy = copies[0];
src_buffer.MarkUsage(copy.src_offset, copy.size);
dest_buffer.MarkUsage(copy.dst_offset, copy.size);
runtime.CopyBuffer(dest_buffer, src_buffer, copies, true);
if (has_new_downloads) { if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
} }
@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
common_ranges.subtract(subtract_interval); common_ranges.subtract(subtract_interval);
const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
auto& dest_buffer = slot_buffers[buffer]; Buffer& dest_buffer = slot_buffers[buffer];
const u32 offset = dest_buffer.Offset(*cpu_dst_address); const u32 offset = dest_buffer.Offset(*cpu_dst_address);
runtime.ClearBuffer(dest_buffer, offset, size, value); runtime.ClearBuffer(dest_buffer, offset, size, value);
dest_buffer.MarkUsage(offset, size);
return true; return true;
} }
@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
async_downloads += std::make_pair(base_interval, 1); async_downloads += std::make_pair(base_interval, 1);
buffer.MarkUsage(copy.src_offset, copy.size);
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
normalized_copies.push_back(second_copy); normalized_copies.push_back(second_copy);
} }
@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
// Have in mind the staging buffer offset for the copy // Have in mind the staging buffer offset for the copy
copy.dst_offset += download_staging.offset; copy.dst_offset += download_staging.offset;
const std::array copies{copy}; const std::array copies{copy};
runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, Buffer& buffer = slot_buffers[buffer_id];
false); buffer.MarkUsage(copy.src_offset, copy.size);
runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
} }
runtime.PostCopyBarrier(); runtime.PostCopyBarrier();
runtime.Finish(); runtime.Finish();
@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
std::memcpy(upload_staging.mapped_span.data(), std::memcpy(upload_staging.mapped_span.data(),
draw_state.inline_index_draw_indexes.data(), size); draw_state.inline_index_draw_indexes.data(), size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies); runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
} else { } else {
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
} }
@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes();
runtime.BindIndexBuffer(buffer, new_offset, size); runtime.BindIndexBuffer(buffer, new_offset, size);
} else { } else {
buffer.MarkUsage(offset, size);
runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
draw_state.index_buffer.first, draw_state.index_buffer.count, draw_state.index_buffer.first, draw_state.index_buffer.count,
buffer, offset, size); buffer, offset, size);
@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
const u32 stride = maxwell3d->regs.vertex_streams[index].stride; const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, binding.size);
host_bindings.buffers.push_back(&buffer); host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset); host_bindings.offsets.push_back(offset);
@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
} }
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) { if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
} else { } else {
@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
SynchronizeBuffer(buffer, binding.cpu_addr, size); SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
if (is_written) { if (is_written) {
@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format; const PixelFormat format = binding.format;
buffer.MarkUsage(offset, size);
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format); runtime.BindImageBuffer(buffer, offset, size, format);
@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
host_bindings.buffers.push_back(&buffer); host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset); host_bindings.offsets.push_back(offset);
host_bindings.sizes.push_back(binding.size); host_bindings.sizes.push_back(size);
} }
if (host_bindings.buffers.size() > 0) { if (host_bindings.buffers.size() > 0) {
runtime.BindTransformFeedbackBuffers(host_bindings); runtime.BindTransformFeedbackBuffers(host_bindings);
@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
SynchronizeBuffer(buffer, binding.cpu_addr, size); SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) { if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
++binding_index; ++binding_index;
@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
SynchronizeBuffer(buffer, binding.cpu_addr, size); SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
buffer.MarkUsage(offset, size);
const bool is_written = const bool is_written =
((channel_state->written_compute_storage_buffers >> index) & 1) != 0; ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
const u32 offset = buffer.Offset(binding.cpu_addr); const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format; const PixelFormat format = binding.format;
buffer.MarkUsage(offset, size);
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format); runtime.BindImageBuffer(buffer, offset, size, format);
@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
} }
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
channel_state->vertex_buffers[index] = Binding{ channel_state->vertex_buffers[index] = Binding{
.cpu_addr = *cpu_addr, .cpu_addr = *cpu_addr,
.size = size, .size = size,
.buffer_id = FindBuffer(*cpu_addr, size), .buffer_id = buffer_id,
}; };
} }
@ -1401,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
.dst_offset = dst_base_offset, .dst_offset = dst_base_offset,
.size = overlap.SizeBytes(), .size = overlap.SizeBytes(),
}); });
runtime.CopyBuffer(new_buffer, overlap, copies); new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size);
runtime.CopyBuffer(new_buffer, overlap, copies, true);
DeleteBuffer(overlap_id, true); DeleteBuffer(overlap_id, true);
} }
@ -1414,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const u32 size = static_cast<u32>(overlap.end - overlap.begin); const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
auto& new_buffer = slot_buffers[new_buffer_id]; auto& new_buffer = slot_buffers[new_buffer_id];
runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0); const size_t size_bytes = new_buffer.SizeBytes();
runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
new_buffer.MarkUsage(0, size_bytes);
for (const BufferId overlap_id : overlap.ids) { for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
} }
@ -1467,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
template <class P> template <class P>
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
return SynchronizeBufferImpl(buffer, cpu_addr, size);
}
template <class P>
bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
boost::container::small_vector<BufferCopy, 4> copies; boost::container::small_vector<BufferCopy, 4> copies;
u64 total_size_bytes = 0; u64 total_size_bytes = 0;
u64 largest_copy = 0; u64 largest_copy = 0;
@ -1493,51 +1508,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
return false; return false;
} }
template <class P>
bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
boost::container::small_vector<BufferCopy, 4> copies;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
IntervalSet found_sets{};
auto make_copies = [&] {
for (auto& interval : found_sets) {
const std::size_t sub_size = interval.upper() - interval.lower();
const VAddr cpu_addr_ = interval.lower();
copies.push_back(BufferCopy{
.src_offset = total_size_bytes,
.dst_offset = cpu_addr_ - buffer.CpuAddr(),
.size = sub_size,
});
total_size_bytes += sub_size;
largest_copy = std::max<u64>(largest_copy, sub_size);
}
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
};
memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
const VAddr base_adr = cpu_addr_out;
const VAddr end_adr = base_adr + range_size;
const IntervalType add_interval{base_adr, end_adr};
found_sets.add(add_interval);
});
if (found_sets.empty()) {
return true;
}
const IntervalType search_interval{cpu_addr, cpu_addr + size};
auto it = common_ranges.lower_bound(search_interval);
auto it_end = common_ranges.upper_bound(search_interval);
if (it == common_ranges.end()) {
make_copies();
return false;
}
while (it != it_end) {
found_sets.subtract(*it);
it++;
}
make_copies();
return false;
}
template <class P> template <class P>
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies) { std::span<BufferCopy> copies) {
@ -1586,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
// Apply the staging offset // Apply the staging offset
copy.src_offset += upload_staging.offset; copy.src_offset += upload_staging.offset;
} }
runtime.CopyBuffer(buffer, upload_staging.buffer, copies); const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} }
} }
@ -1628,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_
}}; }};
u8* const src_pointer = upload_staging.mapped_span.data(); u8* const src_pointer = upload_staging.mapped_span.data();
std::memcpy(src_pointer, inlined_buffer.data(), copy_size); std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies); const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} else { } else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
} }
@ -1681,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
for (BufferCopy& copy : copies) { for (BufferCopy& copy : copies) {
// Modify copies to have the staging offset in mind // Modify copies to have the staging offset in mind
copy.dst_offset += download_staging.offset; copy.dst_offset += download_staging.offset;
buffer.MarkUsage(copy.src_offset, copy.size);
} }
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
runtime.Finish(); runtime.Finish();
for (const BufferCopy& copy : copies) { for (const BufferCopy& copy : copies) {
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;

@ -529,10 +529,6 @@ private:
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies); std::span<BufferCopy> copies);

@ -0,0 +1,79 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include "common/alignment.h"
#include "common/common_types.h"
namespace VideoCommon {
class UsageTracker {
static constexpr size_t BYTES_PER_BIT_SHIFT = 6;
static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT;
static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT;
public:
explicit UsageTracker(size_t size) {
const size_t num_pages = (size >> PAGE_SHIFT) + 1;
pages.resize(num_pages, 0ULL);
}
void Reset() noexcept {
std::ranges::fill(pages, 0ULL);
}
void Track(u64 offset, u64 size) noexcept {
const size_t page = offset >> PAGE_SHIFT;
const size_t page_end = (offset + size) >> PAGE_SHIFT;
TrackPage(page, offset, size);
if (page == page_end) {
return;
}
for (size_t i = page + 1; i < page_end; i++) {
pages[i] = ~u64{0};
}
const size_t offset_end = offset + size;
const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
}
[[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept {
const size_t page = offset >> PAGE_SHIFT;
const size_t page_end = (offset + size) >> PAGE_SHIFT;
if (IsPageUsed(page, offset, size)) {
return true;
}
for (size_t i = page + 1; i < page_end; i++) {
if (pages[i] != 0) {
return true;
}
}
const size_t offset_end = offset + size;
const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
}
private:
void TrackPage(u64 page, u64 offset, u64 size) noexcept {
const size_t offset_in_page = offset % PAGE_BYTES;
const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
const size_t mask = ~u64{0} >> (64 - num_bits);
pages[page] |= (~u64{0} & mask) << first_bit;
}
bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept {
const size_t offset_in_page = offset % PAGE_BYTES;
const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
const size_t mask = ~u64{0} >> (64 - num_bits);
const size_t mask2 = (~u64{0} & mask) << first_bit;
return (pages[page] & mask2) != 0;
}
private:
std::vector<u64> pages;
};
} // namespace VideoCommon

@ -178,13 +178,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
} }
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) { std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool) {
CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
} }
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies) { std::span<const VideoCommon::BufferCopy> copies, bool) {
CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true);
} }
void BufferCacheRuntime::PreCopyBarrier() { void BufferCacheRuntime::PreCopyBarrier() {

@ -30,6 +30,8 @@ public:
void MakeResident(GLenum access) noexcept; void MakeResident(GLenum access) noexcept;
void MarkUsage(u64 offset, u64 size) {}
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
@ -66,22 +68,29 @@ public:
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
return false;
}
void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); std::span<const VideoCommon::BufferCopy> copies, bool barrier);
void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); std::span<const VideoCommon::BufferCopy> copies, bool barrier);
void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool can_reorder_upload = false);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies); std::span<const VideoCommon::BufferCopy> copies, bool);
void PreCopyBarrier(); void PreCopyBarrier();
void PostCopyBarrier(); void PostCopyBarrier();
void Finish(); void Finish();
void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {}
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);

@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
} // Anonymous namespace } // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_) VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
device{&runtime.device}, buffer{ device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { tracker{SizeBytes()} {
if (runtime.device.HasDebuggingToolAttached()) { if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
} }
@ -355,12 +355,31 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage(); return device.CanReportMemoryUsage();
} }
void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
it->ResetUsageTracking();
}
}
void BufferCacheRuntime::Finish() { void BufferCacheRuntime::Finish() {
scheduler.Finish(); scheduler.Finish();
} }
bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer,
std::span<const VideoCommon::BufferCopy> copies) {
if (Settings::values.disable_buffer_reorder) {
return false;
}
const bool can_use_upload_cmdbuf =
std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) {
return !buffer.IsRegionUsed(copy.dst_offset, copy.size);
});
return can_use_upload_cmdbuf;
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) { std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool can_reorder_upload) {
if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
return; return;
} }
@ -376,9 +395,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
}; };
// Measuring a popular game, this number never exceeds the specified size once data is warmed up // Measuring a popular game, this number never exceeds the specified size once data is warmed up
boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) {
scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies](
vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
});
return;
}
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
if (barrier) { if (barrier) {

@ -5,6 +5,7 @@
#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/usage_tracker.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@ -34,6 +35,18 @@ public:
return *buffer; return *buffer;
} }
[[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept {
return tracker.IsUsed(offset, size);
}
void MarkUsage(u64 offset, u64 size) noexcept {
tracker.Track(offset, size);
}
void ResetUsageTracking() noexcept {
tracker.Reset();
}
operator VkBuffer() const noexcept { operator VkBuffer() const noexcept {
return *buffer; return *buffer;
} }
@ -49,6 +62,7 @@ private:
const Device* device{}; const Device* device{};
vk::Buffer buffer; vk::Buffer buffer;
std::vector<BufferView> views; std::vector<BufferView> views;
VideoCommon::UsageTracker tracker;
}; };
class QuadArrayIndexBuffer; class QuadArrayIndexBuffer;
@ -67,6 +81,8 @@ public:
ComputePassDescriptorQueue& compute_pass_descriptor_queue, ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool); DescriptorPool& descriptor_pool);
void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
void Finish(); void Finish();
u64 GetDeviceLocalMemory() const; u64 GetDeviceLocalMemory() const;
@ -79,12 +95,15 @@ public:
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies);
void FreeDeferredStagingBuffer(StagingBufferRef& ref); void FreeDeferredStagingBuffer(StagingBufferRef& ref);
void PreCopyBarrier(); void PreCopyBarrier();
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); std::span<const VideoCommon::BufferCopy> copies, bool barrier,
bool can_reorder_upload = false);
void PostCopyBarrier(); void PostCopyBarrier();

@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) {
Refresh(); Refresh();
} }
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore wait_semaphore, u64 host_tick) { VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick) {
if (semaphore) { if (semaphore) {
return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore,
host_tick);
} else { } else {
return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick);
} }
} }
@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
}; };
VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore signal_semaphore,
VkSemaphore wait_semaphore, u64 host_tick) { VkSemaphore wait_semaphore, u64 host_tick) {
const VkSemaphore timeline_semaphore = *semaphore; const VkSemaphore timeline_semaphore = *semaphore;
@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
const std::array signal_values{host_tick, u64(0)}; const std::array signal_values{host_tick, u64(0)};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
const VkTimelineSemaphoreSubmitInfo timeline_si{ const VkTimelineSemaphoreSubmitInfo timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
.waitSemaphoreCount = num_wait_semaphores, .waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait_semaphore, .pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(), .pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1, .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
.pCommandBuffers = cmdbuf.address(), .pCommandBuffers = cmdbuffers.data(),
.signalSemaphoreCount = num_signal_semaphores, .signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(), .pSignalSemaphores = signal_semaphores.data(),
}; };
@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
return device.GetGraphicsQueue().Submit(submit_info); return device.GetGraphicsQueue().Submit(submit_info);
} }
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf,
VkSemaphore wait_semaphore, u64 host_tick) { vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick) {
const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
const VkSubmitInfo submit_info{ const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = nullptr, .pNext = nullptr,
.waitSemaphoreCount = num_wait_semaphores, .waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait_semaphore, .pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(), .pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1, .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
.pCommandBuffers = cmdbuf.address(), .pCommandBuffers = cmdbuffers.data(),
.signalSemaphoreCount = num_signal_semaphores, .signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = &signal_semaphore, .pSignalSemaphores = &signal_semaphore,
}; };

@ -52,14 +52,16 @@ public:
void Wait(u64 tick); void Wait(u64 tick);
/// Submits the device graphics queue, updating the tick as necessary /// Submits the device graphics queue, updating the tick as necessary
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore wait_semaphore, u64 host_tick); VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick);
private: private:
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore wait_semaphore, u64 host_tick); VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, u64 host_tick);
VkSemaphore wait_semaphore, u64 host_tick); VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
u64 host_tick);
void WaitThread(std::stop_token token); void WaitThread(std::stop_token token);

@ -22,11 +22,12 @@ namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_WaitForWorker); MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
vk::CommandBuffer upload_cmdbuf) {
auto command = first; auto command = first;
while (command != nullptr) { while (command != nullptr) {
auto next = command->GetNext(); auto next = command->GetNext();
command->Execute(cmdbuf); command->Execute(cmdbuf, upload_cmdbuf);
command->~Command(); command->~Command();
command = next; command = next;
} }
@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
// Perform the work, tracking whether the chunk was a submission // Perform the work, tracking whether the chunk was a submission
// before executing. // before executing.
const bool has_submit = work->HasSubmit(); const bool has_submit = work->HasSubmit();
work->ExecuteAll(current_cmdbuf); work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf);
// If the chunk was a submission, reallocate the command buffer. // If the chunk was a submission, reallocate the command buffer.
if (has_submit) { if (has_submit) {
@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() {
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr, .pInheritanceInfo = nullptr,
}); });
current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
current_upload_cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
} }
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
InvalidateState(); InvalidateState();
const u64 signal_value = master_semaphore->NextTick(); const u64 signal_value = master_semaphore->NextTick();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value,
this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) {
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
upload_cmdbuf.End();
cmdbuf.End(); cmdbuf.End();
if (on_submit) { if (on_submit) {
@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
std::scoped_lock lock{submit_mutex}; std::scoped_lock lock{submit_mutex};
switch (const VkResult result = master_semaphore->SubmitQueue( switch (const VkResult result = master_semaphore->SubmitQueue(
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
case VK_SUCCESS: case VK_SUCCESS:
break; break;
case VK_ERROR_DEVICE_LOST: case VK_ERROR_DEVICE_LOST:

@ -80,7 +80,8 @@ public:
/// Send work to a separate thread. /// Send work to a separate thread.
template <typename T> template <typename T>
void Record(T&& command) { requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
void RecordWithUploadBuffer(T&& command) {
if (chunk->Record(command)) { if (chunk->Record(command)) {
return; return;
} }
@ -88,6 +89,15 @@ public:
(void)chunk->Record(command); (void)chunk->Record(command);
} }
template <typename T>
requires std::is_invocable_v<T, vk::CommandBuffer>
void Record(T&& c) {
this->RecordWithUploadBuffer(
[command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
command(cmdbuf);
});
}
/// Returns the current command buffer tick. /// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept { [[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick(); return master_semaphore->CurrentTick();
@ -119,7 +129,7 @@ private:
public: public:
virtual ~Command() = default; virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
Command* GetNext() const { Command* GetNext() const {
return next; return next;
@ -142,8 +152,8 @@ private:
TypedCommand(TypedCommand&&) = delete; TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete; TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf) const override { void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
command(cmdbuf); command(cmdbuf, upload_cmdbuf);
} }
private: private:
@ -152,7 +162,7 @@ private:
class CommandChunk final { class CommandChunk final {
public: public:
void ExecuteAll(vk::CommandBuffer cmdbuf); void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf);
template <typename T> template <typename T>
bool Record(T& command) { bool Record(T& command) {
@ -228,6 +238,7 @@ private:
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf; vk::CommandBuffer current_cmdbuf;
vk::CommandBuffer current_upload_cmdbuf;
std::unique_ptr<CommandChunk> chunk; std::unique_ptr<CommandChunk> chunk;
std::function<void()> on_submit; std::function<void()> on_submit;

@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) {
UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
scheduler.Record([&](vk::CommandBuffer& cmdbuf) { scheduler.Record([&](vk::CommandBuffer cmdbuf) {
for (auto& images : m_dynamic_images) { for (auto& images : m_dynamic_images) {
for (size_t i = 0; i < MaxDynamicImage; i++) { for (size_t i = 0; i < MaxDynamicImage; i++) {
ClearColorImage(cmdbuf, *images.images[i]); ClearColorImage(cmdbuf, *images.images[i]);
@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
UpdateDescriptorSets(source_image_view, image_index); UpdateDescriptorSets(source_image_view, image_index);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) { scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,

@ -36,6 +36,10 @@ public:
StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
void FreeDeferred(StagingBufferRef& ref); void FreeDeferred(StagingBufferRef& ref);
[[nodiscard]] VkBuffer StreamBuf() const noexcept {
return *stream_buffer;
}
void TickFrame(); void TickFrame();
private: private:

@ -138,6 +138,10 @@ public:
return Iterator(this, SlotId{SlotId::INVALID_INDEX}); return Iterator(this, SlotId{SlotId::INVALID_INDEX});
} }
[[nodiscard]] size_t size() const noexcept {
return values_capacity - free_list.size();
}
private: private:
struct NonTrivialDummy { struct NonTrivialDummy {
NonTrivialDummy() noexcept {} NonTrivialDummy() noexcept {}

@ -1101,6 +1101,10 @@ public:
return &handle; return &handle;
} }
VkCommandBuffer operator*() const noexcept {
return handle;
}
void Begin(const VkCommandBufferBeginInfo& begin_info) const { void Begin(const VkCommandBufferBeginInfo& begin_info) const {
Check(dld->vkBeginCommandBuffer(handle, &begin_info)); Check(dld->vkBeginCommandBuffer(handle, &begin_info));
} }

@ -51,6 +51,8 @@ void ConfigureDebug::SetConfiguration() {
ui->enable_all_controllers->setChecked(Settings::values.enable_all_controllers.GetValue()); ui->enable_all_controllers->setChecked(Settings::values.enable_all_controllers.GetValue());
ui->enable_renderdoc_hotkey->setEnabled(runtime_lock); ui->enable_renderdoc_hotkey->setEnabled(runtime_lock);
ui->enable_renderdoc_hotkey->setChecked(Settings::values.enable_renderdoc_hotkey.GetValue()); ui->enable_renderdoc_hotkey->setChecked(Settings::values.enable_renderdoc_hotkey.GetValue());
ui->disable_buffer_reorder->setEnabled(runtime_lock);
ui->disable_buffer_reorder->setChecked(Settings::values.disable_buffer_reorder.GetValue());
ui->enable_graphics_debugging->setEnabled(runtime_lock); ui->enable_graphics_debugging->setEnabled(runtime_lock);
ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue());
ui->enable_shader_feedback->setEnabled(runtime_lock); ui->enable_shader_feedback->setEnabled(runtime_lock);
@ -96,6 +98,7 @@ void ConfigureDebug::ApplyConfiguration() {
Settings::values.enable_all_controllers = ui->enable_all_controllers->isChecked(); Settings::values.enable_all_controllers = ui->enable_all_controllers->isChecked();
Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
Settings::values.enable_renderdoc_hotkey = ui->enable_renderdoc_hotkey->isChecked(); Settings::values.enable_renderdoc_hotkey = ui->enable_renderdoc_hotkey->isChecked();
Settings::values.disable_buffer_reorder = ui->disable_buffer_reorder->isChecked();
Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked(); Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked();
Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();

@ -271,19 +271,6 @@
</widget> </widget>
</item> </item>
<item row="8" column="0"> <item row="8" column="0">
<widget class="QCheckBox" name="disable_macro_hle">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro HLE functions. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro HLE</string>
</property>
</widget>
</item>
<item row="7" column="0">
<widget class="QCheckBox" name="dump_macros"> <widget class="QCheckBox" name="dump_macros">
<property name="enabled"> <property name="enabled">
<bool>true</bool> <bool>true</bool>
@ -306,59 +293,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0">
<widget class="QCheckBox" name="enable_shader_feedback">
<property name="toolTip">
<string>When checked, yuzu will log statistics about the compiled pipeline cache</string>
</property>
<property name="text">
<string>Enable Shader Feedback</string>
</property>
</widget>
</item>
<item row="6" column="0"> <item row="6" column="0">
<widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro Just In Time compiler. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro JIT</string>
</property>
</widget>
</item>
<item row="9" column="0">
<spacer name="verticalSpacer_5">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Preferred</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="enable_graphics_debugging">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, the graphics API enters a slower debugging mode</string>
</property>
<property name="text">
<string>Enable Graphics Debugging</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="dump_shaders"> <widget class="QCheckBox" name="dump_shaders">
<property name="enabled"> <property name="enabled">
<bool>true</bool> <bool>true</bool>
@ -378,6 +313,81 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="7" column="0">
<widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro Just In Time compiler. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro JIT</string>
</property>
</widget>
</item>
<item row="9" column="0">
<widget class="QCheckBox" name="disable_macro_hle">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it disables the macro HLE functions. Enabling this makes games run slower</string>
</property>
<property name="text">
<string>Disable Macro HLE</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="enable_graphics_debugging">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, the graphics API enters a slower debugging mode</string>
</property>
<property name="text">
<string>Enable Graphics Debugging</string>
</property>
</widget>
</item>
<item row="10" column="0">
<spacer name="verticalSpacer_5">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Preferred</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="enable_shader_feedback">
<property name="toolTip">
<string>When checked, yuzu will log statistics about the compiled pipeline cache</string>
</property>
<property name="text">
<string>Enable Shader Feedback</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="disable_buffer_reorder">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;When checked, disables reording of mapped memory uploads which allows to associate uploads with specific draws. May reduce performance in some cases.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Disable Buffer Reorder</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</item> </item>