buffer_cache: Return handles instead of pointer to handles

The original idea of returning pointers is that handles can be moved.
The problem is that the implementation didn't take that in mind and made
everything harder to work with. This commit drops pointer to handles and
returns the handles themselves. While it is still true that handles can
be invalidated, this way we get an old handle instead of a dangling
pointer.

This problem can be solved in the future with sparse buffers.
master
ReinUsesLisp 2020-04-04 02:54:55 +07:00
parent 7e4a132a77
commit 090fd3fefa
14 changed files with 90 additions and 228 deletions

@ -29,10 +29,10 @@ namespace VideoCommon {
using MapInterval = std::shared_ptr<MapIntervalBase>;
template <typename TBuffer, typename TBufferType, typename StreamBuffer>
template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
class BufferCache {
public:
using BufferInfo = std::pair<const TBufferType*, u64>;
using BufferInfo = std::pair<BufferType, u64>;
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool is_written = false, bool use_fast_cbuf = false) {
@ -89,9 +89,7 @@ public:
}
}
const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
return {ToHandle(block), offset};
return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
}
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@ -156,7 +154,7 @@ public:
}
}
virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0;
virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
protected:
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
@ -166,19 +164,19 @@ protected:
~BufferCache() = default;
virtual const TBufferType* ToHandle(const TBuffer& storage) = 0;
virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
virtual void WriteBarrier() = 0;
virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
const u8* data) = 0;
virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
u8* data) = 0;
virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) = 0;
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
@ -221,9 +219,8 @@ private:
return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
}
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
const std::size_t size) {
std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) {
auto& memory_manager = system.GPU().MemoryManager();
@ -272,7 +269,7 @@ private:
return new_map;
}
void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
std::vector<MapInterval>& overlaps) {
const IntervalType base_interval{start, end};
IntervalSet interval_set{};
@ -313,7 +310,7 @@ private:
void FlushMap(MapInterval map) {
std::size_t size = map->GetEnd() - map->GetStart();
TBuffer block = blocks[map->GetStart() >> block_page_bits];
OwnerBuffer block = blocks[map->GetStart() >> block_page_bits];
staging_buffer.resize(size);
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
@ -328,7 +325,7 @@ private:
buffer_ptr += size;
buffer_offset += size;
return {&stream_buffer_handle, uploaded_offset};
return {stream_buffer_handle, uploaded_offset};
}
void AlignBuffer(std::size_t alignment) {
@ -338,11 +335,11 @@ private:
buffer_offset = offset_aligned;
}
TBuffer EnlargeBlock(TBuffer buffer) {
OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
const std::size_t old_size = buffer->GetSize();
const std::size_t new_size = old_size + block_page_size;
const VAddr cpu_addr = buffer->GetCpuAddr();
TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
CopyBlock(buffer, new_buffer, 0, 0, old_size);
buffer->SetEpoch(epoch);
pending_destruction.push_back(buffer);
@ -356,14 +353,14 @@ private:
return new_buffer;
}
TBuffer MergeBlocks(TBuffer first, TBuffer second) {
OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
const std::size_t size_1 = first->GetSize();
const std::size_t size_2 = second->GetSize();
const VAddr first_addr = first->GetCpuAddr();
const VAddr second_addr = second->GetCpuAddr();
const VAddr new_addr = std::min(first_addr, second_addr);
const std::size_t new_size = size_1 + size_2;
TBuffer new_buffer = CreateBlock(new_addr, new_size);
OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
first->SetEpoch(epoch);
@ -380,8 +377,8 @@ private:
return new_buffer;
}
TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
TBuffer found{};
OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
OwnerBuffer found;
const VAddr cpu_addr_end = cpu_addr + size - 1;
u64 page_start = cpu_addr >> block_page_bits;
const u64 page_end = cpu_addr_end >> block_page_bits;
@ -457,7 +454,7 @@ private:
Core::System& system;
std::unique_ptr<StreamBuffer> stream_buffer;
TBufferType stream_buffer_handle{};
BufferType stream_buffer_handle{};
bool invalidated = false;
@ -475,9 +472,9 @@ private:
static constexpr u64 block_page_bits = 21;
static constexpr u64 block_page_size = 1ULL << block_page_bits;
std::unordered_map<u64, TBuffer> blocks;
std::unordered_map<u64, OwnerBuffer> blocks;
std::list<TBuffer> pending_destruction;
std::list<OwnerBuffer> pending_destruction;
u64 epoch = 0;
u64 modified_ticks = 0;

@ -55,33 +55,31 @@ void OGLBufferCache::WriteBarrier() {
glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) {
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
return buffer->GetHandle();
}
const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
static const GLuint null_buffer = 0;
return &null_buffer;
GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
return 0;
}
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) {
glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data);
}
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data);
}
void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) {
glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(),
static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset),
static_cast<GLsizeiptr>(size));
glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
}
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
@ -89,7 +87,7 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
const GLuint& cbuf = cbufs[cbuf_cursor++];
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
return {&cbuf, 0};
return {cbuf, 0};
}
} // namespace OpenGL

@ -34,12 +34,12 @@ public:
explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
~CachedBufferBlock();
const GLuint* GetHandle() const {
return &gl_buffer.handle;
GLuint GetHandle() const {
return gl_buffer.handle;
}
private:
OGLBuffer gl_buffer{};
OGLBuffer gl_buffer;
};
class OGLBufferCache final : public GenericBufferCache {
@ -48,7 +48,7 @@ public:
const Device& device, std::size_t stream_size);
~OGLBufferCache();
const GLuint* GetEmptyBuffer(std::size_t) override;
GLuint GetEmptyBuffer(std::size_t) override;
void Acquire() noexcept {
cbuf_cursor = 0;
@ -57,9 +57,9 @@ public:
protected:
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
void WriteBarrier() override;
GLuint ToHandle(const Buffer& buffer) override;
const GLuint* ToHandle(const Buffer& buffer) override;
void WriteBarrier() override;
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) override;

@ -188,10 +188,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
ASSERT(end > start);
const u64 size = end - start + 1;
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
// Bind the vertex array to the buffer at the current offset.
vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer,
vertex_buffer_offset, vertex_array.stride);
glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
vertex_array.stride);
}
}
@ -222,7 +220,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
const auto& regs = system.GPU().Maxwell3D().regs;
const std::size_t size = CalculateIndexBufferSize();
const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
vertex_array_pushbuffer.SetIndexBuffer(buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
return offset;
}
@ -524,7 +522,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Prepare vertex array format.
SetupVertexFormat();
vertex_array_pushbuffer.Setup();
// Upload vertex and index data.
SetupVertexBuffer();
@ -534,17 +531,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
index_buffer_offset = SetupIndexBuffer();
}
// Prepare packed bindings.
bind_ubo_pushbuffer.Setup();
bind_ssbo_pushbuffer.Setup();
// Setup emulation uniform buffer.
GLShader::MaxwellUniformData ubo;
ubo.SetFromRegs(gpu);
const auto [buffer, offset] =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
// Setup shaders and their used resources.
texture_cache.GuardSamplers(true);
@ -557,11 +550,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Signal the buffer cache that we are not going to upload more things.
buffer_cache.Unmap();
// Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
vertex_array_pushbuffer.Bind();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
program_manager.BindGraphicsPipeline();
if (texture_cache.TextureBarrier()) {
@ -630,17 +618,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size);
bind_ubo_pushbuffer.Setup();
bind_ssbo_pushbuffer.Setup();
SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel);
buffer_cache.Unmap();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
@ -771,8 +753,8 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
const ConstBufferEntry& entry) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
sizeof(float));
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
sizeof(float));
return;
}
@ -783,7 +765,7 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
const auto alignment = device.GetUniformBufferAlignment();
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
device.HasFastBufferSubData());
bind_ubo_pushbuffer.Push(binding, cbuf, offset, size);
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
}
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@ -819,7 +801,8 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] =
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
static_cast<GLsizeiptr>(size));
}
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
@ -1432,7 +1415,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
}
}

@ -231,9 +231,7 @@ private:
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker};
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
GLint vertex_binding = 0;
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;

@ -14,68 +14,6 @@
namespace OpenGL {
struct VertexArrayPushBuffer::Entry {
GLuint binding_index{};
const GLuint* buffer{};
GLintptr offset{};
GLsizei stride{};
};
VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker)
: state_tracker{state_tracker} {}
VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
void VertexArrayPushBuffer::Setup() {
index_buffer = nullptr;
vertex_buffers.clear();
}
void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
index_buffer = buffer;
}
void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
GLintptr offset, GLsizei stride) {
vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
}
void VertexArrayPushBuffer::Bind() {
if (index_buffer) {
state_tracker.BindIndexBuffer(*index_buffer);
}
for (const auto& entry : vertex_buffers) {
glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride);
}
}
struct BindBuffersRangePushBuffer::Entry {
GLuint binding;
const GLuint* buffer;
GLintptr offset;
GLsizeiptr size;
};
BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
void BindBuffersRangePushBuffer::Setup() {
entries.clear();
}
void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset,
GLsizeiptr size) {
entries.push_back(Entry{binding, buffer, offset, size});
}
void BindBuffersRangePushBuffer::Bind() {
for (const Entry& entry : entries) {
glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size);
}
}
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
if (!GLAD_GL_KHR_debug) {
// We don't need to throw an error as this is just for debugging

@ -11,49 +11,6 @@
namespace OpenGL {
class StateTracker;
class VertexArrayPushBuffer final {
public:
explicit VertexArrayPushBuffer(StateTracker& state_tracker);
~VertexArrayPushBuffer();
void Setup();
void SetIndexBuffer(const GLuint* buffer);
void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
GLsizei stride);
void Bind();
private:
struct Entry;
StateTracker& state_tracker;
const GLuint* index_buffer{};
std::vector<Entry> vertex_buffers;
};
class BindBuffersRangePushBuffer final {
public:
explicit BindBuffersRangePushBuffer(GLenum target);
~BindBuffersRangePushBuffer();
void Setup();
void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size);
void Bind();
private:
struct Entry;
GLenum target;
std::vector<Entry> entries;
};
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
} // namespace OpenGL

@ -74,18 +74,18 @@ Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
}
const VkBuffer* VKBufferCache::ToHandle(const Buffer& buffer) {
VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
return buffer->GetHandle();
}
const VkBuffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, size, 0);
});
return empty.handle.address();
return *empty.handle;
}
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
@ -94,7 +94,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
std::memcpy(staging.commit->Map(size), data, size);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
@ -117,7 +117,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
size](vk::CommandBuffer cmdbuf) {
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@ -144,7 +144,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) {
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
dst_offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});

@ -33,8 +33,8 @@ public:
VAddr cpu_addr, std::size_t size);
~CachedBufferBlock();
const VkBuffer* GetHandle() const {
return buffer.handle.address();
VkBuffer GetHandle() const {
return *buffer.handle;
}
private:
@ -50,15 +50,15 @@ public:
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache();
const VkBuffer* GetEmptyBuffer(std::size_t size) override;
VkBuffer GetEmptyBuffer(std::size_t size) override;
protected:
VkBuffer ToHandle(const Buffer& buffer) override;
void WriteBarrier() override {}
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
const VkBuffer* ToHandle(const Buffer& buffer) override;
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) override;

@ -343,13 +343,13 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
QuadArrayPass::~QuadArrayPass() = default;
std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
const u32 num_triangle_vertices = num_vertices * 6 / 4;
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
scheduler.RequestOutsideRenderPassOperationContext();
@ -377,7 +377,7 @@ std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertice
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
});
return {buffer.handle.address(), 0};
return {*buffer.handle, 0};
}
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
@ -391,14 +391,14 @@ Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
Uint8Pass::~Uint8Pass() = default;
std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) {
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) {
const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size);
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
scheduler.RequestOutsideRenderPassOperationContext();
@ -422,7 +422,7 @@ std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer s
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
return {buffer.handle.address(), 0};
return {*buffer.handle, 0};
}
} // namespace Vulkan

@ -50,7 +50,7 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue);
~QuadArrayPass();
std::pair<const VkBuffer*, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
private:
VKScheduler& scheduler;
@ -65,7 +65,7 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue);
~Uint8Pass();
std::pair<const VkBuffer*, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
private:
VKScheduler& scheduler;

@ -134,13 +134,13 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
class BufferBindings final {
public:
void AddVertexBinding(const VkBuffer* buffer, VkDeviceSize offset) {
vertex.buffer_ptrs[vertex.num_buffers] = buffer;
void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) {
vertex.buffers[vertex.num_buffers] = buffer;
vertex.offsets[vertex.num_buffers] = offset;
++vertex.num_buffers;
}
void SetIndexBinding(const VkBuffer* buffer, VkDeviceSize offset, VkIndexType type) {
void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) {
index.buffer = buffer;
index.offset = offset;
index.type = type;
@ -224,19 +224,19 @@ private:
// Some of these fields are intentionally left uninitialized to avoid initializing them twice.
struct {
std::size_t num_buffers = 0;
std::array<const VkBuffer*, Maxwell::NumVertexArrays> buffer_ptrs;
std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
} vertex;
struct {
const VkBuffer* buffer = nullptr;
VkBuffer buffer = nullptr;
VkDeviceSize offset;
VkIndexType type;
} index;
template <std::size_t N>
void BindStatic(VKScheduler& scheduler) const {
if (index.buffer != nullptr) {
if (index.buffer) {
BindStatic<N, true>(scheduler);
} else {
BindStatic<N, false>(scheduler);
@ -251,18 +251,14 @@ private:
}
std::array<VkBuffer, N> buffers;
std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(),
[](const auto ptr) { return *ptr; });
std::array<VkDeviceSize, N> offsets;
std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
if constexpr (is_indexed) {
// Indexed draw
scheduler.Record([buffers, offsets, index_buffer = *index.buffer,
index_offset = index.offset,
index_type = index.type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(index_buffer, index_offset, index_type);
scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
});
} else {
@ -787,7 +783,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
const std::size_t size = binding.buffer_size;
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
scheduler.Record([buffer = *buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
});
@ -867,7 +863,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
auto format = regs.index_array.format;
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset);
std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
format = Maxwell::IndexFormat::UnsignedShort;
}
@ -1004,8 +1000,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
const auto size = memory_manager.Read<u32>(address + 8);
if (size == 0) {
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry because
// Vulkan doesn't like empty buffers.
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry
// because Vulkan doesn't like empty buffers.
constexpr std::size_t dummy_size = 4;
const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);

@ -35,12 +35,13 @@ void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template
payload.clear();
}
// TODO(Rodrigo): Rework to write the payload directly
const auto payload_start = payload.data() + payload.size();
for (const auto& entry : entries) {
if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) {
payload.push_back(*image);
} else if (const auto buffer = std::get_if<Buffer>(&entry)) {
payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
} else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) {
payload.push_back(*buffer);
} else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
payload.push_back(*texel);
} else {

@ -18,12 +18,11 @@ class VKScheduler;
class DescriptorUpdateEntry {
public:
explicit DescriptorUpdateEntry() : image{} {}
explicit DescriptorUpdateEntry() {}
DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
DescriptorUpdateEntry(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
: buffer{buffer, offset, size} {}
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {}
DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {}
@ -54,8 +53,8 @@ public:
entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
}
void AddBuffer(const VkBuffer* buffer, u64 offset, std::size_t size) {
entries.push_back(Buffer{buffer, offset, size});
void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
}
void AddTexelBuffer(VkBufferView texel_buffer) {
@ -67,12 +66,7 @@ public:
}
private:
struct Buffer {
const VkBuffer* buffer = nullptr;
u64 offset = 0;
std::size_t size = 0;
};
using Variant = std::variant<VkDescriptorImageInfo, Buffer, VkBufferView>;
using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>;
const VKDevice& device;
VKScheduler& scheduler;