Merge pull request #3600 from ReinUsesLisp/no-pointer-buf-cache

buffer_cache: Return handles instead of pointer to handles
master
Fernando Sahmkow 2020-04-16 19:58:13 +07:00 committed by GitHub
commit c81f256111
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 90 additions and 228 deletions

@ -29,10 +29,10 @@ namespace VideoCommon {
using MapInterval = std::shared_ptr<MapIntervalBase>;
template <typename TBuffer, typename TBufferType, typename StreamBuffer>
template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
class BufferCache {
public:
using BufferInfo = std::pair<const TBufferType*, u64>;
using BufferInfo = std::pair<BufferType, u64>;
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool is_written = false, bool use_fast_cbuf = false) {
@ -89,9 +89,7 @@ public:
}
}
const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
return {ToHandle(block), offset};
return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
}
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@ -156,7 +154,7 @@ public:
}
}
virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0;
virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
protected:
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
@ -166,19 +164,19 @@ protected:
~BufferCache() = default;
virtual const TBufferType* ToHandle(const TBuffer& storage) = 0;
virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
virtual void WriteBarrier() = 0;
virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
const u8* data) = 0;
virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
u8* data) = 0;
virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) = 0;
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
@ -221,9 +219,8 @@ private:
return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
}
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
const std::size_t size) {
std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) {
auto& memory_manager = system.GPU().MemoryManager();
@ -272,7 +269,7 @@ private:
return new_map;
}
void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
std::vector<MapInterval>& overlaps) {
const IntervalType base_interval{start, end};
IntervalSet interval_set{};
@ -313,7 +310,7 @@ private:
void FlushMap(MapInterval map) {
std::size_t size = map->GetEnd() - map->GetStart();
TBuffer block = blocks[map->GetStart() >> block_page_bits];
OwnerBuffer block = blocks[map->GetStart() >> block_page_bits];
staging_buffer.resize(size);
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
@ -328,7 +325,7 @@ private:
buffer_ptr += size;
buffer_offset += size;
return {&stream_buffer_handle, uploaded_offset};
return {stream_buffer_handle, uploaded_offset};
}
void AlignBuffer(std::size_t alignment) {
@ -338,11 +335,11 @@ private:
buffer_offset = offset_aligned;
}
TBuffer EnlargeBlock(TBuffer buffer) {
OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
const std::size_t old_size = buffer->GetSize();
const std::size_t new_size = old_size + block_page_size;
const VAddr cpu_addr = buffer->GetCpuAddr();
TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
CopyBlock(buffer, new_buffer, 0, 0, old_size);
buffer->SetEpoch(epoch);
pending_destruction.push_back(buffer);
@ -356,14 +353,14 @@ private:
return new_buffer;
}
TBuffer MergeBlocks(TBuffer first, TBuffer second) {
OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
const std::size_t size_1 = first->GetSize();
const std::size_t size_2 = second->GetSize();
const VAddr first_addr = first->GetCpuAddr();
const VAddr second_addr = second->GetCpuAddr();
const VAddr new_addr = std::min(first_addr, second_addr);
const std::size_t new_size = size_1 + size_2;
TBuffer new_buffer = CreateBlock(new_addr, new_size);
OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
first->SetEpoch(epoch);
@ -380,8 +377,8 @@ private:
return new_buffer;
}
TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
TBuffer found{};
OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
OwnerBuffer found;
const VAddr cpu_addr_end = cpu_addr + size - 1;
u64 page_start = cpu_addr >> block_page_bits;
const u64 page_end = cpu_addr_end >> block_page_bits;
@ -457,7 +454,7 @@ private:
Core::System& system;
std::unique_ptr<StreamBuffer> stream_buffer;
TBufferType stream_buffer_handle{};
BufferType stream_buffer_handle{};
bool invalidated = false;
@ -475,9 +472,9 @@ private:
static constexpr u64 block_page_bits = 21;
static constexpr u64 block_page_size = 1ULL << block_page_bits;
std::unordered_map<u64, TBuffer> blocks;
std::unordered_map<u64, OwnerBuffer> blocks;
std::list<TBuffer> pending_destruction;
std::list<OwnerBuffer> pending_destruction;
u64 epoch = 0;
u64 modified_ticks = 0;

@ -55,33 +55,31 @@ void OGLBufferCache::WriteBarrier() {
glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) {
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
return buffer->GetHandle();
}
const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
static const GLuint null_buffer = 0;
return &null_buffer;
GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
return 0;
}
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) {
glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data);
}
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data);
}
void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) {
glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(),
static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset),
static_cast<GLsizeiptr>(size));
glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
}
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
@ -89,7 +87,7 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
const GLuint& cbuf = cbufs[cbuf_cursor++];
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
return {&cbuf, 0};
return {cbuf, 0};
}
} // namespace OpenGL

@ -34,12 +34,12 @@ public:
explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
~CachedBufferBlock();
const GLuint* GetHandle() const {
return &gl_buffer.handle;
GLuint GetHandle() const {
return gl_buffer.handle;
}
private:
OGLBuffer gl_buffer{};
OGLBuffer gl_buffer;
};
class OGLBufferCache final : public GenericBufferCache {
@ -48,7 +48,7 @@ public:
const Device& device, std::size_t stream_size);
~OGLBufferCache();
const GLuint* GetEmptyBuffer(std::size_t) override;
GLuint GetEmptyBuffer(std::size_t) override;
void Acquire() noexcept {
cbuf_cursor = 0;
@ -57,9 +57,9 @@ public:
protected:
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
void WriteBarrier() override;
GLuint ToHandle(const Buffer& buffer) override;
const GLuint* ToHandle(const Buffer& buffer) override;
void WriteBarrier() override;
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) override;

@ -188,10 +188,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
ASSERT(end > start);
const u64 size = end - start + 1;
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
// Bind the vertex array to the buffer at the current offset.
vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer,
vertex_buffer_offset, vertex_array.stride);
glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
vertex_array.stride);
}
}
@ -222,7 +220,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
const auto& regs = system.GPU().Maxwell3D().regs;
const std::size_t size = CalculateIndexBufferSize();
const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
vertex_array_pushbuffer.SetIndexBuffer(buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
return offset;
}
@ -524,7 +522,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Prepare vertex array format.
SetupVertexFormat();
vertex_array_pushbuffer.Setup();
// Upload vertex and index data.
SetupVertexBuffer();
@ -534,17 +531,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
index_buffer_offset = SetupIndexBuffer();
}
// Prepare packed bindings.
bind_ubo_pushbuffer.Setup();
bind_ssbo_pushbuffer.Setup();
// Setup emulation uniform buffer.
GLShader::MaxwellUniformData ubo;
ubo.SetFromRegs(gpu);
const auto [buffer, offset] =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
// Setup shaders and their used resources.
texture_cache.GuardSamplers(true);
@ -557,11 +550,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Signal the buffer cache that we are not going to upload more things.
buffer_cache.Unmap();
// Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
vertex_array_pushbuffer.Bind();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
program_manager.BindGraphicsPipeline();
if (texture_cache.TextureBarrier()) {
@ -630,17 +618,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size);
bind_ubo_pushbuffer.Setup();
bind_ssbo_pushbuffer.Setup();
SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel);
buffer_cache.Unmap();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
@ -771,8 +753,8 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
const ConstBufferEntry& entry) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
sizeof(float));
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
sizeof(float));
return;
}
@ -783,7 +765,7 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
const auto alignment = device.GetUniformBufferAlignment();
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
device.HasFastBufferSubData());
bind_ubo_pushbuffer.Push(binding, cbuf, offset, size);
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
}
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@ -819,7 +801,8 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] =
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
static_cast<GLsizeiptr>(size));
}
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
@ -1432,7 +1415,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
}
}

@ -231,9 +231,7 @@ private:
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker};
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
GLint vertex_binding = 0;
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;

@ -14,68 +14,6 @@
namespace OpenGL {
struct VertexArrayPushBuffer::Entry {
GLuint binding_index{};
const GLuint* buffer{};
GLintptr offset{};
GLsizei stride{};
};
VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker)
: state_tracker{state_tracker} {}
VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
void VertexArrayPushBuffer::Setup() {
index_buffer = nullptr;
vertex_buffers.clear();
}
void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
index_buffer = buffer;
}
void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
GLintptr offset, GLsizei stride) {
vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
}
void VertexArrayPushBuffer::Bind() {
if (index_buffer) {
state_tracker.BindIndexBuffer(*index_buffer);
}
for (const auto& entry : vertex_buffers) {
glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride);
}
}
struct BindBuffersRangePushBuffer::Entry {
GLuint binding;
const GLuint* buffer;
GLintptr offset;
GLsizeiptr size;
};
BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
void BindBuffersRangePushBuffer::Setup() {
entries.clear();
}
void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset,
GLsizeiptr size) {
entries.push_back(Entry{binding, buffer, offset, size});
}
void BindBuffersRangePushBuffer::Bind() {
for (const Entry& entry : entries) {
glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size);
}
}
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
if (!GLAD_GL_KHR_debug) {
// We don't need to throw an error as this is just for debugging

@ -11,49 +11,6 @@
namespace OpenGL {
class StateTracker;
class VertexArrayPushBuffer final {
public:
explicit VertexArrayPushBuffer(StateTracker& state_tracker);
~VertexArrayPushBuffer();
void Setup();
void SetIndexBuffer(const GLuint* buffer);
void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
GLsizei stride);
void Bind();
private:
struct Entry;
StateTracker& state_tracker;
const GLuint* index_buffer{};
std::vector<Entry> vertex_buffers;
};
class BindBuffersRangePushBuffer final {
public:
explicit BindBuffersRangePushBuffer(GLenum target);
~BindBuffersRangePushBuffer();
void Setup();
void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size);
void Bind();
private:
struct Entry;
GLenum target;
std::vector<Entry> entries;
};
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
} // namespace OpenGL

@ -74,18 +74,18 @@ Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
}
const VkBuffer* VKBufferCache::ToHandle(const Buffer& buffer) {
VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
return buffer->GetHandle();
}
const VkBuffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, size, 0);
});
return empty.handle.address();
return *empty.handle;
}
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
@ -94,7 +94,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
std::memcpy(staging.commit->Map(size), data, size);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
@ -117,7 +117,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
size](vk::CommandBuffer cmdbuf) {
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@ -144,7 +144,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) {
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
dst_offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});

@ -33,8 +33,8 @@ public:
VAddr cpu_addr, std::size_t size);
~CachedBufferBlock();
const VkBuffer* GetHandle() const {
return buffer.handle.address();
VkBuffer GetHandle() const {
return *buffer.handle;
}
private:
@ -50,15 +50,15 @@ public:
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache();
const VkBuffer* GetEmptyBuffer(std::size_t size) override;
VkBuffer GetEmptyBuffer(std::size_t size) override;
protected:
VkBuffer ToHandle(const Buffer& buffer) override;
void WriteBarrier() override {}
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
const VkBuffer* ToHandle(const Buffer& buffer) override;
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) override;

@ -343,13 +343,13 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
QuadArrayPass::~QuadArrayPass() = default;
std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
const u32 num_triangle_vertices = num_vertices * 6 / 4;
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
scheduler.RequestOutsideRenderPassOperationContext();
@ -377,7 +377,7 @@ std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertice
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
});
return {buffer.handle.address(), 0};
return {*buffer.handle, 0};
}
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
@ -391,14 +391,14 @@ Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
Uint8Pass::~Uint8Pass() = default;
std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) {
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) {
const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size);
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
scheduler.RequestOutsideRenderPassOperationContext();
@ -422,7 +422,7 @@ std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer s
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
return {buffer.handle.address(), 0};
return {*buffer.handle, 0};
}
} // namespace Vulkan

@ -50,7 +50,7 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue);
~QuadArrayPass();
std::pair<const VkBuffer*, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
private:
VKScheduler& scheduler;
@ -65,7 +65,7 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue);
~Uint8Pass();
std::pair<const VkBuffer*, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
private:
VKScheduler& scheduler;

@ -137,13 +137,13 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
class BufferBindings final {
public:
void AddVertexBinding(const VkBuffer* buffer, VkDeviceSize offset) {
vertex.buffer_ptrs[vertex.num_buffers] = buffer;
void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) {
vertex.buffers[vertex.num_buffers] = buffer;
vertex.offsets[vertex.num_buffers] = offset;
++vertex.num_buffers;
}
void SetIndexBinding(const VkBuffer* buffer, VkDeviceSize offset, VkIndexType type) {
void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) {
index.buffer = buffer;
index.offset = offset;
index.type = type;
@ -227,19 +227,19 @@ private:
// Some of these fields are intentionally left uninitialized to avoid initializing them twice.
struct {
std::size_t num_buffers = 0;
std::array<const VkBuffer*, Maxwell::NumVertexArrays> buffer_ptrs;
std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
} vertex;
struct {
const VkBuffer* buffer = nullptr;
VkBuffer buffer = nullptr;
VkDeviceSize offset;
VkIndexType type;
} index;
template <std::size_t N>
void BindStatic(VKScheduler& scheduler) const {
if (index.buffer != nullptr) {
if (index.buffer) {
BindStatic<N, true>(scheduler);
} else {
BindStatic<N, false>(scheduler);
@ -254,18 +254,14 @@ private:
}
std::array<VkBuffer, N> buffers;
std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(),
[](const auto ptr) { return *ptr; });
std::array<VkDeviceSize, N> offsets;
std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
if constexpr (is_indexed) {
// Indexed draw
scheduler.Record([buffers, offsets, index_buffer = *index.buffer,
index_offset = index.offset,
index_type = index.type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(index_buffer, index_offset, index_type);
scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
});
} else {
@ -790,7 +786,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
const std::size_t size = binding.buffer_size;
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
scheduler.Record([buffer = *buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
});
@ -870,7 +866,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
auto format = regs.index_array.format;
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset);
std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
format = Maxwell::IndexFormat::UnsignedShort;
}
@ -1007,8 +1003,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
const auto size = memory_manager.Read<u32>(address + 8);
if (size == 0) {
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry because
// Vulkan doesn't like empty buffers.
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry
// because Vulkan doesn't like empty buffers.
constexpr std::size_t dummy_size = 4;
const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);

@ -35,12 +35,13 @@ void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template
payload.clear();
}
// TODO(Rodrigo): Rework to write the payload directly
const auto payload_start = payload.data() + payload.size();
for (const auto& entry : entries) {
if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) {
payload.push_back(*image);
} else if (const auto buffer = std::get_if<Buffer>(&entry)) {
payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
} else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) {
payload.push_back(*buffer);
} else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
payload.push_back(*texel);
} else {

@ -18,12 +18,11 @@ class VKScheduler;
class DescriptorUpdateEntry {
public:
explicit DescriptorUpdateEntry() : image{} {}
explicit DescriptorUpdateEntry() {}
DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
DescriptorUpdateEntry(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
: buffer{buffer, offset, size} {}
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {}
DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {}
@ -54,8 +53,8 @@ public:
entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
}
void AddBuffer(const VkBuffer* buffer, u64 offset, std::size_t size) {
entries.push_back(Buffer{buffer, offset, size});
void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
}
void AddTexelBuffer(VkBufferView texel_buffer) {
@ -67,12 +66,7 @@ public:
}
private:
struct Buffer {
const VkBuffer* buffer = nullptr;
u64 offset = 0;
std::size_t size = 0;
};
using Variant = std::variant<VkDescriptorImageInfo, Buffer, VkBufferView>;
using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>;
const VKDevice& device;
VKScheduler& scheduler;