Merge pull request #4105 from ReinUsesLisp/resident-buffers

gl_rasterizer: Use NV_vertex_buffer_unified_memory for vertex buffer robustness
master
bunnei 2020-06-24 11:40:30 +07:00 committed by GitHub
commit 0e1268e507
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 277 additions and 213 deletions

@ -41,7 +41,11 @@ class BufferCache {
static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
public: public:
using BufferInfo = std::pair<BufferType, u64>; struct BufferInfo {
BufferType handle;
u64 offset;
u64 address;
};
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool is_written = false, bool use_fast_cbuf = false) { bool is_written = false, bool use_fast_cbuf = false) {
@ -50,7 +54,7 @@ public:
auto& memory_manager = system.GPU().MemoryManager(); auto& memory_manager = system.GPU().MemoryManager();
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
if (!cpu_addr_opt) { if (!cpu_addr_opt) {
return {GetEmptyBuffer(size), 0}; return GetEmptyBuffer(size);
} }
const VAddr cpu_addr = *cpu_addr_opt; const VAddr cpu_addr = *cpu_addr_opt;
@ -88,7 +92,7 @@ public:
Buffer* const block = GetBlock(cpu_addr, size); Buffer* const block = GetBlock(cpu_addr, size);
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
if (!map) { if (!map) {
return {GetEmptyBuffer(size), 0}; return GetEmptyBuffer(size);
} }
if (is_written) { if (is_written) {
map->MarkAsModified(true, GetModifiedTicks()); map->MarkAsModified(true, GetModifiedTicks());
@ -101,7 +105,7 @@ public:
} }
} }
return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))}; return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
} }
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@ -254,27 +258,17 @@ public:
committed_flushes.pop_front(); committed_flushes.pop_front();
} }
virtual BufferType GetEmptyBuffer(std::size_t size) = 0; virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
protected: protected:
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
std::unique_ptr<StreamBuffer> stream_buffer_) std::unique_ptr<StreamBuffer> stream_buffer)
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
stream_buffer_handle{stream_buffer->Handle()} {}
~BufferCache() = default; ~BufferCache() = default;
virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) = 0;
virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) = 0;
virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) = 0;
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
return {}; return {};
} }
@ -336,11 +330,11 @@ private:
const VAddr cpu_addr_end = cpu_addr + size; const VAddr cpu_addr_end = cpu_addr + size;
if (memory_manager.IsGranularRange(gpu_addr, size)) { if (memory_manager.IsGranularRange(gpu_addr, size)) {
u8* host_ptr = memory_manager.GetPointer(gpu_addr); u8* host_ptr = memory_manager.GetPointer(gpu_addr);
UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); block->Upload(block->Offset(cpu_addr), size, host_ptr);
} else { } else {
staging_buffer.resize(size); staging_buffer.resize(size);
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
} }
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
} }
@ -399,7 +393,7 @@ private:
} }
staging_buffer.resize(size); staging_buffer.resize(size);
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
} }
} }
@ -436,7 +430,7 @@ private:
const std::size_t size = map->end - map->start; const std::size_t size = map->end - map->start;
staging_buffer.resize(size); staging_buffer.resize(size);
DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); block->Download(block->Offset(map->start), size, staging_buffer.data());
system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
map->MarkAsModified(false, 0); map->MarkAsModified(false, 0);
} }
@ -449,7 +443,7 @@ private:
buffer_ptr += size; buffer_ptr += size;
buffer_offset += size; buffer_offset += size;
return {stream_buffer_handle, uploaded_offset}; return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
} }
void AlignBuffer(std::size_t alignment) { void AlignBuffer(std::size_t alignment) {
@ -464,7 +458,7 @@ private:
const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
const VAddr cpu_addr = buffer->CpuAddr(); const VAddr cpu_addr = buffer->CpuAddr();
std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
CopyBlock(*buffer, *new_buffer, 0, 0, old_size); new_buffer->CopyFrom(*buffer, 0, 0, old_size);
QueueDestruction(std::move(buffer)); QueueDestruction(std::move(buffer));
const VAddr cpu_addr_end = cpu_addr + new_size - 1; const VAddr cpu_addr_end = cpu_addr + new_size - 1;
@ -486,8 +480,8 @@ private:
const std::size_t new_size = size_1 + size_2; const std::size_t new_size = size_1 + size_2;
std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
QueueDestruction(std::move(first)); QueueDestruction(std::move(first));
QueueDestruction(std::move(second)); QueueDestruction(std::move(second));

@ -22,21 +22,46 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
: VideoCommon::BufferBlock{cpu_addr, size} {
gl_buffer.Create(); gl_buffer.Create();
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
if (device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
}
} }
Buffer::~Buffer() = default; Buffer::~Buffer() = default;
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
data);
}
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
data);
}
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t size) const {
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
}
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
const Device& device, std::size_t stream_size) const Device& device_, std::size_t stream_size)
: GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { : GenericBufferCache{rasterizer, system,
std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
device{device_} {
if (!device.HasFastBufferSubData()) { if (!device.HasFastBufferSubData()) {
return; return;
} }
static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
for (const GLuint cbuf : cbufs) { for (const GLuint cbuf : cbufs) {
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
@ -48,39 +73,20 @@ OGLBufferCache::~OGLBufferCache() {
} }
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<Buffer>(cpu_addr, size); return std::make_shared<Buffer>(device, cpu_addr, size);
} }
GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
return 0; return {0, 0, 0};
}
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) {
glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data);
}
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data);
}
void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) {
glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset),
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
} }
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
std::size_t size) { std::size_t size) {
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
const GLuint cbuf = cbufs[cbuf_cursor++]; const GLuint cbuf = cbufs[cbuf_cursor++];
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
return {cbuf, 0}; return {cbuf, 0, 0};
} }
} // namespace OpenGL } // namespace OpenGL

@ -25,15 +25,27 @@ class RasterizerOpenGL;
class Buffer : public VideoCommon::BufferBlock { class Buffer : public VideoCommon::BufferBlock {
public: public:
explicit Buffer(VAddr cpu_addr, const std::size_t size); explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
~Buffer(); ~Buffer();
GLuint Handle() const { void Upload(std::size_t offset, std::size_t size, const u8* data) const;
void Download(std::size_t offset, std::size_t size, u8* data) const;
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t size) const;
GLuint Handle() const noexcept {
return gl_buffer.handle; return gl_buffer.handle;
} }
u64 Address() const noexcept {
return gpu_address;
}
private: private:
OGLBuffer gl_buffer; OGLBuffer gl_buffer;
u64 gpu_address = 0;
}; };
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
@ -43,7 +55,7 @@ public:
const Device& device, std::size_t stream_size); const Device& device, std::size_t stream_size);
~OGLBufferCache(); ~OGLBufferCache();
GLuint GetEmptyBuffer(std::size_t) override; BufferInfo GetEmptyBuffer(std::size_t) override;
void Acquire() noexcept { void Acquire() noexcept {
cbuf_cursor = 0; cbuf_cursor = 0;
@ -52,22 +64,16 @@ public:
protected: protected:
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) override;
void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) override;
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) override;
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
private: private:
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
const Device& device;
std::size_t cbuf_cursor = 0; std::size_t cbuf_cursor = 0;
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * std::array<GLuint, NUM_CBUFS> cbufs{};
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
cbufs;
}; };
} // namespace OpenGL } // namespace OpenGL

@ -188,16 +188,32 @@ bool IsASTCSupported() {
return true; return true;
} }
/// @brief Returns true when a GL_RENDERER is a Turing GPU
/// @param renderer GL_RENDERER string
bool IsTuring(std::string_view renderer) {
static constexpr std::array<std::string_view, 12> TURING_GPUS = {
"GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070",
"RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000",
"Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4",
};
return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(),
[renderer](std::string_view candidate) {
return renderer.find(candidate) != std::string_view::npos;
});
}
} // Anonymous namespace } // Anonymous namespace
Device::Device() Device::Device()
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions(); const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc."; const bool is_amd = vendor == "ATI Technologies Inc.";
const bool is_turing = is_nvidia && IsTuring(renderer);
bool disable_fast_buffer_sub_data = false; bool disable_fast_buffer_sub_data = false;
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
@ -221,8 +237,16 @@ Device::Device()
has_variable_aoffi = TestVariableAoffi(); has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd; has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug(); has_precise_bug = TestPreciseBug();
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
// Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on
// DeleteBuffers. Disable unified memory on these devices.
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing;
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
GLAD_GL_NV_transform_feedback2; GLAD_GL_NV_transform_feedback2;

@ -72,6 +72,10 @@ public:
return has_texture_shadow_lod; return has_texture_shadow_lod;
} }
bool HasVertexBufferUnifiedMemory() const {
return has_vertex_buffer_unified_memory;
}
bool HasASTC() const { bool HasASTC() const {
return has_astc; return has_astc;
} }
@ -115,6 +119,7 @@ private:
bool has_vertex_viewport_layer{}; bool has_vertex_viewport_layer{};
bool has_image_load_formatted{}; bool has_image_load_formatted{};
bool has_texture_shadow_lod{}; bool has_texture_shadow_lod{};
bool has_vertex_buffer_unified_memory{};
bool has_astc{}; bool has_astc{};
bool has_variable_aoffi{}; bool has_variable_aoffi{};
bool has_component_indexing_bug{}; bool has_component_indexing_bug{};

@ -61,7 +61,8 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
constexpr std::size_t NumSupportedVertexAttributes = 16; constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
template <typename Engine, typename Entry> template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
@ -193,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
// avoid OpenGL errors. // avoid OpenGL errors.
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
// assume every shader uses them all. // assume every shader uses them all.
for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexFormat0 + index]) { if (!flags[Dirty::VertexFormat0 + index]) {
continue; continue;
} }
@ -231,9 +232,11 @@ void RasterizerOpenGL::SetupVertexBuffer() {
MICROPROFILE_SCOPE(OpenGL_VB); MICROPROFILE_SCOPE(OpenGL_VB);
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
// Upload all guest vertex arrays sequentially to our buffer // Upload all guest vertex arrays sequentially to our buffer
const auto& regs = gpu.regs; const auto& regs = gpu.regs;
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
if (!flags[Dirty::VertexBuffer0 + index]) { if (!flags[Dirty::VertexBuffer0 + index]) {
continue; continue;
} }
@ -246,16 +249,25 @@ void RasterizerOpenGL::SetupVertexBuffer() {
const GPUVAddr start = vertex_array.StartAddress(); const GPUVAddr start = vertex_array.StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end >= start); ASSERT(end >= start);
const GLuint gl_index = static_cast<GLuint>(index);
const u64 size = end - start; const u64 size = end - start;
if (size == 0) { if (size == 0) {
glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
if (use_unified_memory) {
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
}
continue; continue;
} }
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); const auto info = buffer_cache.UploadMemory(start, size);
glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, if (use_unified_memory) {
vertex_array.stride); glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
info.address + info.offset, size);
} else {
glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
}
} }
} }
@ -268,7 +280,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
flags[Dirty::VertexInstances] = false; flags[Dirty::VertexInstances] = false;
const auto& regs = gpu.regs; const auto& regs = gpu.regs;
for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexInstance0 + index]) { if (!flags[Dirty::VertexInstance0 + index]) {
continue; continue;
} }
@ -285,9 +297,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
MICROPROFILE_SCOPE(OpenGL_Index); MICROPROFILE_SCOPE(OpenGL_Index);
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = system.GPU().Maxwell3D().regs;
const std::size_t size = CalculateIndexBufferSize(); const std::size_t size = CalculateIndexBufferSize();
const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
return offset; return info.offset;
} }
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
@ -643,9 +655,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
if (!device.UseAssemblyShaders()) { if (!device.UseAssemblyShaders()) {
MaxwellUniformData ubo; MaxwellUniformData ubo;
ubo.SetFromRegs(gpu); ubo.SetFromRegs(gpu);
const auto [buffer, offset] = const auto info =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
static_cast<GLsizeiptr>(sizeof(ubo))); static_cast<GLsizeiptr>(sizeof(ubo)));
} }
@ -956,8 +968,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
if (device.UseAssemblyShaders()) { if (device.UseAssemblyShaders()) {
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
} else { } else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding, glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
} }
return; return;
} }
@ -970,24 +981,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
const GPUVAddr gpu_addr = buffer.address; const GPUVAddr gpu_addr = buffer.address;
auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
if (device.UseAssemblyShaders()) { if (device.UseAssemblyShaders()) {
UNIMPLEMENTED_IF(use_unified); UNIMPLEMENTED_IF(use_unified);
if (offset != 0) { if (info.offset != 0) {
const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
cbuf = staging_cbuf; info.handle = staging_cbuf;
offset = 0; info.offset = 0;
} }
glBindBufferRangeNV(stage, binding, cbuf, offset, size); glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
return; return;
} }
if (use_unified) { if (use_unified) {
glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
unified_offset, size);
} else { } else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
} }
} }
@ -1023,9 +1035,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, std::size_t size) { GPUVAddr gpu_addr, std::size_t size) {
const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] = const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
static_cast<GLsizeiptr>(size)); static_cast<GLsizeiptr>(size));
} }
@ -1712,8 +1723,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
const GLuint handle = transform_feedback_buffers[index].handle; const GLuint handle = transform_feedback_buffers[index].handle;
const GPUVAddr gpu_addr = binding.Address(); const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size; const std::size_t size = binding.buffer_size;
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
static_cast<GLsizeiptr>(size));
} }
} }

@ -2,11 +2,13 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <deque> #include <tuple>
#include <vector> #include <vector>
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/microprofile.h" #include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL { namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
bool use_persistent)
: buffer_size(size) { : buffer_size(size) {
gl_buffer.Create(); gl_buffer.Create();
@ -29,23 +30,19 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
allocate_size *= 2; allocate_size *= 2;
} }
if (use_persistent) { static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
persistent = true;
coherent = prefer_coherent;
const GLbitfield flags =
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( mapped_ptr = static_cast<u8*>(
gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
} else {
glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW); if (device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
} }
} }
OGLStreamBuffer::~OGLStreamBuffer() { OGLStreamBuffer::~OGLStreamBuffer() {
if (persistent) {
glUnmapNamedBuffer(gl_buffer.handle); glUnmapNamedBuffer(gl_buffer.handle);
}
gl_buffer.Release(); gl_buffer.Release();
} }
@ -60,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
bool invalidate = false; bool invalidate = false;
if (buffer_pos + size > buffer_size) { if (buffer_pos + size > buffer_size) {
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
glInvalidateBufferData(gl_buffer.handle);
buffer_pos = 0; buffer_pos = 0;
invalidate = true; invalidate = true;
if (persistent) {
glUnmapNamedBuffer(gl_buffer.handle);
}
} }
if (invalidate || !persistent) { return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
mapped_ptr = static_cast<u8*>(
glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
mapped_offset = buffer_pos;
}
return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
} }
void OGLStreamBuffer::Unmap(GLsizeiptr size) { void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size); ASSERT(size <= mapped_size);
if (!coherent && size > 0) { if (size > 0) {
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
}
if (!persistent) {
glUnmapNamedBuffer(gl_buffer.handle);
} }
buffer_pos += size; buffer_pos += size;

@ -11,10 +11,11 @@
namespace OpenGL { namespace OpenGL {
class Device;
class OGLStreamBuffer : private NonCopyable { class OGLStreamBuffer : private NonCopyable {
public: public:
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
bool use_persistent = true);
~OGLStreamBuffer(); ~OGLStreamBuffer();
/* /*
@ -33,19 +34,20 @@ public:
return gl_buffer.handle; return gl_buffer.handle;
} }
GLsizeiptr Size() const { u64 Address() const {
return gpu_address;
}
GLsizeiptr Size() const noexcept {
return buffer_size; return buffer_size;
} }
private: private:
OGLBuffer gl_buffer; OGLBuffer gl_buffer;
bool coherent = false; GLuint64EXT gpu_address = 0;
bool persistent = false;
GLintptr buffer_pos = 0; GLintptr buffer_pos = 0;
GLsizeiptr buffer_size = 0; GLsizeiptr buffer_size = 0;
GLintptr mapped_offset = 0;
GLsizeiptr mapped_size = 0; GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr; u8* mapped_ptr = nullptr;
}; };

@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black // Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
&vertex_buffer_address);
}
} }
void RendererOpenGL::AddTelemetryFields() { void RendererOpenGL::AddTelemetryFields() {
@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
offsetof(ScreenRectVertex, tex_coord)); offsetof(ScreenRectVertex, tex_coord));
glVertexAttribBinding(PositionLocation, 0); glVertexAttribBinding(PositionLocation, 0);
glVertexAttribBinding(TexCoordLocation, 0); glVertexAttribBinding(TexCoordLocation, 0);
if (device.HasVertexBufferUnifiedMemory()) {
glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
sizeof(vertices));
} else {
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
}
glBindTextureUnit(0, screen_info.display_texture); glBindTextureUnit(0, screen_info.display_texture);
glBindSampler(0, 0); glBindSampler(0, 0);

@ -107,6 +107,9 @@ private:
OGLPipeline pipeline; OGLPipeline pipeline;
OGLFramebuffer screenshot_framebuffer; OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
GLuint64EXT vertex_buffer_address = 0;
/// Display information for Switch screen /// Display information for Switch screen
ScreenInfo screen_info; ScreenInfo screen_info;

@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
} // Anonymous namespace } // Anonymous namespace
Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
std::size_t size) VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
: VideoCommon::BufferBlock{cpu_addr, size} { : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
VkBufferCreateInfo ci; VkBufferCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
ci.pNext = nullptr; ci.pNext = nullptr;
@ -56,40 +56,15 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cp
Buffer::~Buffer() = default; Buffer::~Buffer() = default;
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
CreateStreamBuffer(device,
scheduler)},
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
staging_pool} {}
VKBufferCache::~VKBufferCache() = default;
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size);
}
VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, size, 0);
});
return *empty.handle;
}
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true); const auto& staging = staging_pool.GetUnusedBuffer(size, true);
std::memcpy(staging.commit->Map(size), data, size); std::memcpy(staging.commit->Map(size), data, size);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
size](vk::CommandBuffer cmdbuf) { const VkBuffer handle = Handle();
cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
VkBufferMemoryBarrier barrier; VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@ -98,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = buffer; barrier.buffer = handle;
barrier.offset = offset; barrier.offset = offset;
barrier.size = size; barrier.size = size;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
@ -106,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
}); });
} }
void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true); const auto& staging = staging_pool.GetUnusedBuffer(size, true);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
size](vk::CommandBuffer cmdbuf) { const VkBuffer handle = Handle();
scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
VkBufferMemoryBarrier barrier; VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr; barrier.pNext = nullptr;
@ -119,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = buffer; barrier.buffer = handle;
barrier.offset = offset; barrier.offset = offset;
barrier.size = size; barrier.size = size;
@ -127,17 +102,19 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
}); });
scheduler.Finish(); scheduler.Finish();
std::memcpy(data, staging.commit->Map(size), size); std::memcpy(data, staging.commit->Map(size), size);
} }
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t dst_offset, std::size_t size) { std::size_t size) const {
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset,
const VkBuffer dst_buffer = Handle();
scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
size](vk::CommandBuffer cmdbuf) { size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
@ -165,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
}); });
} }
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
CreateStreamBuffer(device,
scheduler)},
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
staging_pool} {}
VKBufferCache::~VKBufferCache() = default;
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
size);
}
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, size, 0);
});
return {*empty.handle, 0, 0};
}
} // namespace Vulkan } // namespace Vulkan

@ -25,15 +25,29 @@ class VKScheduler;
class Buffer final : public VideoCommon::BufferBlock { class Buffer final : public VideoCommon::BufferBlock {
public: public:
explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
std::size_t size); VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
~Buffer(); ~Buffer();
void Upload(std::size_t offset, std::size_t size, const u8* data) const;
void Download(std::size_t offset, std::size_t size, u8* data) const;
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t size) const;
VkBuffer Handle() const { VkBuffer Handle() const {
return *buffer.handle; return *buffer.handle;
} }
u64 Address() const {
return 0;
}
private: private:
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
VKBuffer buffer; VKBuffer buffer;
}; };
@ -44,20 +58,11 @@ public:
VKScheduler& scheduler, VKStagingBufferPool& staging_pool); VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache(); ~VKBufferCache();
VkBuffer GetEmptyBuffer(std::size_t size) override; BufferInfo GetEmptyBuffer(std::size_t size) override;
protected: protected:
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) override;
void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) override;
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) override;
private: private:
const VKDevice& device; const VKDevice& device;
VKMemoryManager& memory_manager; VKMemoryManager& memory_manager;

@ -870,10 +870,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
UNIMPLEMENTED_IF(binding.buffer_offset != 0); UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GPUVAddr gpu_addr = binding.Address(); const GPUVAddr gpu_addr = binding.Address();
const auto size = static_cast<VkDeviceSize>(binding.buffer_size); const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
}); });
@ -925,8 +925,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
continue; continue;
} }
const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); const auto info = buffer_cache.UploadMemory(start, size);
buffer_bindings.AddVertexBinding(buffer, offset); buffer_bindings.AddVertexBinding(info.handle, info.offset);
} }
} }
@ -948,7 +948,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
break; break;
} }
const GPUVAddr gpu_addr = regs.index_array.IndexStart(); const GPUVAddr gpu_addr = regs.index_array.IndexStart();
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
VkBuffer buffer = info.handle;
u64 offset = info.offset;
std::tie(buffer, offset) = quad_indexed_pass.Assemble( std::tie(buffer, offset) = quad_indexed_pass.Assemble(
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
@ -962,7 +964,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
break; break;
} }
const GPUVAddr gpu_addr = regs.index_array.IndexStart(); const GPUVAddr gpu_addr = regs.index_array.IndexStart();
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
VkBuffer buffer = info.handle;
u64 offset = info.offset;
auto format = regs.index_array.format; auto format = regs.index_array.format;
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
@ -1109,10 +1113,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
ASSERT(size <= MaxConstbufferSize); ASSERT(size <= MaxConstbufferSize);
const auto [buffer_handle, offset] = const auto info =
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
} }
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
@ -1126,14 +1129,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
// default buffer. // default buffer.
static constexpr std::size_t dummy_size = 4; static constexpr std::size_t dummy_size = 4;
const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
return; return;
} }
const auto [buffer, offset] = buffer_cache.UploadMemory( const auto info = buffer_cache.UploadMemory(
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
update_descriptor_queue.AddBuffer(buffer, offset, size); update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
} }
void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,

@ -35,10 +35,14 @@ public:
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Unmap(u64 size); void Unmap(u64 size);
VkBuffer Handle() const { VkBuffer Handle() const noexcept {
return *buffer; return *buffer;
} }
u64 Address() const noexcept {
return 0;
}
private: private:
struct Watch final { struct Watch final {
VKFenceWatch fence; VKFenceWatch fence;