video_core: Rewrite the texture cache

The current texture cache has several points that hurt maintainability
and performance. It's easy to break unrelated parts of the cache
when doing minor changes. The cache can easily forget valuable
information about the cached textures by CPU writes or simply by its
normal usage.The current texture cache has several points that hurt
maintainability and performance. It's easy to break unrelated parts
of the cache when doing minor changes. The cache can easily forget
valuable information about the cached textures by CPU writes or simply
by its normal usage.

This commit aims to address those issues.
master
ReinUsesLisp 2020-12-30 02:25:23 +07:00
parent 9106ac1e6b
commit 9764c13d6d
152 changed files with 10609 additions and 8351 deletions

@ -85,14 +85,10 @@ add_library(video_core STATIC
renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h
renderer_opengl/gl_framebuffer_cache.cpp
renderer_opengl/gl_framebuffer_cache.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_resource_manager.cpp
renderer_opengl/gl_resource_manager.h
renderer_opengl/gl_sampler_cache.cpp
renderer_opengl/gl_sampler_cache.h
renderer_opengl/gl_shader_cache.cpp
renderer_opengl/gl_shader_cache.h
renderer_opengl/gl_shader_decompiler.cpp
@ -114,8 +110,10 @@ add_library(video_core STATIC
renderer_opengl/maxwell_to_gl.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h
renderer_opengl/utils.cpp
renderer_opengl/utils.h
renderer_opengl/util_shaders.cpp
renderer_opengl/util_shaders.h
renderer_vulkan/blit_image.cpp
renderer_vulkan/blit_image.h
renderer_vulkan/fixed_pipeline_state.cpp
renderer_vulkan/fixed_pipeline_state.h
renderer_vulkan/maxwell_to_vk.cpp
@ -142,8 +140,6 @@ add_library(video_core STATIC
renderer_vulkan/vk_fence_manager.h
renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_image.cpp
renderer_vulkan/vk_image.h
renderer_vulkan/vk_master_semaphore.cpp
renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_memory_manager.cpp
@ -154,12 +150,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_query_cache.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_renderpass_cache.cpp
renderer_vulkan/vk_renderpass_cache.h
renderer_vulkan/vk_resource_pool.cpp
renderer_vulkan/vk_resource_pool.h
renderer_vulkan/vk_sampler_cache.cpp
renderer_vulkan/vk_sampler_cache.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_shader_decompiler.cpp
@ -180,8 +172,6 @@ add_library(video_core STATIC
renderer_vulkan/vk_update_descriptor.h
renderer_vulkan/wrapper.cpp
renderer_vulkan/wrapper.h
sampler_cache.cpp
sampler_cache.h
shader_cache.h
shader_notify.cpp
shader_notify.h
@ -238,19 +228,32 @@ add_library(video_core STATIC
shader/transform_feedback.h
surface.cpp
surface.h
texture_cache/accelerated_swizzle.cpp
texture_cache/accelerated_swizzle.h
texture_cache/decode_bc4.cpp
texture_cache/decode_bc4.h
texture_cache/descriptor_table.h
texture_cache/formatter.cpp
texture_cache/formatter.h
texture_cache/format_lookup_table.cpp
texture_cache/format_lookup_table.h
texture_cache/surface_base.cpp
texture_cache/surface_base.h
texture_cache/surface_params.cpp
texture_cache/surface_params.h
texture_cache/surface_view.cpp
texture_cache/surface_view.h
texture_cache/image_base.cpp
texture_cache/image_base.h
texture_cache/image_info.cpp
texture_cache/image_info.h
texture_cache/image_view_base.cpp
texture_cache/image_view_base.h
texture_cache/image_view_info.cpp
texture_cache/image_view_info.h
texture_cache/render_targets.h
texture_cache/samples_helper.h
texture_cache/slot_vector.h
texture_cache/texture_cache.h
texture_cache/types.h
texture_cache/util.cpp
texture_cache/util.h
textures/astc.cpp
textures/astc.h
textures/convert.cpp
textures/convert.h
textures/decoders.cpp
textures/decoders.h
textures/texture.cpp

@ -118,20 +118,17 @@ public:
/// Prepares the buffer cache for data uploading
/// @param max_size Maximum number of bytes that will be uploaded
/// @return True when a stream buffer invalidation was required, false otherwise
bool Map(std::size_t max_size) {
void Map(std::size_t max_size) {
std::lock_guard lock{mutex};
bool invalidated;
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
buffer_offset = buffer_offset_base;
return invalidated;
}
/// Finishes the upload stream
void Unmap() {
std::lock_guard lock{mutex};
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
stream_buffer.Unmap(buffer_offset - buffer_offset_base);
}
/// Function called at the end of each frame, inteded for deferred operations
@ -261,9 +258,9 @@ public:
protected:
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
std::unique_ptr<StreamBuffer> stream_buffer_)
StreamBuffer& stream_buffer_)
: rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
stream_buffer{stream_buffer_} {}
~BufferCache() = default;
@ -441,7 +438,7 @@ private:
buffer_ptr += size;
buffer_offset += size;
return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
}
void AlignBuffer(std::size_t alignment) {
@ -567,9 +564,7 @@ private:
VideoCore::RasterizerInterface& rasterizer;
Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
std::unique_ptr<StreamBuffer> stream_buffer;
BufferType stream_buffer_handle;
StreamBuffer& stream_buffer;
u8* buffer_ptr = nullptr;
u64 buffer_offset = 0;

@ -9,7 +9,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/texture_cache/surface_params.h"
#include "video_core/textures/decoders.h"
extern "C" {
#include <libswscale/swscale.h>
@ -105,9 +105,9 @@ void Vic::Execute() {
const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
block_height, 0);
std::vector<u8> swizzled_data(size);
Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4,
swizzled_data.data(), converted_frame_buffer.get(),
false, block_height, 0, 1);
Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
frame->width, 4, swizzled_data.data(),
converted_frame_buffer.get(), block_height, 0, 0);
gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
gpu.Maxwell3D().OnMemoryWrite();

@ -3,9 +3,9 @@
// Refer to the license.txt file included.
#include <array>
#include <bitset>
#include <cstddef>
#include "common/common_types.h"
#include "video_core/compatible_formats.h"
#include "video_core/surface.h"
@ -13,23 +13,25 @@ namespace VideoCore::Surface {
namespace {
using Table = std::array<std::array<u64, 2>, MaxPixelFormat>;
// Compatibility table taken from Table 3.X.2 in:
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
constexpr std::array VIEW_CLASS_128_BITS = {
constexpr std::array VIEW_CLASS_128_BITS{
PixelFormat::R32G32B32A32_FLOAT,
PixelFormat::R32G32B32A32_UINT,
PixelFormat::R32G32B32A32_SINT,
};
constexpr std::array VIEW_CLASS_96_BITS = {
constexpr std::array VIEW_CLASS_96_BITS{
PixelFormat::R32G32B32_FLOAT,
};
// Missing formats:
// PixelFormat::RGB32UI,
// PixelFormat::RGB32I,
constexpr std::array VIEW_CLASS_64_BITS = {
constexpr std::array VIEW_CLASS_64_BITS{
PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT,
PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = {
// TODO: How should we handle 48 bits?
constexpr std::array VIEW_CLASS_32_BITS = {
constexpr std::array VIEW_CLASS_32_BITS{
PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = {
// TODO: How should we handle 24 bits?
constexpr std::array VIEW_CLASS_16_BITS = {
constexpr std::array VIEW_CLASS_16_BITS{
PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT,
PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT,
};
constexpr std::array VIEW_CLASS_8_BITS = {
constexpr std::array VIEW_CLASS_8_BITS{
PixelFormat::R8_UINT,
PixelFormat::R8_UNORM,
PixelFormat::R8_SINT,
PixelFormat::R8_SNORM,
};
constexpr std::array VIEW_CLASS_RGTC1_RED = {
constexpr std::array VIEW_CLASS_RGTC1_RED{
PixelFormat::BC4_UNORM,
PixelFormat::BC4_SNORM,
};
constexpr std::array VIEW_CLASS_RGTC2_RG = {
constexpr std::array VIEW_CLASS_RGTC2_RG{
PixelFormat::BC5_UNORM,
PixelFormat::BC5_SNORM,
};
constexpr std::array VIEW_CLASS_BPTC_UNORM = {
constexpr std::array VIEW_CLASS_BPTC_UNORM{
PixelFormat::BC7_UNORM,
PixelFormat::BC7_SRGB,
};
constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
constexpr std::array VIEW_CLASS_BPTC_FLOAT{
PixelFormat::BC6H_SFLOAT,
PixelFormat::BC6H_UFLOAT,
};
constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{
PixelFormat::ASTC_2D_4X4_UNORM,
PixelFormat::ASTC_2D_4X4_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{
PixelFormat::ASTC_2D_5X4_UNORM,
PixelFormat::ASTC_2D_5X4_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{
PixelFormat::ASTC_2D_5X5_UNORM,
PixelFormat::ASTC_2D_5X5_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{
PixelFormat::ASTC_2D_6X5_UNORM,
PixelFormat::ASTC_2D_6X5_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{
PixelFormat::ASTC_2D_6X6_UNORM,
PixelFormat::ASTC_2D_6X6_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{
PixelFormat::ASTC_2D_8X5_UNORM,
PixelFormat::ASTC_2D_8X5_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
PixelFormat::ASTC_2D_8X8_UNORM,
PixelFormat::ASTC_2D_8X8_SRGB,
};
// Missing formats:
// PixelFormat::ASTC_2D_10X5_UNORM
// PixelFormat::ASTC_2D_10X5_SRGB
// Missing formats:
// PixelFormat::ASTC_2D_10X6_UNORM
// PixelFormat::ASTC_2D_10X6_SRGB
constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
PixelFormat::ASTC_2D_10X8_UNORM,
PixelFormat::ASTC_2D_10X8_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
PixelFormat::ASTC_2D_10X10_UNORM,
PixelFormat::ASTC_2D_10X10_SRGB,
};
// Missing formats
// ASTC_2D_12X10_UNORM,
// ASTC_2D_12X10_SRGB,
constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
PixelFormat::ASTC_2D_12X12_UNORM,
PixelFormat::ASTC_2D_12X12_SRGB,
};
// Compatibility table taken from Table 4.X.1 in:
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
constexpr std::array COPY_CLASS_128_BITS = {
constexpr std::array COPY_CLASS_128_BITS{
PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM,
PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM,
@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = {
// PixelFormat::RGBA32I
// COMPRESSED_RG_RGTC2
constexpr std::array COPY_CLASS_64_BITS = {
constexpr std::array COPY_CLASS_64_BITS{
PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT,
@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = {
// COMPRESSED_RGBA_S3TC_DXT1_EXT
// COMPRESSED_SIGNED_RED_RGTC1
void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) {
compatiblity[format_a][format_b] = true;
compatiblity[format_b][format_a] = true;
constexpr void Enable(Table& table, size_t format_a, size_t format_b) {
table[format_a][format_b / 64] |= u64(1) << (format_b % 64);
table[format_b][format_a / 64] |= u64(1) << (format_a % 64);
}
void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) {
Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) {
Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
}
template <typename Range>
void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) {
constexpr void EnableRange(Table& table, const Range& range) {
for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
for (auto it_b = it_a; it_b != range.end(); ++it_b) {
Enable(compatibility, *it_a, *it_b);
Enable(table, *it_a, *it_b);
}
}
}
} // Anonymous namespace
constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) {
const size_t a = static_cast<size_t>(format_a);
const size_t b = static_cast<size_t>(format_b);
return ((table[a][b / 64] >> (b % 64)) & 1) != 0;
}
FormatCompatibility::FormatCompatibility() {
constexpr Table MakeViewTable() {
Table view{};
for (size_t i = 0; i < MaxPixelFormat; ++i) {
// Identity is allowed
Enable(view, i, i);
}
EnableRange(view, VIEW_CLASS_128_BITS);
EnableRange(view, VIEW_CLASS_96_BITS);
EnableRange(view, VIEW_CLASS_64_BITS);
@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() {
EnableRange(view, VIEW_CLASS_RGTC2_RG);
EnableRange(view, VIEW_CLASS_BPTC_UNORM);
EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
return view;
}
copy = view;
constexpr Table MakeCopyTable() {
Table copy = MakeViewTable();
EnableRange(copy, COPY_CLASS_128_BITS);
EnableRange(copy, COPY_CLASS_64_BITS);
return copy;
}
} // Anonymous namespace
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) {
static constexpr Table TABLE = MakeViewTable();
return IsSupported(TABLE, format_a, format_b);
}
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
static constexpr Table TABLE = MakeCopyTable();
return IsSupported(TABLE, format_a, format_b);
}
} // namespace VideoCore::Surface

@ -4,31 +4,12 @@
#pragma once
#include <array>
#include <bitset>
#include <cstddef>
#include "video_core/surface.h"
namespace VideoCore::Surface {
class FormatCompatibility {
public:
using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b);
explicit FormatCompatibility();
bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
}
bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
}
private:
Table view;
Table copy;
};
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
} // namespace VideoCore::Surface

@ -16,6 +16,9 @@ namespace VideoCommon::Dirty {
using Tegra::Engines::Maxwell3D;
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
static constexpr std::size_t num_per_rt = NUM(rt[0]);
static constexpr std::size_t begin = OFF(rt);
static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
}
FillBlock(tables[1], begin, num, RenderTargets);
FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets);
tables[0][OFF(rt_control)] = RenderTargets;
tables[1][OFF(rt_control)] = RenderTargetControl;
static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {

@ -16,7 +16,10 @@ namespace VideoCommon::Dirty {
enum : u8 {
NullEntry = 0,
Descriptors,
RenderTargets,
RenderTargetControl,
ColorBuffer0,
ColorBuffer1,
ColorBuffer2,

@ -10,7 +10,11 @@
namespace Tegra::Engines {
Fermi2D::Fermi2D() = default;
Fermi2D::Fermi2D() {
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
}
Fermi2D::~Fermi2D() = default;
@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Fermi2D register, increase the size of the Regs structure");
regs.reg_array[method] = method_argument;
switch (method) {
// Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
// so trigger on the second 32-bit write.
case FERMI2D_REG_INDEX(blit_src_y) + 1: {
HandleSurfaceCopy();
break;
}
if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
Blit();
}
}
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
for (std::size_t i = 0; i < amount; i++) {
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
for (u32 i = 0; i < amount; ++i) {
CallMethod(method, base_start[i], methods_pending - i <= 1);
}
}
static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
const u32 line_a = src_2 - src_1;
const u32 line_b = dst_2 - dst_1;
const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
return {line_b - (excess * line_b) / line_a, excess};
}
void Fermi2D::Blit() {
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
regs.src.Address(), regs.dst.Address());
void Fermi2D::HandleSurfaceCopy() {
LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation);
UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
// TODO(Subv): Only raw copies are implemented.
ASSERT(regs.operation == Operation::SrcCopy);
const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
u32 src_blit_x2, src_blit_y2;
if (regs.blit_control.origin == Origin::Corner) {
src_blit_x2 =
static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
src_blit_y2 =
static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
} else {
src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
}
u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
const auto [new_dst_w, src_excess_x] =
DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
const auto [new_dst_h, src_excess_y] =
DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
dst_blit_x2 = new_dst_w + regs.blit_dst_x;
src_blit_x2 = src_blit_x2 - src_excess_x;
dst_blit_y2 = new_dst_h + regs.blit_dst_y;
src_blit_y2 = src_blit_y2 - src_excess_y;
const auto [new_src_w, dst_excess_x] =
DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
const auto [new_src_h, dst_excess_y] =
DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
src_blit_x2 = new_src_w + src_blit_x1;
dst_blit_x2 = dst_blit_x2 - dst_excess_x;
src_blit_y2 = new_src_h + src_blit_y1;
dst_blit_y2 = dst_blit_y2 - dst_excess_y;
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
dst_blit_y2};
const Config copy_config{
const auto& args = regs.pixels_from_memory;
const Config config{
.operation = regs.operation,
.filter = regs.blit_control.filter,
.src_rect = src_rect,
.dst_rect = dst_rect,
.filter = args.sample_mode.filter,
.dst_x0 = args.dst_x0,
.dst_y0 = args.dst_y0,
.dst_x1 = args.dst_x0 + args.dst_width,
.dst_y1 = args.dst_y0 + args.dst_height,
.src_x0 = static_cast<s32>(args.src_x0 >> 32),
.src_y0 = static_cast<s32>(args.src_y0 >> 32),
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
};
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
UNIMPLEMENTED();
}
}

@ -53,8 +53,8 @@ public:
};
enum class Filter : u32 {
PointSample = 0, // Nearest
Linear = 1,
Point = 0,
Bilinear = 1,
};
enum class Operation : u32 {
@ -67,88 +67,235 @@ public:
BlendPremult = 6,
};
struct Regs {
static constexpr std::size_t NUM_REGS = 0x258;
enum class MemoryLayout : u32 {
BlockLinear = 0,
Pitch = 1,
};
struct Surface {
RenderTargetFormat format;
BitField<0, 1, u32> linear;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 depth;
u32 layer;
u32 pitch;
u32 width;
u32 height;
u32 address_high;
u32 address_low;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
u32 BlockWidth() const {
return block_width.Value();
}
u32 BlockHeight() const {
return block_height.Value();
}
u32 BlockDepth() const {
return block_depth.Value();
}
};
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
enum class CpuIndexWrap : u32 {
Wrap = 0,
NoWrap = 1,
};
struct Surface {
RenderTargetFormat format;
MemoryLayout linear;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 depth;
u32 layer;
u32 pitch;
u32 width;
u32 height;
u32 addr_upper;
u32 addr_lower;
[[nodiscard]] constexpr GPUVAddr Address() const noexcept {
return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
}
};
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
enum class SectorPromotion : u32 {
NoPromotion = 0,
PromoteTo2V = 1,
PromoteTo2H = 2,
PromoteTo4 = 3,
};
enum class NumTpcs : u32 {
All = 0,
One = 1,
};
enum class RenderEnableMode : u32 {
False = 0,
True = 1,
Conditional = 2,
RenderIfEqual = 3,
RenderIfNotEqual = 4,
};
enum class ColorKeyFormat : u32 {
A16R56G6B5 = 0,
A1R5G55B5 = 1,
A8R8G8B8 = 2,
A2R10G10B10 = 3,
Y8 = 4,
Y16 = 5,
Y32 = 6,
};
union Beta4 {
BitField<0, 8, u32> b;
BitField<8, 8, u32> g;
BitField<16, 8, u32> r;
BitField<24, 8, u32> a;
};
struct Point {
u32 x;
u32 y;
};
enum class PatternSelect : u32 {
MonoChrome8x8 = 0,
MonoChrome64x1 = 1,
MonoChrome1x64 = 2,
Color = 3,
};
enum class NotifyType : u32 {
WriteOnly = 0,
WriteThenAwaken = 1,
};
enum class MonochromePatternColorFormat : u32 {
A8X8R8G6B5 = 0,
A1R5G5B5 = 1,
A8R8G8B8 = 2,
A8Y8 = 3,
A8X8Y16 = 4,
Y32 = 5,
};
enum class MonochromePatternFormat : u32 {
CGA6_M1 = 0,
LE_M1 = 1,
};
union Regs {
static constexpr std::size_t NUM_REGS = 0x258;
struct {
u32 object;
INSERT_UNION_PADDING_WORDS(0x3F);
u32 no_operation;
NotifyType notify;
INSERT_UNION_PADDING_WORDS(0x2);
u32 wait_for_idle;
INSERT_UNION_PADDING_WORDS(0xB);
u32 pm_trigger;
INSERT_UNION_PADDING_WORDS(0xF);
u32 context_dma_notify;
u32 dst_context_dma;
u32 src_context_dma;
u32 semaphore_context_dma;
INSERT_UNION_PADDING_WORDS(0x1C);
Surface dst;
CpuIndexWrap pixels_from_cpu_index_wrap;
u32 kind2d_check_enable;
Surface src;
SectorPromotion pixels_from_memory_sector_promotion;
INSERT_UNION_PADDING_WORDS(0x1);
NumTpcs num_tpcs;
u32 render_enable_addr_upper;
u32 render_enable_addr_lower;
RenderEnableMode render_enable_mode;
INSERT_UNION_PADDING_WORDS(0x4);
u32 clip_x0;
u32 clip_y0;
u32 clip_width;
u32 clip_height;
BitField<0, 1, u32> clip_enable;
BitField<0, 3, ColorKeyFormat> color_key_format;
u32 color_key;
BitField<0, 1, u32> color_key_enable;
BitField<0, 8, u32> rop;
u32 beta1;
Beta4 beta4;
Operation operation;
union {
BitField<0, 6, u32> x;
BitField<8, 6, u32> y;
} pattern_offset;
BitField<0, 2, PatternSelect> pattern_select;
INSERT_UNION_PADDING_WORDS(0xC);
struct {
INSERT_UNION_PADDING_WORDS(0x80);
Surface dst;
INSERT_UNION_PADDING_WORDS(2);
Surface src;
INSERT_UNION_PADDING_WORDS(0x15);
Operation operation;
INSERT_UNION_PADDING_WORDS(0x177);
BitField<0, 3, MonochromePatternColorFormat> color_format;
BitField<0, 1, MonochromePatternFormat> format;
u32 color0;
u32 color1;
u32 pattern0;
u32 pattern1;
} monochrome_pattern;
struct {
std::array<u32, 0x40> X8R8G8B8;
std::array<u32, 0x20> R5G6B5;
std::array<u32, 0x20> X1R5G5B5;
std::array<u32, 0x10> Y8;
} color_pattern;
INSERT_UNION_PADDING_WORDS(0x10);
struct {
u32 prim_mode;
u32 prim_color_format;
u32 prim_color;
u32 line_tie_break_bits;
INSERT_UNION_PADDING_WORDS(0x14);
u32 prim_point_xy;
INSERT_UNION_PADDING_WORDS(0x7);
std::array<Point, 0x40> prim_point;
} render_solid;
struct {
u32 data_type;
u32 color_format;
u32 index_format;
u32 mono_format;
u32 wrap;
u32 color0;
u32 color1;
u32 mono_opacity;
INSERT_UNION_PADDING_WORDS(0x6);
u32 src_width;
u32 src_height;
u32 dx_du_frac;
u32 dx_du_int;
u32 dx_dv_frac;
u32 dy_dv_int;
u32 dst_x0_frac;
u32 dst_x0_int;
u32 dst_y0_frac;
u32 dst_y0_int;
u32 data;
} pixels_from_cpu;
INSERT_UNION_PADDING_WORDS(0x3);
u32 big_endian_control;
INSERT_UNION_PADDING_WORDS(0x3);
struct {
BitField<0, 3, u32> block_shape;
BitField<0, 5, u32> corral_size;
BitField<0, 1, u32> safe_overlap;
union {
u32 raw;
BitField<0, 1, Origin> origin;
BitField<4, 1, Filter> filter;
} blit_control;
} sample_mode;
INSERT_UNION_PADDING_WORDS(0x8);
u32 blit_dst_x;
u32 blit_dst_y;
u32 blit_dst_width;
u32 blit_dst_height;
u64 blit_du_dx;
u64 blit_dv_dy;
u64 blit_src_x;
u64 blit_src_y;
INSERT_UNION_PADDING_WORDS(0x21);
};
std::array<u32, NUM_REGS> reg_array;
s32 dst_x0;
s32 dst_y0;
s32 dst_width;
s32 dst_height;
s64 du_dx;
s64 dv_dy;
s64 src_x0;
s64 src_y0;
} pixels_from_memory;
};
std::array<u32, NUM_REGS> reg_array;
} regs{};
struct Config {
Operation operation{};
Filter filter{};
Common::Rectangle<u32> src_rect;
Common::Rectangle<u32> dst_rect;
Operation operation;
Filter filter;
s32 dst_x0;
s32 dst_y0;
s32 dst_x1;
s32 dst_y1;
s32 src_x0;
s32 src_y0;
s32 src_x1;
s32 src_y1;
};
private:
@ -156,25 +303,49 @@ private:
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
void HandleSurfaceCopy();
void Blit();
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \
static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(dst, 0x80);
ASSERT_REG_POSITION(src, 0x8C);
ASSERT_REG_POSITION(operation, 0xAB);
ASSERT_REG_POSITION(blit_control, 0x223);
ASSERT_REG_POSITION(blit_dst_x, 0x22c);
ASSERT_REG_POSITION(blit_dst_y, 0x22d);
ASSERT_REG_POSITION(blit_dst_width, 0x22e);
ASSERT_REG_POSITION(blit_dst_height, 0x22f);
ASSERT_REG_POSITION(blit_du_dx, 0x230);
ASSERT_REG_POSITION(blit_dv_dy, 0x232);
ASSERT_REG_POSITION(blit_src_x, 0x234);
ASSERT_REG_POSITION(blit_src_y, 0x236);
ASSERT_REG_POSITION(object, 0x0);
ASSERT_REG_POSITION(no_operation, 0x100);
ASSERT_REG_POSITION(notify, 0x104);
ASSERT_REG_POSITION(wait_for_idle, 0x110);
ASSERT_REG_POSITION(pm_trigger, 0x140);
ASSERT_REG_POSITION(context_dma_notify, 0x180);
ASSERT_REG_POSITION(dst_context_dma, 0x184);
ASSERT_REG_POSITION(src_context_dma, 0x188);
ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
ASSERT_REG_POSITION(dst, 0x200);
ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
ASSERT_REG_POSITION(src, 0x230);
ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
ASSERT_REG_POSITION(num_tpcs, 0x260);
ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
ASSERT_REG_POSITION(clip_x0, 0x280);
ASSERT_REG_POSITION(clip_y0, 0x284);
ASSERT_REG_POSITION(clip_width, 0x288);
ASSERT_REG_POSITION(clip_height, 0x28c);
ASSERT_REG_POSITION(clip_enable, 0x290);
ASSERT_REG_POSITION(color_key_format, 0x294);
ASSERT_REG_POSITION(color_key, 0x298);
ASSERT_REG_POSITION(rop, 0x2A0);
ASSERT_REG_POSITION(beta1, 0x2A4);
ASSERT_REG_POSITION(beta4, 0x2A8);
ASSERT_REG_POSITION(operation, 0x2AC);
ASSERT_REG_POSITION(pattern_offset, 0x2B0);
ASSERT_REG_POSITION(pattern_select, 0x2B4);
ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
ASSERT_REG_POSITION(color_pattern, 0x300);
ASSERT_REG_POSITION(render_solid, 0x580);
ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
ASSERT_REG_POSITION(big_endian_control, 0x870);
ASSERT_REG_POSITION(pixels_from_memory, 0x880);
#undef ASSERT_REG_POSITION

@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
}
}
Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
ASSERT(cbuf_mask[regs.tex_cb_index]);
const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
ASSERT(texinfo.Address() != 0);
const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
ASSERT(address < texinfo.Address() + texinfo.size);
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
return GetTextureInfo(tex_handle);
}
Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
}
u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
ASSERT(stage == ShaderType::Compute);
const auto& buffer = launch_description.const_buffer_config[const_buffer];
@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}

@ -209,11 +209,6 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
Texture::FullTextureInfo GetTexture(std::size_t offset) const;
/// Given a texture handle, returns the TSC and TIC entries.
Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;

@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cinttypes>
#include <cstring>
#include <optional>
#include "common/assert.h"
@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
OnMemoryWrite();
}
return;
case MAXWELL3D_REG_INDEX(fragment_barrier):
return rasterizer->FragmentBarrier();
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return rasterizer->TiledCacheBarrier();
}
}
@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() {
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
}
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
Texture::TSCEntry tsc_entry;
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry;
}
Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
}
Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
const auto stage_index = static_cast<std::size_t>(stage);
const auto& shader = state.shader_stages[stage_index];
const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
const GPUVAddr tex_info_address =
tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
return GetTextureInfo(tex_handle);
}
u32 Maxwell3D::GetRegisterValue(u32 method) const {
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
return regs.reg_array[method];
}
void Maxwell3D::ProcessClearBuffers() {
ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
regs.clear_buffers.R == regs.clear_buffers.B &&
regs.clear_buffers.R == regs.clear_buffers.A);
rasterizer->Clear();
}
@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse
ASSERT(stage != ShaderType::Compute);
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& buffer = shader_stage.const_buffers[const_buffer];
u32 result;
std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
return result;
return memory_manager.Read<u32>(buffer.address + offset);
}
SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}

@ -438,16 +438,6 @@ public:
DecrWrapOGL = 0x8508,
};
enum class MemoryLayout : u32 {
Linear = 0,
BlockLinear = 1,
};
enum class InvMemoryLayout : u32 {
BlockLinear = 0,
Linear = 1,
};
enum class CounterReset : u32 {
SampleCnt = 0x01,
Unk02 = 0x02,
@ -589,21 +579,31 @@ public:
NegativeW = 7,
};
enum class SamplerIndex : u32 {
Independently = 0,
ViaHeaderIndex = 1,
};
struct TileMode {
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
BitField<12, 1, u32> is_pitch_linear;
BitField<16, 1, u32> is_3d;
};
};
static_assert(sizeof(TileMode) == 4);
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
u32 width;
u32 height;
Tegra::RenderTargetFormat format;
TileMode tile_mode;
union {
BitField<0, 3, u32> block_width;
BitField<4, 3, u32> block_height;
BitField<8, 3, u32> block_depth;
BitField<12, 1, InvMemoryLayout> type;
BitField<16, 1, u32> is_3d;
} memory_layout;
union {
BitField<0, 16, u32> layers;
BitField<0, 16, u32> depth;
BitField<16, 1, u32> volume;
};
u32 layer_stride;
@ -832,7 +832,11 @@ public:
u32 patch_vertices;
INSERT_UNION_PADDING_WORDS(0xC);
INSERT_UNION_PADDING_WORDS(0x4);
u32 fragment_barrier;
INSERT_UNION_PADDING_WORDS(0x7);
std::array<ScissorTest, NumViewports> scissor_test;
@ -842,7 +846,15 @@ public:
u32 stencil_back_mask;
u32 stencil_back_func_mask;
INSERT_UNION_PADDING_WORDS(0xC);
INSERT_UNION_PADDING_WORDS(0x5);
u32 invalidate_texture_data_cache;
INSERT_UNION_PADDING_WORDS(0x1);
u32 tiled_cache_barrier;
INSERT_UNION_PADDING_WORDS(0x4);
u32 color_mask_common;
@ -866,12 +878,7 @@ public:
u32 address_high;
u32 address_low;
Tegra::DepthFormat format;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
BitField<20, 1, InvMemoryLayout> type;
} memory_layout;
TileMode tile_mode;
u32 layer_stride;
GPUVAddr Address() const {
@ -880,7 +887,18 @@ public:
}
} zeta;
INSERT_UNION_PADDING_WORDS(0x41);
struct {
union {
BitField<0, 16, u32> x;
BitField<16, 16, u32> width;
};
union {
BitField<0, 16, u32> y;
BitField<16, 16, u32> height;
};
} render_area;
INSERT_UNION_PADDING_WORDS(0x3F);
union {
BitField<0, 4, u32> stencil;
@ -921,7 +939,7 @@ public:
BitField<25, 3, u32> map_7;
};
u32 GetMap(std::size_t index) const {
u32 Map(std::size_t index) const {
const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
map_4, map_5, map_6, map_7};
ASSERT(index < maps.size());
@ -934,11 +952,13 @@ public:
u32 zeta_width;
u32 zeta_height;
union {
BitField<0, 16, u32> zeta_layers;
BitField<0, 16, u32> zeta_depth;
BitField<16, 1, u32> zeta_volume;
};
INSERT_UNION_PADDING_WORDS(0x26);
SamplerIndex sampler_index;
INSERT_UNION_PADDING_WORDS(0x25);
u32 depth_test_enable;
@ -964,6 +984,7 @@ public:
float b;
float a;
} blend_color;
INSERT_UNION_PADDING_WORDS(0x4);
struct {
@ -1001,7 +1022,12 @@ public:
float line_width_smooth;
float line_width_aliased;
INSERT_UNION_PADDING_WORDS(0x1F);
INSERT_UNION_PADDING_WORDS(0x1B);
u32 invalidate_sampler_cache_no_wfi;
u32 invalidate_texture_header_cache_no_wfi;
INSERT_UNION_PADDING_WORDS(0x2);
u32 vb_element_base;
u32 vb_base_instance;
@ -1045,13 +1071,13 @@ public:
} condition;
struct {
u32 tsc_address_high;
u32 tsc_address_low;
u32 tsc_limit;
u32 address_high;
u32 address_low;
u32 limit;
GPUVAddr TSCAddress() const {
return static_cast<GPUVAddr>(
(static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} tsc;
@ -1062,13 +1088,13 @@ public:
u32 line_smooth_enable;
struct {
u32 tic_address_high;
u32 tic_address_low;
u32 tic_limit;
u32 address_high;
u32 address_low;
u32 limit;
GPUVAddr TICAddress() const {
return static_cast<GPUVAddr>(
(static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} tic;
@ -1397,12 +1423,6 @@ public:
void FlushMMEInlineDraw();
/// Given a texture handle, returns the TSC and TIC entries.
Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
ASSERT_REG_POSITION(patch_vertices, 0x373);
ASSERT_REG_POSITION(fragment_barrier, 0x378);
ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD);
ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(render_area, 0x3FD);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
ASSERT_REG_POSITION(zeta_layers, 0x48c);
ASSERT_REG_POSITION(zeta_depth, 0x48c);
ASSERT_REG_POSITION(sampler_index, 0x48D);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509);
ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);

@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() {
}
void MaxwellDMA::CopyBlockLinearToPitch() {
UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
}
void MaxwellDMA::CopyPitchToBlockLinear() {
UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
const auto& dst_params = regs.dst_params;
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
const u32 width = dst_params.width;

@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@ -47,6 +48,11 @@ protected:
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
class FenceManager {
public:
/// Notify the fence manager about a new frame
void TickFrame() {
delayed_destruction_ring.Tick();
}
void SignalSemaphore(GPUVAddr addr, u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
@ -86,7 +92,7 @@ public:
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
fences.pop();
PopFence();
}
}
@ -132,7 +138,7 @@ private:
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
fences.pop();
PopFence();
}
}
@ -158,7 +164,14 @@ private:
query_cache.CommitAsyncFlushes();
}
void PopFence() {
delayed_destruction_ring.Push(std::move(fences.front()));
fences.pop();
}
std::queue<TFence> fences;
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
};
} // namespace VideoCommon

@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
}
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size);
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
rasterizer->UnmapMemory(*cpu_addr, size);
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}

@ -1,250 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <cstring>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/morton.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
namespace VideoCore {
using Surface::GetBytesPerPixel;
using Surface::PixelFormat;
using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
template <bool morton_to_linear, PixelFormat format>
static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
u32 tile_width_spacing, u8* buffer, u8* addr) {
constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
if constexpr (morton_to_linear) {
Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
stride, height, depth, block_height, block_depth,
tile_width_spacing);
} else {
Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
(height + tile_size_y - 1) / tile_size_y, depth,
bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
block_height, block_depth, tile_width_spacing);
}
}
static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
MortonCopy<true, PixelFormat::R8_UNORM>,
MortonCopy<true, PixelFormat::R8_SNORM>,
MortonCopy<true, PixelFormat::R8_SINT>,
MortonCopy<true, PixelFormat::R8_UINT>,
MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
MortonCopy<true, PixelFormat::BC2_UNORM>,
MortonCopy<true, PixelFormat::BC3_UNORM>,
MortonCopy<true, PixelFormat::BC4_UNORM>,
MortonCopy<true, PixelFormat::BC4_SNORM>,
MortonCopy<true, PixelFormat::BC5_UNORM>,
MortonCopy<true, PixelFormat::BC5_SNORM>,
MortonCopy<true, PixelFormat::BC7_UNORM>,
MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
MortonCopy<true, PixelFormat::R32G32_FLOAT>,
MortonCopy<true, PixelFormat::R32G32_SINT>,
MortonCopy<true, PixelFormat::R32_FLOAT>,
MortonCopy<true, PixelFormat::R16_FLOAT>,
MortonCopy<true, PixelFormat::R16_UNORM>,
MortonCopy<true, PixelFormat::R16_SNORM>,
MortonCopy<true, PixelFormat::R16_UINT>,
MortonCopy<true, PixelFormat::R16_SINT>,
MortonCopy<true, PixelFormat::R16G16_UNORM>,
MortonCopy<true, PixelFormat::R16G16_FLOAT>,
MortonCopy<true, PixelFormat::R16G16_UINT>,
MortonCopy<true, PixelFormat::R16G16_SINT>,
MortonCopy<true, PixelFormat::R16G16_SNORM>,
MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
MortonCopy<true, PixelFormat::R8G8_UNORM>,
MortonCopy<true, PixelFormat::R8G8_SNORM>,
MortonCopy<true, PixelFormat::R8G8_SINT>,
MortonCopy<true, PixelFormat::R8G8_UINT>,
MortonCopy<true, PixelFormat::R32G32_UINT>,
MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
MortonCopy<true, PixelFormat::R32_UINT>,
MortonCopy<true, PixelFormat::R32_SINT>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
MortonCopy<true, PixelFormat::BC2_SRGB>,
MortonCopy<true, PixelFormat::BC3_SRGB>,
MortonCopy<true, PixelFormat::BC7_SRGB>,
MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
MortonCopy<true, PixelFormat::D32_FLOAT>,
MortonCopy<true, PixelFormat::D16_UNORM>,
MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
};
static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
MortonCopy<false, PixelFormat::R8_UNORM>,
MortonCopy<false, PixelFormat::R8_SNORM>,
MortonCopy<false, PixelFormat::R8_SINT>,
MortonCopy<false, PixelFormat::R8_UINT>,
MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
MortonCopy<false, PixelFormat::BC2_UNORM>,
MortonCopy<false, PixelFormat::BC3_UNORM>,
MortonCopy<false, PixelFormat::BC4_UNORM>,
MortonCopy<false, PixelFormat::BC4_SNORM>,
MortonCopy<false, PixelFormat::BC5_UNORM>,
MortonCopy<false, PixelFormat::BC5_SNORM>,
MortonCopy<false, PixelFormat::BC7_UNORM>,
MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
// TODO(Subv): Swizzling ASTC formats are not supported
nullptr,
MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
MortonCopy<false, PixelFormat::R32G32_FLOAT>,
MortonCopy<false, PixelFormat::R32G32_SINT>,
MortonCopy<false, PixelFormat::R32_FLOAT>,
MortonCopy<false, PixelFormat::R16_FLOAT>,
MortonCopy<false, PixelFormat::R16_UNORM>,
MortonCopy<false, PixelFormat::R16_SNORM>,
MortonCopy<false, PixelFormat::R16_UINT>,
MortonCopy<false, PixelFormat::R16_SINT>,
MortonCopy<false, PixelFormat::R16G16_UNORM>,
MortonCopy<false, PixelFormat::R16G16_FLOAT>,
MortonCopy<false, PixelFormat::R16G16_UINT>,
MortonCopy<false, PixelFormat::R16G16_SINT>,
MortonCopy<false, PixelFormat::R16G16_SNORM>,
MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
MortonCopy<false, PixelFormat::R8G8_UNORM>,
MortonCopy<false, PixelFormat::R8G8_SNORM>,
MortonCopy<false, PixelFormat::R8G8_SINT>,
MortonCopy<false, PixelFormat::R8G8_UINT>,
MortonCopy<false, PixelFormat::R32G32_UINT>,
MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
MortonCopy<false, PixelFormat::R32_UINT>,
MortonCopy<false, PixelFormat::R32_SINT>,
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
MortonCopy<false, PixelFormat::BC2_SRGB>,
MortonCopy<false, PixelFormat::BC3_SRGB>,
MortonCopy<false, PixelFormat::BC7_SRGB>,
MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
MortonCopy<false, PixelFormat::D32_FLOAT>,
MortonCopy<false, PixelFormat::D16_UNORM>,
MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
};
static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
switch (mode) {
case MortonSwizzleMode::MortonToLinear:
return morton_to_linear_fns[static_cast<std::size_t>(format)];
case MortonSwizzleMode::LinearToMorton:
return linear_to_morton_fns[static_cast<std::size_t>(format)];
}
UNREACHABLE();
return morton_to_linear_fns[static_cast<std::size_t>(format)];
}
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, u8* addr) {
GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
tile_width_spacing, buffer, addr);
}
} // namespace VideoCore

@ -1,18 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "video_core/surface.h"
namespace VideoCore {
enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, u8* addr);
} // namespace VideoCore

@ -76,6 +76,9 @@ public:
/// Sync memory between guest and host.
virtual void SyncGuestHost() = 0;
/// Unmap memory range
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
@ -83,6 +86,12 @@ public:
/// Notify the host renderer to wait for previous primitive and compute operations.
virtual void WaitForIdle() = 0;
/// Notify the host renderer to wait for reads and writes to render targets and flush caches.
virtual void FragmentBarrier() = 0;
/// Notify the host renderer to make available previous render target writes.
virtual void TiledCacheBarrier() = 0;
/// Notify the rasterizer to send all written commands to the host GPU.
virtual void FlushCommands() = 0;
@ -91,8 +100,7 @@ public:
/// Attempt to use a faster method to perform a surface copy
[[nodiscard]] virtual bool AccelerateSurfaceCopy(
const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
return false;
}

@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const Device& device_, std::size_t stream_size_)
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_,
std::make_unique<OGLStreamBuffer>(device_, stream_size_, true)},
device{device_} {
const Device& device_, OGLStreamBuffer& stream_buffer_,
StateTracker& state_tracker)
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
if (!device.HasFastBufferSubData()) {
return;
}

@ -22,6 +22,7 @@ namespace OpenGL {
class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
class StateTracker;
class Buffer : public VideoCommon::BufferBlock {
public:
@ -52,9 +53,10 @@ private:
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache {
public:
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const Device& device_, std::size_t stream_size_);
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, OGLStreamBuffer& stream_buffer,
StateTracker& state_tracker);
~OGLBufferCache();
BufferInfo GetEmptyBuffer(std::size_t) override;

@ -5,9 +5,11 @@
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <limits>
#include <optional>
#include <span>
#include <vector>
#include <glad/glad.h>
@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
constexpr std::array LimitUBOs = {
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
constexpr std::array LimitSSBOs = {
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
constexpr std::array LIMIT_SSBOS = {
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
GL_MAX_TEXTURE_IMAGE_UNITS,
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
constexpr std::array LimitImages = {
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
};
constexpr std::array LIMIT_SAMPLERS = {
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
GL_MAX_TEXTURE_IMAGE_UNITS,
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
};
constexpr std::array LIMIT_IMAGES = {
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
};
template <typename T>
T GetInteger(GLenum pname) {
@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {
return extensions;
}
bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) {
return std::find(images.begin(), images.end(), extension) != images.end();
bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
return std::ranges::find(extensions, extension) != extensions.end();
}
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
[](GLenum pname) { return GetInteger<u32>(pname); });
std::ranges::transform(LIMIT_UBOS, max.begin(),
[](GLenum pname) { return GetInteger<u32>(pname); });
return max;
}
@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
bindings[stage] = {
Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
Extract(base_samplers, num_samplers, total_samplers / NumStages,
LIMIT_SAMPLERS[stage])};
}
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
// Reserve at least 4 image bindings on the fragment stage.
bindings[4].image =
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
// This is guaranteed to be at least 1.
const u32 total_extracted_images = num_images / (NumStages - 1);
@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
continue;
}
bindings[stage].image =
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
}
// Compute doesn't care about any of this.
@ -188,6 +193,11 @@ bool IsASTCSupported() {
return true;
}
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
}
} // Anonymous namespace
Device::Device()
@ -206,9 +216,8 @@ Device::Device()
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
@ -224,6 +233,7 @@ Device::Device()
has_precise_bug = TestPreciseBug();
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"

@ -36,11 +36,11 @@ public:
return GetBaseBindings(static_cast<std::size_t>(shader_type));
}
std::size_t GetUniformBufferAlignment() const {
size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
std::size_t GetShaderStorageBufferAlignment() const {
size_t GetShaderStorageBufferAlignment() const {
return shader_storage_alignment;
}
@ -104,6 +104,10 @@ public:
return has_nv_viewport_array2;
}
bool HasDebuggingToolAttached() const {
return has_debugging_tool_attached;
}
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
@ -118,8 +122,8 @@ private:
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
std::size_t uniform_buffer_alignment{};
std::size_t shader_storage_alignment{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
@ -135,6 +139,7 @@ private:
bool has_precise_bug{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
};

@ -46,7 +46,7 @@ void GLInnerFence::Wait() {
}
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_,
Tegra::GPU& gpu_, TextureCache& texture_cache_,
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}

@ -33,12 +33,12 @@ private:
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_,
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
QueryCache& query_cache_);
protected:

@ -1,85 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include <unordered_map>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using VideoCore::Surface::SurfaceType;
FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
const auto [entry, is_cache_miss] = cache.try_emplace(key);
auto& framebuffer{entry->second};
if (is_cache_miss) {
framebuffer = CreateFramebuffer(key);
}
return framebuffer.handle;
}
OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
OGLFramebuffer framebuffer;
framebuffer.Create();
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
if (key.zeta) {
const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
}
std::size_t num_buffers = 0;
std::array<GLenum, Maxwell::NumRenderTargets> targets;
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
if (!key.colors[index]) {
targets[index] = GL_NONE;
continue;
}
const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
num_buffers = index + 1;
}
if (num_buffers > 0) {
glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
} else {
glDrawBuffer(GL_NONE);
}
return framebuffer;
}
std::size_t FramebufferCacheKey::Hash() const noexcept {
std::size_t hash = std::hash<View>{}(zeta);
for (const auto& color : colors) {
hash ^= std::hash<View>{}(color);
}
hash ^= static_cast<std::size_t>(color_attachments) << 16;
return hash;
}
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
return std::tie(colors, zeta, color_attachments) ==
std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
}
} // namespace OpenGL

@ -1,68 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <unordered_map>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
constexpr std::size_t BitsPerAttachment = 4;
struct FramebufferCacheKey {
View zeta;
std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
u32 color_attachments = 0;
std::size_t Hash() const noexcept;
bool operator==(const FramebufferCacheKey& rhs) const noexcept;
bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
void SetAttachment(std::size_t index, u32 attachment) {
color_attachments |= attachment << (BitsPerAttachment * index);
}
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::FramebufferCacheKey> {
std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace OpenGL {
class FramebufferCacheOpenGL {
public:
FramebufferCacheOpenGL();
~FramebufferCacheOpenGL();
GLuint GetFramebuffer(const FramebufferCacheKey& key);
private:
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
};
} // namespace OpenGL

@ -25,12 +25,15 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
namespace {
constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
constexpr size_t TOTAL_CONST_BUFFER_BYTES =
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
constexpr size_t MAX_TEXTURES = 192;
constexpr size_t MAX_IMAGES = 48;
struct TextureHandle {
constexpr TextureHandle(u32 data, bool via_header_index) {
const Tegra::Texture::TextureHandle handle{data};
image = handle.tic_id;
sampler = via_header_index ? image : handle.tsc_id.Value();
}
u32 image;
u32 sampler;
};
template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
ShaderType shader_type, std::size_t index = 0) {
TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
ShaderType shader_type, size_t index = 0) {
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
if (entry.is_separated) {
const u32 buffer_1 = entry.buffer;
@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
const u32 offset_2 = entry.secondary_offset;
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
return engine.GetTextureInfo(handle_1 | handle_2);
return TextureHandle(handle_1 | handle_2, via_header_index);
}
}
if (entry.is_bindless) {
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
return engine.GetTextureInfo(handle);
}
const auto& gpu_profile = engine.AccessGuestDriverProfile();
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
return engine.GetStageTexture(shader_type, offset);
} else {
return engine.GetTexture(offset);
const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
return TextureHandle(raw, via_header_index);
}
const u32 buffer = engine.GetBoundBuffer();
const u64 offset = (entry.offset + index) * sizeof(u32);
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
if (!entry.IsIndirect()) {
return entry.GetSize();
}
if (buffer.size > Maxwell::MaxConstBufferSize) {
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
Maxwell::MaxConstBufferSize);
@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss
reinterpret_cast<const GLuint*>(ssbos));
}
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
if (entry.is_buffer) {
return ImageViewType::Buffer;
}
switch (entry.type) {
case Tegra::Shader::TextureType::Texture1D:
return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
case Tegra::Shader::TextureType::Texture2D:
return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
case Tegra::Shader::TextureType::Texture3D:
return ImageViewType::e3D;
case Tegra::Shader::TextureType::TextureCube:
return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
}
UNREACHABLE();
return ImageViewType::e2D;
}
ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
switch (entry.type) {
case Tegra::Shader::ImageType::Texture1D:
return ImageViewType::e1D;
case Tegra::Shader::ImageType::Texture1DArray:
return ImageViewType::e1DArray;
case Tegra::Shader::ImageType::Texture2D:
return ImageViewType::e2D;
case Tegra::Shader::ImageType::Texture2DArray:
return ImageViewType::e2DArray;
case Tegra::Shader::ImageType::Texture3D:
return ImageViewType::e3D;
case Tegra::Shader::ImageType::TextureBuffer:
return ImageViewType::Buffer;
}
UNREACHABLE();
return ImageViewType::e2D;
}
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_)
: RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
stream_buffer(device, state_tracker),
texture_cache_runtime(device, program_manager, state_tracker),
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
query_cache(*this, maxwell3d, gpu_memory),
buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE),
buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
async_shaders(emu_window_) {
CheckExtensions();
unified_uniform_buffer.Create();
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
nullptr, 0);
}
}
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
}
}
void RasterizerOpenGL::CheckExtensions() {
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
LOG_WARNING(
Render_OpenGL,
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
}
}
void RasterizerOpenGL::SetupVertexFormat() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
return info.offset;
}
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
void RasterizerOpenGL::SetupShaders() {
MICROPROFILE_SCOPE(OpenGL_Shader);
u32 clip_distances = 0;
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
image_view_indices.clear();
sampler_handles.clear();
texture_cache.SynchronizeGraphicsDescriptors();
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = maxwell3d.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
continue;
}
// Currently this stages are not supported in the OpenGL backend.
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
if (program == Maxwell::ShaderProgram::TesselationControl ||
@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
default:
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
shader_config.enable.Value(), shader_config.offset);
break;
}
// Stage indices are 0 - 5
const std::size_t stage = index == 0 ? 0 : index - 1;
const size_t stage = index == 0 ? 0 : index - 1;
shaders[stage] = shader;
SetupDrawConstBuffers(stage, shader);
SetupDrawGlobalMemory(stage, shader);
SetupDrawTextures(stage, shader);
SetupDrawImages(stage, shader);
SetupDrawTextures(shader, stage);
SetupDrawImages(shader, stage);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
++index;
}
}
SyncClipEnabled(clip_distances);
maxwell3d.dirty.flags[Dirty::Shaders] = false;
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
size_t image_view_index = 0;
size_t texture_index = 0;
size_t image_index = 0;
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
const Shader* const shader = shaders[stage];
if (shader) {
const auto base = device.GetBaseBindings(stage);
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
texture_index, image_index);
}
}
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
shader_cache.LoadDiskCache(title_id, stop_loading, callback);
}
void RasterizerOpenGL::ConfigureFramebuffers() {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
return;
}
maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
texture_cache.GuardRenderTargets(true);
View depth_surface = texture_cache.GetDepthBufferSurface(true);
const auto& regs = maxwell3d.regs;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces
FramebufferCacheKey key;
const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
for (std::size_t index = 0; index < colors_count; ++index) {
View color_surface{texture_cache.GetColorBufferSurface(index, true)};
if (!color_surface) {
continue;
}
// Assume that a surface will be written to if it is used as a framebuffer, even
// if the shader doesn't actually write to it.
texture_cache.MarkColorBufferInUse(index);
key.SetAttachment(index, regs.rt_control.GetMap(index));
key.colors[index] = std::move(color_surface);
}
if (depth_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
texture_cache.MarkDepthBufferInUse();
key.zeta = std::move(depth_surface);
}
texture_cache.GuardRenderTargets(false);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
}
void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
const auto& regs = maxwell3d.regs;
texture_cache.GuardRenderTargets(true);
View color_surface;
if (using_color) {
// Determine if we have to preserve the contents.
// First we have to make sure all clear masks are enabled.
bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
!regs.clear_buffers.B || !regs.clear_buffers.A;
const std::size_t index = regs.clear_buffers.RT;
if (regs.clear_flags.scissor) {
// Then we have to confirm scissor testing clears the whole image.
const auto& scissor = regs.scissor_test[0];
preserve_contents |= scissor.min_x > 0;
preserve_contents |= scissor.min_y > 0;
preserve_contents |= scissor.max_x < regs.rt[index].width;
preserve_contents |= scissor.max_y < regs.rt[index].height;
}
color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
texture_cache.MarkColorBufferInUse(index);
}
View depth_surface;
if (using_depth_stencil) {
bool preserve_contents = false;
if (regs.clear_flags.scissor) {
// For depth stencil clears we only have to confirm scissor test covers the whole image.
const auto& scissor = regs.scissor_test[0];
preserve_contents |= scissor.min_x > 0;
preserve_contents |= scissor.min_y > 0;
preserve_contents |= scissor.max_x < regs.zeta_width;
preserve_contents |= scissor.max_y < regs.zeta_height;
}
depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
texture_cache.MarkDepthBufferInUse();
}
texture_cache.GuardRenderTargets(false);
FramebufferCacheKey key;
key.colors[0] = std::move(color_surface);
key.zeta = std::move(depth_surface);
state_tracker.NotifyFramebuffer();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
}
void RasterizerOpenGL::Clear() {
if (!maxwell3d.ShouldExecute()) {
return;
@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() {
regs.clear_buffers.A) {
use_color = true;
state_tracker.NotifyColorMask0();
glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
const GLuint index = regs.clear_buffers.RT;
state_tracker.NotifyColorMask(index);
glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
// TODO(Rodrigo): Determine if clamping is used on clears
@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() {
state_tracker.NotifyScissor0();
glDisablei(GL_SCISSOR_TEST, 0);
}
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
if (use_color) {
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
{
auto lock = texture_cache.AcquireLock();
texture_cache.UpdateRenderTargets(true);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
}
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
}
if (use_depth && use_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
} else if (use_depth) {
@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
// Prepare the vertex array.
const bool invalidated = buffer_cache.Map(buffer_size);
if (invalidated) {
// When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
auto& dirty = maxwell3d.dirty.flags;
dirty[Dirty::VertexBuffers] = true;
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
dirty[index] = true;
}
}
buffer_cache.Map(buffer_size);
// Prepare vertex array format.
SetupVertexFormat();
@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
// Setup shaders and their used resources.
texture_cache.GuardSamplers(true);
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
SetupShaders(primitive_mode);
texture_cache.GuardSamplers(false);
ConfigureFramebuffers();
auto lock = texture_cache.AcquireLock();
SetupShaders();
// Signal the buffer cache that we are not going to upload more things.
buffer_cache.Unmap();
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
if (texture_cache.TextureBarrier()) {
glTextureBarrier();
}
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
BeginTransformFeedback(primitive_mode);
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Acquire();
current_cbuf = 0;
auto kernel = shader_cache.GetComputeKernel(code_addr);
program_manager.BindCompute(kernel->GetHandle());
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
auto lock = texture_cache.AcquireLock();
BindComputeTextures(kernel);
const std::size_t buffer_size =
Tegra::Engines::KeplerCompute::NumConstBuffers *
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size);
SetupComputeConstBuffers(kernel);
@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap();
const auto& launch_desc = kepler_compute.launch_description;
program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.FlushRegion(addr, size);
{
auto lock = texture_cache.AcquireLock();
texture_cache.DownloadMemory(addr, size);
}
buffer_cache.FlushRegion(addr, size);
query_cache.FlushRegion(addr, size);
}
@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
}
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
return texture_cache.IsRegionGpuModified(addr, size) ||
buffer_cache.MustFlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.InvalidateRegion(addr, size);
{
auto lock = texture_cache.AcquireLock();
texture_cache.WriteMemory(addr, size);
}
shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.OnCPUWrite(addr, size);
{
auto lock = texture_cache.AcquireLock();
texture_cache.WriteMemory(addr, size);
}
shader_cache.OnCPUWrite(addr, size);
buffer_cache.OnCPUWrite(addr, size);
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
shader_cache.SyncGuestHost();
}
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
{
auto lock = texture_cache.AcquireLock();
texture_cache.UnmapMemory(addr, size);
}
buffer_cache.OnCPUWrite(addr, size);
shader_cache.OnCPUWrite(addr, size);
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
}
void RasterizerOpenGL::FragmentBarrier() {
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
}
void RasterizerOpenGL::TiledCacheBarrier() {
glTextureBarrier();
}
void RasterizerOpenGL::FlushCommands() {
// Only flush when we have commands queued to OpenGL.
if (num_queued_commands == 0) {
@ -854,47 +833,97 @@ void RasterizerOpenGL::TickFrame() {
// Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
num_queued_commands = 0;
fence_manager.TickFrame();
buffer_cache.TickFrame();
{
auto lock = texture_cache.AcquireLock();
texture_cache.TickFrame();
}
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
texture_cache.DoFermiCopy(src, dst, copy_config);
auto lock = texture_cache.AcquireLock();
texture_cache.BlitImage(dst, src, copy_config);
return true;
}
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) {
return {};
if (framebuffer_addr == 0) {
return false;
}
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
if (!surface) {
return {};
auto lock = texture_cache.AcquireLock();
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
if (!image_view) {
return false;
}
// Verify that the cached surface is the same size and format as the requested framebuffer
const auto& params{surface->GetSurfaceParams()};
const auto& pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
if (params.pixel_format != pixel_format) {
LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
}
screen_info.display_texture = surface->GetTexture();
screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
image_view_indices.clear();
sampler_handles.clear();
texture_cache.SynchronizeComputeDescriptors();
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
program_manager.BindCompute(kernel->GetHandle());
size_t image_view_index = 0;
size_t texture_index = 0;
size_t image_index = 0;
BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
}
void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
GLuint base_image, size_t& image_view_index,
size_t& texture_index, size_t& image_index) {
const GLuint* const samplers = sampler_handles.data() + texture_index;
const GLuint* const textures = texture_handles.data() + texture_index;
const GLuint* const images = image_handles.data() + image_index;
const size_t num_samplers = entries.samplers.size();
for (const auto& sampler : entries.samplers) {
for (size_t i = 0; i < sampler.size; ++i) {
const ImageViewId image_view_id = image_view_ids[image_view_index++];
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
texture_handles[texture_index++] = handle;
}
}
const size_t num_images = entries.images.size();
for (size_t unit = 0; unit < num_images; ++unit) {
// TODO: Mark as modified
const ImageViewId image_view_id = image_view_ids[image_view_index++];
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
image_handles[image_index] = handle;
++image_index;
}
if (num_samplers > 0) {
glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
}
if (num_images > 0) {
glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
}
}
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
static constexpr std::array PARAMETER_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
const auto& entries{shader->GetEntries().global_memory_entries};
@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
}
}
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
MICROPROFILE_SCOPE(OpenGL_Texture);
u32 binding = device.GetBaseBindings(stage_index).sampler;
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
const bool via_header_index =
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
for (std::size_t i = 0; i < entry.size; ++i) {
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
SetupTexture(binding++, texture, entry);
for (size_t index = 0; index < entry.size; ++index) {
const auto handle =
GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
sampler_handles.push_back(sampler->Handle());
image_view_indices.push_back(handle.image);
}
}
}
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
u32 binding = 0;
void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : kernel->GetEntries().samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
SetupTexture(binding++, texture, entry);
for (size_t i = 0; i < entry.size; ++i) {
const auto handle =
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
sampler_handles.push_back(sampler->Handle());
image_view_indices.push_back(handle.image);
}
}
}
void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const SamplerEntry& entry) {
const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
if (!view) {
// Can occur when texture addr is null or its memory is unmapped/invalid
glBindSampler(binding, 0);
glBindTextureUnit(binding, 0);
return;
}
const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
texture.tic.z_source, texture.tic.w_source);
glBindTextureUnit(binding, handle);
if (!view->GetSurfaceParams().IsBuffer()) {
glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
}
}
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
u32 binding = device.GetBaseBindings(stage_index).image;
void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
const bool via_header_index =
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : shader->GetEntries().images) {
const auto shader_type = static_cast<ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(binding++, tic, entry);
const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
image_view_indices.push_back(handle.image);
}
}
void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
u32 binding = 0;
void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : shader->GetEntries().images) {
const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
SetupImage(binding++, tic, entry);
const auto handle =
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
image_view_indices.push_back(handle.image);
}
}
void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
const ImageEntry& entry) {
const auto view = texture_cache.GetImageSurface(tic, entry);
if (!view) {
glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
return;
}
if (entry.is_written) {
view->MarkAsModified(texture_cache.Tick());
}
const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
}
void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d.dirty.flags;
const auto& regs = maxwell3d.regs;
@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {
flags[Dirty::PointSize] = false;
oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
if (maxwell3d.regs.vp_point_size.enable) {
// By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
glEnable(GL_PROGRAM_POINT_SIZE);
return;
}
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
glDisable(GL_PROGRAM_POINT_SIZE);
}
void RasterizerOpenGL::SyncLineState() {

@ -7,12 +7,13 @@
#include <array>
#include <atomic>
#include <cstddef>
#include <map>
#include <memory>
#include <optional>
#include <tuple>
#include <utility>
#include <boost/container/static_vector.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
@ -23,16 +24,14 @@
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
@ -51,7 +50,7 @@ class MemoryManager;
namespace OpenGL {
struct ScreenInfo;
struct DrawParameters;
struct ShaderEntries;
struct BindlessSSBO {
GLuint64EXT address;
@ -79,15 +78,18 @@ public:
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
@ -108,11 +110,14 @@ public:
}
private:
/// Configures the color and depth framebuffer states.
void ConfigureFramebuffers();
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
/// Configures the color and depth framebuffer for clearing.
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
void BindComputeTextures(Shader* kernel);
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
size_t& image_view_index, size_t& texture_index, size_t& image_index);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
@ -136,23 +141,16 @@ private:
size_t size, BindlessSSBO* ssbo);
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
void SetupDrawTextures(const Shader* shader, size_t stage_index);
/// Configures the textures used in a compute shader.
void SetupComputeTextures(Shader* kernel);
/// Configures a texture.
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const SamplerEntry& entry);
void SetupComputeTextures(const Shader* kernel);
/// Configures images in a graphics shader.
void SetupDrawImages(std::size_t stage_index, Shader* shader);
void SetupDrawImages(const Shader* shader, size_t stage_index);
/// Configures images in a compute shader.
void SetupComputeImages(Shader* shader);
/// Configures an image.
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
void SetupComputeImages(const Shader* shader);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@ -227,9 +225,6 @@ private:
/// End a transform feedback
void EndTransformFeedback();
/// Check for extension that are not strictly required but are needed for correct emulation
void CheckExtensions();
std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
@ -242,7 +237,7 @@ private:
GLintptr SetupIndexBuffer();
void SetupShaders(GLenum primitive_mode);
void SetupShaders();
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
@ -254,19 +249,21 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
TextureCacheOpenGL texture_cache;
OGLStreamBuffer stream_buffer;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
ShaderCacheOpenGL shader_cache;
SamplerCacheOpenGL sampler_cache;
FramebufferCacheOpenGL framebuffer_cache;
QueryCache query_cache;
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
VideoCommon::Shader::AsyncShaders async_shaders;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
GLint vertex_binding = 0;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
std::array<GLuint, MAX_TEXTURES> texture_handles;
std::array<GLuint, MAX_IMAGES> image_handles;
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;
@ -280,7 +277,7 @@ private:
std::size_t current_cbuf = 0;
OGLBuffer unified_uniform_buffer;
/// Number of commands queued to the OpenGL driver. Reseted on flush.
/// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;
u32 last_clip_distance_mask = 0;

@ -71,7 +71,7 @@ void OGLSampler::Create() {
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenSamplers(1, &handle);
glCreateSamplers(1, &handle);
}
void OGLSampler::Release() {

@ -1,52 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
OGLSampler sampler;
sampler.Create();
const GLuint sampler_id{sampler.handle};
glSamplerParameteri(
sampler_id, GL_TEXTURE_MAG_FILTER,
MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
if (GLAD_GL_ARB_texture_filter_anisotropic) {
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
} else if (GLAD_GL_EXT_texture_filter_anisotropic) {
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
} else {
LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
}
return sampler;
}
GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
return sampler.handle;
}
} // namespace OpenGL

@ -1,25 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/sampler_cache.h"
namespace OpenGL {
class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
public:
explicit SamplerCacheOpenGL();
~SamplerCacheOpenGL();
protected:
OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
GLuint ToSamplerType(const OGLSampler& sampler) const override;
};
} // namespace OpenGL

@ -27,7 +27,6 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"

@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using Tegra::Shader::TextureType;
using VideoCommon::Shader::BuildTransformFeedback;
using VideoCommon::Shader::Registry;
using namespace std::string_literals;
using namespace VideoCommon::Shader;
using namespace std::string_literals;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Operation = const OperationNode&;
@ -2753,11 +2751,11 @@ private:
}
}
std::string GetSampler(const Sampler& sampler) const {
std::string GetSampler(const SamplerEntry& sampler) const {
return AppendSuffix(sampler.index, "sampler");
}
std::string GetImage(const Image& image) const {
std::string GetImage(const ImageEntry& image) const {
return AppendSuffix(image.index, "image");
}

@ -20,8 +20,8 @@ namespace OpenGL {
class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using SamplerEntry = VideoCommon::Shader::Sampler;
using ImageEntry = VideoCommon::Shader::Image;
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
using ImageEntry = VideoCommon::Shader::ImageEntry;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:

@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {
}
}
void ProgramManager::BindHostCompute(GLuint program) {
if (use_assembly_programs) {
glDisable(GL_COMPUTE_PROGRAM_NV);
}
glUseProgram(program);
is_graphics_bound = false;
}
void ProgramManager::RestoreGuestCompute() {
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
glUseProgram(0);
}
}
void ProgramManager::UseVertexShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);

@ -45,6 +45,12 @@ public:
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
/// Binds an OpenGL GLSL program object unsynchronized with the guest state.
void BindHostCompute(GLuint program);
/// Rewinds BindHostCompute state changes.
void RestoreGuestCompute();
void UseVertexShader(GLuint program);
void UseGeometryShader(GLuint program);
void UseFragmentShader(GLuint program);

@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
}
}
void StateTracker::InvalidateStreamBuffer() {
flags[Dirty::VertexBuffers] = true;
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
flags[index] = true;
}
}
} // namespace OpenGL

@ -92,6 +92,8 @@ class StateTracker {
public:
explicit StateTracker(Tegra::GPU& gpu);
void InvalidateStreamBuffer();
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
@ -100,6 +102,14 @@ public:
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
}
void BindFramebuffer(GLuint new_framebuffer) {
if (framebuffer == new_framebuffer) {
return;
}
framebuffer = new_framebuffer;
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
}
void NotifyScreenDrawVertexArray() {
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
@ -129,9 +139,9 @@ public:
flags[OpenGL::Dirty::Scissor0] = true;
}
void NotifyColorMask0() {
void NotifyColorMask(size_t index) {
flags[OpenGL::Dirty::ColorMasks] = true;
flags[OpenGL::Dirty::ColorMask0] = true;
flags[OpenGL::Dirty::ColorMask0 + index] = true;
}
void NotifyBlend0() {
@ -190,6 +200,7 @@ public:
private:
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
GLuint framebuffer = 0;
GLuint index_buffer = 0;
};

@ -9,6 +9,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
: buffer_size(size) {
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
: state_tracker{state_tracker_} {
gl_buffer.Create();
GLsizeiptr allocate_size = size;
if (vertex_data_usage) {
// On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
// crash.
allocate_size *= 2;
}
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
mapped_ptr = static_cast<u8*>(
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() {
gl_buffer.Release();
}
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT(size <= buffer_size);
ASSERT(alignment <= buffer_size);
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT(size <= BUFFER_SIZE);
ASSERT(alignment <= BUFFER_SIZE);
mapped_size = size;
if (alignment > 0) {
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
}
bool invalidate = false;
if (buffer_pos + size > buffer_size) {
if (buffer_pos + size > BUFFER_SIZE) {
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
glInvalidateBufferData(gl_buffer.handle);
state_tracker.InvalidateStreamBuffer();
buffer_pos = 0;
invalidate = true;
}
return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
}
void OGLStreamBuffer::Unmap(GLsizeiptr size) {

@ -4,29 +4,31 @@
#pragma once
#include <tuple>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class Device;
class StateTracker;
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
~OGLStreamBuffer();
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
* The return values are the pointer to the new chunk, the offset within the buffer,
* and the invalidation flag for previous chunks.
* The return values are the pointer to the new chunk, and the offset within the buffer.
* The actual used size must be specified on unmapping the chunk.
*/
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
void Unmap(GLsizeiptr size);
@ -39,15 +41,18 @@ public:
}
GLsizeiptr Size() const noexcept {
return buffer_size;
return BUFFER_SIZE;
}
private:
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
StateTracker& state_tracker;
OGLBuffer gl_buffer;
GLuint64EXT gpu_address = 0;
GLintptr buffer_pos = 0;
GLsizeiptr buffer_size = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
};

File diff suppressed because it is too large Load Diff

@ -4,157 +4,247 @@
#pragma once
#include <array>
#include <functional>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
#include <span>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
using VideoCommon::SurfaceParams;
using VideoCommon::ViewParams;
class CachedSurfaceView;
class CachedSurface;
class TextureCacheOpenGL;
class Device;
class ProgramManager;
class StateTracker;
using Surface = std::shared_ptr<CachedSurface>;
using View = std::shared_ptr<CachedSurfaceView>;
using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
class Framebuffer;
class Image;
class ImageView;
class Sampler;
class CachedSurface final : public VideoCommon::SurfaceBase<View> {
friend CachedSurfaceView;
using VideoCommon::ImageId;
using VideoCommon::ImageViewId;
using VideoCommon::ImageViewType;
using VideoCommon::NUM_RT;
using VideoCommon::Offset2D;
using VideoCommon::RenderTargets;
class ImageBufferMap {
public:
explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_,
bool is_astc_supported_);
~CachedSurface();
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
~ImageBufferMap();
void UploadTexture(const std::vector<u8>& staging_buffer) override;
void DownloadTexture(std::vector<u8>& staging_buffer) override;
GLenum GetTarget() const {
return target;
GLuint Handle() const noexcept {
return handle;
}
GLuint GetTexture() const {
std::span<u8> Span() const noexcept {
return span;
}
private:
std::span<u8> span;
OGLSync* sync;
GLuint handle;
};
struct FormatProperties {
GLenum compatibility_class;
bool compatibility_by_size;
bool is_compressed;
};
class TextureCacheRuntime {
friend Framebuffer;
friend Image;
friend ImageView;
friend Sampler;
public:
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
StateTracker& state_tracker);
~TextureCacheRuntime();
void Finish();
ImageBufferMap MapUploadBuffer(size_t size);
ImageBufferMap MapDownloadBuffer(size_t size);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
UNIMPLEMENTED();
}
bool CanImageBeCopied(const Image& dst, const Image& src);
void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
private:
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers();
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size);
std::vector<OGLSync> syncs;
std::vector<OGLBuffer> buffers;
std::vector<u8*> maps;
std::vector<size_t> sizes;
GLenum storage_flags;
GLenum map_flags;
};
const Device& device;
StateTracker& state_tracker;
UtilShaders util_shaders;
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
OGLTexture null_image_rect;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
};
class Image : public VideoCommon::ImageBase {
friend ImageView;
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies);
void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
GLuint Handle() const noexcept {
return texture.handle;
}
bool IsCompressed() const {
return is_compressed;
}
protected:
void DecorateSurfaceName() override;
View CreateView(const ViewParams& view_key) override;
View CreateViewInner(const ViewParams& view_key, bool is_proxy);
private:
void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
GLenum internal_format{};
GLenum format{};
GLenum type{};
bool is_compressed{};
GLenum target{};
u32 view_count{};
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
OGLTexture texture;
OGLBuffer texture_buffer;
OGLTextureView store_view;
OGLBuffer buffer;
GLenum gl_internal_format = GL_NONE;
GLenum gl_store_format = GL_NONE;
GLenum gl_format = GL_NONE;
GLenum gl_type = GL_NONE;
};
class CachedSurfaceView final : public VideoCommon::ViewBase {
class ImageView : public VideoCommon::ImageViewBase {
friend Image;
public:
explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_);
~CachedSurfaceView();
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
/// @brief Attaches this texture view to the currently bound fb_target framebuffer
/// @param attachment Attachment to bind textures to
/// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
void Attach(GLenum attachment, GLenum fb_target) const;
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
void MarkAsModified(u64 tick) {
surface.MarkAsModified(true, tick);
[[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
return views[static_cast<size_t>(query_type)];
}
GLuint GetTexture() const {
if (is_proxy) {
return surface.GetTexture();
}
return main_view.handle;
[[nodiscard]] GLuint DefaultHandle() const noexcept {
return default_handle;
}
GLenum GetFormat() const {
return format;
}
const SurfaceParams& GetSurfaceParams() const {
return surface.GetSurfaceParams();
[[nodiscard]] GLenum Format() const noexcept {
return internal_format;
}
private:
OGLTextureView CreateTextureView() const;
void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
const VideoCommon::ImageViewInfo& info,
VideoCommon::SubresourceRange view_range);
CachedSurface& surface;
const GLenum format;
const GLenum target;
const bool is_proxy;
std::unordered_map<u32, OGLTextureView> view_cache;
OGLTextureView main_view;
// Use an invalid default so it always fails the comparison test
u32 current_swizzle = 0xffffffff;
GLuint current_view = 0;
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
std::vector<OGLTextureView> stored_views;
GLuint default_handle = 0;
GLenum internal_format = GL_NONE;
};
class TextureCacheOpenGL final : public TextureCacheBase {
class ImageAlloc : public VideoCommon::ImageAllocBase {};
class Sampler {
public:
explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
StateTracker& state_tracker);
~TextureCacheOpenGL();
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
protected:
Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
void ImageCopy(Surface& src_surface, Surface& dst_surface,
const VideoCommon::CopyParams& copy_params) override;
void ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
GLuint Handle() const noexcept {
return sampler.handle;
}
private:
GLuint FetchPBO(std::size_t buffer_size);
StateTracker& state_tracker;
OGLFramebuffer src_framebuffer;
OGLFramebuffer dst_framebuffer;
std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
OGLSampler sampler;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
[[nodiscard]] GLuint Handle() const noexcept {
return framebuffer.handle;
}
[[nodiscard]] GLbitfield BufferBits() const noexcept {
return buffer_bits;
}
private:
OGLFramebuffer framebuffer;
GLbitfield buffer_bits = GL_NONE;
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
using ImageAlloc = OpenGL::ImageAlloc;
using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace OpenGL

@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
return GL_FILL;
}
inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
switch (filter) {
case Tegra::Texture::SamplerReduction::WeightedAverage:
return GL_WEIGHTED_AVERAGE_ARB;
case Tegra::Texture::SamplerReduction::Min:
return GL_MIN;
case Tegra::Texture::SamplerReduction::Max:
return GL_MAX;
}
UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
return GL_WEIGHTED_AVERAGE_ARB;
}
inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
// Enumeration order matches register order. We can convert it arithmetically.
return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);

@ -23,10 +23,10 @@
#include "core/telemetry_session.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
PrepareRendertarget(framebuffer);
RenderScreenshot();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
state_tracker.BindFramebuffer(0);
DrawScreen(emu_window.GetFramebufferLayout());
++m_current_frame;
@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
gl_framebuffer_data.data(), host_ptr);
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
const u64 size_in_bytes{Tegra::Texture::CalculateSize(
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
const std::span<const u8> input_data(host_ptr, size_in_bytes);
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
framebuffer.width, framebuffer.height, 1, block_height_log2,
0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture
@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() {
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
// Generate presentation sampler
present_sampler.Create();
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
// Generate VBO handle for drawing
vertex_buffer.Create();
@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
// Enable seamless cubemaps when per texture parameters are not available
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
}
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
GLint internal_format;
@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
static_cast<u32>(framebuffer.pixel_format));
// UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
// static_cast<u32>(framebuffer.pixel_format));
}
texture.resource.Release();
@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
state_tracker.NotifyColorMask0();
state_tracker.NotifyColorMask(0);
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
}
glBindTextureUnit(0, screen_info.display_texture);
glBindSampler(0, 0);
glBindSampler(0, present_sampler.handle);
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {
DrawScreen(layout);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
renderer_settings.screenshot_bits);

@ -102,6 +102,7 @@ private:
StateTracker state_tracker{gpu};
// OpenGL object IDs
OGLSampler present_sampler;
OGLBuffer vertex_buffer;
OGLProgram vertex_program;
OGLProgram fragment_program;

@ -0,0 +1,224 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <bit>
#include <span>
#include <string_view>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
using namespace HostShaders;
using VideoCommon::Extent3D;
using VideoCommon::ImageCopy;
using VideoCommon::ImageType;
using VideoCommon::SwizzleParameters;
using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
using VideoCore::Surface::BytesPerBlock;
namespace {
OGLProgram MakeProgram(std::string_view source) {
OGLShader shader;
shader.Create(source, GL_COMPUTE_SHADER);
OGLProgram program;
program.Create(true, false, shader.handle);
return program;
}
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
: program_manager{program_manager_},
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
}
UtilShaders::~UtilShaders() = default;
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.layer_stride);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.slice_size);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
static constexpr GLuint LOC_ORIGIN = 0;
static constexpr GLuint LOC_DESTINATION = 1;
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
static constexpr GLuint LOC_PITCH = 3;
const u32 bytes_per_block = BytesPerBlock(image.info.format);
const GLenum format = StoreFormat(bytes_per_block);
const u32 pitch = image.info.pitch;
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
glUniform1ui(LOC_PITCH, pitch);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
program_manager.BindHostCompute(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
ASSERT(copy.src_subresource.num_layers == 1);
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
}
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
return GL_R8UI;
case 2:
return GL_R16UI;
case 4:
return GL_R32UI;
case 8:
return GL_RG32UI;
case 16:
return GL_RGBA32UI;
}
UNREACHABLE();
return GL_R8UI;
}
} // namespace OpenGL

@ -0,0 +1,51 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/texture_cache/types.h"
namespace OpenGL {
class Image;
class ImageBufferMap;
class ProgramManager;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
private:
ProgramManager& program_manager;
OGLBuffer swizzle_table_buffer;
OGLProgram block_linear_unswizzle_2d_program;
OGLProgram block_linear_unswizzle_3d_program;
OGLProgram pitch_unswizzle_program;
OGLProgram copy_bc4_program;
};
GLenum StoreFormat(u32 bytes_per_block);
} // namespace OpenGL

@ -1,42 +0,0 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string>
#include <vector>
#include <fmt/format.h>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
if (!GLAD_GL_KHR_debug) {
// We don't need to throw an error as this is just for debugging
return;
}
std::string object_label;
if (extra_info.empty()) {
switch (identifier) {
case GL_TEXTURE:
object_label = fmt::format("Texture@0x{:016X}", addr);
break;
case GL_PROGRAM:
object_label = fmt::format("Shader@0x{:016X}", addr);
break;
default:
object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
break;
}
} else {
object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
}
glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
}
} // namespace OpenGL

@ -1,16 +0,0 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
namespace OpenGL {
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
} // namespace OpenGL

@ -0,0 +1,624 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
namespace Vulkan {
using VideoCommon::ImageViewType;
namespace {
struct PushConstants {
std::array<float, 2> tex_scale;
std::array<float, 2> tex_offset;
};
template <u32 binding>
inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{
.binding = binding,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
.pImmutableSamplers = nullptr,
};
constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{
TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>,
};
constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = 1,
.pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
};
constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
.pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
};
constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
.offset = 0,
.size = sizeof(PushConstants),
};
constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.vertexBindingDescriptionCount = 0,
.pVertexBindingDescriptions = nullptr,
.vertexAttributeDescriptionCount = 0,
.pVertexAttributeDescriptions = nullptr,
};
constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
.primitiveRestartEnable = VK_FALSE,
};
constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.viewportCount = 1,
.pViewports = nullptr,
.scissorCount = 1,
.pScissors = nullptr,
};
constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_BACK_BIT,
.frontFace = VK_FRONT_FACE_CLOCKWISE,
.depthBiasEnable = VK_FALSE,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
constexpr std::array DYNAMIC_STATES{
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
};
constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
.pDynamicStates = DYNAMIC_STATES.data(),
};
constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_CLEAR,
.attachmentCount = 0,
.pAttachments = nullptr,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_CLEAR,
.attachmentCount = 1,
.pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthTestEnable = VK_TRUE,
.depthWriteEnable = VK_TRUE,
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
.depthBoundsTestEnable = VK_FALSE,
.stencilTestEnable = VK_FALSE,
.front = VkStencilOpState{},
.back = VkStencilOpState{},
.minDepthBounds = 0.0f,
.maxDepthBounds = 0.0f,
};
template <VkFilter filter>
inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.magFilter = filter,
.minFilter = filter,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
.unnormalizedCoordinates = VK_TRUE,
};
constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo(
const VkDescriptorSetLayout* set_layout) {
return VkPipelineLayoutCreateInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &PUSH_CONSTANT_RANGE,
};
}
constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage,
VkShaderModule shader) {
return VkPipelineShaderStageCreateInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = stage,
.module = shader,
.pName = "main",
.pSpecializationInfo = nullptr,
};
}
constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages(
VkShaderModule vertex_shader, VkShaderModule fragment_shader) {
return std::array{
PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader),
PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader),
};
}
void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
VkSampler sampler, VkImageView image_view) {
const VkDescriptorImageInfo image_info{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkWriteDescriptorSet write_descriptor_set{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr);
}
void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
VkSampler sampler, VkImageView image_view_0,
VkImageView image_view_1) {
const VkDescriptorImageInfo image_info_0{
.sampler = sampler,
.imageView = image_view_0,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkDescriptorImageInfo image_info_1{
.sampler = sampler,
.imageView = image_view_1,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const std::array write_descriptor_sets{
VkWriteDescriptorSet{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info_0,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
},
VkWriteDescriptorSet{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_set,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info_1,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
},
};
device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
}
void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region) {
const VkOffset2D offset{
.x = std::min(dst_region[0].x, dst_region[1].x),
.y = std::min(dst_region[0].y, dst_region[1].y),
};
const VkExtent2D extent{
.width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)),
.height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)),
};
const VkViewport viewport{
.x = static_cast<float>(offset.x),
.y = static_cast<float>(offset.y),
.width = static_cast<float>(extent.width),
.height = static_cast<float>(extent.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
// TODO: Support scissored blits
const VkRect2D scissor{
.offset = offset,
.extent = extent,
};
const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x);
const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y);
const PushConstants push_constants{
.tex_scale = {scale_x, scale_y},
.tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)},
};
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
}
} // Anonymous namespace
BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_,
StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
: device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
if (device.IsExtShaderStencilExportSupported()) {
blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
}
}
BlitImageHelper::~BlitImageHelper() = default;
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
const BlitImagePipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.operation = operation,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
const VkPipeline pipeline = FindOrEmplacePipeline(key);
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
&device = device](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
nullptr);
BindBlitState(cmdbuf, layout, dst_region, src_region);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
VkImageView src_depth_view, VkImageView src_stencil_view,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkSampler sampler = *nearest_sampler;
const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
src_stencil_view, descriptor_set,
&device = device](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
src_stencil_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
nullptr);
BindBlitState(cmdbuf, layout, dst_region, src_region);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
const VkSampler sampler = *nearest_sampler;
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
const VkExtent2D extent{
.width = src_image_view.size.width,
.height = src_image_view.size.height,
};
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
&device = device](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
.y = 0,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(extent.width),
.height = static_cast<float>(extent.height),
.minDepth = 0.0f,
.maxDepth = 0.0f,
};
const VkRect2D scissor{
.offset = offset,
.extent = extent,
};
const PushConstants push_constants{
.tex_scale = {viewport.width, viewport.height},
.tex_offset = {0.0f, 0.0f},
};
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
// TODO: Barriers
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
nullptr);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
const auto it = std::ranges::find(blit_color_keys, key);
if (it != blit_color_keys.end()) {
return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
}
blit_color_keys.push_back(key);
const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag);
const VkPipelineColorBlendAttachmentState blend_attachment{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
// TODO: programmable blending
const VkPipelineColorBlendStateCreateInfo color_blend_create_info{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_CLEAR,
.attachmentCount = 1,
.pAttachments = &blend_attachment,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_create_info,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = key.renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
}));
return *blit_color_pipelines.back();
}
VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
if (blit_depth_stencil_pipeline) {
return *blit_depth_stencil_pipeline;
}
const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *two_textures_pipeline_layout,
.renderPass = renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
return *blit_depth_stencil_pipeline;
}
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
if (pipeline) {
return;
}
const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag);
pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = nullptr,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
}
void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
if (pipeline) {
return;
}
const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag);
pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
}
} // namespace Vulkan

@ -0,0 +1,97 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <compare>
#include "video_core/engines/fermi_2d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/texture_cache/types.h"
namespace Vulkan {
using VideoCommon::Offset2D;
class VKDevice;
class VKScheduler;
class StateTracker;
class Framebuffer;
class ImageView;
struct BlitImagePipelineKey {
constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
VkRenderPass renderpass;
Tegra::Engines::Fermi2D::Operation operation;
};
class BlitImageHelper {
public:
explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler,
StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
[[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
const VKDevice& device;
VKScheduler& scheduler;
StateTracker& state_tracker;
vk::DescriptorSetLayout one_texture_set_layout;
vk::DescriptorSetLayout two_textures_set_layout;
DescriptorAllocator one_texture_descriptor_allocator;
DescriptorAllocator two_textures_descriptor_allocator;
vk::PipelineLayout one_texture_pipeline_layout;
vk::PipelineLayout two_textures_pipeline_layout;
vk::ShaderModule full_screen_vert;
vk::ShaderModule blit_color_to_color_frag;
vk::ShaderModule blit_depth_stencil_frag;
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
std::vector<BlitImagePipelineKey> blit_color_keys;
std::vector<vk::Pipeline> blit_color_pipelines;
vk::Pipeline blit_depth_stencil_pipeline;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;
vk::Pipeline convert_r16_to_d16_pipeline;
};
} // namespace Vulkan

@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
topology.Assign(regs.draw.topology);
msaa_mode.Assign(regs.multisample_mode);
raw2 = 0;
const auto test_func =

@ -186,6 +186,7 @@ struct FixedPipelineState {
BitField<19, 4, u32> logic_op;
BitField<23, 1, u32> rasterize_enable;
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
};
union {
u32 raw2;

@ -122,7 +122,7 @@ struct FormatTuple {
{VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT
{VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT
{VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM
{VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM
{VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
{VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
@ -163,7 +163,7 @@ struct FormatTuple {
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
{VK_FORMAT_UNDEFINED}, // R16G16_UINT
{VK_FORMAT_UNDEFINED}, // R16G16_SINT
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
{VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
{VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB
@ -233,18 +233,20 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
// Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
? VK_FORMAT_A8B8G8R8_SRGB_PACK32
: VK_FORMAT_A8B8G8R8_UNORM_PACK32;
const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
}
const bool attachable = tuple.usage & Attachable;
const bool storage = tuple.usage & Storage;
VkFormatFeatureFlags usage;
if (format_type == FormatType::Buffer) {
switch (format_type) {
case FormatType::Buffer:
usage =
VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
} else {
break;
case FormatType::Linear:
case FormatType::Optimal:
usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT;
if (attachable) {
@ -254,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
if (storage) {
usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
}
break;
}
return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
}
@ -724,4 +727,17 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle)
return {};
}
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) {
switch (reduction) {
case Tegra::Texture::SamplerReduction::WeightedAverage:
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
case Tegra::Texture::SamplerReduction::Min:
return VK_SAMPLER_REDUCTION_MODE_MIN_EXT;
case Tegra::Texture::SamplerReduction::Max:
return VK_SAMPLER_REDUCTION_MODE_MAX_EXT;
}
UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction));
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
}
} // namespace Vulkan::MaxwellToVK

@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
} // namespace Vulkan::MaxwellToVK

@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() {
return library;
}
std::pair<vk::Instance, u32> CreateInstance(
Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) {
std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library,
vk::InstanceDispatch& dld, WindowSystemType window_type,
bool enable_debug_utils, bool enable_layers) {
if (!library.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Vulkan library not available");
return {};
@ -133,7 +133,7 @@ std::pair<vk::Instance, u32> CreateInstance(
if (window_type != Core::Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_layers) {
if (enable_debug_utils) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
@ -287,7 +287,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
bool RendererVulkan::Init() {
library = OpenVulkanLibrary();
std::tie(instance, instance_version) = CreateInstance(
library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug);
library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug);
if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
return false;
}
@ -447,7 +447,8 @@ void RendererVulkan::Report() const {
std::vector<std::string> RendererVulkan::EnumerateDevices() {
vk::InstanceDispatch dld;
Common::DynamicLibrary library = OpenVulkanLibrary();
vk::Instance instance = CreateInstance(library, dld).first;
vk::Instance instance =
CreateInstance(library, dld, WindowSystemType::Headless, false, false).first;
if (!instance) {
return {};
}

@ -33,10 +33,9 @@ class VKDevice;
class VKMemoryManager;
class VKSwapchain;
class VKScheduler;
class VKImage;
struct VKScreenInfo {
VKImage* image{};
VkImageView image_view{};
u32 width{};
u32 height{};
bool is_srgb{};

@ -1,24 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V $THIS_FILE -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color;
layout (binding = 1) uniform sampler2D color_texture;
void main() {
color = texture(color_texture, frag_tex_coord);
}

@ -1,28 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V $THIS_FILE -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
layout (location = 0) in vec2 vert_position;
layout (location = 1) in vec2 vert_tex_coord;
layout (location = 0) out vec2 frag_tex_coord;
layout (set = 0, binding = 0) uniform MatrixBlock {
mat4 modelview_matrix;
};
void main() {
gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0);
frag_tex_coord = vert_tex_coord;
}

@ -1,37 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V $THIS_FILE -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
layout (local_size_x = 1024) in;
layout (std430, set = 0, binding = 0) buffer OutputBuffer {
uint output_indexes[];
};
layout (push_constant) uniform PushConstants {
uint first;
};
void main() {
uint primitive = gl_GlobalInvocationID.x;
if (primitive * 6 >= output_indexes.length()) {
return;
}
const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
for (uint vertex = 0; vertex < 6; ++vertex) {
uint index = first + primitive * 4 + quad_map[vertex];
output_indexes[primitive * 6 + vertex] = index;
}
}

@ -1,50 +0,0 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V quad_indexed.comp -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
layout (local_size_x = 1024) in;
layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
uint input_indexes[];
};
layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
uint output_indexes[];
};
layout (push_constant) uniform PushConstants {
uint base_vertex;
int index_shift; // 0: uint8, 1: uint16, 2: uint32
};
void main() {
int primitive = int(gl_GlobalInvocationID.x);
if (primitive * 6 >= output_indexes.length()) {
return;
}
int index_size = 8 << index_shift;
int flipped_shift = 2 - index_shift;
int mask = (1 << flipped_shift) - 1;
const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3);
for (uint vertex = 0; vertex < 6; ++vertex) {
int offset = primitive * 4 + quad_swizzle[vertex];
int int_offset = offset >> flipped_shift;
int bit_offset = (offset & mask) * index_size;
uint packed_input = input_indexes[int_offset];
uint index = bitfieldExtract(packed_input, bit_offset, index_size);
output_indexes[primitive * 6 + vertex] = index + base_vertex;
}
}

@ -1,33 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V $THIS_FILE -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_8bit_storage : require
layout (local_size_x = 1024) in;
layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
uint8_t input_indexes[];
};
layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
uint16_t output_indexes[];
};
void main() {
uint id = gl_GlobalInvocationID.x;
if (id < input_indexes.length()) {
output_indexes[id] = uint16_t(input_indexes[id]);
}
}

@ -16,12 +16,12 @@
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/morton.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_image.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -29,108 +29,12 @@
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
namespace Vulkan {
namespace {
// Generated from the "shaders/" directory, read the instructions there.
constexpr u8 blit_vertex_code[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00,
0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00,
0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
0x38, 0x00, 0x01, 0x00};
constexpr u8 blit_fragment_code[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
struct ScreenRectVertex {
ScreenRectVertex() = default;
explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
@ -173,9 +77,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
// clang-format on
}
std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
using namespace VideoCore::Surface;
return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
}
std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get();
UpdateDescriptorSet(image_index, blit_image->GetPresentView());
UpdateDescriptorSet(image_index,
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
BufferData data;
SetUniformData(data, framebuffer);
SetVertexData(data, framebuffer);
auto map = buffer_commit->Map();
std::memcpy(map.GetAddress(), &data, sizeof(data));
std::memcpy(map.Address(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
const auto pixel_format =
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
const size_t size_bytes = GetSizeInBytes(framebuffer);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
map.GetAddress() + image_offset, host_ptr);
blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
Tegra::Texture::UnswizzleTexture(
std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
},
};
scheduler.Record(
[buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
[buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = 0,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
VkImageMemoryBarrier read_barrier = base_barrier;
read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkImageMemoryBarrier write_barrier = base_barrier;
write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
});
}
map.Release();
blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
descriptor_set = descriptor_sets[image_index], buffer = *buffer,
size = swapchain.GetSize(), pipeline = *pipeline,
@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
const VkClearValue clear_color{
.color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
};
VkRenderPassBeginInfo renderpass_bi;
renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
renderpass_bi.pNext = nullptr;
renderpass_bi.renderPass = renderpass;
renderpass_bi.framebuffer = framebuffer;
renderpass_bi.renderArea.offset.x = 0;
renderpass_bi.renderArea.offset.y = 0;
renderpass_bi.renderArea.extent = size;
renderpass_bi.clearValueCount = 1;
renderpass_bi.pClearValues = &clear_color;
VkViewport viewport;
viewport.x = 0.0f;
viewport.y = 0.0f;
viewport.width = static_cast<float>(size.width);
viewport.height = static_cast<float>(size.height);
viewport.minDepth = 0.0f;
viewport.maxDepth = 1.0f;
VkRect2D scissor;
scissor.offset.x = 0;
scissor.offset.y = 0;
scissor.extent = size;
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = renderpass,
.framebuffer = framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.SetViewport(0, viewport);
@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
}
void VKBlitScreen::CreateShaders() {
vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code);
fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code);
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
}
void VKBlitScreen::CreateSemaphores() {
@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() {
const VkAttachmentReference color_attachment_ref{
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkSubpassDescription subpass_description{
@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
raw_images.resize(image_count);
raw_image_views.resize(image_count);
raw_buffer_commits.resize(image_count);
const VkImageCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.imageType = VK_IMAGE_TYPE_2D,
.format = GetFormat(framebuffer),
.extent =
{
.width = framebuffer.width,
.height = framebuffer.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
for (std::size_t i = 0; i < image_count; ++i) {
raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT);
raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false);
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.imageType = VK_IMAGE_TYPE_2D,
.format = GetFormat(framebuffer),
.extent =
{
.width = framebuffer.width,
.height = framebuffer.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false);
raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *raw_images[i],
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = GetFormat(framebuffer),
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
});
}
}
@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
const VkDescriptorImageInfo image_info{
.sampler = *sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkWriteDescriptorSet sampler_write{

@ -35,7 +35,6 @@ struct ScreenInfo;
class RasterizerVulkan;
class VKDevice;
class VKImage;
class VKScheduler;
class VKSwapchain;
@ -110,7 +109,8 @@ private:
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
std::vector<std::unique_ptr<VKImage>> raw_images;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<VKMemoryCommit> raw_buffer_commits;
u32 raw_width = 0;
u32 raw_height = 0;

@ -31,15 +31,19 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE);
return std::make_unique<VKStreamBuffer>(device, scheduler);
}
} // Anonymous namespace
Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
: BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} {
: BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
staging_pool_} {
const VkBufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@ -64,24 +68,39 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
scheduler.RequestOutsideRenderPassOperationContext();
const VkBuffer handle = Handle();
scheduler.Record(
[staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
const VkBufferMemoryBarrier barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = UPLOAD_ACCESS_BARRIERS,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
barrier, {});
});
scheduler.Record([staging = *staging.handle, handle, offset, data_size,
&device = device](vk::CommandBuffer cmdbuf) {
const VkBufferMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask =
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_WRITE_BIT |
(device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
};
const VkBufferMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = UPLOAD_ACCESS_BARRIERS,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
write_barrier);
});
}
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
@ -150,8 +169,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const VKDevice& device_, VKMemoryManager& memory_manager_,
VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_)
: BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)},
VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
VKStagingBufferPool& staging_pool_)
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
cpu_memory_, stream_buffer_},
device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
staging_pool_} {}

@ -41,6 +41,7 @@ public:
}
private:
const VKDevice& device;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
@ -49,10 +50,11 @@ private:
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
public:
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const VKDevice& device_, VKMemoryManager& memory_manager_,
VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_);
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
VKStagingBufferPool& staging_pool);
~VKBufferCache();
BufferInfo GetEmptyBuffer(std::size_t size) override;

@ -10,6 +10,9 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
@ -22,99 +25,6 @@ namespace Vulkan {
namespace {
// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
constexpr u8 quad_array[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
};
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
return {
.binding = 0,
@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
};
}
// Uint8 SPIR-V module. Generated from the "shaders/" directory.
constexpr u8 uint8_pass[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
};
// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
constexpr u8 QUAD_INDEXED_SPV[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
};
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
return {{
{
@ -381,8 +89,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
const u8* code) {
vk::Span<VkPushConstantRange> push_constants,
std::span<const u32> code) {
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
.bindingCount = bindings.size(),
.pBindings = bindings.data(),
});
layout = device.GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
.pushConstantRangeCount = push_constants.size(),
.pPushConstantRanges = push_constants.data(),
});
if (!templates.empty()) {
descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
}
auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
std::memcpy(code_copy.get(), code, code_size);
module = device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = code_size,
.pCode = code_copy.get(),
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
@ -467,7 +168,7 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array),
BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
@ -510,12 +211,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
return {*buffer.handle, 0};
}
Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_,
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass),
uint8_pass),
: VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
@ -561,8 +261,7 @@ QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(),
BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV),
QUAD_INDEXED_SPV),
BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}

@ -5,6 +5,7 @@
#pragma once
#include <optional>
#include <span>
#include <utility>
#include "common/common_types.h"
@ -24,8 +25,7 @@ public:
explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
const u8* code);
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
~VKComputePass();
protected:

@ -46,6 +46,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
@ -122,6 +123,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_R16G16_UNORM,
VK_FORMAT_R16G16_SNORM,
VK_FORMAT_R16G16_SFLOAT,
VK_FORMAT_R16G16_SINT,
VK_FORMAT_R16_UNORM,
VK_FORMAT_R16_UINT,
VK_FORMAT_R8G8B8A8_SRGB,
@ -161,18 +163,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VK_FORMAT_BC2_SRGB_BLOCK,
VK_FORMAT_BC3_SRGB_BLOCK,
VK_FORMAT_BC7_SRGB_BLOCK,
VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
@ -192,7 +208,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_,
VkSurfaceKHR surface, const vk::InstanceDispatch& dld_)
: dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
: instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} {
SetupFamilies(surface);
SetupFeatures();
@ -214,7 +230,7 @@ bool VKDevice::Create() {
features2.features = {
.robustBufferAccess = false,
.fullDrawIndexUint32 = false,
.imageCubeArray = false,
.imageCubeArray = true,
.independentBlend = true,
.geometryShader = true,
.tessellationShader = true,
@ -242,7 +258,7 @@ bool VKDevice::Create() {
.shaderTessellationAndGeometryPointSize = false,
.shaderImageGatherExtended = true,
.shaderStorageImageExtendedFormats = false,
.shaderStorageImageMultisample = false,
.shaderStorageImageMultisample = true,
.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
.shaderStorageImageWriteWithoutFormat = true,
.shaderUniformBufferArrayDynamicIndexing = false,
@ -268,7 +284,6 @@ bool VKDevice::Create() {
.variableMultisampleRate = false,
.inheritedQueries = false,
};
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
.pNext = nullptr,
@ -380,6 +395,20 @@ bool VKDevice::Create() {
LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
}
VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
if (ext_robustness2) {
robustness2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
.pNext = nullptr,
.robustBufferAccess2 = false,
.robustImageAccess2 = true,
.nullDescriptor = true,
};
SetNext(next, robustness2);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
}
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@ -405,7 +434,14 @@ bool VKDevice::Create() {
}
CollectTelemetryParameters();
CollectToolingInfo();
if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
LOG_WARNING(
Render_Vulkan,
"Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
ext_extended_dynamic_state = false;
}
if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) {
// AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it
// seems to cause stability issues
@ -458,7 +494,7 @@ void VKDevice::ReportLoss() const {
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
// Wait for the log to flush and for Nsight Aftermath to dump the results
std::this_thread::sleep_for(std::chrono::seconds{3});
std::this_thread::sleep_for(std::chrono::seconds{15});
}
void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
@ -499,6 +535,16 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features)
return true;
}
bool VKDevice::TestDepthStencilBlits() const {
static constexpr VkFormatFeatureFlags required_features =
VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
const auto test_features = [](VkFormatProperties props) {
return (props.optimalTilingFeatures & required_features) == required_features;
};
return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) &&
test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT));
}
bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const {
const auto it = format_properties.find(wanted_format);
@ -569,6 +615,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
const auto features{physical.GetFeatures()};
const std::array feature_report = {
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
std::make_pair(features.imageCubeArray, "imageCubeArray"),
std::make_pair(features.independentBlend, "independentBlend"),
std::make_pair(features.depthClamp, "depthClamp"),
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
@ -580,6 +627,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
"shaderStorageImageWriteWithoutFormat"),
};
@ -608,6 +656,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
bool has_ext_extended_dynamic_state{};
bool has_ext_robustness2{};
for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
bool push) {
@ -627,11 +676,15 @@ std::vector<const char*> VKDevice::LoadExtensions() {
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
true);
test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
if (instance_version >= VK_API_VERSION_1_1) {
test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
}
@ -733,6 +786,18 @@ std::vector<const char*> VKDevice::LoadExtensions() {
}
}
if (has_ext_robustness2) {
VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
robustness2.pNext = nullptr;
features.pNext = &robustness2;
physical.GetFeatures2KHR(features);
if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
ext_robustness2 = true;
}
}
return extensions;
}
@ -764,6 +829,7 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) {
void VKDevice::SetupFeatures() {
const auto supported_features{physical.GetFeatures()};
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
is_blit_depth_stencil_supported = TestDepthStencilBlits();
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
}
@ -794,6 +860,32 @@ void VKDevice::CollectTelemetryParameters() {
}
}
void VKDevice::CollectToolingInfo() {
if (!ext_tooling_info) {
return;
}
const auto vkGetPhysicalDeviceToolPropertiesEXT =
reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>(
dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT"));
if (!vkGetPhysicalDeviceToolPropertiesEXT) {
return;
}
u32 tool_count = 0;
if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) {
return;
}
std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count);
if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) {
return;
}
for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) {
const std::string_view name = tool.name;
LOG_INFO(Render_Vulkan, "{}", name);
has_renderdoc = has_renderdoc || name == "RenderDoc";
has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
}
}
std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
static constexpr float QUEUE_PRIORITY = 1.0f;

@ -157,6 +157,11 @@ public:
return is_formatless_image_load_supported;
}
/// Returns true when blitting from and to depth stencil images is supported.
bool IsBlitDepthStencilSupported() const {
return is_blit_depth_stencil_supported;
}
/// Returns true if the device supports VK_NV_viewport_swizzle.
bool IsNvViewportSwizzleSupported() const {
return nv_viewport_swizzle;
@ -172,6 +177,11 @@ public:
return ext_index_type_uint8;
}
/// Returns true if the device supports VK_EXT_sampler_filter_minmax.
bool IsExtSamplerFilterMinmaxSupported() const {
return ext_sampler_filter_minmax;
}
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
bool IsExtDepthRangeUnrestrictedSupported() const {
return ext_depth_range_unrestricted;
@ -197,6 +207,16 @@ public:
return ext_extended_dynamic_state;
}
/// Returns true if the device supports VK_EXT_shader_stencil_export.
bool IsExtShaderStencilExportSupported() const {
return ext_shader_stencil_export;
}
/// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics;
}
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
@ -228,16 +248,23 @@ private:
/// Collects telemetry information from the device.
void CollectTelemetryParameters();
/// Collects information about attached tools.
void CollectToolingInfo();
/// Returns a list of queue initialization descriptors.
std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
/// Returns true if ASTC textures are natively supported.
bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
/// Returns true if the device natively supports blitting depth stencil images.
bool TestDepthStencilBlits() const;
/// Returns true if a format is supported.
bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const;
VkInstance instance; ///< Vulkan instance.
vk::DeviceDispatch dld; ///< Device function pointers.
vk::PhysicalDevice physical; ///< Physical device.
VkPhysicalDeviceProperties properties; ///< Device properties.
@ -253,15 +280,22 @@ private:
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
// Asynchronous Graphics Pipeline setting
bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline

@ -73,10 +73,9 @@ bool InnerFence::IsEventSignalled() const {
}
VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
Tegra::MemoryManager& memory_manager_,
VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_,
VKQueryCache& query_cache_, const VKDevice& device_,
VKScheduler& scheduler_)
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
const VKDevice& device_, VKScheduler& scheduler_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
device{device_}, scheduler{scheduler_} {}

@ -8,6 +8,7 @@
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
@ -24,7 +25,6 @@ class VKBufferCache;
class VKDevice;
class VKQueryCache;
class VKScheduler;
class VKTextureCache;
class InnerFence : public VideoCommon::FenceBase {
public:
@ -51,12 +51,12 @@ private:
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_,
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
const VKDevice& device_, VKScheduler& scheduler_);

@ -15,7 +15,6 @@
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
};
}
VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
switch (msaa_mode) {
case Tegra::Texture::MsaaMode::Msaa1x1:
return VK_SAMPLE_COUNT_1_BIT;
case Tegra::Texture::MsaaMode::Msaa2x1:
case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
return VK_SAMPLE_COUNT_2_BIT;
case Tegra::Texture::MsaaMode::Msaa2x2:
case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
return VK_SAMPLE_COUNT_4_BIT;
case Tegra::Texture::MsaaMode::Msaa4x2:
case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
return VK_SAMPLE_COUNT_8_BIT;
case Tegra::Texture::MsaaMode::Msaa4x4:
return VK_SAMPLE_COUNT_16_BIT;
default:
UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
return VK_SAMPLE_COUNT_1_BIT;
}
}
} // Anonymous namespace
VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKRenderPassCache& renderpass_cache_,
const GraphicsPipelineCacheKey& key_,
vk::Span<VkDescriptorSetLayoutBinding> bindings_,
const SPIRVProgram& program_)
: device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()},
descriptor_set_layout{CreateDescriptorSetLayout(bindings_)},
const GraphicsPipelineCacheKey& key,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
const SPIRVProgram& program, u32 num_color_buffers)
: device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules(
program_)},
renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)},
pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {}
descriptor_template{CreateDescriptorUpdateTemplate(program)},
modules(CreateShaderModules(program)),
pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
@ -179,8 +200,9 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
return shader_modules;
}
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const {
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
VkRenderPass renderpass,
u32 num_color_buffers) const {
const auto& state = cache_key.fixed_state;
const auto& viewport_swizzles = state.viewport_swizzles;
@ -290,8 +312,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
};
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(),
UnpackViewportSwizzle);
std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
.pNext = nullptr,
@ -326,7 +347,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
.rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
@ -352,8 +373,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
};
std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments);
for (std::size_t index = 0; index < num_attachments; ++index) {
for (std::size_t index = 0; index < num_color_buffers; ++index) {
static constexpr std::array COMPONENT_TABLE{
VK_COLOR_COMPONENT_R_BIT,
VK_COLOR_COMPONENT_G_BIT,
@ -387,7 +407,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = static_cast<u32>(num_attachments),
.attachmentCount = num_color_buffers,
.pAttachments = cb_attachments.data(),
.blendConstants = {},
};
@ -447,8 +467,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
stage_ci.pNext = &subgroup_size_ci;
}
}
const VkGraphicsPipelineCreateInfo ci{
return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -468,8 +487,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
.subpass = 0,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
};
return device.GetLogical().CreateGraphicsPipeline(ci);
});
}
} // namespace Vulkan

@ -8,10 +8,10 @@
#include <optional>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/wrapper.h"
@ -20,8 +20,7 @@ namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey {
RenderPassParams renderpass_params;
u32 padding;
VkRenderPass renderpass;
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
FixedPipelineState fixed_state;
@ -34,7 +33,7 @@ struct GraphicsPipelineCacheKey {
}
std::size_t Size() const noexcept {
return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
@ -43,7 +42,6 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
class VKDescriptorPool;
class VKDevice;
class VKRenderPassCache;
class VKScheduler;
class VKUpdateDescriptorQueue;
@ -52,12 +50,11 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt
class VKGraphicsPipeline final {
public:
explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKRenderPassCache& renderpass_cache_,
const GraphicsPipelineCacheKey& key_,
vk::Span<VkDescriptorSetLayoutBinding> bindings_,
const SPIRVProgram& program_);
const GraphicsPipelineCacheKey& key,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
const SPIRVProgram& program, u32 num_color_buffers);
~VKGraphicsPipeline();
VkDescriptorSet CommitDescriptorSet();
@ -70,10 +67,6 @@ public:
return *layout;
}
VkRenderPass GetRenderPass() const {
return renderpass;
}
GraphicsPipelineCacheKey GetCacheKey() const {
return cache_key;
}
@ -89,8 +82,8 @@ private:
std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const;
vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
u32 num_color_buffers) const;
const VKDevice& device;
VKScheduler& scheduler;
@ -104,7 +97,6 @@ private:
vk::DescriptorUpdateTemplateKHR descriptor_template;
std::vector<vk::ShaderModule> modules;
VkRenderPass renderpass;
vk::Pipeline pipeline;
};

@ -1,135 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <vector>
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_image.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_,
const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_)
: device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_},
image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} {
UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0,
"Queue family tracking is not implemented");
image = device_.GetLogical().CreateImage(image_ci_);
const u32 num_ranges = image_num_layers * image_num_levels;
barriers.resize(num_ranges);
subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout});
}
VKImage::~VKImage() = default;
void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
VkImageLayout new_layout) {
if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
return;
}
std::size_t cursor = 0;
for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
const u32 layer = base_layer + layer_it;
const u32 level = base_level + level_it;
auto& state = GetSubrangeState(layer, level);
auto& barrier = barriers[cursor];
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = state.access;
barrier.dstAccessMask = new_access;
barrier.oldLayout = state.layout;
barrier.newLayout = new_layout;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = *image;
barrier.subresourceRange.aspectMask = aspect_mask;
barrier.subresourceRange.baseMipLevel = level;
barrier.subresourceRange.levelCount = 1;
barrier.subresourceRange.baseArrayLayer = layer;
barrier.subresourceRange.layerCount = 1;
state.access = new_access;
state.layout = new_layout;
}
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) {
// TODO(Rodrigo): Implement a way to use the latest stage across subresources.
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {},
vk::Span(barriers.data(), cursor));
});
}
bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
VkAccessFlags new_access, VkImageLayout new_layout) noexcept {
const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
base_level == 0 && num_levels == image_num_levels;
if (!is_full_range) {
state_diverged = true;
}
if (!state_diverged) {
auto& state = GetSubrangeState(0, 0);
if (state.access != new_access || state.layout != new_layout) {
return true;
}
}
for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
for (u32 level_it = 0; level_it < num_levels; ++level_it) {
const u32 layer = base_layer + layer_it;
const u32 level = base_level + level_it;
auto& state = GetSubrangeState(layer, level);
if (state.access != new_access || state.layout != new_layout) {
return true;
}
}
}
return false;
}
void VKImage::CreatePresentView() {
// Image type has to be 2D to be presented.
present_view = device.GetLogical().CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
});
}
VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
static_cast<std::size_t>(level)];
}
} // namespace Vulkan

@ -1,84 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
class VKScheduler;
class VKImage {
public:
explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_,
const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_);
~VKImage();
/// Records in the passed command buffer an image transition and updates the state of the image.
void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
VkImageLayout new_layout);
/// Returns a view compatible with presentation, the image has to be 2D.
VkImageView GetPresentView() {
if (!present_view) {
CreatePresentView();
}
return *present_view;
}
/// Returns the Vulkan image handler.
const vk::Image& GetHandle() const {
return image;
}
/// Returns the Vulkan format for this image.
VkFormat GetFormat() const {
return format;
}
/// Returns the Vulkan aspect mask.
VkImageAspectFlags GetAspectMask() const {
return aspect_mask;
}
private:
struct SubrangeState final {
VkAccessFlags access = 0; ///< Current access bits.
VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout.
};
bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
VkAccessFlags new_access, VkImageLayout new_layout) noexcept;
/// Creates a presentation view.
void CreatePresentView();
/// Returns the subrange state for a layer and layer.
SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
const VKDevice& device; ///< Device handler.
VKScheduler& scheduler; ///< Device scheduler.
const VkFormat format; ///< Vulkan format.
const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
const u32 image_num_layers; ///< Number of layers.
const u32 image_num_levels; ///< Number of mipmap levels.
vk::Image image; ///< Image handle.
vk::ImageView present_view; ///< Image view compatible with presentation.
std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers.
std::vector<SubrangeState> subrange_states; ///< Current subrange state.
bool state_diverged = false; ///< True when subresources mismatch in layout.
};
} // namespace Vulkan

@ -216,7 +216,7 @@ VKMemoryCommitImpl::~VKMemoryCommitImpl() {
}
MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
return MemoryMap{this, memory.Map(interval.first + offset_, size)};
return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size));
}
void VKMemoryCommitImpl::Unmap() const {

@ -5,6 +5,7 @@
#pragma once
#include <memory>
#include <span>
#include <utility>
#include <vector>
#include "common/common_types.h"
@ -93,8 +94,8 @@ private:
/// Holds ownership of a memory map.
class MemoryMap final {
public:
explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_)
: commit{commit_}, address{address_} {}
explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_)
: commit{commit_}, span{span_} {}
~MemoryMap() {
if (commit) {
@ -108,19 +109,24 @@ public:
commit = nullptr;
}
/// Returns a span to the memory map.
[[nodiscard]] std::span<u8> Span() const noexcept {
return span;
}
/// Returns the address of the memory map.
u8* GetAddress() const {
return address;
[[nodiscard]] u8* Address() const noexcept {
return span.data();
}
/// Returns the address of the memory map;
operator u8*() const {
return address;
[[nodiscard]] operator u8*() const noexcept {
return span.data();
}
private:
const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
u8* address{}; ///< Address to the mapped memory.
std::span<u8> span; ///< Address to the mapped memory.
};
} // namespace Vulkan

@ -8,6 +8,7 @@
#include <vector>
#include "common/bit_cast.h"
#include "common/cityhash.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
@ -22,7 +23,6 @@
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
VideoCommon::Shader::CompileDepth::FullDecompile};
.depth = VideoCommon::Shader::CompileDepth::FullDecompile,
.disable_else_derivation = true,
};
constexpr std::size_t GetStageFromProgram(std::size_t program) {
return program == 0 ? 0 : program - 1;
@ -149,12 +151,11 @@ VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKRenderPassCache& renderpass_cache_)
: ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_},
descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
renderpass_cache{renderpass_cache_} {}
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
update_descriptor_queue_} {}
VKPipelineCache::~VKPipelineCache() = default;
@ -199,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
}
VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
VideoCommon::Shader::AsyncShaders& async_shaders) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) {
@ -215,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, bindings,
program, key);
update_descriptor_queue, bindings, program, key,
num_color_buffers);
}
last_graphics_pipeline = pair->second.get();
return last_graphics_pipeline;
@ -229,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, key,
bindings, program);
update_descriptor_queue, key, bindings,
program, num_color_buffers);
gpu.ShaderNotify().MarkShaderComplete();
}
last_graphics_pipeline = entry.get();

@ -19,7 +19,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/async_shaders.h"
@ -119,18 +118,18 @@ private:
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public:
explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKRenderPassCache& renderpass_cache_);
explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, const VKDevice& device,
VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue);
~VKPipelineCache() override;
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
u32 num_color_buffers,
VideoCommon::Shader::AsyncShaders& async_shaders);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
@ -153,7 +152,6 @@ private:
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache& renderpass_cache;
std::unique_ptr<Shader> null_shader;
std::unique_ptr<Shader> null_kernel;

File diff suppressed because it is too large Load Diff

@ -11,11 +11,11 @@
#include <vector>
#include <boost/container/static_vector.hpp>
#include <boost/functional/hash.hpp>
#include "common/common_types.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
@ -24,10 +24,9 @@
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
@ -49,60 +48,9 @@ namespace Vulkan {
struct VKScreenInfo;
using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>;
struct FramebufferCacheKey {
VkRenderPass renderpass{};
u32 width = 0;
u32 height = 0;
u32 layers = 0;
ImageViewsPack views;
std::size_t Hash() const noexcept {
std::size_t hash = 0;
boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
for (const auto& view : views) {
boost::hash_combine(hash, static_cast<VkImageView>(view));
}
boost::hash_combine(hash, width);
boost::hash_combine(hash, height);
boost::hash_combine(hash, layers);
return hash;
}
bool operator==(const FramebufferCacheKey& rhs) const noexcept {
return std::tie(renderpass, views, width, height, layers) ==
std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
}
bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::FramebufferCacheKey> {
std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class StateTracker;
class BufferBindings;
struct ImageView {
View view;
VkImageLayout* layout = nullptr;
};
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@ -123,15 +71,18 @@ public:
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
@ -145,11 +96,17 @@ public:
}
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000;
static constexpr size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
struct DrawParameters {
void Draw(vk::CommandBuffer cmdbuf) const;
@ -160,23 +117,8 @@ private:
bool is_indexed = 0;
};
using ColorAttachments = std::array<View, Maxwell::NumRenderTargets>;
using ZetaAttachment = View;
using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
void FlushWork();
/// @brief Updates the currently bound attachments
/// @param is_clear True when the framebuffer is updated as a clear
/// @return Bitfield of attachments being used as sampled textures
Texceptions UpdateAttachments(bool is_clear);
std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
/// Setups geometry buffers and state.
DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
bool is_indexed, bool is_instanced);
@ -184,17 +126,12 @@ private:
/// Setup descriptors in the graphics pipeline.
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color,
const ZetaAttachment& zeta);
void UpdateDynamicStates();
void BeginTransformFeedback();
void EndTransformFeedback();
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
void SetupVertexArrays(BufferBindings& buffer_bindings);
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
@ -240,14 +177,6 @@ private:
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@ -264,18 +193,16 @@ private:
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
std::size_t CalculateComputeStreamBufferSize() const;
size_t CalculateComputeStreamBufferSize() const;
std::size_t CalculateVertexArraysSize() const;
size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
size_t CalculateIndexBufferSize() const;
std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer) const;
RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer) const;
VkBuffer DefaultBuffer();
@ -290,18 +217,19 @@ private:
StateTracker& state_tracker;
VKScheduler& scheduler;
VKStreamBuffer stream_buffer;
VKStagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
VKRenderPassCache renderpass_cache;
BlitImageHelper blit_image;
QuadArrayPass quad_array_pass;
QuadIndexedPass quad_indexed_pass;
Uint8Pass uint8_pass;
VKTextureCache texture_cache;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKSamplerCache sampler_cache;
VKQueryCache query_cache;
VKFenceManager fence_manager;
@ -310,16 +238,11 @@ private:
vk::Event wfi_event;
VideoCommon::Shader::AsyncShaders async_shaders;
ColorAttachments color_attachments;
ZetaAttachment zeta_attachment;
std::vector<ImageView> sampled_views;
std::vector<ImageView> image_views;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
u32 draw_counter = 0;
// TODO(Rodrigo): Invalidate on image destruction
std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache;
};
} // namespace Vulkan

@ -1,158 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <memory>
#include <vector>
#include "common/cityhash.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
std::size_t RenderPassParams::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
return static_cast<std::size_t>(hash);
}
bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept {
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {}
VKRenderPassCache::~VKRenderPassCache() = default;
VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
const auto [pair, is_cache_miss] = cache.try_emplace(params);
auto& entry = pair->second;
if (is_cache_miss) {
entry = CreateRenderPass(params);
}
return *entry;
}
vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
using namespace VideoCore::Surface;
const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
std::vector<VkAttachmentDescription> descriptors;
descriptors.reserve(num_attachments);
std::vector<VkAttachmentReference> color_references;
color_references.reserve(num_attachments);
for (std::size_t rt = 0; rt < num_attachments; ++rt) {
const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
static_cast<int>(pixel_format));
// TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed.
const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
descriptors.push_back({
.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
.format = format.format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = color_layout,
.finalLayout = color_layout,
});
color_references.push_back({
.attachment = static_cast<u32>(rt),
.layout = color_layout,
});
}
VkAttachmentReference zeta_attachment_ref;
const bool has_zeta = params.zeta_format != 0;
if (has_zeta) {
const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format);
const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format);
const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
static_cast<int>(pixel_format));
const VkImageLayout zeta_layout = params.zeta_texception != 0
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
descriptors.push_back({
.flags = 0,
.format = format.format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = zeta_layout,
.finalLayout = zeta_layout,
});
zeta_attachment_ref = {
.attachment = static_cast<u32>(num_attachments),
.layout = zeta_layout,
};
}
const VkSubpassDescription subpass_description{
.flags = 0,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,
.colorAttachmentCount = static_cast<u32>(color_references.size()),
.pColorAttachments = color_references.data(),
.pResolveAttachments = nullptr,
.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr,
.preserveAttachmentCount = 0,
.pPreserveAttachments = nullptr,
};
VkAccessFlags access = 0;
VkPipelineStageFlags stage = 0;
if (!color_references.empty()) {
access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
}
if (has_zeta) {
access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
}
const VkSubpassDependency subpass_dependency{
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = 0,
.srcStageMask = stage,
.dstStageMask = stage,
.srcAccessMask = 0,
.dstAccessMask = access,
.dependencyFlags = 0,
};
return device.GetLogical().CreateRenderPass({
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.attachmentCount = static_cast<u32>(descriptors.size()),
.pAttachments = descriptors.data(),
.subpassCount = 1,
.pSubpasses = &subpass_description,
.dependencyCount = 1,
.pDependencies = &subpass_dependency,
});
}
} // namespace Vulkan

@ -1,70 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <type_traits>
#include <unordered_map>
#include <boost/container/static_vector.hpp>
#include <boost/functional/hash.hpp>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
namespace Vulkan {
class VKDevice;
struct RenderPassParams {
std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats;
u8 num_color_attachments;
u8 texceptions;
u8 zeta_format;
u8 zeta_texception;
std::size_t Hash() const noexcept;
bool operator==(const RenderPassParams& rhs) const noexcept;
bool operator!=(const RenderPassParams& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::has_unique_object_representations_v<RenderPassParams>);
static_assert(std::is_trivially_copyable_v<RenderPassParams>);
static_assert(std::is_trivially_constructible_v<RenderPassParams>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::RenderPassParams> {
std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class VKRenderPassCache final {
public:
explicit VKRenderPassCache(const VKDevice& device_);
~VKRenderPassCache();
VkRenderPass GetRenderPass(const RenderPassParams& params);
private:
vk::RenderPass CreateRenderPass(const RenderPassParams& params) const;
const VKDevice& device;
std::unordered_map<RenderPassParams, vk::RenderPass> cache;
};
} // namespace Vulkan

@ -1,83 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <unordered_map>
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/textures/texture.h"
using Tegra::Texture::TextureMipmapFilter;
namespace Vulkan {
namespace {
VkBorderColor ConvertBorderColor(std::array<float, 4> color) {
// TODO(Rodrigo): Manage integer border colors
if (color == std::array<float, 4>{0, 0, 0, 0}) {
return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
} else if (color == std::array<float, 4>{0, 0, 0, 1}) {
return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
} else if (color == std::array<float, 4>{1, 1, 1, 1}) {
return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
}
if (color[0] + color[1] + color[2] > 1.35f) {
// If color elements are brighter than roughly 0.5 average, use white border
return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
} else if (color[3] > 0.5f) {
return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
} else {
return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
}
}
} // Anonymous namespace
VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {}
VKSamplerCache::~VKSamplerCache() = default;
vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
const std::array color = tsc.GetBorderColor();
VkSamplerCustomBorderColorCreateInfoEXT border{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
.pNext = nullptr,
.customBorderColor = {},
.format = VK_FORMAT_UNDEFINED,
};
std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
return device.GetLogical().CreateSampler({
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = arbitrary_borders ? &border : nullptr,
.flags = 0,
.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
.mipLodBias = tsc.GetLodBias(),
.anisotropyEnable =
static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE),
.maxAnisotropy = tsc.GetMaxAnisotropy(),
.compareEnable = tsc.depth_compare_enabled,
.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(),
.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(),
.borderColor =
arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
.unnormalizedCoordinates = VK_FALSE,
});
}
VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
return *sampler;
}
} // namespace Vulkan

@ -1,29 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/sampler_cache.h"
#include "video_core/textures/texture.h"
namespace Vulkan {
class VKDevice;
class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> {
public:
explicit VKSamplerCache(const VKDevice& device_);
~VKSamplerCache();
protected:
vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
VkSampler ToSamplerType(const vk::Sampler& sampler) const override;
private:
const VKDevice& device;
};
} // namespace Vulkan

@ -16,6 +16,7 @@
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() {
AcquireNewChunk();
}
void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer,
VkExtent2D render_area) {
if (renderpass == state.renderpass && framebuffer == state.framebuffer &&
void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) {
const VkRenderPass renderpass = framebuffer->RenderPass();
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
const VkExtent2D render_area = framebuffer->RenderArea();
if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
render_area.width == state.render_area.width &&
render_area.height == state.render_area.height) {
return;
}
const bool end_renderpass = state.renderpass != nullptr;
EndRenderPass();
state.renderpass = renderpass;
state.framebuffer = framebuffer;
state.framebuffer = framebuffer_handle;
state.render_area = render_area;
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = renderpass,
.framebuffer = framebuffer,
.renderArea =
{
.offset = {.x = 0, .y = 0},
.extent = render_area,
},
.clearValueCount = 0,
.pClearValues = nullptr,
};
Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) {
if (end_renderpass) {
cmdbuf.EndRenderPass();
}
Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = renderpass,
.framebuffer = framebuffer_handle,
.renderArea =
{
.offset = {.x = 0, .y = 0},
.extent = render_area,
},
.clearValueCount = 0,
.pClearValues = nullptr,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
});
num_renderpass_images = framebuffer->NumImages();
renderpass_images = framebuffer->Images();
renderpass_image_ranges = framebuffer->ImageRanges();
}
void VKScheduler::RequestOutsideRenderPassOperationContext() {
@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
Record([num_images = num_renderpass_images, images = renderpass_images,
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
std::array<VkImageMemoryBarrier, 9> barriers;
for (size_t i = 0; i < num_images; ++i) {
barriers[i] = VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = images[i],
.subresourceRange = ranges[i],
};
}
cmdbuf.EndRenderPass();
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
vk::Span(barriers.data(), num_images));
});
state.renderpass = nullptr;
Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); });
num_renderpass_images = 0;
}
void VKScheduler::AcquireNewChunk() {

@ -17,6 +17,7 @@
namespace Vulkan {
class CommandPool;
class Framebuffer;
class MasterSemaphore;
class StateTracker;
class VKDevice;
@ -52,8 +53,7 @@ public:
void DispatchWork();
/// Requests to begin a renderpass.
void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer,
VkExtent2D render_area);
void RequestRenderpass(const Framebuffer* framebuffer);
/// Requests the current executino context to be able to execute operations only allowed outside
/// of a renderpass.
@ -62,6 +62,9 @@ public:
/// Binds a pipeline to the current execution context.
void BindGraphicsPipeline(VkPipeline pipeline);
/// Invalidates current command buffer state except for render passes
void InvalidateState();
/// Assigns the query cache.
void SetQueryCache(VKQueryCache& query_cache_) {
query_cache = &query_cache_;
@ -170,8 +173,6 @@ private:
void AllocateNewContext();
void InvalidateState();
void EndPendingOperations();
void EndRenderPass();
@ -192,6 +193,11 @@ private:
std::thread worker_thread;
State state;
u32 num_renderpass_images = 0;
std::array<VkImage, 9> renderpass_images{};
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex;

@ -102,7 +102,7 @@ struct GenericVaryingDescription {
bool is_scalar = false;
};
spv::Dim GetSamplerDim(const Sampler& sampler) {
spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
ASSERT(!sampler.is_buffer);
switch (sampler.type) {
case Tegra::Shader::TextureType::Texture1D:
@ -119,7 +119,7 @@ spv::Dim GetSamplerDim(const Sampler& sampler) {
}
}
std::pair<spv::Dim, bool> GetImageDim(const Image& image) {
std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
switch (image.type) {
case Tegra::Shader::ImageType::Texture1D:
return {spv::Dim::Dim1D, false};
@ -980,7 +980,7 @@ private:
return binding;
}
void DeclareImage(const Image& image, u32& binding) {
void DeclareImage(const ImageEntry& image, u32& binding) {
const auto [dim, arrayed] = GetImageDim(image);
constexpr int depth = 0;
constexpr bool ms = false;

@ -21,10 +21,10 @@ class VKDevice;
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using UniformTexelEntry = VideoCommon::Shader::Sampler;
using SamplerEntry = VideoCommon::Shader::Sampler;
using StorageTexelEntry = VideoCommon::Shader::Image;
using ImageEntry = VideoCommon::Shader::Image;
using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
using ImageEntry = VideoCommon::Shader::ImageEntry;
constexpr u32 DESCRIPTOR_SET = 0;

@ -13,18 +13,13 @@
namespace Vulkan {
vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) {
// Avoid undefined behavior by copying to a staging allocation
ASSERT(code_size % sizeof(u32) == 0);
const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
std::memcpy(data.get(), code_data, code_size);
vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code) {
return device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = code_size,
.pCode = data.get(),
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
}

@ -4,6 +4,8 @@
#pragma once
#include <span>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/wrapper.h"
@ -11,6 +13,6 @@ namespace Vulkan {
class VKDevice;
vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data);
vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code);
} // namespace Vulkan

@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstddef>
#include <iterator>
@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr std::array INVALIDATION_FLAGS{
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
};
Flags flags{};
flags[Viewports] = true;
flags[Scissors] = true;
flags[DepthBias] = true;
flags[BlendConstants] = true;
flags[DepthBounds] = true;
flags[StencilProperties] = true;
flags[CullMode] = true;
flags[DepthBoundsEnable] = true;
flags[DepthTestEnable] = true;
flags[DepthWriteEnable] = true;
flags[DepthCompareOp] = true;
flags[FrontFace] = true;
flags[StencilOp] = true;
flags[StencilTestEnable] = true;
for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true;
}
return flags;
}

@ -52,6 +52,14 @@ public:
current_topology = INVALID_TOPOLOGY;
}
void InvalidateViewports() {
flags[Dirty::Viewports] = true;
}
void InvalidateScissors() {
flags[Dirty::Scissors] = true;
}
bool TouchViewports() {
return Exchange(Dirty::Viewports, false);
}

@ -19,6 +19,10 @@ namespace Vulkan {
namespace {
constexpr VkBufferUsageFlags BUFFER_USAGE =
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
} // Anonymous namespace
VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_,
VkBufferUsageFlags usage)
VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
CreateBuffers(usage);
CreateBuffers();
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
}
VKStreamBuffer::~VKStreamBuffer() = default;
std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {
ASSERT(size <= stream_buffer_size);
mapped_size = size;
@ -76,7 +79,6 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
WaitPendingOperations(offset);
bool invalidated = false;
if (offset + size > stream_buffer_size) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidation_mark = current_watch_cursor;
@ -90,11 +92,9 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
// Ensure that we don't wait for uncommitted fences.
scheduler.Flush();
invalidated = true;
}
return {memory.Map(offset, size), offset, invalidated};
return std::make_pair(memory.Map(offset, size), offset);
}
void VKStreamBuffer::Unmap(u64 size) {
@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) {
watch.tick = scheduler.CurrentTick();
}
void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
void VKStreamBuffer::CreateBuffers() {
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
const u32 preferred_type = GetMemoryType(memory_properties);
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
.pNext = nullptr,
.flags = 0,
.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
.usage = usage,
.usage = BUFFER_USAGE,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,

@ -5,7 +5,7 @@
#pragma once
#include <optional>
#include <tuple>
#include <utility>
#include <vector>
#include "common/common_types.h"
@ -19,17 +19,15 @@ class VKScheduler;
class VKStreamBuffer final {
public:
explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
VkBufferUsageFlags usage);
explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler);
~VKStreamBuffer();
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
* offset and a boolean that's true when buffer has been invalidated.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
std::pair<u8*, u64> Map(u64 size, u64 alignment);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Unmap(u64 size);
@ -49,7 +47,7 @@ private:
};
/// Creates Vulkan buffer handles committing the required the required memory.
void CreateBuffers(VkBufferUsageFlags usage);
void CreateBuffers();
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);

File diff suppressed because it is too large Load Diff

@ -4,217 +4,265 @@
#pragma once
#include <memory>
#include <unordered_map>
#include <compare>
#include <span>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_image.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class RasterizerVulkan;
using VideoCommon::ImageId;
using VideoCommon::NUM_RT;
using VideoCommon::Offset2D;
using VideoCommon::RenderTargets;
using VideoCore::Surface::PixelFormat;
class VKDevice;
class VKScheduler;
class VKStagingBufferPool;
class CachedSurfaceView;
class CachedSurface;
class BlitImageHelper;
class Image;
class ImageView;
class Framebuffer;
using Surface = std::shared_ptr<CachedSurface>;
using View = std::shared_ptr<CachedSurfaceView>;
using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
struct RenderPassKey {
constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
using VideoCommon::SurfaceParams;
using VideoCommon::ViewParams;
class CachedSurface final : public VideoCommon::SurfaceBase<View> {
friend CachedSurfaceView;
public:
explicit CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_,
VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_,
GPUVAddr gpu_addr_, const SurfaceParams& params_);
~CachedSurface();
void UploadTexture(const std::vector<u8>& staging_buffer) override;
void DownloadTexture(std::vector<u8>& staging_buffer) override;
void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
VkImageLayout new_layout) {
image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels,
new_stage_mask, new_access, new_layout);
}
void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
VkImageLayout new_layout) {
image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
new_access, new_layout);
}
VKImage& GetImage() {
return *image;
}
const VKImage& GetImage() const {
return *image;
}
VkImage GetImageHandle() const {
return *image->GetHandle();
}
VkImageAspectFlags GetAspectMask() const {
return image->GetAspectMask();
}
VkBufferView GetBufferViewHandle() const {
return *buffer_view;
}
protected:
void DecorateSurfaceName() override;
View CreateView(const ViewParams& view_params) override;
private:
void UploadBuffer(const std::vector<u8>& staging_buffer);
void UploadImage(const std::vector<u8>& staging_buffer);
VkBufferImageCopy GetBufferImageCopy(u32 level) const;
VkImageSubresourceRange GetImageSubresourceRange() const;
const VKDevice& device;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
std::optional<VKImage> image;
vk::Buffer buffer;
vk::BufferView buffer_view;
VKMemoryCommit commit;
VkFormat format = VK_FORMAT_UNDEFINED;
};
class CachedSurfaceView final : public VideoCommon::ViewBase {
public:
explicit CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_,
const ViewParams& view_params_);
~CachedSurfaceView();
VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
VkImageView GetAttachment();
bool IsSameSurface(const CachedSurfaceView& rhs) const {
return &surface == &rhs.surface;
}
u32 GetWidth() const {
return surface_params.GetMipWidth(base_level);
}
u32 GetHeight() const {
return surface_params.GetMipHeight(base_level);
}
u32 GetNumLayers() const {
return num_layers;
}
bool IsBufferView() const {
return buffer_view;
}
VkImage GetImage() const {
return image;
}
VkBufferView GetBufferView() const {
return buffer_view;
}
VkImageSubresourceRange GetImageSubresourceRange() const {
return {aspect_mask, base_level, num_levels, base_layer, num_layers};
}
VkImageSubresourceLayers GetImageSubresourceLayers() const {
return {surface.GetAspectMask(), base_level, base_layer, num_layers};
}
void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask,
VkAccessFlags new_access) const {
surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
new_access, new_layout);
}
void MarkAsModified(u64 tick) {
surface.MarkAsModified(true, tick);
}
private:
// Store a copy of these values to avoid double dereference when reading them
const SurfaceParams surface_params;
const VkImage image;
const VkBufferView buffer_view;
const VkImageAspectFlags aspect_mask;
const VKDevice& device;
CachedSurface& surface;
const u32 base_level;
const u32 num_levels;
const VkImageViewType image_view_type;
u32 base_layer = 0;
u32 num_layers = 0;
u32 base_slice = 0;
u32 num_slices = 0;
VkImageView last_image_view = nullptr;
u32 last_swizzle = 0;
vk::ImageView render_target;
std::unordered_map<u32, vk::ImageView> view_cache;
};
class VKTextureCache final : public TextureCacheBase {
public:
explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
VKMemoryManager& memory_manager_, VKScheduler& scheduler_,
VKStagingBufferPool& staging_pool_);
~VKTextureCache();
private:
Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
void ImageCopy(Surface& src_surface, Surface& dst_surface,
const VideoCommon::CopyParams& copy_params) override;
void ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
const VKDevice& device;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
std::array<PixelFormat, NUM_RT> color_formats;
PixelFormat depth_format;
VkSampleCountFlagBits samples;
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::RenderPassKey> {
[[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
size_t value = static_cast<size_t>(key.depth_format) << 48;
value ^= static_cast<size_t>(key.samples) << 52;
for (size_t i = 0; i < key.color_formats.size(); ++i) {
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
}
return value;
}
};
} // namespace std
namespace Vulkan {
struct ImageBufferMap {
[[nodiscard]] VkBuffer Handle() const noexcept {
return handle;
}
[[nodiscard]] std::span<u8> Span() const noexcept {
return map.Span();
}
VkBuffer handle;
MemoryMap map;
};
struct TextureCacheRuntime {
const VKDevice& device;
VKScheduler& scheduler;
VKMemoryManager& memory_manager;
VKStagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache;
void Finish();
[[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
[[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) {
// TODO: Have a special function for this
return MapUploadBuffer(size);
}
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
[[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept {
return false;
}
void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
std::span<const VideoCommon::SwizzleParameters>) {
UNREACHABLE();
}
void InsertUploadMemoryBarrier() {}
};
class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies);
void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {
return *image;
}
[[nodiscard]] VkBuffer Buffer() const noexcept {
return *buffer;
}
[[nodiscard]] VkImageCreateFlags AspectMask() const noexcept {
return aspect_mask;
}
private:
VKScheduler* scheduler;
vk::Image image;
vk::Buffer buffer;
VKMemoryCommit commit;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
};
class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
[[nodiscard]] VkImageView DepthView();
[[nodiscard]] VkImageView StencilView();
[[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept {
return *image_views[static_cast<size_t>(query_type)];
}
[[nodiscard]] VkBufferView BufferView() const noexcept {
return *buffer_view;
}
[[nodiscard]] VkImage ImageHandle() const noexcept {
return image_handle;
}
[[nodiscard]] VkImageView RenderTarget() const noexcept {
return render_target;
}
[[nodiscard]] PixelFormat ImageFormat() const noexcept {
return image_format;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
private:
[[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
const VKDevice* device = nullptr;
std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
vk::BufferView buffer_view;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
PixelFormat image_format = PixelFormat::Invalid;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
[[nodiscard]] VkSampler Handle() const noexcept {
return *sampler;
}
private:
vk::Sampler sampler;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
[[nodiscard]] VkFramebuffer Handle() const noexcept {
return *framebuffer;
}
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
return renderpass;
}
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
return render_area;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
[[nodiscard]] u32 NumColorBuffers() const noexcept {
return num_color_buffers;
}
[[nodiscard]] u32 NumImages() const noexcept {
return num_images;
}
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
return images;
}
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
return image_ranges;
}
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
VkExtent2D render_area{};
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 num_color_buffers = 0;
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false;
using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image;
using ImageAlloc = Vulkan::ImageAlloc;
using ImageView = Vulkan::ImageView;
using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace Vulkan

@ -40,30 +40,34 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkSampler sampler, VkImageView image_view) {
payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
payload.emplace_back(VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
});
}
void AddImage(VkImageView image_view) {
payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
payload.emplace_back(VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
});
}
void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
payload.emplace_back(VkDescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
});
}
void AddTexelBuffer(VkBufferView texel_buffer) {
payload.emplace_back(texel_buffer);
}
VkImageLayout* LastImageLayout() {
return &payload.back().image.imageLayout;
}
const VkImageLayout* LastImageLayout() const {
return &payload.back().image.imageLayout;
}
private:
const VKDevice& device;
VKScheduler& scheduler;

Some files were not shown because too many files have changed in this diff Show More