Merge pull request #3301 from ReinUsesLisp/state-tracker

video_core: Remove gl_state and use a state tracker based on dirty flags
merge-requests/60/head
Rodrigo Locatti 2020-03-09 18:34:37 +07:00 committed by GitHub
commit 22e825a3bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
50 changed files with 1583 additions and 1896 deletions

@ -24,17 +24,29 @@ struct Rectangle {
: left(left), top(top), right(right), bottom(bottom) {} : left(left), top(top), right(right), bottom(bottom) {}
T GetWidth() const { T GetWidth() const {
if constexpr (std::is_floating_point_v<T>) {
return std::abs(right - left);
} else {
return std::abs(static_cast<std::make_signed_t<T>>(right - left)); return std::abs(static_cast<std::make_signed_t<T>>(right - left));
} }
}
T GetHeight() const { T GetHeight() const {
if constexpr (std::is_floating_point_v<T>) {
return std::abs(bottom - top);
} else {
return std::abs(static_cast<std::make_signed_t<T>>(bottom - top)); return std::abs(static_cast<std::make_signed_t<T>>(bottom - top));
} }
}
Rectangle<T> TranslateX(const T x) const { Rectangle<T> TranslateX(const T x) const {
return Rectangle{left + x, top, right + x, bottom}; return Rectangle{left + x, top, right + x, bottom};
} }
Rectangle<T> TranslateY(const T y) const { Rectangle<T> TranslateY(const T y) const {
return Rectangle{left, top + y, right, bottom + y}; return Rectangle{left, top + y, right, bottom + y};
} }
Rectangle<T> Scale(const float s) const { Rectangle<T> Scale(const float s) const {
return Rectangle{left, top, static_cast<T>(left + GetWidth() * s), return Rectangle{left, top, static_cast<T>(left + GetWidth() * s),
static_cast<T>(top + GetHeight() * s)}; static_cast<T>(top + GetHeight() * s)};

@ -174,6 +174,7 @@ struct System::Impl {
} }
interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
gpu_core = VideoCore::CreateGPU(system); gpu_core = VideoCore::CreateGPU(system);
renderer->Rasterizer().SetupDirtyFlags();
is_powered_on = true; is_powered_on = true;
exit_lock = false; exit_lock = false;

@ -2,6 +2,8 @@ add_library(video_core STATIC
buffer_cache/buffer_block.h buffer_cache/buffer_block.h
buffer_cache/buffer_cache.h buffer_cache/buffer_cache.h
buffer_cache/map_interval.h buffer_cache/map_interval.h
dirty_flags.cpp
dirty_flags.h
dma_pusher.cpp dma_pusher.cpp
dma_pusher.h dma_pusher.h
engines/const_buffer_engine_interface.h engines/const_buffer_engine_interface.h
@ -69,8 +71,8 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_manager.h
renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.cpp
renderer_opengl/gl_shader_util.h renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state.cpp renderer_opengl/gl_state_tracker.cpp
renderer_opengl/gl_state.h renderer_opengl/gl_state_tracker.h
renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_stream_buffer.cpp
renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.cpp
@ -198,6 +200,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_shader_util.h
renderer_vulkan/vk_staging_buffer_pool.cpp renderer_vulkan/vk_staging_buffer_pool.cpp
renderer_vulkan/vk_staging_buffer_pool.h renderer_vulkan/vk_staging_buffer_pool.h
renderer_vulkan/vk_state_tracker.cpp
renderer_vulkan/vk_state_tracker.h
renderer_vulkan/vk_stream_buffer.cpp renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp renderer_vulkan/vk_swapchain.cpp

@ -0,0 +1,46 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <cstddef>
#include "common/common_types.h"
#include "video_core/dirty_flags.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32))
namespace VideoCommon::Dirty {
using Tegra::Engines::Maxwell3D;
void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store) {
store[RenderTargets] = true;
store[ZetaBuffer] = true;
for (std::size_t i = 0; i < Maxwell3D::Regs::NumRenderTargets; ++i) {
store[ColorBuffer0 + i] = true;
}
}
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
static constexpr std::size_t num_per_rt = NUM(rt[0]);
static constexpr std::size_t begin = OFF(rt);
static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
for (std::size_t rt = 0; rt < Maxwell3D::Regs::NumRenderTargets; ++rt) {
FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
}
FillBlock(tables[1], begin, num, RenderTargets);
static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
const u8 flag = zeta_flags[i];
auto& table = tables[i];
table[OFF(zeta_enable)] = flag;
table[OFF(zeta_width)] = flag;
table[OFF(zeta_height)] = flag;
FillBlock(table, OFF(zeta), NUM(zeta), flag);
}
}
} // namespace VideoCommon::Dirty

@ -0,0 +1,51 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <cstddef>
#include <iterator>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
namespace VideoCommon::Dirty {
enum : u8 {
NullEntry = 0,
RenderTargets,
ColorBuffer0,
ColorBuffer1,
ColorBuffer2,
ColorBuffer3,
ColorBuffer4,
ColorBuffer5,
ColorBuffer6,
ColorBuffer7,
ZetaBuffer,
LastCommonEntry,
};
template <typename Integer>
void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Table& table, std::size_t begin,
std::size_t num, Integer dirty_index) {
const auto it = std::begin(table) + begin;
std::fill(it, it + num, static_cast<u8>(dirty_index));
}
template <typename Integer1, typename Integer2>
void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_t begin,
std::size_t num, Integer1 index_a, Integer2 index_b) {
FillBlock(tables[0], begin, num, index_a);
FillBlock(tables[1], begin, num, index_b);
}
void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store);
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
} // namespace VideoCommon::Dirty

@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls); MICROPROFILE_SCOPE(DispatchCalls);
// On entering GPU code, assume all memory may be touched by the ARM core. // On entering GPU code, assume all memory may be touched by the ARM core.
gpu.Maxwell3D().dirty.OnMemoryWrite(); gpu.Maxwell3D().OnMemoryWrite();
dma_pushbuffer_subindex = 0; dma_pushbuffer_subindex = 0;

@ -39,7 +39,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall(); const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call); upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) { if (is_last_call) {
system.GPU().Maxwell3D().dirty.OnMemoryWrite(); system.GPU().Maxwell3D().OnMemoryWrite();
} }
break; break;
} }

@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall(); const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call); upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) { if (is_last_call) {
system.GPU().Maxwell3D().dirty.OnMemoryWrite(); system.GPU().Maxwell3D().OnMemoryWrite();
} }
break; break;
} }

@ -26,7 +26,8 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
MemoryManager& memory_manager) MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
InitDirtySettings(); dirty.flags.flip();
InitializeRegisterDefaults(); InitializeRegisterDefaults();
} }
@ -75,8 +76,8 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.stencil_back_mask = 0xFFFFFFFF; regs.stencil_back_mask = 0xFFFFFFFF;
regs.depth_test_func = Regs::ComparisonOp::Always; regs.depth_test_func = Regs::ComparisonOp::Always;
regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; regs.front_face = Regs::FrontFace::CounterClockWise;
regs.cull.cull_face = Regs::Cull::CullFace::Back; regs.cull_face = Regs::CullFace::Back;
// TODO(Rodrigo): Most games do not set a point size. I think this is a case of a // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
// register carrying a default value. Assume it's OpenGL's default (1). // register carrying a default value. Assume it's OpenGL's default (1).
@ -95,7 +96,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.rasterize_enable = 1; regs.rasterize_enable = 1;
regs.rt_separate_frag_data = 1; regs.rt_separate_frag_data = 1;
regs.framebuffer_srgb = 1; regs.framebuffer_srgb = 1;
regs.cull.front_face = Maxwell3D::Regs::Cull::FrontFace::ClockWise; regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true; mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true; mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
@ -103,164 +104,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
} }
#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name))
void Maxwell3D::InitDirtySettings() {
const auto set_block = [this](std::size_t start, std::size_t range, u8 position) {
const auto start_itr = dirty_pointers.begin() + start;
const auto end_itr = start_itr + range;
std::fill(start_itr, end_itr, position);
};
dirty.regs.fill(true);
// Init Render Targets
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
u8 rt_dirty_reg = DIRTY_REGS_POS(render_target);
for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
set_block(rt_reg, registers_per_rt, rt_dirty_reg);
++rt_dirty_reg;
}
constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
// Init Vertex Arrays
constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
vertex_reg += vertex_array_size) {
set_block(vertex_reg, 3, va_dirty_reg);
// The divisor concerns vertex array instances
dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg;
++va_dirty_reg;
++vi_dirty_reg;
}
constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
va_dirty_reg = DIRTY_REGS_POS(vertex_array);
for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
vertex_reg += vertex_limit_size) {
set_block(vertex_reg, vertex_limit_size, va_dirty_reg);
va_dirty_reg++;
}
constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
constexpr u32 vertex_instance_size =
sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
constexpr u32 vertex_instance_end =
vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
vertex_reg += vertex_instance_size) {
set_block(vertex_reg, vertex_instance_size, vi_dirty_reg);
vi_dirty_reg++;
}
set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
DIRTY_REGS_POS(vertex_attrib_format));
// Init Shaders
constexpr u32 shader_registers_count =
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
DIRTY_REGS_POS(shaders));
// State
// Viewport
constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
set_block(viewport_start, viewport_size, viewport_dirty_reg);
constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
// Viewport transformation
constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
// Cullmode
constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
// Screen y control
dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
// Primitive Restart
constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
// Depth Test
constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
// Stencil Test
constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
// Color Mask
constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
color_mask_dirty_reg);
// Blend State
constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
blend_state_dirty_reg);
dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
blend_state_dirty_reg);
// Scissor State
constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
scissor_test_dirty_reg);
// Polygon Offset
constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
// Depth bounds
constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg;
}
void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
// Reset the current macro. // Reset the current macro.
executing_macro = 0; executing_macro = 0;
@ -319,19 +162,9 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
if (regs.reg_array[method] != method_call.argument) { if (regs.reg_array[method] != method_call.argument) {
regs.reg_array[method] = method_call.argument; regs.reg_array[method] = method_call.argument;
const std::size_t dirty_reg = dirty_pointers[method];
if (dirty_reg) { for (const auto& table : dirty.tables) {
dirty.regs[dirty_reg] = true; dirty.flags[table[method]] = true;
if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
dirty.vertex_array_buffers = true;
} else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
dirty.vertex_instances = true;
} else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
dirty_reg < DIRTY_REGS_POS(render_settings)) {
dirty.render_settings = true;
}
} }
} }
@ -419,7 +252,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall(); const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call); upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) { if (is_last_call) {
dirty.OnMemoryWrite(); OnMemoryWrite();
} }
break; break;
} }
@ -727,7 +560,7 @@ void Maxwell3D::FinishCBData() {
const u32 id = cb_data_state.id; const u32 id = cb_data_state.id;
memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
dirty.OnMemoryWrite(); OnMemoryWrite();
cb_data_state.id = null_cb_data; cb_data_state.id = null_cb_data;
cb_data_state.current = null_cb_data; cb_data_state.current = null_cb_data;

@ -6,6 +6,7 @@
#include <array> #include <array>
#include <bitset> #include <bitset>
#include <limits>
#include <optional> #include <optional>
#include <type_traits> #include <type_traits>
#include <unordered_map> #include <unordered_map>
@ -431,7 +432,6 @@ public:
GeneratedPrimitives = 0x1F, GeneratedPrimitives = 0x1F,
}; };
struct Cull {
enum class FrontFace : u32 { enum class FrontFace : u32 {
ClockWise = 0x0900, ClockWise = 0x0900,
CounterClockWise = 0x0901, CounterClockWise = 0x0901,
@ -443,11 +443,6 @@ public:
FrontAndBack = 0x0408, FrontAndBack = 0x0408,
}; };
u32 enabled;
FrontFace front_face;
CullFace cull_face;
};
struct Blend { struct Blend {
enum class Equation : u32 { enum class Equation : u32 {
Add = 1, Add = 1,
@ -574,7 +569,7 @@ public:
f32 translate_z; f32 translate_z;
INSERT_UNION_PADDING_WORDS(2); INSERT_UNION_PADDING_WORDS(2);
Common::Rectangle<s32> GetRect() const { Common::Rectangle<f32> GetRect() const {
return { return {
GetX(), // left GetX(), // left
GetY() + GetHeight(), // top GetY() + GetHeight(), // top
@ -583,20 +578,20 @@ public:
}; };
}; };
s32 GetX() const { f32 GetX() const {
return static_cast<s32>(std::max(0.0f, translate_x - std::fabs(scale_x))); return std::max(0.0f, translate_x - std::fabs(scale_x));
} }
s32 GetY() const { f32 GetY() const {
return static_cast<s32>(std::max(0.0f, translate_y - std::fabs(scale_y))); return std::max(0.0f, translate_y - std::fabs(scale_y));
} }
s32 GetWidth() const { f32 GetWidth() const {
return static_cast<s32>(translate_x + std::fabs(scale_x)) - GetX(); return translate_x + std::fabs(scale_x) - GetX();
} }
s32 GetHeight() const { f32 GetHeight() const {
return static_cast<s32>(translate_y + std::fabs(scale_y)) - GetY(); return translate_y + std::fabs(scale_y) - GetY();
} }
}; };
@ -872,16 +867,7 @@ public:
INSERT_UNION_PADDING_WORDS(0x35); INSERT_UNION_PADDING_WORDS(0x35);
union { u32 clip_distance_enabled;
BitField<0, 1, u32> c0;
BitField<1, 1, u32> c1;
BitField<2, 1, u32> c2;
BitField<3, 1, u32> c3;
BitField<4, 1, u32> c4;
BitField<5, 1, u32> c5;
BitField<6, 1, u32> c6;
BitField<7, 1, u32> c7;
} clip_distance_enabled;
u32 samplecnt_enable; u32 samplecnt_enable;
@ -1060,7 +1046,9 @@ public:
INSERT_UNION_PADDING_WORDS(1); INSERT_UNION_PADDING_WORDS(1);
Cull cull; u32 cull_test_enabled;
FrontFace front_face;
CullFace cull_face;
u32 pixel_center_integer; u32 pixel_center_integer;
@ -1238,79 +1226,6 @@ public:
State state{}; State state{};
struct DirtyRegs {
static constexpr std::size_t NUM_REGS = 256;
static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max());
union {
struct {
bool null_dirty;
// Vertex Attributes
bool vertex_attrib_format;
// Vertex Arrays
std::array<bool, 32> vertex_array;
bool vertex_array_buffers;
// Vertex Instances
std::array<bool, 32> vertex_instance;
bool vertex_instances;
// Render Targets
std::array<bool, 8> render_target;
bool depth_buffer;
bool render_settings;
// Shaders
bool shaders;
// Rasterizer State
bool viewport;
bool clip_coefficient;
bool cull_mode;
bool primitive_restart;
bool depth_test;
bool stencil_test;
bool blend_state;
bool scissor_test;
bool transform_feedback;
bool color_mask;
bool polygon_offset;
bool depth_bounds_values;
// Complementary
bool viewport_transform;
bool screen_y_control;
bool memory_general;
};
std::array<bool, NUM_REGS> regs;
};
void ResetVertexArrays() {
vertex_array.fill(true);
vertex_array_buffers = true;
}
void ResetRenderTargets() {
depth_buffer = true;
render_target.fill(true);
render_settings = true;
}
void OnMemoryWrite() {
shaders = true;
memory_general = true;
ResetRenderTargets();
ResetVertexArrays();
}
} dirty{};
/// Reads a register value located at the input method address /// Reads a register value located at the input method address
u32 GetRegisterValue(u32 method) const; u32 GetRegisterValue(u32 method) const;
@ -1356,6 +1271,11 @@ public:
return execute_on; return execute_on;
} }
/// Notify a memory write has happened.
void OnMemoryWrite() {
dirty.flags |= dirty.on_write_stores;
}
enum class MMEDrawMode : u32 { enum class MMEDrawMode : u32 {
Undefined, Undefined,
Array, Array,
@ -1371,6 +1291,16 @@ public:
u32 gl_end_count{}; u32 gl_end_count{};
} mme_draw; } mme_draw;
struct DirtyState {
using Flags = std::bitset<std::numeric_limits<u8>::max()>;
using Table = std::array<u8, Regs::NUM_REGS>;
using Tables = std::array<Table, 2>;
Flags flags;
Flags on_write_stores;
Tables tables{};
} dirty;
private: private:
void InitializeRegisterDefaults(); void InitializeRegisterDefaults();
@ -1417,8 +1347,6 @@ private:
/// Retrieves information about a specific TSC entry from the TSC buffer. /// Retrieves information about a specific TSC entry from the TSC buffer.
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
void InitDirtySettings();
/** /**
* Call a macro on this engine. * Call a macro on this engine.
* @param method Method to call * @param method Method to call
@ -1561,7 +1489,9 @@ ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644); ASSERT_REG_POSITION(vp_point_size, 0x644);
ASSERT_REG_POSITION(cull, 0x646); ASSERT_REG_POSITION(cull_test_enabled, 0x646);
ASSERT_REG_POSITION(front_face, 0x647);
ASSERT_REG_POSITION(cull_face, 0x648);
ASSERT_REG_POSITION(pixel_center_integer, 0x649); ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);

@ -57,7 +57,7 @@ void MaxwellDMA::HandleCopy() {
} }
// All copies here update the main memory, so mark all rasterizer states as invalid. // All copies here update the main memory, so mark all rasterizer states as invalid.
system.GPU().Maxwell3D().dirty.OnMemoryWrite(); system.GPU().Maxwell3D().OnMemoryWrite();
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D

@ -89,6 +89,9 @@ public:
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
const DiskResourceLoadCallback& callback = {}) {} const DiskResourceLoadCallback& callback = {}) {}
/// Initializes renderer dirty flags
virtual void SetupDirtyFlags() {}
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
GuestDriverProfile& AccessGuestDriverProfile() { GuestDriverProfile& AccessGuestDriverProfile() {
return guest_driver_profile; return guest_driver_profile;

@ -11,7 +11,6 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
namespace OpenGL { namespace OpenGL {
@ -36,8 +35,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
framebuffer.Create(); framebuffer.Create();
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
local_state.draw.draw_framebuffer = framebuffer.handle; glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
local_state.ApplyFramebufferState();
if (key.zeta) { if (key.zeta) {
const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;

@ -13,7 +13,6 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL { namespace OpenGL {
@ -63,7 +62,6 @@ public:
private: private:
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
OpenGLState local_state;
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
}; };

File diff suppressed because it is too large Load Diff

@ -30,7 +30,7 @@
#include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h" #include "video_core/renderer_opengl/utils.h"
#include "video_core/textures/texture.h" #include "video_core/textures/texture.h"
@ -55,7 +55,8 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public: public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info); ScreenInfo& info, GLShader::ProgramManager& program_manager,
StateTracker& state_tracker);
~RasterizerOpenGL() override; ~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override; void Draw(bool is_indexed, bool is_instanced) override;
@ -76,6 +77,7 @@ public:
u32 pixel_stride) override; u32 pixel_stride) override;
void LoadDiskResources(const std::atomic_bool& stop_loading, void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override; const VideoCore::DiskResourceLoadCallback& callback) override;
void SetupDirtyFlags() override;
/// Returns true when there are commands queued to the OpenGL server. /// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const { bool AnyCommandQueued() const {
@ -86,8 +88,7 @@ private:
/// Configures the color and depth framebuffer states. /// Configures the color and depth framebuffer states.
void ConfigureFramebuffers(); void ConfigureFramebuffers();
void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb);
bool using_depth_fb, bool using_stencil_fb);
/// Configures the current constbuffers to use for the draw command. /// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
@ -130,11 +131,13 @@ private:
const GLShader::ImageEntry& entry); const GLShader::ImageEntry& entry);
/// Syncs the viewport and depth range to match the guest state /// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state); void SyncViewport();
/// Syncs the depth clamp state
void SyncDepthClamp();
/// Syncs the clip enabled status to match the guest state /// Syncs the clip enabled status to match the guest state
void SyncClipEnabled( void SyncClipEnabled(u32 clip_mask);
const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask);
/// Syncs the clip coefficients to match the guest state /// Syncs the clip coefficients to match the guest state
void SyncClipCoef(); void SyncClipCoef();
@ -164,7 +167,7 @@ private:
void SyncMultiSampleState(); void SyncMultiSampleState();
/// Syncs the scissor test state to match the guest state /// Syncs the scissor test state to match the guest state
void SyncScissorTest(OpenGLState& current_state); void SyncScissorTest();
/// Syncs the transform feedback state to match the guest state /// Syncs the transform feedback state to match the guest state
void SyncTransformFeedback(); void SyncTransformFeedback();
@ -173,7 +176,7 @@ private:
void SyncPointState(); void SyncPointState();
/// Syncs the rasterizer enable state to match the guest state /// Syncs the rasterizer enable state to match the guest state
void SyncRasterizeEnable(OpenGLState& current_state); void SyncRasterizeEnable();
/// Syncs Color Mask /// Syncs Color Mask
void SyncColorMask(); void SyncColorMask();
@ -184,6 +187,9 @@ private:
/// Syncs the alpha test state to match the guest state /// Syncs the alpha test state to match the guest state
void SyncAlphaTest(); void SyncAlphaTest();
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
/// Check for extension that are not strictly required but are needed for correct emulation /// Check for extension that are not strictly required but are needed for correct emulation
void CheckExtensions(); void CheckExtensions();
@ -191,18 +197,17 @@ private:
std::size_t CalculateIndexBufferSize() const; std::size_t CalculateIndexBufferSize() const;
/// Updates and returns a vertex array object representing current vertex format /// Updates the current vertex format
GLuint SetupVertexFormat(); void SetupVertexFormat();
void SetupVertexBuffer(GLuint vao); void SetupVertexBuffer();
void SetupVertexInstances(GLuint vao); void SetupVertexInstances();
GLintptr SetupIndexBuffer(); GLintptr SetupIndexBuffer();
void SetupShaders(GLenum primitive_mode); void SetupShaders(GLenum primitive_mode);
const Device device; const Device device;
OpenGLState state;
TextureCacheOpenGL texture_cache; TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache; ShaderCacheOpenGL shader_cache;
@ -212,22 +217,20 @@ private:
Core::System& system; Core::System& system;
ScreenInfo& screen_info; ScreenInfo& screen_info;
GLShader::ProgramManager& program_manager;
std::unique_ptr<GLShader::ProgramManager> shader_program_manager; StateTracker& state_tracker;
std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute,
Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>,
OGLVertexArray>
vertex_array_cache;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache; OGLBufferCache buffer_cache;
VertexArrayPushBuffer vertex_array_pushbuffer; VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker};
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
/// Number of commands queued to the OpenGL driver. Reseted on flush. /// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0; std::size_t num_queued_commands = 0;
u32 last_clip_distance_mask = 0;
}; };
} // namespace OpenGL } // namespace OpenGL

@ -8,7 +8,6 @@
#include "common/microprofile.h" #include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state.h"
MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
@ -20,7 +19,7 @@ void OGLRenderbuffer::Create() {
return; return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation); MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenRenderbuffers(1, &handle); glCreateRenderbuffers(1, &handle);
} }
void OGLRenderbuffer::Release() { void OGLRenderbuffer::Release() {
@ -29,7 +28,6 @@ void OGLRenderbuffer::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteRenderbuffers(1, &handle); glDeleteRenderbuffers(1, &handle);
OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply();
handle = 0; handle = 0;
} }
@ -47,7 +45,6 @@ void OGLTexture::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle); glDeleteTextures(1, &handle);
OpenGLState::GetCurState().UnbindTexture(handle).Apply();
handle = 0; handle = 0;
} }
@ -65,7 +62,6 @@ void OGLTextureView::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle); glDeleteTextures(1, &handle);
OpenGLState::GetCurState().UnbindTexture(handle).Apply();
handle = 0; handle = 0;
} }
@ -83,7 +79,6 @@ void OGLSampler::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteSamplers(1, &handle); glDeleteSamplers(1, &handle);
OpenGLState::GetCurState().ResetSampler(handle).Apply();
handle = 0; handle = 0;
} }
@ -127,7 +122,6 @@ void OGLProgram::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgram(handle); glDeleteProgram(handle);
OpenGLState::GetCurState().ResetProgram(handle).Apply();
handle = 0; handle = 0;
} }
@ -145,7 +139,6 @@ void OGLPipeline::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramPipelines(1, &handle); glDeleteProgramPipelines(1, &handle);
OpenGLState::GetCurState().ResetPipeline(handle).Apply();
handle = 0; handle = 0;
} }
@ -189,24 +182,6 @@ void OGLSync::Release() {
handle = 0; handle = 0;
} }
void OGLVertexArray::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateVertexArrays(1, &handle);
}
void OGLVertexArray::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteVertexArrays(1, &handle);
OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
handle = 0;
}
void OGLFramebuffer::Create() { void OGLFramebuffer::Create() {
if (handle != 0) if (handle != 0)
return; return;
@ -221,7 +196,6 @@ void OGLFramebuffer::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteFramebuffers(1, &handle); glDeleteFramebuffers(1, &handle);
OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
handle = 0; handle = 0;
} }

@ -241,31 +241,6 @@ public:
GLsync handle = 0; GLsync handle = 0;
}; };
class OGLVertexArray : private NonCopyable {
public:
OGLVertexArray() = default;
OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLVertexArray() {
Release();
}
OGLVertexArray& operator=(OGLVertexArray&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLFramebuffer : private NonCopyable { class OGLFramebuffer : private NonCopyable {
public: public:
OGLFramebuffer() = default; OGLFramebuffer() = default;

@ -22,6 +22,7 @@
#include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h" #include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/shader_ir.h" #include "video_core/shader/shader_ir.h"
@ -623,7 +624,7 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
} }
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
if (!system.GPU().Maxwell3D().dirty.shaders) { if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
return last_shaders[static_cast<std::size_t>(program)]; return last_shaders[static_cast<std::size_t>(program)];
} }

@ -2547,7 +2547,10 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
for (const auto& image : ir.GetImages()) { for (const auto& image : ir.GetImages()) {
entries.images.emplace_back(image); entries.images.emplace_back(image);
} }
entries.clip_distances = ir.GetClipDistances(); const auto clip_distances = ir.GetClipDistances();
for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
}
entries.shader_length = ir.GetLength(); entries.shader_length = ir.GetLength();
return entries; return entries;
} }

@ -74,7 +74,7 @@ struct ShaderEntries {
std::vector<GlobalMemoryEntry> global_memory_entries; std::vector<GlobalMemoryEntry> global_memory_entries;
std::vector<SamplerEntry> samplers; std::vector<SamplerEntry> samplers;
std::vector<ImageEntry> images; std::vector<ImageEntry> images;
std::array<bool, Maxwell::NumClipDistances> clip_distances{}; u32 clip_distances{};
std::size_t shader_length{}; std::size_t shader_length{};
}; };

@ -10,26 +10,20 @@ namespace OpenGL::GLShader {
using Tegra::Engines::Maxwell3D; using Tegra::Engines::Maxwell3D;
ProgramManager::ProgramManager() { ProgramManager::~ProgramManager() = default;
void ProgramManager::Create() {
pipeline.Create(); pipeline.Create();
} }
ProgramManager::~ProgramManager() = default; void ProgramManager::Update() {
void ProgramManager::ApplyTo(OpenGLState& state) {
UpdatePipeline();
state.draw.shader_program = 0;
state.draw.program_pipeline = pipeline.handle;
}
void ProgramManager::UpdatePipeline() {
// Avoid updating the pipeline when values have no changed // Avoid updating the pipeline when values have no changed
if (old_state == current_state) { if (old_state == current_state) {
return; return;
} }
// Workaround for AMD bug // Workaround for AMD bug
constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
GL_FRAGMENT_SHADER_BIT}; GL_FRAGMENT_SHADER_BIT};
glUseProgramStages(pipeline.handle, all_used_stages, 0); glUseProgramStages(pipeline.handle, all_used_stages, 0);

@ -9,7 +9,6 @@
#include <glad/glad.h> #include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL::GLShader { namespace OpenGL::GLShader {
@ -29,25 +28,26 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
class ProgramManager { class ProgramManager {
public: public:
explicit ProgramManager();
~ProgramManager(); ~ProgramManager();
void ApplyTo(OpenGLState& state); void Create();
void UseProgrammableVertexShader(GLuint program) { void Update();
void UseVertexShader(GLuint program) {
current_state.vertex_shader = program; current_state.vertex_shader = program;
} }
void UseProgrammableGeometryShader(GLuint program) { void UseGeometryShader(GLuint program) {
current_state.geometry_shader = program; current_state.geometry_shader = program;
} }
void UseProgrammableFragmentShader(GLuint program) { void UseFragmentShader(GLuint program) {
current_state.fragment_shader = program; current_state.fragment_shader = program;
} }
void UseTrivialGeometryShader() { GLuint GetHandle() const {
current_state.geometry_shader = 0; return pipeline.handle;
} }
void UseTrivialFragmentShader() { void UseTrivialFragmentShader() {
@ -70,8 +70,6 @@ private:
GLuint geometry_shader{}; GLuint geometry_shader{};
}; };
void UpdatePipeline();
OGLPipeline pipeline; OGLPipeline pipeline;
PipelineState current_state; PipelineState current_state;
PipelineState old_state; PipelineState old_state;

@ -1,569 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <iterator>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_state.h"
MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
OpenGLState OpenGLState::cur_state;
namespace {
template <typename T>
bool UpdateValue(T& current_value, const T new_value) {
const bool changed = current_value != new_value;
current_value = new_value;
return changed;
}
template <typename T1, typename T2>
bool UpdateTie(T1 current_value, const T2 new_value) {
const bool changed = current_value != new_value;
current_value = new_value;
return changed;
}
template <typename T>
std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) {
std::optional<std::size_t> first;
std::size_t last;
for (std::size_t i = 0; i < std::size(current_values); ++i) {
if (!UpdateValue(current_values[i], new_values[i])) {
continue;
}
if (!first) {
first = i;
}
last = i;
}
if (!first) {
return std::nullopt;
}
return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1));
}
void Enable(GLenum cap, bool enable) {
if (enable) {
glEnable(cap);
} else {
glDisable(cap);
}
}
void Enable(GLenum cap, GLuint index, bool enable) {
if (enable) {
glEnablei(cap, index);
} else {
glDisablei(cap, index);
}
}
void Enable(GLenum cap, bool& current_value, bool new_value) {
if (UpdateValue(current_value, new_value)) {
Enable(cap, new_value);
}
}
void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
if (UpdateValue(current_value, new_value)) {
Enable(cap, index, new_value);
}
}
} // Anonymous namespace
OpenGLState::OpenGLState() = default;
void OpenGLState::SetDefaultViewports() {
viewports.fill(Viewport{});
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
}
void OpenGLState::ApplyFramebufferState() {
if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
}
if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
}
}
void OpenGLState::ApplyVertexArrayState() {
if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
glBindVertexArray(draw.vertex_array);
}
}
void OpenGLState::ApplyShaderProgram() {
if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
glUseProgram(draw.shader_program);
}
}
void OpenGLState::ApplyProgramPipeline() {
if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
glBindProgramPipeline(draw.program_pipeline);
}
}
void OpenGLState::ApplyClipDistances() {
for (std::size_t i = 0; i < clip_distance.size(); ++i) {
Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
clip_distance[i]);
}
}
void OpenGLState::ApplyPointSize() {
Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite);
if (UpdateValue(cur_state.point.size, point.size)) {
glPointSize(point.size);
}
}
void OpenGLState::ApplyFragmentColorClamp() {
if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
}
}
void OpenGLState::ApplyMultisample() {
Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
multisample_control.alpha_to_coverage);
Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
multisample_control.alpha_to_one);
}
void OpenGLState::ApplyDepthClamp() {
if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
return;
}
cur_state.depth_clamp = depth_clamp;
UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
"Unimplemented Depth Clamp Separation!");
Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
}
void OpenGLState::ApplySRgb() {
if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
return;
cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
if (framebuffer_srgb.enabled) {
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
}
void OpenGLState::ApplyCulling() {
Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
if (UpdateValue(cur_state.cull.mode, cull.mode)) {
glCullFace(cull.mode);
}
if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
glFrontFace(cull.front_face);
}
}
void OpenGLState::ApplyRasterizerDiscard() {
Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
}
void OpenGLState::ApplyColorMask() {
if (!dirty.color_mask) {
return;
}
dirty.color_mask = false;
for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
const auto& updated = color_mask[i];
auto& current = cur_state.color_mask[i];
if (updated.red_enabled != current.red_enabled ||
updated.green_enabled != current.green_enabled ||
updated.blue_enabled != current.blue_enabled ||
updated.alpha_enabled != current.alpha_enabled) {
current = updated;
glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
updated.blue_enabled, updated.alpha_enabled);
}
}
}
void OpenGLState::ApplyDepth() {
Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
if (cur_state.depth.test_func != depth.test_func) {
cur_state.depth.test_func = depth.test_func;
glDepthFunc(depth.test_func);
}
if (cur_state.depth.write_mask != depth.write_mask) {
cur_state.depth.write_mask = depth.write_mask;
glDepthMask(depth.write_mask);
}
}
void OpenGLState::ApplyPrimitiveRestart() {
Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
if (cur_state.primitive_restart.index != primitive_restart.index) {
cur_state.primitive_restart.index = primitive_restart.index;
glPrimitiveRestartIndex(primitive_restart.index);
}
}
void OpenGLState::ApplyStencilTest() {
if (!dirty.stencil_state) {
return;
}
dirty.stencil_state = false;
Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
current.test_mask != config.test_mask) {
current.test_func = config.test_func;
current.test_ref = config.test_ref;
current.test_mask = config.test_mask;
glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
}
if (current.action_depth_fail != config.action_depth_fail ||
current.action_depth_pass != config.action_depth_pass ||
current.action_stencil_fail != config.action_stencil_fail) {
current.action_depth_fail = config.action_depth_fail;
current.action_depth_pass = config.action_depth_pass;
current.action_stencil_fail = config.action_stencil_fail;
glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
config.action_depth_pass);
}
if (current.write_mask != config.write_mask) {
current.write_mask = config.write_mask;
glStencilMaskSeparate(face, config.write_mask);
}
};
ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
}
void OpenGLState::ApplyViewport() {
for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
const auto& updated = viewports[i];
auto& current = cur_state.viewports[i];
if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
current.height != updated.height) {
current.x = updated.x;
current.y = updated.y;
current.width = updated.width;
current.height = updated.height;
glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
static_cast<GLfloat>(updated.width),
static_cast<GLfloat>(updated.height));
}
if (current.depth_range_near != updated.depth_range_near ||
current.depth_range_far != updated.depth_range_far) {
current.depth_range_near = updated.depth_range_near;
current.depth_range_far = updated.depth_range_far;
glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
}
Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
current.scissor.width != updated.scissor.width ||
current.scissor.height != updated.scissor.height) {
current.scissor.x = updated.scissor.x;
current.scissor.y = updated.scissor.y;
current.scissor.width = updated.scissor.width;
current.scissor.height = updated.scissor.height;
glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
updated.scissor.height);
}
}
}
void OpenGLState::ApplyGlobalBlending() {
const Blend& updated = blend[0];
Blend& current = cur_state.blend[0];
Enable(GL_BLEND, current.enabled, updated.enabled);
if (current.src_rgb_func != updated.src_rgb_func ||
current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
current.dst_a_func != updated.dst_a_func) {
current.src_rgb_func = updated.src_rgb_func;
current.dst_rgb_func = updated.dst_rgb_func;
current.src_a_func = updated.src_a_func;
current.dst_a_func = updated.dst_a_func;
glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func);
}
if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
current.rgb_equation = updated.rgb_equation;
current.a_equation = updated.a_equation;
glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
}
}
void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) {
const Blend& updated = blend[target];
Blend& current = cur_state.blend[target];
if (current.enabled != updated.enabled || force) {
current.enabled = updated.enabled;
Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
}
if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
current.dst_a_func),
std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func))) {
glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
}
if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
std::tie(updated.rgb_equation, updated.a_equation))) {
glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
updated.a_equation);
}
}
void OpenGLState::ApplyBlending() {
if (!dirty.blend_state) {
return;
}
dirty.blend_state = false;
if (independant_blend.enabled) {
const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
ApplyTargetBlending(target, force);
}
} else {
ApplyGlobalBlending();
}
cur_state.independant_blend.enabled = independant_blend.enabled;
if (UpdateTie(
std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
cur_state.blend_color.blue, cur_state.blend_color.alpha),
std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
}
}
void OpenGLState::ApplyLogicOp() {
Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
glLogicOp(logic_op.operation);
}
}
void OpenGLState::ApplyPolygonOffset() {
if (!dirty.polygon_offset) {
return;
}
dirty.polygon_offset = false;
Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
polygon_offset.fill_enable);
Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
polygon_offset.line_enable);
Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
polygon_offset.point_enable);
if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
cur_state.polygon_offset.clamp),
std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
} else {
UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
"Unimplemented Depth polygon offset clamp.");
glPolygonOffset(polygon_offset.factor, polygon_offset.units);
}
}
}
void OpenGLState::ApplyAlphaTest() {
Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
std::tie(alpha_test.func, alpha_test.ref))) {
glAlphaFunc(alpha_test.func, alpha_test.ref);
}
}
void OpenGLState::ApplyClipControl() {
if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
std::tie(clip_control.origin, clip_control.depth_mode))) {
glClipControl(clip_control.origin, clip_control.depth_mode);
}
}
void OpenGLState::ApplyRenderBuffer() {
if (cur_state.renderbuffer != renderbuffer) {
cur_state.renderbuffer = renderbuffer;
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
}
}
void OpenGLState::ApplyTextures() {
const std::size_t size = std::size(textures);
for (std::size_t i = 0; i < size; ++i) {
if (UpdateValue(cur_state.textures[i], textures[i])) {
// BindTextureUnit doesn't support binding null textures, skip those binds.
// TODO(Rodrigo): Stop using null textures
if (textures[i] != 0) {
glBindTextureUnit(static_cast<GLuint>(i), textures[i]);
}
}
}
}
void OpenGLState::ApplySamplers() {
const std::size_t size = std::size(samplers);
for (std::size_t i = 0; i < size; ++i) {
if (UpdateValue(cur_state.samplers[i], samplers[i])) {
glBindSampler(static_cast<GLuint>(i), samplers[i]);
}
}
}
void OpenGLState::ApplyImages() {
if (const auto update = UpdateArray(cur_state.images, images)) {
glBindImageTextures(update->first, update->second, images.data() + update->first);
}
}
void OpenGLState::Apply() {
MICROPROFILE_SCOPE(OpenGL_State);
ApplyFramebufferState();
ApplyVertexArrayState();
ApplyShaderProgram();
ApplyProgramPipeline();
ApplyClipDistances();
ApplyPointSize();
ApplyFragmentColorClamp();
ApplyMultisample();
ApplyRasterizerDiscard();
ApplyColorMask();
ApplyDepthClamp();
ApplyViewport();
ApplyStencilTest();
ApplySRgb();
ApplyCulling();
ApplyDepth();
ApplyPrimitiveRestart();
ApplyBlending();
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
ApplyImages();
ApplyPolygonOffset();
ApplyAlphaTest();
ApplyClipControl();
ApplyRenderBuffer();
}
void OpenGLState::EmulateViewportWithScissor() {
auto& current = viewports[0];
if (current.scissor.enabled) {
const GLint left = std::max(current.x, current.scissor.x);
const GLint right =
std::max(current.x + current.width, current.scissor.x + current.scissor.width);
const GLint bottom = std::max(current.y, current.scissor.y);
const GLint top =
std::max(current.y + current.height, current.scissor.y + current.scissor.height);
current.scissor.x = std::max(left, 0);
current.scissor.y = std::max(bottom, 0);
current.scissor.width = std::max(right - left, 0);
current.scissor.height = std::max(top - bottom, 0);
} else {
current.scissor.enabled = true;
current.scissor.x = current.x;
current.scissor.y = current.y;
current.scissor.width = current.width;
current.scissor.height = current.height;
}
}
OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
for (auto& texture : textures) {
if (texture == handle) {
texture = 0;
}
}
return *this;
}
OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
for (auto& sampler : samplers) {
if (sampler == handle) {
sampler = 0;
}
}
return *this;
}
OpenGLState& OpenGLState::ResetProgram(GLuint handle) {
if (draw.shader_program == handle) {
draw.shader_program = 0;
}
return *this;
}
OpenGLState& OpenGLState::ResetPipeline(GLuint handle) {
if (draw.program_pipeline == handle) {
draw.program_pipeline = 0;
}
return *this;
}
OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) {
if (draw.vertex_array == handle) {
draw.vertex_array = 0;
}
return *this;
}
OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) {
if (draw.read_framebuffer == handle) {
draw.read_framebuffer = 0;
}
if (draw.draw_framebuffer == handle) {
draw.draw_framebuffer = 0;
}
return *this;
}
OpenGLState& OpenGLState::ResetRenderbuffer(GLuint handle) {
if (renderbuffer == handle) {
renderbuffer = 0;
}
return *this;
}
} // namespace OpenGL

@ -1,251 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <type_traits>
#include <glad/glad.h>
#include "video_core/engines/maxwell_3d.h"
namespace OpenGL {
class OpenGLState {
public:
struct {
bool enabled = false; // GL_FRAMEBUFFER_SRGB
} framebuffer_srgb;
struct {
bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE
bool alpha_to_one = false; // GL_ALPHA_TO_ONE
} multisample_control;
struct {
bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB
} fragment_color_clamp;
struct {
bool far_plane = false;
bool near_plane = false;
} depth_clamp; // GL_DEPTH_CLAMP
struct {
bool enabled = false; // GL_CULL_FACE
GLenum mode = GL_BACK; // GL_CULL_FACE_MODE
GLenum front_face = GL_CCW; // GL_FRONT_FACE
} cull;
struct {
bool test_enabled = false; // GL_DEPTH_TEST
GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK
GLenum test_func = GL_LESS; // GL_DEPTH_FUNC
} depth;
struct {
bool enabled = false;
GLuint index = 0;
} primitive_restart; // GL_PRIMITIVE_RESTART
bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
struct ColorMask {
GLboolean red_enabled = GL_TRUE;
GLboolean green_enabled = GL_TRUE;
GLboolean blue_enabled = GL_TRUE;
GLboolean alpha_enabled = GL_TRUE;
};
std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
color_mask; // GL_COLOR_WRITEMASK
struct {
bool test_enabled = false; // GL_STENCIL_TEST
struct {
GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC
GLint test_ref = 0; // GL_STENCIL_REF
GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK
GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK
GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL
GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL
GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS
} front, back;
} stencil;
struct Blend {
bool enabled = false; // GL_BLEND
GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB
GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA
GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB
GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB
GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA
GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA
};
std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
struct {
bool enabled = false;
} independant_blend;
struct {
GLclampf red = 0.0f;
GLclampf green = 0.0f;
GLclampf blue = 0.0f;
GLclampf alpha = 0.0f;
} blend_color; // GL_BLEND_COLOR
struct {
bool enabled = false; // GL_LOGIC_OP_MODE
GLenum operation = GL_COPY;
} logic_op;
static constexpr std::size_t NumSamplers = 32 * 5;
static constexpr std::size_t NumImages = 8 * 5;
std::array<GLuint, NumSamplers> textures = {};
std::array<GLuint, NumSamplers> samplers = {};
std::array<GLuint, NumImages> images = {};
struct {
GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING
GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING
GLuint shader_program = 0; // GL_CURRENT_PROGRAM
GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING
} draw;
struct Viewport {
GLint x = 0;
GLint y = 0;
GLint width = 0;
GLint height = 0;
GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE
GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE
struct {
bool enabled = false; // GL_SCISSOR_TEST
GLint x = 0;
GLint y = 0;
GLsizei width = 0;
GLsizei height = 0;
} scissor;
};
std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
struct {
bool program_control = false; // GL_PROGRAM_POINT_SIZE
bool sprite = false; // GL_POINT_SPRITE
GLfloat size = 1.0f; // GL_POINT_SIZE
} point;
struct {
bool point_enable = false;
bool line_enable = false;
bool fill_enable = false;
GLfloat units = 0.0f;
GLfloat factor = 0.0f;
GLfloat clamp = 0.0f;
} polygon_offset;
struct {
bool enabled = false; // GL_ALPHA_TEST
GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC
GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF
} alpha_test;
std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
struct {
GLenum origin = GL_LOWER_LEFT;
GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
} clip_control;
GLuint renderbuffer{}; // GL_RENDERBUFFER_BINDING
OpenGLState();
/// Get the currently active OpenGL state
static OpenGLState GetCurState() {
return cur_state;
}
void SetDefaultViewports();
/// Apply this state as the current OpenGL state
void Apply();
void ApplyFramebufferState();
void ApplyVertexArrayState();
void ApplyShaderProgram();
void ApplyProgramPipeline();
void ApplyClipDistances();
void ApplyPointSize();
void ApplyFragmentColorClamp();
void ApplyMultisample();
void ApplySRgb();
void ApplyCulling();
void ApplyRasterizerDiscard();
void ApplyColorMask();
void ApplyDepth();
void ApplyPrimitiveRestart();
void ApplyStencilTest();
void ApplyViewport();
void ApplyTargetBlending(std::size_t target, bool force);
void ApplyGlobalBlending();
void ApplyBlending();
void ApplyLogicOp();
void ApplyTextures();
void ApplySamplers();
void ApplyImages();
void ApplyDepthClamp();
void ApplyPolygonOffset();
void ApplyAlphaTest();
void ApplyClipControl();
void ApplyRenderBuffer();
/// Resets any references to the given resource
OpenGLState& UnbindTexture(GLuint handle);
OpenGLState& ResetSampler(GLuint handle);
OpenGLState& ResetProgram(GLuint handle);
OpenGLState& ResetPipeline(GLuint handle);
OpenGLState& ResetVertexArray(GLuint handle);
OpenGLState& ResetFramebuffer(GLuint handle);
OpenGLState& ResetRenderbuffer(GLuint handle);
/// Viewport does not affects glClearBuffer so emulate viewport using scissor test
void EmulateViewportWithScissor();
void MarkDirtyBlendState() {
dirty.blend_state = true;
}
void MarkDirtyStencilState() {
dirty.stencil_state = true;
}
void MarkDirtyPolygonOffset() {
dirty.polygon_offset = true;
}
void MarkDirtyColorMask() {
dirty.color_mask = true;
}
void AllDirty() {
dirty.blend_state = true;
dirty.stencil_state = true;
dirty.polygon_offset = true;
dirty.color_mask = true;
}
private:
static OpenGLState cur_state;
struct {
bool blend_state;
bool stencil_state;
bool viewport_state;
bool polygon_offset;
bool color_mask;
} dirty{};
};
static_assert(std::is_trivially_copyable_v<OpenGLState>);
} // namespace OpenGL

@ -0,0 +1,238 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstddef>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
namespace OpenGL {
namespace {
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
void SetupDirtyColorMasks(Tables& tables) {
tables[0][OFF(color_mask_common)] = ColorMaskCommon;
for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) {
const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]);
FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt);
}
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
}
void SetupDirtyVertexArrays(Tables& tables) {
static constexpr std::size_t num_array = 3;
static constexpr std::size_t instance_base_offset = 3;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
const std::size_t instance_array_offset = array_offset + instance_base_offset;
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_array_offset] = VertexInstances;
const std::size_t instance_offset = OFF(instanced_arrays) + i;
tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_offset] = VertexInstances;
}
}
void SetupDirtyVertexFormat(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i);
}
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats);
}
void SetupDirtyViewports(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]);
const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]);
FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i);
FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i);
}
FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
tables[0][OFF(viewport_transform_enabled)] = ViewportTransform;
tables[1][OFF(viewport_transform_enabled)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]);
FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i);
}
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyShaders(Tables& tables) {
FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
Shaders);
}
void SetupDirtyDepthTest(Tables& tables) {
auto& table = tables[0];
table[OFF(depth_test_enable)] = DepthTest;
table[OFF(depth_write_enabled)] = DepthMask;
table[OFF(depth_test_func)] = DepthTest;
}
void SetupDirtyStencilTest(Tables& tables) {
static constexpr std::array offsets = {
OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref),
OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail),
OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask),
OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass),
OFF(stencil_back_mask)};
for (const auto offset : offsets) {
tables[0][offset] = StencilTest;
}
}
void SetupDirtyAlphaTest(Tables& tables) {
auto& table = tables[0];
table[OFF(alpha_test_ref)] = AlphaTest;
table[OFF(alpha_test_func)] = AlphaTest;
table[OFF(alpha_test_enabled)] = AlphaTest;
}
void SetupDirtyBlend(Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor);
tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled;
for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) {
const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]);
FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i);
tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i);
}
FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates);
FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates);
}
void SetupDirtyPrimitiveRestart(Tables& tables) {
FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart);
}
void SetupDirtyPolygonOffset(Tables& tables) {
auto& table = tables[0];
table[OFF(polygon_offset_fill_enable)] = PolygonOffset;
table[OFF(polygon_offset_line_enable)] = PolygonOffset;
table[OFF(polygon_offset_point_enable)] = PolygonOffset;
table[OFF(polygon_offset_factor)] = PolygonOffset;
table[OFF(polygon_offset_units)] = PolygonOffset;
table[OFF(polygon_offset_clamp)] = PolygonOffset;
}
void SetupDirtyMultisampleControl(Tables& tables) {
FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl);
}
void SetupDirtyRasterizeEnable(Tables& tables) {
tables[0][OFF(rasterize_enable)] = RasterizeEnable;
}
void SetupDirtyFramebufferSRGB(Tables& tables) {
tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB;
}
void SetupDirtyLogicOp(Tables& tables) {
FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp);
}
void SetupDirtyFragmentClampColor(Tables& tables) {
tables[0][OFF(frag_color_clamp)] = FragmentClampColor;
}
void SetupDirtyPointSize(Tables& tables) {
tables[0][OFF(vp_point_size)] = PointSize;
tables[0][OFF(point_size)] = PointSize;
tables[0][OFF(point_sprite_enable)] = PointSize;
}
void SetupDirtyClipControl(Tables& tables) {
auto& table = tables[0];
table[OFF(screen_y_control)] = ClipControl;
table[OFF(depth_mode)] = ClipControl;
}
void SetupDirtyDepthClampEnabled(Tables& tables) {
tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled;
}
void SetupDirtyMisc(Tables& tables) {
auto& table = tables[0];
table[OFF(clip_distance_enabled)] = ClipDistances;
table[OFF(front_face)] = FrontFace;
table[OFF(cull_test_enabled)] = CullTest;
table[OFF(cull_face)] = CullTest;
}
} // Anonymous namespace
StateTracker::StateTracker(Core::System& system) : system{system} {}
void StateTracker::Initialize() {
auto& dirty = system.GPU().Maxwell3D().dirty;
auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyVertexArrays(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);
SetupDirtyAlphaTest(tables);
SetupDirtyBlend(tables);
SetupDirtyPrimitiveRestart(tables);
SetupDirtyPolygonOffset(tables);
SetupDirtyMultisampleControl(tables);
SetupDirtyRasterizeEnable(tables);
SetupDirtyFramebufferSRGB(tables);
SetupDirtyLogicOp(tables);
SetupDirtyFragmentClampColor(tables);
SetupDirtyPointSize(tables);
SetupDirtyClipControl(tables);
SetupDirtyDepthClampEnabled(tables);
SetupDirtyMisc(tables);
auto& store = dirty.on_write_stores;
SetupCommonOnWriteStores(store);
store[VertexBuffers] = true;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
store[VertexBuffer0 + i] = true;
}
}
} // namespace OpenGL

@ -0,0 +1,204 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <limits>
#include <glad/glad.h>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Core {
class System;
}
namespace OpenGL {
namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
VertexFormats,
VertexFormat0,
VertexFormat31 = VertexFormat0 + 31,
VertexBuffers,
VertexBuffer0,
VertexBuffer31 = VertexBuffer0 + 31,
VertexInstances,
VertexInstance0,
VertexInstance31 = VertexInstance0 + 31,
ViewportTransform,
Viewports,
Viewport0,
Viewport15 = Viewport0 + 15,
Scissors,
Scissor0,
Scissor15 = Scissor0 + 15,
ColorMaskCommon,
ColorMasks,
ColorMask0,
ColorMask7 = ColorMask0 + 7,
BlendColor,
BlendIndependentEnabled,
BlendStates,
BlendState0,
BlendState7 = BlendState0 + 7,
Shaders,
ClipDistances,
ColorMask,
FrontFace,
CullTest,
DepthMask,
DepthTest,
StencilTest,
AlphaTest,
PrimitiveRestart,
PolygonOffset,
MultisampleControl,
RasterizeEnable,
FramebufferSRGB,
LogicOp,
FragmentClampColor,
PointSize,
ClipControl,
DepthClampEnabled,
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
public:
explicit StateTracker(Core::System& system);
void Initialize();
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
}
index_buffer = new_index_buffer;
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
}
void NotifyScreenDrawVertexArray() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
flags[OpenGL::Dirty::VertexBuffers] = true;
flags[OpenGL::Dirty::VertexBuffer0] = true;
flags[OpenGL::Dirty::VertexInstances] = true;
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
}
void NotifyViewport0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Viewports] = true;
flags[OpenGL::Dirty::Viewport0] = true;
}
void NotifyScissor0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Scissors] = true;
flags[OpenGL::Dirty::Scissor0] = true;
}
void NotifyColorMask0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ColorMasks] = true;
flags[OpenGL::Dirty::ColorMask0] = true;
}
void NotifyBlend0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::BlendStates] = true;
flags[OpenGL::Dirty::BlendState0] = true;
}
void NotifyFramebuffer() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[VideoCommon::Dirty::RenderTargets] = true;
}
void NotifyFrontFace() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FrontFace] = true;
}
void NotifyCullTest() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::CullTest] = true;
}
void NotifyDepthMask() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthMask] = true;
}
void NotifyDepthTest() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthTest] = true;
}
void NotifyStencilTest() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::StencilTest] = true;
}
void NotifyPolygonOffset() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonOffset] = true;
}
void NotifyRasterizeEnable() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::RasterizeEnable] = true;
}
void NotifyFramebufferSRGB() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FramebufferSRGB] = true;
}
void NotifyLogicOp() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::LogicOp] = true;
}
void NotifyClipControl() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ClipControl] = true;
}
void NotifyAlphaTest() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::AlphaTest] = true;
}
private:
Core::System& system;
GLuint index_buffer = 0;
};
} // namespace OpenGL

@ -7,7 +7,6 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/microprofile.h" #include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",

@ -10,7 +10,7 @@
#include "core/core.h" #include "core/core.h"
#include "video_core/morton.h" #include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h" #include "video_core/renderer_opengl/utils.h"
#include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_base.h"
@ -397,6 +397,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
const bool is_proxy) const bool is_proxy)
: VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
target = GetTextureTarget(params.target); target = GetTextureTarget(params.target);
format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format;
if (!is_proxy) { if (!is_proxy) {
texture_view = CreateTextureView(); texture_view = CreateTextureView();
} }
@ -467,25 +468,20 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou
} }
OGLTextureView CachedSurfaceView::CreateTextureView() const { OGLTextureView CachedSurfaceView::CreateTextureView() const {
const auto& owner_params = surface.GetSurfaceParams();
OGLTextureView texture_view; OGLTextureView texture_view;
texture_view.Create(); texture_view.Create();
const GLuint handle{texture_view.handle}; glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)};
glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
params.num_levels, params.base_layer, params.num_layers); params.num_levels, params.base_layer, params.num_layers);
ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
ApplyTextureDefaults(owner_params, handle);
return texture_view; return texture_view;
} }
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
VideoCore::RasterizerInterface& rasterizer, VideoCore::RasterizerInterface& rasterizer,
const Device& device) const Device& device, StateTracker& state_tracker)
: TextureCacheBase{system, rasterizer} { : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} {
src_framebuffer.Create(); src_framebuffer.Create();
dst_framebuffer.Create(); dst_framebuffer.Create();
} }
@ -519,25 +515,26 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) { const Tegra::Engines::Fermi2D::Config& copy_config) {
const auto& src_params{src_view->GetSurfaceParams()}; const auto& src_params{src_view->GetSurfaceParams()};
const auto& dst_params{dst_view->GetSurfaceParams()}; const auto& dst_params{dst_view->GetSurfaceParams()};
OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({
prev_state.AllDirty();
prev_state.Apply();
});
OpenGLState state;
state.draw.read_framebuffer = src_framebuffer.handle;
state.draw.draw_framebuffer = dst_framebuffer.handle;
state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
state.AllDirty();
state.Apply();
u32 buffers{};
UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
state_tracker.NotifyScissor0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyRasterizeEnable();
state_tracker.NotifyFramebufferSRGB();
if (dst_params.srgb_conversion) {
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
glDisable(GL_RASTERIZER_DISCARD);
glDisablei(GL_SCISSOR_TEST, 0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
GLenum buffers = 0;
if (src_params.type == SurfaceType::ColorTexture) { if (src_params.type == SurfaceType::ColorTexture) {
src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,

@ -27,6 +27,7 @@ using VideoCommon::ViewParams;
class CachedSurfaceView; class CachedSurfaceView;
class CachedSurface; class CachedSurface;
class TextureCacheOpenGL; class TextureCacheOpenGL;
class StateTracker;
using Surface = std::shared_ptr<CachedSurface>; using Surface = std::shared_ptr<CachedSurface>;
using View = std::shared_ptr<CachedSurfaceView>; using View = std::shared_ptr<CachedSurfaceView>;
@ -96,6 +97,10 @@ public:
return texture_view.handle; return texture_view.handle;
} }
GLenum GetFormat() const {
return format;
}
const SurfaceParams& GetSurfaceParams() const { const SurfaceParams& GetSurfaceParams() const {
return surface.GetSurfaceParams(); return surface.GetSurfaceParams();
} }
@ -113,6 +118,7 @@ private:
CachedSurface& surface; CachedSurface& surface;
GLenum target{}; GLenum target{};
GLenum format{};
OGLTextureView texture_view; OGLTextureView texture_view;
u32 swizzle{}; u32 swizzle{};
@ -122,7 +128,7 @@ private:
class TextureCacheOpenGL final : public TextureCacheBase { class TextureCacheOpenGL final : public TextureCacheBase {
public: public:
explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const Device& device); const Device& device, StateTracker& state_tracker);
~TextureCacheOpenGL(); ~TextureCacheOpenGL();
protected: protected:
@ -139,6 +145,8 @@ protected:
private: private:
GLuint FetchPBO(std::size_t buffer_size); GLuint FetchPBO(std::size_t buffer_size);
StateTracker& state_tracker;
OGLFramebuffer src_framebuffer; OGLFramebuffer src_framebuffer;
OGLFramebuffer dst_framebuffer; OGLFramebuffer dst_framebuffer;
std::unordered_map<u32, OGLBuffer> copy_pbo_cache; std::unordered_map<u32, OGLBuffer> copy_pbo_cache;

@ -425,24 +425,24 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
return GL_KEEP; return GL_KEEP;
} }
inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { inline GLenum FrontFace(Maxwell::FrontFace front_face) {
switch (front_face) { switch (front_face) {
case Maxwell::Cull::FrontFace::ClockWise: case Maxwell::FrontFace::ClockWise:
return GL_CW; return GL_CW;
case Maxwell::Cull::FrontFace::CounterClockWise: case Maxwell::FrontFace::CounterClockWise:
return GL_CCW; return GL_CCW;
} }
LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
return GL_CCW; return GL_CCW;
} }
inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { inline GLenum CullFace(Maxwell::CullFace cull_face) {
switch (cull_face) { switch (cull_face) {
case Maxwell::Cull::CullFace::Front: case Maxwell::CullFace::Front:
return GL_FRONT; return GL_FRONT;
case Maxwell::Cull::CullFace::Back: case Maxwell::CullFace::Back:
return GL_BACK; return GL_BACK;
case Maxwell::Cull::CullFace::FrontAndBack: case Maxwell::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK; return GL_FRONT_AND_BACK;
} }
LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));

@ -20,6 +20,7 @@
#include "core/telemetry_session.h" #include "core/telemetry_session.h"
#include "video_core/morton.h" #include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h"
namespace OpenGL { namespace OpenGL {
@ -86,28 +87,22 @@ public:
} }
void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
OpenGLState prev_state = OpenGLState::GetCurState();
OpenGLState state = OpenGLState::GetCurState();
// Recreate the color texture attachment // Recreate the color texture attachment
frame->color.Release(); frame->color.Release();
frame->color.Create(); frame->color.Create();
state.renderbuffer = frame->color.handle; const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
state.Apply(); glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
glRenderbufferStorage(GL_RENDERBUFFER, frame->is_srgb ? GL_SRGB8 : GL_RGB8, width, height);
// Recreate the FBO for the render target // Recreate the FBO for the render target
frame->render.Release(); frame->render.Release();
frame->render.Create(); frame->render.Create();
state.draw.read_framebuffer = frame->render.handle; glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
state.draw.draw_framebuffer = frame->render.handle;
state.Apply();
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle); frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
} }
prev_state.Apply();
frame->width = width; frame->width = width;
frame->height = height; frame->height = height;
frame->color_reloaded = true; frame->color_reloaded = true;
@ -164,9 +159,13 @@ public:
namespace { namespace {
constexpr char vertex_shader[] = R"( constexpr char VERTEX_SHADER[] = R"(
#version 430 core #version 430 core
out gl_PerVertex {
vec4 gl_Position;
};
layout (location = 0) in vec2 vert_position; layout (location = 0) in vec2 vert_position;
layout (location = 1) in vec2 vert_tex_coord; layout (location = 1) in vec2 vert_tex_coord;
layout (location = 0) out vec2 frag_tex_coord; layout (location = 0) out vec2 frag_tex_coord;
@ -187,7 +186,7 @@ void main() {
} }
)"; )";
constexpr char fragment_shader[] = R"( constexpr char FRAGMENT_SHADER[] = R"(
#version 430 core #version 430 core
layout (location = 0) in vec2 frag_tex_coord; layout (location = 0) in vec2 frag_tex_coord;
@ -196,7 +195,7 @@ layout (location = 0) out vec4 color;
layout (binding = 0) uniform sampler2D color_texture; layout (binding = 0) uniform sampler2D color_texture;
void main() { void main() {
color = texture(color_texture, frag_tex_coord); color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
} }
)"; )";
@ -205,8 +204,8 @@ constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0; constexpr GLint ModelViewMatrixLocation = 0;
struct ScreenRectVertex { struct ScreenRectVertex {
constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v)
: position{{x, y}}, tex_coord{{u, v}} {} : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {}
std::array<GLfloat, 2> position; std::array<GLfloat, 2> position;
std::array<GLfloat, 2> tex_coord; std::array<GLfloat, 2> tex_coord;
@ -311,11 +310,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
return; return;
} }
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
state.AllDirty();
state.Apply();
PrepareRendertarget(framebuffer); PrepareRendertarget(framebuffer);
RenderScreenshot(); RenderScreenshot();
@ -358,8 +352,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
frame->is_srgb = screen_info.display_srgb; frame->is_srgb = screen_info.display_srgb;
frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height); frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
} }
state.draw.draw_framebuffer = frame->render.handle; glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle);
state.Apply();
DrawScreen(layout); DrawScreen(layout);
// Create a fence for the frontend to wait on and swap this frame to OffTex // Create a fence for the frontend to wait on and swap this frame to OffTex
frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
@ -368,10 +361,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
m_current_frame++; m_current_frame++;
rasterizer->TickFrame(); rasterizer->TickFrame();
} }
// Restore the rasterizer state
prev_state.AllDirty();
prev_state.Apply();
} }
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
@ -442,31 +431,25 @@ void RendererOpenGL::InitOpenGLObjects() {
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
0.0f); 0.0f);
// Link shaders and get variable locations // Create shader programs
shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); OGLShader vertex_shader;
state.draw.shader_program = shader.handle; vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
state.AllDirty();
state.Apply(); OGLShader fragment_shader;
fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle);
// Create program pipeline
program_manager.Create();
glBindProgramPipeline(program_manager.GetHandle());
// Generate VBO handle for drawing // Generate VBO handle for drawing
vertex_buffer.Create(); vertex_buffer.Create();
// Generate VAO
vertex_array.Create();
state.draw.vertex_array = vertex_array.handle;
// Attach vertex data to VAO // Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, position));
glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, tex_coord));
glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
sizeof(ScreenRectVertex));
// Allocate textures for the screen // Allocate textures for the screen
screen_info.texture.resource.Create(GL_TEXTURE_2D); screen_info.texture.resource.Create(GL_TEXTURE_2D);
@ -499,7 +482,8 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) { if (rasterizer) {
return; return;
} }
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info,
program_manager, state_tracker);
} }
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@ -538,8 +522,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
} }
void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
float h) { if (renderer_settings.set_background_color) {
// Update background color before drawing
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
0.0f);
}
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
std::data(ortho_matrix));
const auto& texcoords = screen_info.display_texcoords; const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left; auto left = texcoords.left;
auto right = texcoords.right; auto right = texcoords.right;
@ -571,46 +566,77 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
static_cast<f32>(screen_info.texture.height); static_cast<f32>(screen_info.texture.height);
} }
const std::array vertices = {
ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v),
ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v),
ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v),
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
};
state.textures[0] = screen_info.display_texture;
state.framebuffer_srgb.enabled = screen_info.display_srgb;
state.AllDirty();
state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
state.framebuffer_srgb.enabled = false;
state.textures[0] = 0;
state.AllDirty();
state.Apply();
}
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
if (renderer_settings.set_background_color) {
// Update background color before drawing
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
0.0f);
}
const auto& screen = layout.screen; const auto& screen = layout.screen;
const std::array vertices = {
ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v),
ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v),
ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v),
ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v),
};
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
// TODO: Signal state tracker about these changes
state_tracker.NotifyScreenDrawVertexArray();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
state_tracker.NotifyColorMask0();
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
state_tracker.NotifyCullTest();
state_tracker.NotifyDepthTest();
state_tracker.NotifyStencilTest();
state_tracker.NotifyPolygonOffset();
state_tracker.NotifyRasterizeEnable();
state_tracker.NotifyFramebufferSRGB();
state_tracker.NotifyLogicOp();
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
program_manager.UseVertexShader(vertex_program.handle);
program_manager.UseGeometryShader(0);
program_manager.UseFragmentShader(fragment_program.handle);
program_manager.Update();
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
glDisable(GL_POLYGON_OFFSET_FILL);
glDisable(GL_RASTERIZER_DISCARD);
glDisable(GL_ALPHA_TEST);
glDisablei(GL_BLEND, 0);
glDisablei(GL_SCISSOR_TEST, 0);
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glDepthRangeIndexed(0, 0.0, 0.0);
glEnableVertexAttribArray(PositionLocation);
glEnableVertexAttribArray(TexCoordLocation);
glVertexAttribDivisor(PositionLocation, 0);
glVertexAttribDivisor(TexCoordLocation, 0);
glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, position));
glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, tex_coord));
glVertexAttribBinding(PositionLocation, 0);
glVertexAttribBinding(TexCoordLocation, 0);
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
glBindTextureUnit(0, screen_info.display_texture);
glBindSampler(0, 0);
glViewport(0, 0, layout.width, layout.height);
glClear(GL_COLOR_BUFFER_BIT); glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
static_cast<float>(screen.GetHeight()));
} }
void RendererOpenGL::TryPresent(int timeout_ms) { void RendererOpenGL::TryPresent(int timeout_ms) {
@ -653,13 +679,14 @@ void RendererOpenGL::RenderScreenshot() {
return; return;
} }
GLint old_read_fb;
GLint old_draw_fb;
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
// Draw the current frame to the screenshot framebuffer // Draw the current frame to the screenshot framebuffer
screenshot_framebuffer.Create(); screenshot_framebuffer.Create();
GLuint old_read_fb = state.draw.read_framebuffer; glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
GLuint old_draw_fb = state.draw.draw_framebuffer;
state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
state.AllDirty();
state.Apply();
Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@ -676,12 +703,11 @@ void RendererOpenGL::RenderScreenshot() {
renderer_settings.screenshot_bits); renderer_settings.screenshot_bits);
screenshot_framebuffer.Release(); screenshot_framebuffer.Release();
state.draw.read_framebuffer = old_read_fb;
state.draw.draw_framebuffer = old_draw_fb;
state.AllDirty();
state.Apply();
glDeleteRenderbuffers(1, &renderbuffer); glDeleteRenderbuffers(1, &renderbuffer);
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
renderer_settings.screenshot_complete_callback(); renderer_settings.screenshot_complete_callback();
renderer_settings.screenshot_requested = false; renderer_settings.screenshot_requested = false;
} }

@ -10,7 +10,8 @@
#include "common/math_util.h" #include "common/math_util.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core { namespace Core {
class System; class System;
@ -76,8 +77,6 @@ private:
/// Draws the emulated screens to the emulator window. /// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout); void DrawScreen(const Layout::FramebufferLayout& layout);
void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
void RenderScreenshot(); void RenderScreenshot();
/// Loads framebuffer from emulated memory into the active OpenGL texture. /// Loads framebuffer from emulated memory into the active OpenGL texture.
@ -93,17 +92,20 @@ private:
Core::Frontend::EmuWindow& emu_window; Core::Frontend::EmuWindow& emu_window;
Core::System& system; Core::System& system;
OpenGLState state; StateTracker state_tracker{system};
// OpenGL object IDs // OpenGL object IDs
OGLVertexArray vertex_array;
OGLBuffer vertex_buffer; OGLBuffer vertex_buffer;
OGLProgram shader; OGLProgram vertex_program;
OGLProgram fragment_program;
OGLFramebuffer screenshot_framebuffer; OGLFramebuffer screenshot_framebuffer;
/// Display information for Switch screen /// Display information for Switch screen
ScreenInfo screen_info; ScreenInfo screen_info;
/// Global dummy shader pipeline
GLShader::ProgramManager program_manager;
/// OpenGL framebuffer data /// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data; std::vector<u8> gl_framebuffer_data;

@ -9,6 +9,7 @@
#include <glad/glad.h> #include <glad/glad.h>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h" #include "video_core/renderer_opengl/utils.h"
namespace OpenGL { namespace OpenGL {
@ -20,12 +21,12 @@ struct VertexArrayPushBuffer::Entry {
GLsizei stride{}; GLsizei stride{};
}; };
VertexArrayPushBuffer::VertexArrayPushBuffer() = default; VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker)
: state_tracker{state_tracker} {}
VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
void VertexArrayPushBuffer::Setup(GLuint vao_) { void VertexArrayPushBuffer::Setup() {
vao = vao_;
index_buffer = nullptr; index_buffer = nullptr;
vertex_buffers.clear(); vertex_buffers.clear();
} }
@ -41,13 +42,11 @@ void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint*
void VertexArrayPushBuffer::Bind() { void VertexArrayPushBuffer::Bind() {
if (index_buffer) { if (index_buffer) {
glVertexArrayElementBuffer(vao, *index_buffer); state_tracker.BindIndexBuffer(*index_buffer);
} }
// TODO(Rodrigo): Find a way to ARB_multi_bind this
for (const auto& entry : vertex_buffers) { for (const auto& entry : vertex_buffers) {
glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride);
entry.stride);
} }
} }

@ -11,12 +11,14 @@
namespace OpenGL { namespace OpenGL {
class StateTracker;
class VertexArrayPushBuffer final { class VertexArrayPushBuffer final {
public: public:
explicit VertexArrayPushBuffer(); explicit VertexArrayPushBuffer(StateTracker& state_tracker);
~VertexArrayPushBuffer(); ~VertexArrayPushBuffer();
void Setup(GLuint vao_); void Setup();
void SetIndexBuffer(const GLuint* buffer); void SetIndexBuffer(const GLuint* buffer);
@ -28,7 +30,8 @@ public:
private: private:
struct Entry; struct Entry;
GLuint vao{}; StateTracker& state_tracker;
const GLuint* index_buffer{}; const GLuint* index_buffer{};
std::vector<Entry> vertex_buffers; std::vector<Entry> vertex_buffers;
}; };

@ -112,19 +112,18 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
const auto& clip = regs.view_volume_clip_control; const auto& clip = regs.view_volume_clip_control;
const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
Maxwell::Cull::FrontFace front_face = regs.cull.front_face; Maxwell::FrontFace front_face = regs.front_face;
if (regs.screen_y_control.triangle_rast_flip != 0 && if (regs.screen_y_control.triangle_rast_flip != 0 &&
regs.viewport_transform[0].scale_y > 0.0f) { regs.viewport_transform[0].scale_y > 0.0f) {
if (front_face == Maxwell::Cull::FrontFace::CounterClockWise) if (front_face == Maxwell::FrontFace::CounterClockWise)
front_face = Maxwell::Cull::FrontFace::ClockWise; front_face = Maxwell::FrontFace::ClockWise;
else if (front_face == Maxwell::Cull::FrontFace::ClockWise) else if (front_face == Maxwell::FrontFace::ClockWise)
front_face = Maxwell::Cull::FrontFace::CounterClockWise; front_face = Maxwell::FrontFace::CounterClockWise;
} }
const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled,
depth_clamp_enabled, gl_ndc, regs.cull.cull_face, depth_clamp_enabled, gl_ndc, regs.cull_face, front_face);
front_face);
} }
} // Anonymous namespace } // Anonymous namespace

@ -171,8 +171,8 @@ struct FixedPipelineState {
struct Rasterizer { struct Rasterizer {
constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face, bool ndc_minus_one_to_one, Maxwell::CullFace cull_face,
Maxwell::Cull::FrontFace front_face) Maxwell::FrontFace front_face)
: cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
cull_face{cull_face}, front_face{front_face} {} cull_face{cull_face}, front_face{front_face} {}
@ -182,8 +182,8 @@ struct FixedPipelineState {
bool depth_bias_enable; bool depth_bias_enable;
bool depth_clamp_enable; bool depth_clamp_enable;
bool ndc_minus_one_to_one; bool ndc_minus_one_to_one;
Maxwell::Cull::CullFace cull_face; Maxwell::CullFace cull_face;
Maxwell::Cull::FrontFace front_face; Maxwell::FrontFace front_face;
std::size_t Hash() const noexcept; std::size_t Hash() const noexcept;

@ -586,24 +586,24 @@ vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
return {}; return {};
} }
vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) { vk::FrontFace FrontFace(Maxwell::FrontFace front_face) {
switch (front_face) { switch (front_face) {
case Maxwell::Cull::FrontFace::ClockWise: case Maxwell::FrontFace::ClockWise:
return vk::FrontFace::eClockwise; return vk::FrontFace::eClockwise;
case Maxwell::Cull::FrontFace::CounterClockWise: case Maxwell::FrontFace::CounterClockWise:
return vk::FrontFace::eCounterClockwise; return vk::FrontFace::eCounterClockwise;
} }
UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
return {}; return {};
} }
vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) { vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) {
switch (cull_face) { switch (cull_face) {
case Maxwell::Cull::CullFace::Front: case Maxwell::CullFace::Front:
return vk::CullModeFlagBits::eFront; return vk::CullModeFlagBits::eFront;
case Maxwell::Cull::CullFace::Back: case Maxwell::CullFace::Back:
return vk::CullModeFlagBits::eBack; return vk::CullModeFlagBits::eBack;
case Maxwell::Cull::CullFace::FrontAndBack: case Maxwell::CullFace::FrontAndBack:
return vk::CullModeFlagBits::eFrontAndBack; return vk::CullModeFlagBits::eFrontAndBack;
} }
UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));

@ -54,9 +54,9 @@ vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face); vk::FrontFace FrontFace(Maxwell::FrontFace front_face);
vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face); vk::CullModeFlags CullFace(Maxwell::CullFace cull_face);
vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);

@ -27,6 +27,7 @@
#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/vk_swapchain.h"
namespace Vulkan { namespace Vulkan {
@ -177,10 +178,13 @@ bool RendererVulkan::Init() {
swapchain = std::make_unique<VKSwapchain>(surface, *device); swapchain = std::make_unique<VKSwapchain>(surface, *device);
swapchain->Create(framebuffer.width, framebuffer.height, false); swapchain->Create(framebuffer.width, framebuffer.height, false);
scheduler = std::make_unique<VKScheduler>(*device, *resource_manager); state_tracker = std::make_unique<StateTracker>(system);
scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
*resource_manager, *memory_manager, *scheduler); *resource_manager, *memory_manager,
*state_tracker, *scheduler);
blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
*resource_manager, *memory_manager, *swapchain, *resource_manager, *memory_manager, *swapchain,

@ -4,8 +4,10 @@
#pragma once #pragma once
#include <memory>
#include <optional> #include <optional>
#include <vector> #include <vector>
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/declarations.h"
@ -15,6 +17,7 @@ class System;
namespace Vulkan { namespace Vulkan {
class StateTracker;
class VKBlitScreen; class VKBlitScreen;
class VKDevice; class VKDevice;
class VKFence; class VKFence;
@ -61,6 +64,7 @@ private:
std::unique_ptr<VKSwapchain> swapchain; std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKMemoryManager> memory_manager; std::unique_ptr<VKMemoryManager> memory_manager;
std::unique_ptr<VKResourceManager> resource_manager; std::unique_ptr<VKResourceManager> resource_manager;
std::unique_ptr<StateTracker> state_tracker;
std::unique_ptr<VKScheduler> scheduler; std::unique_ptr<VKScheduler> scheduler;
std::unique_ptr<VKBlitScreen> blit_screen; std::unique_ptr<VKBlitScreen> blit_screen;
}; };

@ -188,11 +188,6 @@ VKPipelineCache::~VKPipelineCache() = default;
std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const auto& gpu = system.GPU().Maxwell3D(); const auto& gpu = system.GPU().Maxwell3D();
auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
if (!dirty) {
return last_shaders;
}
dirty = false;
std::array<Shader, Maxwell::MaxShaderProgram> shaders; std::array<Shader, Maxwell::MaxShaderProgram> shaders;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {

@ -36,6 +36,7 @@
#include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h"
@ -280,10 +281,11 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf,
RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
VKScreenInfo& screen_info, const VKDevice& device, VKScreenInfo& screen_info, const VKDevice& device,
VKResourceManager& resource_manager, VKResourceManager& resource_manager,
VKMemoryManager& memory_manager, VKScheduler& scheduler) VKMemoryManager& memory_manager, StateTracker& state_tracker,
VKScheduler& scheduler)
: RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
screen_info{screen_info}, device{device}, resource_manager{resource_manager}, screen_info{screen_info}, device{device}, resource_manager{resource_manager},
memory_manager{memory_manager}, scheduler{scheduler}, memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
staging_pool(device, memory_manager, scheduler), descriptor_pool(device), staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
update_descriptor_queue(device, scheduler), update_descriptor_queue(device, scheduler),
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
@ -548,6 +550,10 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true; return true;
} }
void RasterizerVulkan::SetupDirtyFlags() {
state_tracker.Initialize();
}
void RasterizerVulkan::FlushWork() { void RasterizerVulkan::FlushWork() {
static constexpr u32 DRAWS_TO_DISPATCH = 4096; static constexpr u32 DRAWS_TO_DISPATCH = 4096;
@ -571,9 +577,9 @@ void RasterizerVulkan::FlushWork() {
RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
MICROPROFILE_SCOPE(Vulkan_RenderTargets); MICROPROFILE_SCOPE(Vulkan_RenderTargets);
auto& dirty = system.GPU().Maxwell3D().dirty; auto& dirty = system.GPU().Maxwell3D().dirty.flags;
const bool update_rendertargets = dirty.render_settings; const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
dirty.render_settings = false; dirty[VideoCommon::Dirty::RenderTargets] = false;
texture_cache.GuardRenderTargets(true); texture_cache.GuardRenderTargets(true);
@ -723,13 +729,13 @@ void RasterizerVulkan::SetupImageTransitions(
} }
void RasterizerVulkan::UpdateDynamicStates() { void RasterizerVulkan::UpdateDynamicStates() {
auto& gpu = system.GPU().Maxwell3D(); auto& regs = system.GPU().Maxwell3D().regs;
UpdateViewportsState(gpu); UpdateViewportsState(regs);
UpdateScissorsState(gpu); UpdateScissorsState(regs);
UpdateDepthBias(gpu); UpdateDepthBias(regs);
UpdateBlendConstants(gpu); UpdateBlendConstants(regs);
UpdateDepthBounds(gpu); UpdateDepthBounds(regs);
UpdateStencilFaces(gpu); UpdateStencilFaces(regs);
} }
void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@ -979,12 +985,10 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
image_views.push_back(ImageView{std::move(view), image_layout}); image_views.push_back(ImageView{std::move(view), image_layout});
} }
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) { if (!state_tracker.TouchViewports()) {
return; return;
} }
gpu.dirty.viewport_transform = false;
const auto& regs = gpu.regs;
const std::array viewports{ const std::array viewports{
GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
@ -999,12 +1003,10 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
}); });
} }
void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) { if (!state_tracker.TouchScissors()) {
return; return;
} }
gpu.dirty.scissor_test = false;
const auto& regs = gpu.regs;
const std::array scissors = { const std::array scissors = {
GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
@ -1017,46 +1019,39 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
}); });
} }
void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) { void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) { if (!state_tracker.TouchDepthBias()) {
return; return;
} }
gpu.dirty.polygon_offset = false;
const auto& regs = gpu.regs;
scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) {
cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld);
}); });
} }
void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) { void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) { if (!state_tracker.TouchBlendConstants()) {
return; return;
} }
gpu.dirty.blend_state = false; const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g, regs.blend_color.a};
gpu.regs.blend_color.b, gpu.regs.blend_color.a};
scheduler.Record([blend_color](auto cmdbuf, auto& dld) { scheduler.Record([blend_color](auto cmdbuf, auto& dld) {
cmdbuf.setBlendConstants(blend_color.data(), dld); cmdbuf.setBlendConstants(blend_color.data(), dld);
}); });
} }
void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) { void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) { if (!state_tracker.TouchDepthBounds()) {
return; return;
} }
gpu.dirty.depth_bounds_values = false;
const auto& regs = gpu.regs;
scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); });
} }
void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) { void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) { if (!state_tracker.TouchStencilProperties()) {
return; return;
} }
gpu.dirty.stencil_test = false;
const auto& regs = gpu.regs;
if (regs.stencil_two_side_enable) { if (regs.stencil_two_side_enable) {
// Separate values per face // Separate values per face
scheduler.Record( scheduler.Record(

@ -96,6 +96,7 @@ struct hash<Vulkan::FramebufferCacheKey> {
namespace Vulkan { namespace Vulkan {
class StateTracker;
class BufferBindings; class BufferBindings;
struct ImageView { struct ImageView {
@ -108,7 +109,7 @@ public:
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
VKScreenInfo& screen_info, const VKDevice& device, VKScreenInfo& screen_info, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
VKScheduler& scheduler); StateTracker& state_tracker, VKScheduler& scheduler);
~RasterizerVulkan() override; ~RasterizerVulkan() override;
void Draw(bool is_indexed, bool is_instanced) override; void Draw(bool is_indexed, bool is_instanced) override;
@ -127,6 +128,7 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override; const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override; u32 pixel_stride) override;
void SetupDirtyFlags() override;
/// Maximum supported size that a constbuffer can have in bytes. /// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000; static constexpr std::size_t MaxConstbufferSize = 0x10000;
@ -215,12 +217,12 @@ private:
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu); void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu); void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu); void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
@ -241,6 +243,7 @@ private:
const VKDevice& device; const VKDevice& device;
VKResourceManager& resource_manager; VKResourceManager& resource_manager;
VKMemoryManager& memory_manager; VKMemoryManager& memory_manager;
StateTracker& state_tracker;
VKScheduler& scheduler; VKScheduler& scheduler;
VKStagingBufferPool staging_pool; VKStagingBufferPool staging_pool;

@ -2,6 +2,12 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <memory>
#include <mutex>
#include <optional>
#include <thread>
#include <utility>
#include "common/assert.h" #include "common/assert.h"
#include "common/microprofile.h" #include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/declarations.h"
@ -9,6 +15,7 @@
#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
namespace Vulkan { namespace Vulkan {
@ -29,9 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
last = nullptr; last = nullptr;
} }
VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
: device{device}, resource_manager{resource_manager}, next_fence{ StateTracker& state_tracker)
&resource_manager.CommitFence()} { : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker},
next_fence{&resource_manager.CommitFence()} {
AcquireNewChunk(); AcquireNewChunk();
AllocateNewContext(); AllocateNewContext();
worker_thread = std::thread(&VKScheduler::WorkerThread, this); worker_thread = std::thread(&VKScheduler::WorkerThread, this);
@ -157,12 +165,7 @@ void VKScheduler::AllocateNewContext() {
void VKScheduler::InvalidateState() { void VKScheduler::InvalidateState() {
state.graphics_pipeline = nullptr; state.graphics_pipeline = nullptr;
state.viewports = false; state_tracker.InvalidateCommandBufferState();
state.scissors = false;
state.depth_bias = false;
state.blend_constants = false;
state.depth_bounds = false;
state.stencil_values = false;
} }
void VKScheduler::EndPendingOperations() { void VKScheduler::EndPendingOperations() {

@ -17,6 +17,7 @@
namespace Vulkan { namespace Vulkan {
class StateTracker;
class VKDevice; class VKDevice;
class VKFence; class VKFence;
class VKQueryCache; class VKQueryCache;
@ -43,7 +44,8 @@ private:
/// OpenGL-like operations on Vulkan command buffers. /// OpenGL-like operations on Vulkan command buffers.
class VKScheduler { class VKScheduler {
public: public:
explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
StateTracker& state_tracker);
~VKScheduler(); ~VKScheduler();
/// Sends the current execution context to the GPU. /// Sends the current execution context to the GPU.
@ -74,36 +76,6 @@ public:
query_cache = &query_cache_; query_cache = &query_cache_;
} }
/// Returns true when viewports have been set in the current command buffer.
bool TouchViewports() {
return std::exchange(state.viewports, true);
}
/// Returns true when scissors have been set in the current command buffer.
bool TouchScissors() {
return std::exchange(state.scissors, true);
}
/// Returns true when depth bias have been set in the current command buffer.
bool TouchDepthBias() {
return std::exchange(state.depth_bias, true);
}
/// Returns true when blend constants have been set in the current command buffer.
bool TouchBlendConstants() {
return std::exchange(state.blend_constants, true);
}
/// Returns true when depth bounds have been set in the current command buffer.
bool TouchDepthBounds() {
return std::exchange(state.depth_bounds, true);
}
/// Returns true when stencil values have been set in the current command buffer.
bool TouchStencilValues() {
return std::exchange(state.stencil_values, true);
}
/// Send work to a separate thread. /// Send work to a separate thread.
template <typename T> template <typename T>
void Record(T&& command) { void Record(T&& command) {
@ -217,6 +189,8 @@ private:
const VKDevice& device; const VKDevice& device;
VKResourceManager& resource_manager; VKResourceManager& resource_manager;
StateTracker& state_tracker;
VKQueryCache* query_cache = nullptr; VKQueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf; vk::CommandBuffer current_cmdbuf;
@ -226,12 +200,6 @@ private:
struct State { struct State {
std::optional<vk::RenderPassBeginInfo> renderpass; std::optional<vk::RenderPassBeginInfo> renderpass;
vk::Pipeline graphics_pipeline; vk::Pipeline graphics_pipeline;
bool viewports = false;
bool scissors = false;
bool depth_bias = false;
bool blend_constants = false;
bool depth_bounds = false;
bool stencil_values = false;
} state; } state;
std::unique_ptr<CommandChunk> chunk; std::unique_ptr<CommandChunk> chunk;

@ -0,0 +1,101 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstddef>
#include <iterator>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
namespace Vulkan {
namespace {
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
Flags flags{};
flags[Viewports] = true;
flags[Scissors] = true;
flags[DepthBias] = true;
flags[BlendConstants] = true;
flags[DepthBounds] = true;
flags[StencilProperties] = true;
return flags;
}
void SetupDirtyViewports(Tables& tables) {
FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
tables[0][OFF(viewport_transform_enabled)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyDepthBias(Tables& tables) {
auto& table = tables[0];
table[OFF(polygon_offset_units)] = DepthBias;
table[OFF(polygon_offset_clamp)] = DepthBias;
table[OFF(polygon_offset_factor)] = DepthBias;
}
void SetupDirtyBlendConstants(Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
}
void SetupDirtyDepthBounds(Tables& tables) {
FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
}
void SetupDirtyStencilProperties(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_two_side_enable)] = StencilProperties;
table[OFF(stencil_front_func_ref)] = StencilProperties;
table[OFF(stencil_front_mask)] = StencilProperties;
table[OFF(stencil_front_func_mask)] = StencilProperties;
table[OFF(stencil_back_func_ref)] = StencilProperties;
table[OFF(stencil_back_mask)] = StencilProperties;
table[OFF(stencil_back_func_mask)] = StencilProperties;
}
} // Anonymous namespace
StateTracker::StateTracker(Core::System& system)
: system{system}, invalidation_flags{MakeInvalidationFlags()} {}
void StateTracker::Initialize() {
auto& dirty = system.GPU().Maxwell3D().dirty;
auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyDepthBias(tables);
SetupDirtyBlendConstants(tables);
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
SetupCommonOnWriteStores(dirty.on_write_stores);
}
void StateTracker::InvalidateCommandBufferState() {
system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
}
} // namespace Vulkan

@ -0,0 +1,79 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <limits>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Vulkan {
namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
Viewports,
Scissors,
DepthBias,
BlendConstants,
DepthBounds,
StencilProperties,
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
public:
explicit StateTracker(Core::System& system);
void Initialize();
void InvalidateCommandBufferState();
bool TouchViewports() {
return Exchange(Dirty::Viewports, false);
}
bool TouchScissors() {
return Exchange(Dirty::Scissors, false);
}
bool TouchDepthBias() {
return Exchange(Dirty::DepthBias, false);
}
bool TouchBlendConstants() {
return Exchange(Dirty::BlendConstants, false);
}
bool TouchDepthBounds() {
return Exchange(Dirty::DepthBounds, false);
}
bool TouchStencilProperties() {
return Exchange(Dirty::StencilProperties, false);
}
private:
bool Exchange(std::size_t id, bool new_value) const noexcept {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
const bool is_dirty = flags[id];
flags[id] = new_value;
return is_dirty;
}
Core::System& system;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
};
} // namespace Vulkan

@ -22,6 +22,7 @@
#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/surface.h" #include "video_core/surface.h"

@ -22,6 +22,7 @@
#include "core/core.h" #include "core/core.h"
#include "core/memory.h" #include "core/memory.h"
#include "core/settings.h" #include "core/settings.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/fermi_2d.h" #include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
@ -142,11 +143,10 @@ public:
TView GetDepthBufferSurface(bool preserve_contents) { TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
auto& maxwell3d = system.GPU().Maxwell3D(); auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
if (!maxwell3d.dirty.depth_buffer) {
return depth_buffer.view; return depth_buffer.view;
} }
maxwell3d.dirty.depth_buffer = false; maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
const auto& regs{maxwell3d.regs}; const auto& regs{maxwell3d.regs};
const auto gpu_addr{regs.zeta.Address()}; const auto gpu_addr{regs.zeta.Address()};
@ -175,10 +175,10 @@ public:
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D(); auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.render_target[index]) { if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
return render_targets[index].view; return render_targets[index].view;
} }
maxwell3d.dirty.render_target[index] = false; maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
const auto& regs{maxwell3d.regs}; const auto& regs{maxwell3d.regs};
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@ -320,14 +320,14 @@ protected:
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) { void ManageRenderTargetUnregister(TSurface& surface) {
auto& maxwell3d = system.GPU().Maxwell3D(); auto& dirty = system.GPU().Maxwell3D().dirty;
const u32 index = surface->GetRenderTarget(); const u32 index = surface->GetRenderTarget();
if (index == DEPTH_RT) { if (index == DEPTH_RT) {
maxwell3d.dirty.depth_buffer = true; dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
} else { } else {
maxwell3d.dirty.render_target[index] = true; dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true;
} }
maxwell3d.dirty.render_settings = true; dirty.flags[VideoCommon::Dirty::RenderTargets] = true;
} }
void Register(TSurface surface) { void Register(TSurface surface) {