Merge pull request #3964 from ReinUsesLisp/arb-integration

renderer_opengl: Add assembly program code paths
merge-requests/60/head
bunnei 2020-05-24 00:34:12 +07:00 committed by GitHub
commit 325e7eed3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 368 additions and 109 deletions

@ -112,6 +112,7 @@ void LogSettings() {
LogSetting("Renderer_UseAsynchronousGpuEmulation", LogSetting("Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation); Settings::values.use_asynchronous_gpu_emulation);
LogSetting("Renderer_UseVsync", Settings::values.use_vsync); LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy); LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy);
LogSetting("Audio_OutputEngine", Settings::values.sink_id); LogSetting("Audio_OutputEngine", Settings::values.sink_id);
LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);

@ -446,6 +446,7 @@ struct Values {
GPUAccuracy gpu_accuracy; GPUAccuracy gpu_accuracy;
bool use_asynchronous_gpu_emulation; bool use_asynchronous_gpu_emulation;
bool use_vsync; bool use_vsync;
bool use_assembly_shaders;
bool force_30fps_mode; bool force_30fps_mode;
bool use_fast_gpu_time; bool use_fast_gpu_time;

@ -201,6 +201,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation); Settings::values.use_asynchronous_gpu_emulation);
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
} }

@ -13,6 +13,7 @@
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "core/settings.h"
#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
@ -183,10 +184,16 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
has_precise_bug = TestPreciseBug(); has_precise_bug = TestPreciseBug();
has_broken_compute = is_intel_proprietary; has_broken_compute = is_intel_proprietary;
has_fast_buffer_sub_data = is_nvidia; has_fast_buffer_sub_data = is_nvidia;
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
GLAD_GL_NV_compute_program5;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
if (Settings::values.use_assembly_shaders && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
}
} }
Device::Device(std::nullptr_t) { Device::Device(std::nullptr_t) {

@ -88,6 +88,10 @@ public:
return has_fast_buffer_sub_data; return has_fast_buffer_sub_data;
} }
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
private: private:
static bool TestVariableAoffi(); static bool TestVariableAoffi();
static bool TestPreciseBug(); static bool TestPreciseBug();
@ -107,6 +111,7 @@ private:
bool has_precise_bug{}; bool has_precise_bug{};
bool has_broken_compute{}; bool has_broken_compute{};
bool has_fast_buffer_sub_data{}; bool has_fast_buffer_sub_data{};
bool use_assembly_shaders{};
}; };
} // namespace OpenGL } // namespace OpenGL

@ -94,17 +94,30 @@ void oglEnable(GLenum cap, bool state) {
} // Anonymous namespace } // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info, GLShader::ProgramManager& program_manager, const Device& device, ScreenInfo& info,
StateTracker& state_tracker) ProgramManager& program_manager, StateTracker& state_tracker)
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
state_tracker},
shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
CheckExtensions(); CheckExtensions();
if (device.UseAssemblyShaders()) {
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
for (const GLuint cbuf : staging_cbufs) {
glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
nullptr, 0);
}
}
} }
RasterizerOpenGL::~RasterizerOpenGL() {} RasterizerOpenGL::~RasterizerOpenGL() {
if (device.UseAssemblyShaders()) {
glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
}
}
void RasterizerOpenGL::CheckExtensions() { void RasterizerOpenGL::CheckExtensions() {
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
@ -230,6 +243,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader); MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = system.GPU().Maxwell3D(); auto& gpu = system.GPU().Maxwell3D();
std::size_t num_ssbos = 0;
u32 clip_distances = 0; u32 clip_distances = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@ -261,6 +275,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
Shader shader{shader_cache.GetStageProgram(program)}; Shader shader{shader_cache.GetStageProgram(program)};
if (device.UseAssemblyShaders()) {
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
// all stages share the same bindings.
const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
num_ssbos += num_stage_ssbos;
}
// Stage indices are 0 - 5 // Stage indices are 0 - 5
const std::size_t stage = index == 0 ? 0 : index - 1; const std::size_t stage = index == 0 ? 0 : index - 1;
SetupDrawConstBuffers(stage, shader); SetupDrawConstBuffers(stage, shader);
@ -526,6 +548,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncFramebufferSRGB(); SyncFramebufferSRGB();
buffer_cache.Acquire(); buffer_cache.Acquire();
current_cbuf = 0;
std::size_t buffer_size = CalculateVertexArraysSize(); std::size_t buffer_size = CalculateVertexArraysSize();
@ -535,9 +558,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
} }
// Uniform space for the 5 shader stages // Uniform space for the 5 shader stages
buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + buffer_size =
(sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * Common::AlignUp<std::size_t>(buffer_size, 4) +
Maxwell::MaxShaderStage; (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers // Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * buffer_size += Maxwell::MaxConstBuffers *
@ -558,12 +581,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
} }
// Setup emulation uniform buffer. // Setup emulation uniform buffer.
GLShader::MaxwellUniformData ubo; if (!device.UseAssemblyShaders()) {
ubo.SetFromRegs(gpu); MaxwellUniformData ubo;
const auto [buffer, offset] = ubo.SetFromRegs(gpu);
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); const auto [buffer, offset] =
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
static_cast<GLsizeiptr>(sizeof(ubo))); glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
}
// Setup shaders and their used resources. // Setup shaders and their used resources.
texture_cache.GuardSamplers(true); texture_cache.GuardSamplers(true);
@ -635,11 +660,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
} }
buffer_cache.Acquire(); buffer_cache.Acquire();
current_cbuf = 0;
auto kernel = shader_cache.GetComputeKernel(code_addr); auto kernel = shader_cache.GetComputeKernel(code_addr);
SetupComputeTextures(kernel); SetupComputeTextures(kernel);
SetupComputeImages(kernel); SetupComputeImages(kernel);
program_manager.BindComputeShader(kernel->GetHandle());
const std::size_t buffer_size = const std::size_t buffer_size =
Tegra::Engines::KeplerCompute::NumConstBuffers * Tegra::Engines::KeplerCompute::NumConstBuffers *
@ -652,6 +677,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap(); buffer_cache.Unmap();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands; ++num_queued_commands;
} }
@ -812,14 +838,20 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
} }
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
static constexpr std::array PARAMETER_LUT = {
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
MICROPROFILE_SCOPE(OpenGL_UBO); MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
const auto& shader_stage = stages[stage_index]; const auto& shader_stage = stages[stage_index];
u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; u32 binding =
device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
for (const auto& entry : shader->GetEntries().const_buffers) { for (const auto& entry : shader->GetEntries().const_buffers) {
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
SetupConstBuffer(binding++, buffer, entry); SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
} }
} }
@ -835,16 +867,21 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
buffer.address = config.Address(); buffer.address = config.Address();
buffer.size = config.size; buffer.size = config.size;
buffer.enabled = mask[entry.GetIndex()]; buffer.enabled = mask[entry.GetIndex()];
SetupConstBuffer(binding++, buffer, entry); SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
} }
} }
void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry) { const ConstBufferEntry& entry) {
if (!buffer.enabled) { if (!buffer.enabled) {
// Set values to zero to unbind buffers // Set values to zero to unbind buffers
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, if (device.UseAssemblyShaders()) {
sizeof(float)); glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
} else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
}
return; return;
} }
@ -853,9 +890,19 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
const auto alignment = device.GetUniformBufferAlignment(); const auto alignment = device.GetUniformBufferAlignment();
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
device.HasFastBufferSubData()); device.HasFastBufferSubData());
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); if (!device.UseAssemblyShaders()) {
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
return;
}
if (offset != 0) {
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
cbuf = staging_cbuf;
offset = 0;
}
glBindBufferRangeNV(stage, binding, cbuf, offset, size);
} }
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@ -863,7 +910,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
auto& memory_manager{gpu.MemoryManager()}; auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; u32 binding =
device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : shader->GetEntries().global_memory_entries) { for (const auto& entry : shader->GetEntries().global_memory_entries) {
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};

@ -56,8 +56,8 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public: public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info, GLShader::ProgramManager& program_manager, const Device& device, ScreenInfo& info,
StateTracker& state_tracker); ProgramManager& program_manager, StateTracker& state_tracker);
~RasterizerOpenGL() override; ~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override; void Draw(bool is_indexed, bool is_instanced) override;
@ -106,7 +106,7 @@ private:
void SetupComputeConstBuffers(const Shader& kernel); void SetupComputeConstBuffers(const Shader& kernel);
/// Configures a constant buffer. /// Configures a constant buffer.
void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry); const ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command. /// Configures the current global memory entries to use for the draw command.
@ -224,7 +224,7 @@ private:
void SetupShaders(GLenum primitive_mode); void SetupShaders(GLenum primitive_mode);
const Device device; const Device& device;
TextureCacheOpenGL texture_cache; TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache; ShaderCacheOpenGL shader_cache;
@ -236,7 +236,7 @@ private:
Core::System& system; Core::System& system;
ScreenInfo& screen_info; ScreenInfo& screen_info;
GLShader::ProgramManager& program_manager; ProgramManager& program_manager;
StateTracker& state_tracker; StateTracker& state_tracker;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
@ -248,6 +248,12 @@ private:
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
enabled_transform_feedback_buffers; enabled_transform_feedback_buffers;
static constexpr std::size_t NUM_CONSTANT_BUFFERS =
Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
std::size_t current_cbuf = 0;
/// Number of commands queued to the OpenGL driver. Reseted on flush. /// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0; std::size_t num_queued_commands = 0;

@ -125,6 +125,15 @@ void OGLProgram::Release() {
handle = 0; handle = 0;
} }
void OGLAssemblyProgram::Release() {
if (handle == 0) {
return;
}
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramsARB(1, &handle);
handle = 0;
}
void OGLPipeline::Create() { void OGLPipeline::Create() {
if (handle != 0) if (handle != 0)
return; return;

@ -167,6 +167,22 @@ public:
GLuint handle = 0; GLuint handle = 0;
}; };
class OGLAssemblyProgram : private NonCopyable {
public:
OGLAssemblyProgram() = default;
OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLAssemblyProgram() {
Release();
}
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLPipeline : private NonCopyable { class OGLPipeline : private NonCopyable {
public: public:
OGLPipeline() = default; OGLPipeline() = default;

@ -97,6 +97,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
return {}; return {};
} }
constexpr GLenum AssemblyEnum(ShaderType shader_type) {
switch (shader_type) {
case ShaderType::Vertex:
return GL_VERTEX_PROGRAM_NV;
case ShaderType::TesselationControl:
return GL_TESS_CONTROL_PROGRAM_NV;
case ShaderType::TesselationEval:
return GL_TESS_EVALUATION_PROGRAM_NV;
case ShaderType::Geometry:
return GL_GEOMETRY_PROGRAM_NV;
case ShaderType::Fragment:
return GL_FRAGMENT_PROGRAM_NV;
case ShaderType::Compute:
return GL_COMPUTE_PROGRAM_NV;
}
return {};
}
std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
} }
@ -120,18 +138,43 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
return registry; return registry;
} }
std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
u64 unique_identifier, const ShaderIR& ir, const ShaderIR& ir, const Registry& registry,
const Registry& registry, bool hint_retrievable = false) { bool hint_retrievable = false) {
const std::string shader_id = MakeShaderID(unique_identifier, shader_type); const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
LOG_INFO(Render_OpenGL, "{}", shader_id); LOG_INFO(Render_OpenGL, "{}", shader_id);
const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); auto program = std::make_shared<ProgramHandle>();
OGLShader shader;
shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); if (device.UseAssemblyShaders()) {
const std::string arb = "Not implemented";
GLuint& arb_prog = program->assembly_program.handle;
// Commented out functions signal OpenGL errors but are compatible with apitrace.
// Use them only to capture and replay on apitrace.
#if 0
glGenProgramsNV(1, &arb_prog);
glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
reinterpret_cast<const GLubyte*>(arb.data()));
#else
glGenProgramsARB(1, &arb_prog);
glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
static_cast<GLsizei>(arb.size()), arb.data());
#endif
const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
if (err && *err) {
LOG_CRITICAL(Render_OpenGL, "{}", err);
LOG_INFO(Render_OpenGL, "\n{}", arb);
}
} else {
const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
OGLShader shader;
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
program->source_program.Create(true, hint_retrievable, shader.handle);
}
auto program = std::make_shared<OGLProgram>();
program->Create(true, hint_retrievable, shader.handle);
return program; return program;
} }
@ -153,15 +196,22 @@ std::unordered_set<GLenum> GetSupportedFormats() {
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry, std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program) ShaderEntries entries, ProgramSharedPtr program_)
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
size_in_bytes{size_in_bytes}, program{std::move(program)} {} size_in_bytes{size_in_bytes}, program{std::move(program_)} {
// Assign either the assembly program or source program. We can't have both.
handle = program->assembly_program.handle;
if (handle == 0) {
handle = program->source_program.handle;
}
ASSERT(handle != 0);
}
CachedShader::~CachedShader() = default; CachedShader::~CachedShader() = default;
GLuint CachedShader::GetHandle() const { GLuint CachedShader::GetHandle() const {
DEBUG_ASSERT(registry->IsConsistent()); DEBUG_ASSERT(registry->IsConsistent());
return program->handle; return handle;
} }
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
@ -239,7 +289,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return; return;
} }
const std::vector gl_cache = disk_cache.LoadPrecompiled(); std::vector<ShaderDiskCachePrecompiled> gl_cache;
if (!device.UseAssemblyShaders()) {
// Only load precompiled cache when we are not using assembly shaders
gl_cache = disk_cache.LoadPrecompiled();
}
const auto supported_formats = GetSupportedFormats(); const auto supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to // Track if precompiled cache was altered during loading to know if we have to
@ -278,7 +332,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
auto registry = MakeRegistry(entry); auto registry = MakeRegistry(entry);
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
std::shared_ptr<OGLProgram> program; ProgramSharedPtr program;
if (precompiled_entry) { if (precompiled_entry) {
// If the shader is precompiled, attempt to load it with // If the shader is precompiled, attempt to load it with
program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
@ -332,6 +386,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return; return;
} }
if (device.UseAssemblyShaders()) {
// Don't store precompiled binaries for assembly shaders.
return;
}
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
// before precompiling them // before precompiling them
@ -339,7 +398,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const u64 id = (*transferable)[i].unique_identifier; const u64 id = (*transferable)[i].unique_identifier;
const auto it = find_precompiled(id); const auto it = find_precompiled(id);
if (it == gl_cache.end()) { if (it == gl_cache.end()) {
const GLuint program = runtime_cache.at(id).program->handle; const GLuint program = runtime_cache.at(id).program->source_program.handle;
disk_cache.SavePrecompiled(id, program); disk_cache.SavePrecompiled(id, program);
precompiled_cache_altered = true; precompiled_cache_altered = true;
} }
@ -350,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
} }
} }
std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats) { const std::unordered_set<GLenum>& supported_formats) {
if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
@ -358,15 +417,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
return {}; return {};
} }
auto program = std::make_shared<OGLProgram>(); auto program = std::make_shared<ProgramHandle>();
program->handle = glCreateProgram(); GLuint& handle = program->source_program.handle;
glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); handle = glCreateProgram();
glProgramBinary(program->handle, precompiled_entry.binary_format, glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
precompiled_entry.binary.data(), glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
static_cast<GLsizei>(precompiled_entry.binary.size())); static_cast<GLsizei>(precompiled_entry.binary.size()));
GLint link_status; GLint link_status;
glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) { if (link_status == GL_FALSE) {
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
return {}; return {};

@ -43,8 +43,14 @@ struct UnspecializedShader;
using Shader = std::shared_ptr<CachedShader>; using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ProgramHandle {
OGLProgram source_program;
OGLAssemblyProgram assembly_program;
};
using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
struct PrecompiledShader { struct PrecompiledShader {
std::shared_ptr<OGLProgram> program; ProgramSharedPtr program;
std::shared_ptr<VideoCommon::Shader::Registry> registry; std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries; ShaderEntries entries;
}; };
@ -87,12 +93,13 @@ public:
private: private:
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry, std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program); ShaderEntries entries, ProgramSharedPtr program);
std::shared_ptr<VideoCommon::Shader::Registry> registry; std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries; ShaderEntries entries;
std::size_t size_in_bytes = 0; std::size_t size_in_bytes = 0;
std::shared_ptr<OGLProgram> program; ProgramSharedPtr program;
GLuint handle = 0;
}; };
class ShaderCacheOpenGL final : public RasterizerCache<Shader> { class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@ -115,7 +122,7 @@ protected:
void FlushObjectInner(const Shader& object) override {} void FlushObjectInner(const Shader& object) override {}
private: private:
std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( ProgramSharedPtr GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats); const std::unordered_set<GLenum>& supported_formats);

@ -6,47 +6,107 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL::GLShader { namespace OpenGL {
ProgramManager::ProgramManager() = default; ProgramManager::ProgramManager(const Device& device) {
use_assembly_programs = device.UseAssemblyShaders();
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
} else {
graphics_pipeline.Create();
glBindProgramPipeline(graphics_pipeline.handle);
}
}
ProgramManager::~ProgramManager() = default; ProgramManager::~ProgramManager() = default;
void ProgramManager::Create() { void ProgramManager::BindCompute(GLuint program) {
graphics_pipeline.Create(); if (use_assembly_programs) {
glBindProgramPipeline(graphics_pipeline.handle); glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
} else {
is_graphics_bound = false;
glUseProgram(program);
}
} }
void ProgramManager::BindGraphicsPipeline() { void ProgramManager::BindGraphicsPipeline() {
if (use_assembly_programs) {
UpdateAssemblyPrograms();
} else {
UpdateSourcePrograms();
}
}
void ProgramManager::BindHostPipeline(GLuint pipeline) {
if (use_assembly_programs) {
if (geometry_enabled) {
geometry_enabled = false;
old_state.geometry = 0;
glDisable(GL_GEOMETRY_PROGRAM_NV);
}
}
glBindProgramPipeline(pipeline);
}
void ProgramManager::RestoreGuestPipeline() {
if (use_assembly_programs) {
glBindProgramPipeline(0);
} else {
glBindProgramPipeline(graphics_pipeline.handle);
}
}
void ProgramManager::UpdateAssemblyPrograms() {
const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
if (current == old) {
return;
}
if (current == 0) {
if (enabled) {
enabled = false;
glDisable(stage);
}
return;
}
if (!enabled) {
enabled = true;
glEnable(stage);
}
glBindProgramARB(stage, current);
};
update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
old_state.geometry);
update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
old_state.fragment);
old_state = current_state;
}
void ProgramManager::UpdateSourcePrograms() {
if (!is_graphics_bound) { if (!is_graphics_bound) {
is_graphics_bound = true; is_graphics_bound = true;
glUseProgram(0); glUseProgram(0);
} }
// Avoid updating the pipeline when values have no changed
if (old_state == current_state) {
return;
}
// Workaround for AMD bug
static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
GL_FRAGMENT_SHADER_BIT};
const GLuint handle = graphics_pipeline.handle; const GLuint handle = graphics_pipeline.handle;
glUseProgramStages(handle, all_used_stages, 0); const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); if (current == old) {
glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); return;
glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); }
glUseProgramStages(handle, stage, current);
};
update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
old_state = current_state; old_state = current_state;
} }
void ProgramManager::BindComputeShader(GLuint program) {
is_graphics_bound = false;
glUseProgram(program);
}
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
const auto& regs = maxwell.regs; const auto& regs = maxwell.regs;
@ -54,4 +114,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
} }
} // namespace OpenGL::GLShader } // namespace OpenGL

@ -11,7 +11,9 @@
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL::GLShader { namespace OpenGL {
class Device;
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
class ProgramManager { class ProgramManager {
public: public:
explicit ProgramManager(); explicit ProgramManager(const Device& device);
~ProgramManager(); ~ProgramManager();
void Create(); /// Binds a compute program
void BindCompute(GLuint program);
/// Updates the graphics pipeline and binds it. /// Updates bound programs.
void BindGraphicsPipeline(); void BindGraphicsPipeline();
/// Binds a compute shader. /// Binds an OpenGL pipeline object unsynchronized with the guest state.
void BindComputeShader(GLuint program); void BindHostPipeline(GLuint pipeline);
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
void UseVertexShader(GLuint program) { void UseVertexShader(GLuint program) {
current_state.vertex_shader = program; current_state.vertex = program;
} }
void UseGeometryShader(GLuint program) { void UseGeometryShader(GLuint program) {
current_state.geometry_shader = program; current_state.geometry = program;
} }
void UseFragmentShader(GLuint program) { void UseFragmentShader(GLuint program) {
current_state.fragment_shader = program; current_state.fragment = program;
} }
private: private:
struct PipelineState { struct PipelineState {
bool operator==(const PipelineState& rhs) const noexcept { GLuint vertex = 0;
return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && GLuint geometry = 0;
geometry_shader == rhs.geometry_shader; GLuint fragment = 0;
}
bool operator!=(const PipelineState& rhs) const noexcept {
return !operator==(rhs);
}
GLuint vertex_shader = 0;
GLuint fragment_shader = 0;
GLuint geometry_shader = 0;
}; };
/// Update NV_gpu_program5 programs.
void UpdateAssemblyPrograms();
/// Update GLSL programs.
void UpdateSourcePrograms();
OGLPipeline graphics_pipeline; OGLPipeline graphics_pipeline;
OGLPipeline compute_pipeline;
PipelineState current_state; PipelineState current_state;
PipelineState old_state; PipelineState old_state;
bool use_assembly_programs = false;
bool is_graphics_bound = true; bool is_graphics_bound = true;
bool vertex_enabled = false;
bool geometry_enabled = false;
bool fragment_enabled = false;
}; };
} // namespace OpenGL::GLShader } // namespace OpenGL

@ -316,7 +316,7 @@ public:
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
Core::Frontend::GraphicsContext& context) Core::Frontend::GraphicsContext& context)
: RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
has_debug_tool{HasDebugTool()} {} program_manager{device}, has_debug_tool{HasDebugTool()} {}
RendererOpenGL::~RendererOpenGL() = default; RendererOpenGL::~RendererOpenGL() = default;
@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() {
vertex_program.Create(true, false, vertex_shader.handle); vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle); fragment_program.Create(true, false, fragment_shader.handle);
// Create program pipeline pipeline.Create();
program_manager.Create(); glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
// Generate VBO handle for drawing // Generate VBO handle for drawing
vertex_buffer.Create(); vertex_buffer.Create();
@ -508,7 +509,7 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) { if (rasterizer) {
return; return;
} }
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
program_manager, state_tracker); program_manager, state_tracker);
} }
@ -620,10 +621,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyClipControl(); state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest(); state_tracker.NotifyAlphaTest();
program_manager.UseVertexShader(vertex_program.handle); program_manager.BindHostPipeline(pipeline.handle);
program_manager.UseGeometryShader(0);
program_manager.UseFragmentShader(fragment_program.handle);
program_manager.BindGraphicsPipeline();
glEnable(GL_CULL_FACE); glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) { if (screen_info.display_srgb) {
@ -665,6 +663,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT); glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
program_manager.RestoreGuestPipeline();
} }
bool RendererOpenGL::TryPresent(int timeout_ms) { bool RendererOpenGL::TryPresent(int timeout_ms) {

@ -9,6 +9,7 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/math_util.h" #include "common/math_util.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_state_tracker.h"
@ -95,6 +96,7 @@ private:
Core::Frontend::EmuWindow& emu_window; Core::Frontend::EmuWindow& emu_window;
Core::System& system; Core::System& system;
Core::Frontend::GraphicsContext& context; Core::Frontend::GraphicsContext& context;
const Device device;
StateTracker state_tracker{system}; StateTracker state_tracker{system};
@ -102,13 +104,14 @@ private:
OGLBuffer vertex_buffer; OGLBuffer vertex_buffer;
OGLProgram vertex_program; OGLProgram vertex_program;
OGLProgram fragment_program; OGLProgram fragment_program;
OGLPipeline pipeline;
OGLFramebuffer screenshot_framebuffer; OGLFramebuffer screenshot_framebuffer;
/// Display information for Switch screen /// Display information for Switch screen
ScreenInfo screen_info; ScreenInfo screen_info;
/// Global dummy shader pipeline /// Global dummy shader pipeline
GLShader::ProgramManager program_manager; ProgramManager program_manager;
/// OpenGL framebuffer data /// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data; std::vector<u8> gl_framebuffer_data;

@ -643,6 +643,8 @@ void Config::ReadRendererValues() {
Settings::values.use_asynchronous_gpu_emulation = Settings::values.use_asynchronous_gpu_emulation =
ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
Settings::values.use_assembly_shaders =
ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool();
Settings::values.use_fast_gpu_time = Settings::values.use_fast_gpu_time =
ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool(); ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool();
Settings::values.force_30fps_mode = Settings::values.force_30fps_mode =
@ -1090,6 +1092,8 @@ void Config::SaveRendererValues() {
WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
Settings::values.use_asynchronous_gpu_emulation, false); Settings::values.use_asynchronous_gpu_emulation, false);
WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders,
false);
WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true);
WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);

@ -12,6 +12,9 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
ui->setupUi(this); ui->setupUi(this);
// TODO: Remove this after assembly shaders are fully integrated
ui->use_assembly_shaders->setVisible(false);
SetConfiguration(); SetConfiguration();
} }
@ -22,6 +25,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
ui->use_vsync->setEnabled(runtime_lock); ui->use_vsync->setEnabled(runtime_lock);
ui->use_vsync->setChecked(Settings::values.use_vsync); ui->use_vsync->setChecked(Settings::values.use_vsync);
ui->use_assembly_shaders->setEnabled(runtime_lock);
ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders);
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time);
ui->force_30fps_mode->setEnabled(runtime_lock); ui->force_30fps_mode->setEnabled(runtime_lock);
ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
@ -33,6 +38,7 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
Settings::values.gpu_accuracy = gpu_accuracy; Settings::values.gpu_accuracy = gpu_accuracy;
Settings::values.use_vsync = ui->use_vsync->isChecked(); Settings::values.use_vsync = ui->use_vsync->isChecked();
Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked();
Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked(); Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked();
Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();

@ -62,6 +62,16 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="use_assembly_shaders">
<property name="toolTip">
<string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string>
</property>
<property name="text">
<string>Use assembly shaders (experimental, Nvidia OpenGL only)</string>
</property>
</widget>
</item>
<item> <item>
<widget class="QCheckBox" name="force_30fps_mode"> <widget class="QCheckBox" name="force_30fps_mode">
<property name="text"> <property name="text">

@ -397,6 +397,8 @@ void Config::ReadValues() {
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
Settings::values.use_vsync = Settings::values.use_vsync =
static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1));
Settings::values.use_assembly_shaders =
sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false);
Settings::values.use_fast_gpu_time = Settings::values.use_fast_gpu_time =
sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true);

@ -134,6 +134,10 @@ max_anisotropy =
# 0 (default): Off, 1: On # 0 (default): Off, 1: On
use_vsync = use_vsync =
# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required.
# 0 (default): Off, 1: On
use_assembly_shaders =
# Turns on the frame limiter, which will limit frames output to the target game speed # Turns on the frame limiter, which will limit frames output to the target game speed
# 0: Off, 1: On (default) # 0: Off, 1: On (default)
use_frame_limit = use_frame_limit =