gl_rasterizer: isolate shader management into its own class

master
wwylele 2018-04-10 00:31:11 +07:00
parent 36bc92273b
commit 8dc75598a4
5 changed files with 304 additions and 141 deletions

@ -32,6 +32,8 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_decompiler.h
renderer_opengl/gl_shader_gen.cpp
renderer_opengl/gl_shader_gen.h
renderer_opengl/gl_shader_manager.cpp
renderer_opengl/gl_shader_manager.h
renderer_opengl/gl_shader_util.cpp
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state.cpp

@ -178,6 +178,9 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle);
shader_program_manager =
std::make_unique<ShaderProgramManager>(GLAD_GL_ARB_separate_shader_objects);
glEnable(GL_BLEND);
SyncEntireState();
@ -486,6 +489,11 @@ void RasterizerOpenGL::DrawTriangles() {
state.scissor.height = draw_rect.GetHeight();
state.Apply();
shader_program_manager->UseTrivialVertexShader();
shader_program_manager->UseTrivialGeometryShader();
shader_program_manager->ApplyTo(state);
state.Apply();
// Draw the vertex batch
size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex)));
for (size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) {
@ -1253,95 +1261,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
void RasterizerOpenGL::SetShader() {
auto config = GLShader::PicaShaderConfig::BuildFromRegs(Pica::g_state.regs);
std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>();
// Find (or generate) the GLSL shader for the current TEV state
auto cached_shader = shader_cache.find(config);
if (cached_shader != shader_cache.end()) {
current_shader = cached_shader->second.get();
state.draw.shader_program = current_shader->shader.handle;
state.Apply();
} else {
LOG_DEBUG(Render_OpenGL, "Creating new shader");
shader->shader.Create(GLShader::GenerateVertexShader().c_str(),
GLShader::GenerateFragmentShader(config).c_str());
state.draw.shader_program = shader->shader.handle;
state.Apply();
// Set the texture samplers to correspond to different texture units
GLint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]");
if (uniform_tex != -1) {
glUniform1i(uniform_tex, TextureUnits::PicaTexture(0).id);
}
uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]");
if (uniform_tex != -1) {
glUniform1i(uniform_tex, TextureUnits::PicaTexture(1).id);
}
uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
if (uniform_tex != -1) {
glUniform1i(uniform_tex, TextureUnits::PicaTexture(2).id);
}
uniform_tex = glGetUniformLocation(shader->shader.handle, "tex_cube");
if (uniform_tex != -1) {
glUniform1i(uniform_tex, TextureUnits::TextureCube.id);
}
// Set the texture samplers to correspond to different lookup table texture units
GLint uniform_lut = glGetUniformLocation(shader->shader.handle, "lighting_lut");
if (uniform_lut != -1) {
glUniform1i(uniform_lut, TextureUnits::LightingLUT.id);
}
GLint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
if (uniform_fog_lut != -1) {
glUniform1i(uniform_fog_lut, TextureUnits::FogLUT.id);
}
GLint uniform_proctex_noise_lut =
glGetUniformLocation(shader->shader.handle, "proctex_noise_lut");
if (uniform_proctex_noise_lut != -1) {
glUniform1i(uniform_proctex_noise_lut, TextureUnits::ProcTexNoiseLUT.id);
}
GLint uniform_proctex_color_map =
glGetUniformLocation(shader->shader.handle, "proctex_color_map");
if (uniform_proctex_color_map != -1) {
glUniform1i(uniform_proctex_color_map, TextureUnits::ProcTexColorMap.id);
}
GLint uniform_proctex_alpha_map =
glGetUniformLocation(shader->shader.handle, "proctex_alpha_map");
if (uniform_proctex_alpha_map != -1) {
glUniform1i(uniform_proctex_alpha_map, TextureUnits::ProcTexAlphaMap.id);
}
GLint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut");
if (uniform_proctex_lut != -1) {
glUniform1i(uniform_proctex_lut, TextureUnits::ProcTexLUT.id);
}
GLint uniform_proctex_diff_lut =
glGetUniformLocation(shader->shader.handle, "proctex_diff_lut");
if (uniform_proctex_diff_lut != -1) {
glUniform1i(uniform_proctex_diff_lut, TextureUnits::ProcTexDiffLUT.id);
}
current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
GLuint block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
if (block_index != GL_INVALID_INDEX) {
GLint block_size;
glGetActiveUniformBlockiv(current_shader->shader.handle, block_index,
GL_UNIFORM_BLOCK_DATA_SIZE, &block_size);
ASSERT_MSG(block_size == sizeof(UniformData),
"Uniform block size did not match! Got {}, expected {}",
static_cast<int>(block_size), sizeof(UniformData));
glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
}
}
shader_program_manager->UseFragmentShader(config);
}
void RasterizerOpenGL::SyncClipEnabled() {

@ -8,12 +8,10 @@
#include <cstddef>
#include <cstring>
#include <memory>
#include <unordered_map>
#include <vector>
#include <glad/glad.h>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "video_core/pica_state.h"
@ -25,13 +23,14 @@
#include "video_core/regs_texturing.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
#include "video_core/shader/shader.h"
struct ScreenInfo;
class ShaderProgramManager;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
@ -52,12 +51,6 @@ public:
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info) override;
/// OpenGL shader generated for a given Pica register state
struct PicaShader {
/// OpenGL shader resource
OGLShader shader;
};
private:
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
@ -121,47 +114,6 @@ private:
GLfloat view[3];
};
struct LightSrc {
alignas(16) GLvec3 specular_0;
alignas(16) GLvec3 specular_1;
alignas(16) GLvec3 diffuse;
alignas(16) GLvec3 ambient;
alignas(16) GLvec3 position;
alignas(16) GLvec3 spot_direction; // negated
GLfloat dist_atten_bias;
GLfloat dist_atten_scale;
};
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct UniformData {
GLint framebuffer_scale;
GLint alphatest_ref;
GLfloat depth_scale;
GLfloat depth_offset;
GLint scissor_x1;
GLint scissor_y1;
GLint scissor_x2;
GLint scissor_y2;
alignas(16) GLvec3 fog_color;
alignas(8) GLvec2 proctex_noise_f;
alignas(8) GLvec2 proctex_noise_a;
alignas(8) GLvec2 proctex_noise_p;
alignas(16) GLvec3 lighting_global_ambient;
LightSrc light_src[8];
alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) GLvec4 tev_combiner_buffer_color;
alignas(16) GLvec4 clip_coef;
};
static_assert(
sizeof(UniformData) == 0x460,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// Syncs entire status to match PICA registers
void SyncEntireState();
@ -269,8 +221,6 @@ private:
std::vector<HardwareVertex> vertex_batch;
std::unordered_map<GLShader::PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
const PicaShader* current_shader = nullptr;
bool shader_dirty;
struct {
@ -285,6 +235,8 @@ private:
bool dirty;
} uniform_block_data = {};
std::unique_ptr<ShaderProgramManager> shader_program_manager;
std::array<SamplerInfo, 3> texture_samplers;
OGLVertexArray vertex_array;
static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;

@ -0,0 +1,216 @@
// Copyright 2018 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <unordered_map>
#include <boost/functional/hash.hpp>
#include <boost/variant.hpp>
#include "video_core/renderer_opengl/gl_shader_manager.h"
static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding,
size_t expected_size) {
GLuint ub_index = glGetUniformBlockIndex(shader, name);
if (ub_index == GL_INVALID_INDEX) {
return;
}
GLint ub_size = 0;
glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
ASSERT_MSG(ub_size == expected_size, "Uniform block size did not match! Got %d, expected %zu",
static_cast<int>(ub_size), expected_size);
glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
}
static void SetShaderUniformBlockBindings(GLuint shader) {
SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common,
sizeof(UniformData));
}
static void SetShaderSamplerBinding(GLuint shader, const char* name,
TextureUnits::TextureUnit binding) {
GLint uniform_tex = glGetUniformLocation(shader, name);
if (uniform_tex != -1) {
glUniform1i(uniform_tex, binding.id);
}
}
static void SetShaderSamplerBindings(GLuint shader) {
OpenGLState cur_state = OpenGLState::GetCurState();
GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
cur_state.Apply();
// Set the texture samplers to correspond to different texture units
SetShaderSamplerBinding(shader, "tex0", TextureUnits::PicaTexture(0));
SetShaderSamplerBinding(shader, "tex1", TextureUnits::PicaTexture(1));
SetShaderSamplerBinding(shader, "tex2", TextureUnits::PicaTexture(2));
SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube);
// Set the texture samplers to correspond to different lookup table texture units
SetShaderSamplerBinding(shader, "lighting_lut", TextureUnits::LightingLUT);
SetShaderSamplerBinding(shader, "fog_lut", TextureUnits::FogLUT);
SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT);
SetShaderSamplerBinding(shader, "proctex_color_map", TextureUnits::ProcTexColorMap);
SetShaderSamplerBinding(shader, "proctex_alpha_map", TextureUnits::ProcTexAlphaMap);
SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT);
SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT);
cur_state.draw.shader_program = old_program;
cur_state.Apply();
}
/**
* An object representing a shader program staging. It can be either a shader object or a program
* object, depending on whether separable program is used.
*/
class OGLShaderStage {
public:
explicit OGLShaderStage(bool separable) {
if (separable) {
shader_or_program = OGLProgram();
} else {
shader_or_program = OGLShader();
}
}
void Create(const char* source, GLenum type) {
if (shader_or_program.which() == 0) {
boost::get<OGLShader>(shader_or_program).Create(source, type);
} else {
OGLShader shader;
shader.Create(source, type);
OGLProgram& program = boost::get<OGLProgram>(shader_or_program);
program.Create(true, {shader.handle});
SetShaderUniformBlockBindings(program.handle);
SetShaderSamplerBindings(program.handle);
}
}
GLuint GetHandle() const {
if (shader_or_program.which() == 0) {
return boost::get<OGLShader>(shader_or_program).handle;
} else {
return boost::get<OGLProgram>(shader_or_program).handle;
}
}
private:
boost::variant<OGLShader, OGLProgram> shader_or_program;
};
class TrivialVertexShader {
public:
explicit TrivialVertexShader(bool separable) : program(separable) {
program.Create(GLShader::GenerateTrivialVertexShader(separable).c_str(), GL_VERTEX_SHADER);
}
GLuint Get() const {
return program.GetHandle();
}
private:
OGLShaderStage program;
};
template <typename KeyConfigType, std::string (*CodeGenerator)(const KeyConfigType&, bool),
GLenum ShaderType>
class ShaderCache {
public:
explicit ShaderCache(bool separable) : separable(separable) {}
GLuint Get(const KeyConfigType& config) {
auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable});
OGLShaderStage& cached_shader = iter->second;
if (new_shader) {
cached_shader.Create(CodeGenerator(config, separable).c_str(), ShaderType);
}
return cached_shader.GetHandle();
}
private:
bool separable;
std::unordered_map<KeyConfigType, OGLShaderStage> shaders;
};
using FragmentShaders =
ShaderCache<GLShader::PicaShaderConfig, &GLShader::GenerateFragmentShader, GL_FRAGMENT_SHADER>;
class ShaderProgramManager::Impl {
public:
explicit Impl(bool separable)
: separable(separable), trivial_vertex_shader(separable), fragment_shaders(separable) {
if (separable)
pipeline.Create();
}
struct ShaderTuple {
GLuint vs = 0;
GLuint gs = 0;
GLuint fs = 0;
bool operator==(const ShaderTuple& rhs) const {
return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
}
bool operator!=(const ShaderTuple& rhs) const {
return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs);
}
struct Hash {
std::size_t operator()(const ShaderTuple& tuple) const {
std::size_t hash = 0;
boost::hash_combine(hash, tuple.vs);
boost::hash_combine(hash, tuple.gs);
boost::hash_combine(hash, tuple.fs);
return hash;
}
};
};
ShaderTuple current;
TrivialVertexShader trivial_vertex_shader;
FragmentShaders fragment_shaders;
bool separable;
std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache;
OGLPipeline pipeline;
};
ShaderProgramManager::ShaderProgramManager(bool separable)
: impl(std::make_unique<Impl>(separable)) {}
ShaderProgramManager::~ShaderProgramManager() = default;
void ShaderProgramManager::UseTrivialVertexShader() {
impl->current.vs = impl->trivial_vertex_shader.Get();
}
void ShaderProgramManager::UseTrivialGeometryShader() {
impl->current.gs = 0;
}
void ShaderProgramManager::UseFragmentShader(const GLShader::PicaShaderConfig& config) {
impl->current.fs = impl->fragment_shaders.Get(config);
}
void ShaderProgramManager::ApplyTo(OpenGLState& state) {
if (impl->separable) {
// Without this reseting, AMD sometimes freezes when one stage is changed but not for the
// others
glUseProgramStages(impl->pipeline.handle,
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
0);
glUseProgramStages(impl->pipeline.handle, GL_VERTEX_SHADER_BIT, impl->current.vs);
glUseProgramStages(impl->pipeline.handle, GL_GEOMETRY_SHADER_BIT, impl->current.gs);
glUseProgramStages(impl->pipeline.handle, GL_FRAGMENT_SHADER_BIT, impl->current.fs);
state.draw.shader_program = 0;
state.draw.program_pipeline = impl->pipeline.handle;
} else {
OGLProgram& cached_program = impl->program_cache[impl->current];
if (cached_program.handle == 0) {
cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs});
SetShaderUniformBlockBindings(cached_program.handle);
SetShaderSamplerBindings(cached_program.handle);
}
state.draw.shader_program = cached_program.handle;
}
}

@ -0,0 +1,73 @@
// Copyright 2018 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
enum class UniformBindings : GLuint { Common };
struct LightSrc {
alignas(16) GLvec3 specular_0;
alignas(16) GLvec3 specular_1;
alignas(16) GLvec3 diffuse;
alignas(16) GLvec3 ambient;
alignas(16) GLvec3 position;
alignas(16) GLvec3 spot_direction; // negated
GLfloat dist_atten_bias;
GLfloat dist_atten_scale;
};
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct UniformData {
GLint framebuffer_scale;
GLint alphatest_ref;
GLfloat depth_scale;
GLfloat depth_offset;
GLint scissor_x1;
GLint scissor_y1;
GLint scissor_x2;
GLint scissor_y2;
alignas(16) GLvec3 fog_color;
alignas(8) GLvec2 proctex_noise_f;
alignas(8) GLvec2 proctex_noise_a;
alignas(8) GLvec2 proctex_noise_p;
alignas(16) GLvec3 lighting_global_ambient;
LightSrc light_src[8];
alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) GLvec4 tev_combiner_buffer_color;
alignas(16) GLvec4 clip_coef;
};
static_assert(
sizeof(UniformData) == 0x460,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// A class that manage different shader stages and configures them with given config data.
class ShaderProgramManager {
public:
explicit ShaderProgramManager(bool separable);
~ShaderProgramManager();
void UseTrivialVertexShader();
void UseTrivialGeometryShader();
void UseFragmentShader(const GLShader::PicaShaderConfig& config);
void ApplyTo(OpenGLState& state);
private:
class Impl;
std::unique_ptr<Impl> impl;
};