From 32c1bc6a67820f9df21c8f64f4df078b015aa7da Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 6 Nov 2019 04:32:43 -0300 Subject: [PATCH 01/15] shader/texture: Deduce texture buffers from locker Instead of specializing shaders to separate texture buffers from 1D textures, use the locker to deduce them while they are being decoded. --- .../renderer_opengl/gl_rasterizer.cpp | 45 +++----- .../renderer_opengl/gl_rasterizer.h | 15 ++- .../renderer_opengl/gl_shader_cache.cpp | 12 --- .../renderer_opengl/gl_shader_decompiler.cpp | 39 ++----- .../renderer_opengl/gl_shader_disk_cache.cpp | 37 +++---- .../renderer_opengl/gl_shader_disk_cache.h | 8 +- src/video_core/shader/decode/texture.cpp | 102 +++++++----------- src/video_core/shader/node.h | 14 ++- src/video_core/shader/shader_ir.h | 13 ++- 9 files changed, 109 insertions(+), 176 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 05f8e511b..b76de71ec 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -271,9 +271,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { const auto stage = static_cast(index == 0 ? 0 : index - 1); SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); - const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)}; + SetupDrawTextures(stage, shader, base_bindings); - const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; + const ProgramVariant variant{base_bindings, primitive_mode}; const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); switch (program) { @@ -303,7 +303,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // When VertexA is enabled, we have dual vertex shaders if (program == Maxwell::ShaderProgram::VertexA) { // VertexB was combined with VertexA, so we skip the VertexB iteration - index++; + ++index; } base_bindings = next_bindings; @@ -732,11 +732,10 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } auto kernel = shader_cache.GetComputeKernel(code_addr); - ProgramVariant variant; - variant.texture_buffer_usage = SetupComputeTextures(kernel); + SetupComputeTextures(kernel); SetupComputeImages(kernel); - const auto [program, next_bindings] = kernel->GetProgramHandle(variant); + const auto [program, next_bindings] = kernel->GetProgramHandle({}); state.draw.shader_program = program; state.draw.program_pipeline = 0; @@ -918,9 +917,8 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast(size)); } -TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, - const Shader& shader, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, const Shader& shader, + BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); @@ -929,8 +927,6 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); - TextureBufferUsage texture_buffer_usage{0}; - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; const auto texture = [&] { @@ -943,15 +939,11 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag return maxwell3d.GetTextureInfo(tex_handle); }(); - if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) { - texture_buffer_usage.set(bindpoint); - } + SetupTexture(base_bindings.sampler + bindpoint, texture, entry); } - - return texture_buffer_usage; } -TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { +void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& compute = system.GPU().KeplerCompute(); const auto& entries = kernel->GetShaderEntries().samplers; @@ -959,8 +951,6 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) ASSERT_MSG(entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); - TextureBufferUsage texture_buffer_usage{0}; - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; const auto texture = [&] { @@ -972,34 +962,29 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) return compute.GetTextureInfo(tex_handle); }(); - if (SetupTexture(bindpoint, texture, entry)) { - texture_buffer_usage.set(bindpoint); - } + SetupTexture(bindpoint, texture, entry); } - - return texture_buffer_usage; } -bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, +void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, const GLShader::SamplerEntry& entry) { - state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); - const auto view = texture_cache.GetTextureSurface(texture.tic, entry); if (!view) { // Can occur when texture addr is null or its memory is unmapped/invalid + state.samplers[binding] = 0; state.textures[binding] = 0; - return false; + return; } state.textures[binding] = view->GetTexture(); if (view->GetSurfaceParams().IsBuffer()) { - return true; + return; } + state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); // Apply swizzle to textures that are not buffers. view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, texture.tic.w_source); - return false; } void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bd6fe5c3a..0e0819d59 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -107,16 +107,15 @@ private: /// Syncs all the state, shaders, render targets and textures setting before a draw call. void DrawPrelude(); - /// Configures the current textures to use for the draw command. Returns shaders texture buffer - /// usage. - TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, BaseBindings base_bindings); + /// Configures the current textures to use for the draw command. + void SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, + BaseBindings base_bindings); - /// Configures the textures used in a compute shader. Returns texture buffer usage. - TextureBufferUsage SetupComputeTextures(const Shader& kernel); + /// Configures the textures used in a compute shader. + void SetupComputeTextures(const Shader& kernel); - /// Configures a texture. Returns true when the texture is a texture buffer. - bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, + /// Configures a texture. + void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, const GLShader::SamplerEntry& entry); /// Configures images in a compute shader. diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 04a239a39..7ce06a978 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -270,7 +270,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy auto base_bindings{variant.base_bindings}; const auto primitive_mode{variant.primitive_mode}; - const auto texture_buffer_usage{variant.texture_buffer_usage}; std::string source = fmt::format(R"(// {} #version 430 core @@ -317,17 +316,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); } - // Transform 1D textures to texture samplers by declaring its preprocessor macros. - for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) { - if (!texture_buffer_usage.test(i)) { - continue; - } - source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i); - } - if (texture_buffer_usage.any()) { - source += '\n'; - } - if (program_type == ProgramType::Geometry) { const auto [glsl_topology, debug_name, max_vertices] = GetPrimitiveDescription(primitive_mode); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 4f2b49170..51c80bf32 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -658,9 +658,11 @@ private: const std::string description{"layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) + ") uniform"}; std::string sampler_type = [&]() { + if (sampler.IsBuffer()) { + return "samplerBuffer"; + } switch (sampler.GetType()) { case Tegra::Shader::TextureType::Texture1D: - // Special cased, read below. return "sampler1D"; case Tegra::Shader::TextureType::Texture2D: return "sampler2D"; @@ -680,19 +682,7 @@ private: sampler_type += "Shadow"; } - if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) { - // 1D textures can be aliased to texture buffers, hide the declarations behind a - // preprocessor flag and use one or the other from the GPU state. This has to be - // done because shaders don't have enough information to determine the texture type. - EmitIfdefIsBuffer(sampler); - code.AddLine("{} samplerBuffer {};", description, name); - code.AddLine("#else"); - code.AddLine("{} {} {};", description, sampler_type, name); - code.AddLine("#endif"); - } else { - // The other texture types (2D, 3D and cubes) don't have this issue. - code.AddLine("{} {} {};", description, sampler_type, name); - } + code.AddLine("{} {} {};", description, sampler_type, name); } if (!samplers.empty()) { code.AddNewLine(); @@ -1749,27 +1739,14 @@ private: expr += ", "; } - // Store a copy of the expression without the lod to be used with texture buffers - std::string expr_buffer = expr; - - if (meta->lod) { + if (meta->lod && !meta->sampler.IsBuffer()) { expr += ", "; expr += Visit(meta->lod).AsInt(); } expr += ')'; expr += GetSwizzle(meta->element); - expr_buffer += ')'; - expr_buffer += GetSwizzle(meta->element); - - const std::string tmp{code.GenerateTemporary()}; - EmitIfdefIsBuffer(meta->sampler); - code.AddLine("float {} = {};", tmp, expr_buffer); - code.AddLine("#else"); - code.AddLine("float {} = {};", tmp, expr); - code.AddLine("#endif"); - - return {tmp, Type::Float}; + return {std::move(expr), Type::Float}; } Expression ImageLoad(Operation operation) { @@ -2214,10 +2191,6 @@ private: return GetDeclarationWithSuffix(static_cast(image.GetIndex()), "image"); } - void EmitIfdefIsBuffer(const Sampler& sampler) { - code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); - } - std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { return fmt::format("{}_{}_{}", name, index, suffix); } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 184a565e6..3f4daf28d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -28,23 +28,6 @@ using VideoCommon::Shader::KeyMap; namespace { -struct ConstBufferKey { - u32 cbuf; - u32 offset; - u32 value; -}; - -struct BoundSamplerKey { - u32 offset; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct BindlessSamplerKey { - u32 cbuf; - u32 offset; - Tegra::Engines::SamplerDescriptor sampler; -}; - using ShaderCacheVersionHash = std::array; enum class TransferableEntryKind : u32 { @@ -52,10 +35,28 @@ enum class TransferableEntryKind : u32 { Usage, }; -constexpr u32 NativeVersion = 5; +struct ConstBufferKey { + u32 cbuf{}; + u32 offset{}; + u32 value{}; +}; + +struct BoundSamplerKey { + u32 offset{}; + Tegra::Engines::SamplerDescriptor sampler{}; +}; + +struct BindlessSamplerKey { + u32 cbuf{}; + u32 offset{}; + Tegra::Engines::SamplerDescriptor sampler{}; +}; + +constexpr u32 NativeVersion = 6; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); +static_assert(sizeof(ProgramVariant) == 20); ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index db23ada93..55311dc6d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include #include @@ -37,7 +36,6 @@ struct ShaderDiskCacheDump; using ProgramCode = std::vector; using ShaderDumpsMap = std::unordered_map; -using TextureBufferUsage = std::bitset<64>; /// Allocated bindings used by an OpenGL shader program struct BaseBindings { @@ -61,11 +59,10 @@ static_assert(std::is_trivially_copyable_v); struct ProgramVariant { BaseBindings base_bindings; GLenum primitive_mode{}; - TextureBufferUsage texture_buffer_usage{}; bool operator==(const ProgramVariant& rhs) const { - return std::tie(base_bindings, primitive_mode, texture_buffer_usage) == - std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage); + return std::tie(base_bindings, primitive_mode) == + std::tie(rhs.base_bindings, rhs.primitive_mode); } bool operator!=(const ProgramVariant& rhs) const { @@ -112,7 +109,6 @@ template <> struct hash { std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { return std::hash()(variant.base_bindings) ^ - std::hash()(variant.texture_buffer_usage) ^ (static_cast(variant.primitive_mode) << 6); } }; diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index bb926a132..695fdbd24 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -128,8 +128,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } const Node component = Immediate(static_cast(instr.tld4s.component)); - const auto& sampler = - GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); + const SamplerInfo info{TextureType::Texture2D, false, depth_compare}; + const auto& sampler = GetSampler(instr.sampler, info); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -149,7 +149,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); + is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); u32 indexer = 0; switch (instr.txq.query_type) { @@ -185,8 +185,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; const auto& sampler = - is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) - : GetSampler(instr.sampler, {{texture_type, is_array, false}}); + is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); std::vector coords; @@ -254,67 +253,50 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } +ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional sampler_info, u32 offset, + std::optional buffer) { + if (sampler_info) { + return *sampler_info; + } + const auto sampler = + buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); + if (!sampler) { + LOG_WARNING(HW_GPU, "Unknown sampler info"); + return SamplerInfo{TextureType::Texture2D, false, false, false}; + } + return SamplerInfo{sampler->texture_type, sampler->is_array != 0, sampler->is_shadow != 0, + sampler->is_buffer != 0}; +} + const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, std::optional sampler_info) { const auto offset = static_cast(sampler.index.Value()); - - TextureType type; - bool is_array; - bool is_shadow; - if (sampler_info) { - type = sampler_info->type; - is_array = sampler_info->is_array; - is_shadow = sampler_info->is_shadow; - } else if (const auto sampler = locker.ObtainBoundSampler(offset)) { - type = sampler->texture_type.Value(); - is_array = sampler->is_array.Value() != 0; - is_shadow = sampler->is_shadow.Value() != 0; - } else { - LOG_WARNING(HW_GPU, "Unknown sampler info"); - type = TextureType::Texture2D; - is_array = false; - is_shadow = false; - } + const auto info = GetSamplerInfo(sampler_info, offset); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), [offset](const Sampler& entry) { return entry.GetOffset() == offset; }); if (it != used_samplers.end()) { - ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && - it->IsShadow() == is_shadow); + ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && + it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer); return *it; } // Otherwise create a new mapping for this sampler const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow)); + return used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, + info.is_buffer); } -const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, +const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, std::optional sampler_info) { const Node sampler_register = GetRegister(reg); const auto [base_sampler, buffer, offset] = TrackCbuf(sampler_register, global_code, static_cast(global_code.size())); ASSERT(base_sampler != nullptr); - TextureType type; - bool is_array; - bool is_shadow; - if (sampler_info) { - type = sampler_info->type; - is_array = sampler_info->is_array; - is_shadow = sampler_info->is_shadow; - } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) { - type = sampler->texture_type.Value(); - is_array = sampler->is_array.Value() != 0; - is_shadow = sampler->is_shadow.Value() != 0; - } else { - LOG_WARNING(HW_GPU, "Unknown sampler info"); - type = TextureType::Texture2D; - is_array = false; - is_shadow = false; - } + const auto info = GetSamplerInfo(sampler_info, offset, buffer); // If this sampler has already been used, return the existing mapping. const auto it = @@ -323,15 +305,15 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, return entry.GetBuffer() == buffer && entry.GetOffset() == offset; }); if (it != used_samplers.end()) { - ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && - it->IsShadow() == is_shadow); + ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && + it->IsShadow() == info.is_shadow); return *it; } // Otherwise create a new mapping for this sampler const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back( - Sampler(next_index, offset, buffer, type, is_array, is_shadow)); + return used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, + info.is_shadow, info.is_buffer); } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -416,17 +398,16 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); + const SamplerInfo info{texture_type, is_array, is_shadow, false}; const auto& sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) - : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); + is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; - // LOD selection (either via bias or explicit textureLod) not - // supported in GL for sampler2DArrayShadow and - // samplerCubeArrayShadow. + // LOD selection (either via bias or explicit textureLod) not supported in GL for + // sampler2DArrayShadow and samplerCubeArrayShadow. const bool gl_lod_supported = !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); @@ -436,8 +417,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); - Node bias = {}; - Node lod = {}; + Node bias; + Node lod; if (process_mode != TextureProcessMode::None && gl_lod_supported) { switch (process_mode) { case TextureProcessMode::LZ: @@ -573,10 +554,9 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de u64 parameter_register = instr.gpr20.Value(); - const auto& sampler = - is_bindless - ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}}) - : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); + const SamplerInfo info{texture_type, is_array, depth_compare, false}; + const auto& sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) + : GetSampler(instr.sampler, info); std::vector aoffi; if (is_aoffi) { @@ -623,7 +603,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); + const auto& sampler = GetSampler(instr.sampler); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -659,7 +639,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); + const auto& sampler = GetSampler(instr.sampler); Node4 values; for (u32 element = 0; element < values.size(); ++element) { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 54217e6a4..44d85d434 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -225,14 +225,15 @@ class Sampler { public: /// This constructor is for bound samplers constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) - : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} + bool is_array, bool is_shadow, bool is_buffer) + : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_buffer{is_buffer} {} /// This constructor is for bindless samplers constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) + bool is_array, bool is_shadow, bool is_buffer) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_bindless{true} {} + is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} constexpr u32 GetIndex() const { return index; @@ -258,6 +259,10 @@ public: return is_shadow; } + constexpr bool IsBuffer() const { + return is_buffer; + } + constexpr bool IsBindless() const { return is_bindless; } @@ -270,6 +275,7 @@ private: Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 76a849818..2f71a50d2 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -179,6 +179,7 @@ private: Tegra::Shader::TextureType type; bool is_array; bool is_shadow; + bool is_buffer; }; void Decode(); @@ -303,13 +304,17 @@ private: /// Returns a predicate combiner operation OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); + /// Queries the missing sampler info from the execution context. + SamplerInfo GetSamplerInfo(std::optional sampler_info, u32 offset, + std::optional buffer = std::nullopt); + /// Accesses a texture sampler const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, - std::optional sampler_info); + std::optional sampler_info = std::nullopt); - // Accesses a texture sampler for a bindless texture. - const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, - std::optional sampler_info); + /// Accesses a texture sampler for a bindless texture. + const Sampler& GetBindlessSampler(Tegra::Shader::Register reg, + std::optional sampler_info = std::nullopt); /// Accesses an image. Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); From dc9961f341be64dcbc13097d4eb7b95db45f9fb9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 6 Nov 2019 22:43:02 -0300 Subject: [PATCH 02/15] shader/texture: Handle TLDS texture type mismatches Some games like "Fire Emblem: Three Houses" bind 2D textures to offsets used by instructions of 1D textures. To handle the discrepancy this commit uses the the texture type from the binding and modifies the emitted code IR to build a valid backend expression. E.g.: Bound texture is 2D and instruction is 1D, the emitted IR samples a 2D texture in the coordinate ivec2(X, 0). --- src/video_core/shader/decode/texture.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 695fdbd24..b094e5a06 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -616,6 +616,8 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { } Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { + const auto& sampler = GetSampler(instr.sampler); + const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; @@ -639,7 +641,14 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - const auto& sampler = GetSampler(instr.sampler); + // Fill empty entries from the guest sampler. + const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); + if (type_coord_count != entry_coord_count) { + LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); + } + for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { + coords.push_back(GetRegister(Register::ZeroIndex)); + } Node4 values; for (u32 element = 0; element < values.size(); ++element) { From 4f5d8e434278cd5999bf21e91f0923d55ec8d52b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 12 Nov 2019 23:26:56 -0300 Subject: [PATCH 03/15] gl_shader_cache: Specialize shader workgroup Drop the usage of ARB_compute_variable_group_size and specialize compute shaders instead. This permits compute to run on AMD and Intel proprietary drivers. --- src/video_core/engines/kepler_compute.h | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 21 +++---- .../renderer_opengl/gl_shader_cache.cpp | 63 +++++++++---------- .../renderer_opengl/gl_shader_cache.h | 6 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 4 +- .../renderer_opengl/gl_shader_disk_cache.h | 48 +++++++++----- 6 files changed, 75 insertions(+), 69 deletions(-) diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 5259d92bd..bd49c6627 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -140,7 +140,7 @@ public: INSERT_PADDING_WORDS(0x3); - BitField<0, 16, u32> shared_alloc; + BitField<0, 18, u32> shared_alloc; BitField<16, 16, u32> block_dim_x; union { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b76de71ec..bd4e5f6e3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -273,8 +273,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { SetupDrawGlobalMemory(stage, shader); SetupDrawTextures(stage, shader, base_bindings); - const ProgramVariant variant{base_bindings, primitive_mode}; - const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); + const ProgramVariant variant(base_bindings, primitive_mode); + const auto [program_handle, next_bindings] = shader->GetHandle(variant); switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -725,18 +725,14 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - if (!GLAD_GL_ARB_compute_variable_group_size) { - LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " - "lack of GL_ARB_compute_variable_group_size"); - return; - } - auto kernel = shader_cache.GetComputeKernel(code_addr); SetupComputeTextures(kernel); SetupComputeImages(kernel); - const auto [program, next_bindings] = kernel->GetProgramHandle({}); - state.draw.shader_program = program; + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, + launch_desc.block_dim_z); + std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.program_pipeline = 0; const std::size_t buffer_size = @@ -760,10 +756,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { state.ApplyShaderProgram(); state.ApplyProgramPipeline(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, - launch_desc.grid_dim_z, launch_desc.block_dim_x, - launch_desc.block_dim_y, launch_desc.block_dim_z); + glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); } void RasterizerOpenGL::FlushAll() {} diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7ce06a978..a5789b6d3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -255,7 +255,7 @@ void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, const ProgramCode& program_code, const ProgramCode& program_code_b, - const ProgramVariant& variant, ConstBufferLocker& locker, + ConstBufferLocker& locker, const ProgramVariant& variant, bool hint_retrievable = false) { LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); @@ -268,17 +268,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy } const auto entries = GLShader::GetEntries(ir); - auto base_bindings{variant.base_bindings}; - const auto primitive_mode{variant.primitive_mode}; - std::string source = fmt::format(R"(// {} #version 430 core #extension GL_ARB_separate_shader_objects : enable )", GetShaderId(unique_identifier, program_type)); - if (is_compute) { - source += "#extension GL_ARB_compute_variable_group_size : require\n"; - } if (device.HasShaderBallot()) { source += "#extension GL_ARB_shader_ballot : require\n"; } @@ -295,6 +289,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy } source += '\n'; + auto base_bindings = variant.base_bindings; if (!is_compute) { source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); } @@ -318,13 +313,15 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy if (program_type == ProgramType::Geometry) { const auto [glsl_topology, debug_name, max_vertices] = - GetPrimitiveDescription(primitive_mode); + GetPrimitiveDescription(variant.primitive_mode); - source += "layout (" + std::string(glsl_topology) + ") in;\n\n"; - source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; + source += fmt::format("layout ({}) in;\n\n", glsl_topology); + source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); } if (program_type == ProgramType::Compute) { - source += "layout (local_size_variable) in;\n"; + source += + fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", + variant.block_x, variant.block_y, variant.block_z); } source += '\n'; @@ -422,58 +419,53 @@ Shader CachedShader::CreateFromCache(const ShaderParameters& params, unspecialized.code_b)); } -std::tuple CachedShader::GetProgramHandle(const ProgramVariant& variant) { - UpdateVariant(); +std::tuple CachedShader::GetHandle(const ProgramVariant& variant) { + EnsureValidLockerVariant(); - const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); + const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); auto& program = entry->second; if (is_cache_miss) { program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, - variant, *curr_variant->locker); - disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker)); + *curr_locker_variant->locker, variant); + disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } auto base_bindings = variant.base_bindings; base_bindings.cbuf += static_cast(entries.const_buffers.size()); - if (program_type != ProgramType::Compute) { - base_bindings.cbuf += STAGE_RESERVED_UBOS; - } + base_bindings.cbuf += STAGE_RESERVED_UBOS; base_bindings.gmem += static_cast(entries.global_memory_entries.size()); base_bindings.sampler += static_cast(entries.samplers.size()); return {program->handle, base_bindings}; } -void CachedShader::UpdateVariant() { - if (curr_variant && !curr_variant->locker->IsConsistent()) { - curr_variant = nullptr; +bool CachedShader::EnsureValidLockerVariant() { + const auto previous_variant = curr_locker_variant; + if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) { + curr_locker_variant = nullptr; } - if (!curr_variant) { + if (!curr_locker_variant) { for (auto& variant : locker_variants) { if (variant->locker->IsConsistent()) { - curr_variant = variant.get(); + curr_locker_variant = variant.get(); } } } - if (!curr_variant) { + if (!curr_locker_variant) { auto& new_variant = locker_variants.emplace_back(); new_variant = std::make_unique(); new_variant->locker = MakeLocker(system, program_type); - curr_variant = new_variant.get(); + curr_locker_variant = new_variant.get(); } + return previous_variant == curr_locker_variant; } ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, const ConstBufferLocker& locker) const { - ShaderDiskCacheUsage usage; - usage.unique_identifier = unique_identifier; - usage.variant = variant; - usage.keys = locker.GetKeys(); - usage.bound_samplers = locker.GetBoundSamplers(); - usage.bindless_samplers = locker.GetBindlessSamplers(); - return usage; + return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), + locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -534,9 +526,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (!shader) { auto locker{MakeLocker(system, unspecialized.program_type)}; FillLocker(*locker, usage); + shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, - unspecialized.code, unspecialized.code_b, usage.variant, - *locker, true); + unspecialized.code, unspecialized.code_b, *locker, + usage.variant, true); } std::scoped_lock lock{mutex}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6bd7c9cf1..795b05a19 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -86,7 +86,7 @@ public: } /// Gets the GL program handle for the shader - std::tuple GetProgramHandle(const ProgramVariant& variant); + std::tuple GetHandle(const ProgramVariant& variant); private: struct LockerVariant { @@ -98,7 +98,7 @@ private: GLShader::ShaderEntries entries, ProgramCode program_code, ProgramCode program_code_b); - void UpdateVariant(); + bool EnsureValidLockerVariant(); ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, const VideoCommon::Shader::ConstBufferLocker& locker) const; @@ -117,7 +117,7 @@ private: ProgramCode program_code; ProgramCode program_code_b; - LockerVariant* curr_variant = nullptr; + LockerVariant* curr_locker_variant = nullptr; std::vector> locker_variants; }; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 3f4daf28d..9156f180a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -52,11 +52,11 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 6; +constexpr u32 NativeVersion = 7; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ProgramVariant) == 20); +static_assert(sizeof(ProgramVariant) == 28); ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 55311dc6d..4c7ca004d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -44,32 +44,49 @@ struct BaseBindings { u32 sampler{}; u32 image{}; - bool operator==(const BaseBindings& rhs) const { + bool operator==(const BaseBindings& rhs) const noexcept { return std::tie(cbuf, gmem, sampler, image) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image); } - bool operator!=(const BaseBindings& rhs) const { + bool operator!=(const BaseBindings& rhs) const noexcept { return !operator==(rhs); } }; static_assert(std::is_trivially_copyable_v); -/// Describes the different variants a single program can be compiled. -struct ProgramVariant { - BaseBindings base_bindings; - GLenum primitive_mode{}; +/// Describes the different variants a program can be compiled with. +struct ProgramVariant final { + ProgramVariant() = default; - bool operator==(const ProgramVariant& rhs) const { - return std::tie(base_bindings, primitive_mode) == - std::tie(rhs.base_bindings, rhs.primitive_mode); + /// Graphics constructor. + explicit constexpr ProgramVariant(BaseBindings base_bindings, GLenum primitive_mode) noexcept + : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} + + /// Compute constructor. + explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z) noexcept + : block_x{block_x}, block_y{static_cast(block_y)}, block_z{static_cast(block_z)} { } - bool operator!=(const ProgramVariant& rhs) const { + // Graphics specific parameters. + BaseBindings base_bindings{}; + GLenum primitive_mode{}; + + // Compute specific parameters. + u32 block_x{}; + u16 block_y{}; + u16 block_z{}; + + bool operator==(const ProgramVariant& rhs) const noexcept { + return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z) == + std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, + rhs.block_z); + } + + bool operator!=(const ProgramVariant& rhs) const noexcept { return !operator==(rhs); } }; - static_assert(std::is_trivially_copyable_v); /// Describes how a shader is used. @@ -108,8 +125,11 @@ struct hash { template <> struct hash { std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { - return std::hash()(variant.base_bindings) ^ - (static_cast(variant.primitive_mode) << 6); + return std::hash{}(variant.base_bindings) ^ + (static_cast(variant.primitive_mode) << 6) ^ + static_cast(variant.block_x) ^ + (static_cast(variant.block_y) << 32) ^ + (static_cast(variant.block_z) << 48); } }; @@ -117,7 +137,7 @@ template <> struct hash { std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { return static_cast(usage.unique_identifier) ^ - std::hash()(usage.variant); + std::hash{}(usage.variant); } }; From dbeb52387979c7e28c0acb03dfc1468146947104 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 12 Nov 2019 23:39:45 -0300 Subject: [PATCH 04/15] gl_shader_cache: Specialize shared memory size Shared memory was being declared with an undefined size. Specialize from guest GPU parameters the compute shader's shared memory size. --- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_shader_cache.cpp | 7 ++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 23 ++++--------------- .../renderer_opengl/gl_shader_disk_cache.cpp | 4 ++-- .../renderer_opengl/gl_shader_disk_cache.h | 18 +++++++++------ 5 files changed, 25 insertions(+), 29 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bd4e5f6e3..ebfe52e6d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -731,7 +731,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z); + launch_desc.block_dim_z, launch_desc.shared_alloc); std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.program_pipeline = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index a5789b6d3..982c4e23a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -8,7 +8,9 @@ #include #include #include +#include "common/alignment.h" #include "common/assert.h" +#include "common/logging/log.h" #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" @@ -322,6 +324,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy source += fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", variant.block_x, variant.block_y, variant.block_z); + + if (variant.shared_memory_size > 0) { + source += fmt::format("shared uint smem[{}];", + Common::AlignUp(variant.shared_memory_size, 4) / 4); + } } source += '\n'; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 51c80bf32..fb2ba0905 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -223,7 +223,7 @@ private: Type type{}; }; -constexpr const char* GetTypeString(Type type) { +const char* GetTypeString(Type type) { switch (type) { case Type::Bool: return "bool"; @@ -243,7 +243,7 @@ constexpr const char* GetTypeString(Type type) { } } -constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { +const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { switch (image_type) { case Tegra::Shader::ImageType::Texture1D: return "1D"; @@ -522,13 +522,6 @@ private: code.AddNewLine(); } - void DeclareSharedMemory() { - if (stage != ProgramType::Compute) { - return; - } - code.AddLine("shared uint {}[];", GetSharedMemory()); - } - void DeclareInternalFlags() { for (u32 flag = 0; flag < static_cast(InternalFlag::Amount); flag++) { const auto flag_code = static_cast(flag); @@ -867,9 +860,7 @@ private: } if (const auto smem = std::get_if(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), - Type::Uint}; + return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; } if (const auto internal_flag = std::get_if(&*node)) { @@ -1245,9 +1236,7 @@ private: Type::Uint}; } else if (const auto smem = std::get_if(&*dest)) { ASSERT(stage == ProgramType::Compute); - target = { - fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), - Type::Uint}; + target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; } else if (const auto gmem = std::get_if(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()).AsUint(); const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); @@ -2170,10 +2159,6 @@ private: return "lmem_" + suffix; } - std::string GetSharedMemory() const { - return fmt::format("smem_{}", suffix); - } - std::string GetInternalFlag(InternalFlag flag) const { constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", "overflow_flag"}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 9156f180a..d2bb8502a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -52,11 +52,11 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 7; +constexpr u32 NativeVersion = 8; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ProgramVariant) == 28); +static_assert(sizeof(ProgramVariant) == 32); ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 4c7ca004d..6f8e51364 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -64,9 +64,10 @@ struct ProgramVariant final { : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} /// Compute constructor. - explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z) noexcept - : block_x{block_x}, block_y{static_cast(block_y)}, block_z{static_cast(block_z)} { - } + explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, + u32 shared_memory_size) noexcept + : block_x{block_x}, block_y{static_cast(block_y)}, block_z{static_cast(block_z)}, + shared_memory_size{shared_memory_size} {} // Graphics specific parameters. BaseBindings base_bindings{}; @@ -76,11 +77,13 @@ struct ProgramVariant final { u32 block_x{}; u16 block_y{}; u16 block_z{}; + u32 shared_memory_size{}; bool operator==(const ProgramVariant& rhs) const noexcept { - return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z) == - std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, - rhs.block_z); + return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, + shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode, + rhs.block_x, rhs.block_y, rhs.block_z, + rhs.shared_memory_size); } bool operator!=(const ProgramVariant& rhs) const noexcept { @@ -129,7 +132,8 @@ struct hash { (static_cast(variant.primitive_mode) << 6) ^ static_cast(variant.block_x) ^ (static_cast(variant.block_y) << 32) ^ - (static_cast(variant.block_z) << 48); + (static_cast(variant.block_z) << 48) ^ + (static_cast(variant.shared_memory_size) << 16); } }; From 287ae2b9e8ea38642a4c8e36f7863d881d4c0e87 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 13 Nov 2019 00:25:52 -0300 Subject: [PATCH 05/15] gl_shader_cache: Specialize local memory size for compute shaders Local memory size in compute shaders was stubbed with an arbitary size. This commit specializes local memory size from guest GPU parameters. --- src/video_core/engines/kepler_compute.h | 7 ++++++- .../renderer_opengl/gl_rasterizer.cpp | 3 ++- .../renderer_opengl/gl_shader_cache.cpp | 5 +++++ .../renderer_opengl/gl_shader_decompiler.cpp | 18 ++++++++---------- .../renderer_opengl/gl_shader_disk_cache.cpp | 4 ++-- .../renderer_opengl/gl_shader_disk_cache.h | 16 +++++++++------- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index bd49c6627..c526287b7 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -178,7 +178,12 @@ public: BitField<24, 5, u32> gpr_alloc; }; - INSERT_PADDING_WORDS(0x11); + union { + BitField<0, 20, u32> local_crs_alloc; + BitField<24, 5, u32> sass_version; + }; + + INSERT_PADDING_WORDS(0x10); } launch_description{}; struct { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ebfe52e6d..d890076f8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z, launch_desc.shared_alloc); + launch_desc.block_dim_z, launch_desc.shared_alloc, + launch_desc.local_pos_alloc); std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.program_pipeline = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 982c4e23a..b23a982d7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy source += fmt::format("shared uint smem[{}];", Common::AlignUp(variant.shared_memory_size, 4) / 4); } + + if (variant.local_memory_size > 0) { + source += fmt::format("#define LOCAL_MEMORY_SIZE {}", + Common::AlignUp(variant.local_memory_size, 4) / 4); + } } source += '\n'; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index fb2ba0905..fe016c05c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -510,10 +510,14 @@ private: } void DeclareLocalMemory() { - // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at - // specialization time. - const u64 local_memory_size = - stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); + if (stage == ProgramType::Compute) { + code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); + code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); + code.AddLine("#endif"); + return; + } + + const u64 local_memory_size = header.GetLocalMemorySize(); if (local_memory_size == 0) { return; } @@ -851,9 +855,6 @@ private: } if (const auto lmem = std::get_if(&*node)) { - if (stage == ProgramType::Compute) { - LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); - } return { fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; @@ -1228,9 +1229,6 @@ private: } target = std::move(*output); } else if (const auto lmem = std::get_if(&*dest)) { - if (stage == ProgramType::Compute) { - LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); - } target = { fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index d2bb8502a..5ebcbbbba 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -52,11 +52,11 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 8; +constexpr u32 NativeVersion = 9; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ProgramVariant) == 32); +static_assert(sizeof(ProgramVariant) == 36); ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 6f8e51364..28689f6c7 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -64,10 +64,10 @@ struct ProgramVariant final { : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} /// Compute constructor. - explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, - u32 shared_memory_size) noexcept + explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, + u32 local_memory_size) noexcept : block_x{block_x}, block_y{static_cast(block_y)}, block_z{static_cast(block_z)}, - shared_memory_size{shared_memory_size} {} + shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} // Graphics specific parameters. BaseBindings base_bindings{}; @@ -78,12 +78,13 @@ struct ProgramVariant final { u16 block_y{}; u16 block_z{}; u32 shared_memory_size{}; + u32 local_memory_size{}; bool operator==(const ProgramVariant& rhs) const noexcept { return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, - shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode, - rhs.block_x, rhs.block_y, rhs.block_z, - rhs.shared_memory_size); + shared_memory_size, local_memory_size) == + std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, + rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size); } bool operator!=(const ProgramVariant& rhs) const noexcept { @@ -133,7 +134,8 @@ struct hash { static_cast(variant.block_x) ^ (static_cast(variant.block_y) << 32) ^ (static_cast(variant.block_z) << 48) ^ - (static_cast(variant.shared_memory_size) << 16); + (static_cast(variant.shared_memory_size) << 16) ^ + (static_cast(variant.local_memory_size) << 36); } }; From 0f23359a44d9258efa0c0cd50243cd0efaf80235 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 13 Nov 2019 00:27:12 -0300 Subject: [PATCH 06/15] gl_rasterizer: Bind graphics images to draw commands Images were not being bound to draw invocations because these would require a cache invalidation. --- .../renderer_opengl/gl_rasterizer.cpp | 80 +++++++++++-------- .../renderer_opengl/gl_rasterizer.h | 4 + .../renderer_opengl/gl_shader_cache.cpp | 1 + .../renderer_opengl/gl_shader_disk_cache.cpp | 2 +- 4 files changed, 54 insertions(+), 33 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d890076f8..9ce20f8f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -49,8 +49,26 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); -static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, - const GLShader::ConstBufferEntry& entry) { +namespace { + +template +Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, + Tegra::Engines::ShaderType shader_type) { + if (entry.IsBindless()) { + const Tegra::Texture::TextureHandle tex_handle = + engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); + return engine.GetTextureInfo(tex_handle); + } + if constexpr (std::is_same_v) { + const auto stage = static_cast(shader_type); + return engine.GetStageTexture(stage, entry.GetOffset()); + } else { + return engine.GetTexture(entry.GetOffset()); + } +} + +std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry) { if (!entry.IsIndirect()) { return entry.GetSize(); } @@ -64,6 +82,8 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf return buffer.size; } +} // Anonymous namespace + RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, @@ -272,6 +292,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); SetupDrawTextures(stage, shader, base_bindings); + SetupDrawImages(stage, shader, base_bindings); const ProgramVariant variant(base_bindings, primitive_mode); const auto [program_handle, next_bindings] = shader->GetHandle(variant); @@ -921,18 +942,11 @@ void RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, const Shade ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto num_entries = static_cast(entries.size()); + for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = [&] { - if (!entry.IsBindless()) { - return maxwell3d.GetStageTexture(stage, entry.GetOffset()); - } - const auto shader_type = static_cast(stage); - const Tegra::Texture::TextureHandle tex_handle = - maxwell3d.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); - return maxwell3d.GetTextureInfo(tex_handle); - }(); - + const auto shader_type = static_cast(stage); + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); SetupTexture(base_bindings.sampler + bindpoint, texture, entry); } } @@ -945,17 +959,10 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { ASSERT_MSG(entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto num_entries = static_cast(entries.size()); + for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = [&] { - if (!entry.IsBindless()) { - return compute.GetTexture(entry.GetOffset()); - } - const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32( - Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset()); - return compute.GetTextureInfo(tex_handle); - }(); - + const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); SetupTexture(bindpoint, texture, entry); } } @@ -981,19 +988,28 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu texture.tic.w_source); } +void RasterizerOpenGL::SetupDrawImages(Maxwell::ShaderStage stage, const Shader& shader, + BaseBindings base_bindings) { + const auto& maxwell3d = system.GPU().Maxwell3D(); + const auto& entries = shader->GetShaderEntries().images; + + const auto num_entries = static_cast(entries.size()); + for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { + const auto& entry = entries[bindpoint]; + const auto shader_type = static_cast(stage); + const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; + SetupImage(base_bindings.image + bindpoint, tic, entry); + } +} + void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { const auto& compute = system.GPU().KeplerCompute(); const auto& entries = shader->GetShaderEntries().images; - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + + const auto num_entries = static_cast(entries.size()); + for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto tic = [&] { - if (!entry.IsBindless()) { - return compute.GetTexture(entry.GetOffset()).tic; - } - const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32( - Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset()); - return compute.GetTextureInfo(tex_handle).tic; - }(); + const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; SetupImage(bindpoint, tic, entry); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 0e0819d59..267ed7803 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -118,6 +118,10 @@ private: void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, const GLShader::SamplerEntry& entry); + /// Configures images in a graphics shader. + void SetupDrawImages(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, + BaseBindings base_bindings); + /// Configures images in a compute shader. void SetupComputeImages(const Shader& shader); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b23a982d7..e7c92e45d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -449,6 +449,7 @@ std::tuple CachedShader::GetHandle(const ProgramVariant& v base_bindings.cbuf += STAGE_RESERVED_UBOS; base_bindings.gmem += static_cast(entries.global_memory_entries.size()); base_bindings.sampler += static_cast(entries.samplers.size()); + base_bindings.image += static_cast(entries.images.size()); return {program->handle, base_bindings}; } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 5ebcbbbba..ccf530367 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -52,7 +52,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 9; +constexpr u32 NativeVersion = 10; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); From c8a48aacc0a2bfe87de74e0aa7842f5d1aec1558 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 18 Nov 2019 18:35:21 -0300 Subject: [PATCH 07/15] video_core: Unify ProgramType and ShaderStage into ShaderType --- src/video_core/CMakeLists.txt | 1 + .../engines/const_buffer_engine_interface.h | 10 +- src/video_core/engines/kepler_compute.cpp | 1 + src/video_core/engines/kepler_compute.h | 1 + src/video_core/engines/maxwell_3d.cpp | 33 +-- src/video_core/engines/maxwell_3d.h | 13 +- src/video_core/engines/shader_type.h | 20 ++ .../renderer_opengl/gl_rasterizer.cpp | 26 +- .../renderer_opengl/gl_rasterizer.h | 11 +- .../renderer_opengl/gl_shader_cache.cpp | 223 ++++++++---------- .../renderer_opengl/gl_shader_cache.h | 13 +- .../renderer_opengl/gl_shader_decompiler.cpp | 44 ++-- .../renderer_opengl/gl_shader_decompiler.h | 17 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 44 ++-- .../renderer_opengl/gl_shader_disk_cache.h | 26 +- .../renderer_opengl/gl_shader_gen.cpp | 13 +- .../renderer_vulkan/maxwell_to_vk.cpp | 14 +- .../renderer_vulkan/maxwell_to_vk.h | 2 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 29 +-- .../renderer_vulkan/vk_shader_decompiler.h | 2 +- src/video_core/shader/const_buffer_locker.cpp | 1 + src/video_core/shader/const_buffer_locker.h | 3 +- 22 files changed, 260 insertions(+), 287 deletions(-) create mode 100644 src/video_core/engines/shader_type.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6f3f2aa9f..3b20c7d34 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -22,6 +22,7 @@ add_library(video_core STATIC engines/maxwell_dma.h engines/shader_bytecode.h engines/shader_header.h + engines/shader_type.h gpu.cpp gpu.h gpu_asynch.cpp diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index ac27b6cbe..44b8b8d22 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -8,19 +8,11 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/engines/shader_type.h" #include "video_core/textures/texture.h" namespace Tegra::Engines { -enum class ShaderType : u32 { - Vertex = 0, - TesselationControl = 1, - TesselationEval = 2, - Geometry = 3, - Fragment = 4, - Compute = 5, -}; - struct SamplerDescriptor { union { BitField<0, 20, Tegra::Shader::TextureType> texture_type; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 3a39aeabe..110406f2f 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -8,6 +8,7 @@ #include "core/core.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index c526287b7..4ef3e0613 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -12,6 +12,7 @@ #include "common/common_types.h" #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_upload.h" +#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a44c09003..15a7a9d6a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -9,6 +9,7 @@ #include "core/core_timing.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" @@ -368,24 +369,24 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { StartCBData(method); break; } - case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { - ProcessCBBind(Regs::ShaderStage::Vertex); + case MAXWELL3D_REG_INDEX(cb_bind[0]): { + ProcessCBBind(0); break; } - case MAXWELL3D_REG_INDEX(cb_bind[1].raw_config): { - ProcessCBBind(Regs::ShaderStage::TesselationControl); + case MAXWELL3D_REG_INDEX(cb_bind[1]): { + ProcessCBBind(1); break; } - case MAXWELL3D_REG_INDEX(cb_bind[2].raw_config): { - ProcessCBBind(Regs::ShaderStage::TesselationEval); + case MAXWELL3D_REG_INDEX(cb_bind[2]): { + ProcessCBBind(2); break; } - case MAXWELL3D_REG_INDEX(cb_bind[3].raw_config): { - ProcessCBBind(Regs::ShaderStage::Geometry); + case MAXWELL3D_REG_INDEX(cb_bind[3]): { + ProcessCBBind(3); break; } - case MAXWELL3D_REG_INDEX(cb_bind[4].raw_config): { - ProcessCBBind(Regs::ShaderStage::Fragment); + case MAXWELL3D_REG_INDEX(cb_bind[4]): { + ProcessCBBind(4); break; } case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): { @@ -687,10 +688,10 @@ void Maxwell3D::DrawArrays() { } } -void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { +void Maxwell3D::ProcessCBBind(std::size_t stage_index) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. - auto& shader = state.shader_stages[static_cast(stage)]; - auto& bind_data = regs.cb_bind[static_cast(stage)]; + auto& shader = state.shader_stages[stage_index]; + auto& bind_data = regs.cb_bind[stage_index]; ASSERT(bind_data.index < Regs::MaxConstBuffers); auto& buffer = shader.const_buffers[bind_data.index]; @@ -757,9 +758,9 @@ Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_ha return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; } -Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, - std::size_t offset) const { - const auto& shader = state.shader_stages[static_cast(stage)]; +Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const { + const auto stage_index = static_cast(stage); + const auto& shader = state.shader_stages[stage_index]; const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1aa7c274f..72994f4d2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -18,6 +18,7 @@ #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_upload.h" +#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/macro_interpreter.h" #include "video_core/textures/texture.h" @@ -130,14 +131,6 @@ public: Fragment = 5, }; - enum class ShaderStage : u32 { - Vertex = 0, - TesselationControl = 1, - TesselationEval = 2, - Geometry = 3, - Fragment = 4, - }; - struct VertexAttribute { enum class Size : u32 { Invalid = 0x0, @@ -1254,7 +1247,7 @@ public: Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; /// Returns the texture information for a specific texture in a specific shader stage. - Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; + Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; @@ -1376,7 +1369,7 @@ private: void FinishCBData(); /// Handles a write to the CB_BIND register. - void ProcessCBBind(Regs::ShaderStage stage); + void ProcessCBBind(std::size_t stage_index); /// Handles a write to the VERTEX_END_GL register, triggering a draw. void DrawArrays(); diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h new file mode 100644 index 000000000..239196ba9 --- /dev/null +++ b/src/video_core/engines/shader_type.h @@ -0,0 +1,20 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Tegra::Engines { + +enum class ShaderType : u32 { + Vertex = 0, + TesselationControl = 1, + TesselationEval = 2, + Geometry = 3, + Fragment = 4, + Compute = 5, +}; + +} // namespace Tegra::Engines diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9ce20f8f4..8baa73ebf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -22,6 +22,7 @@ #include "core/settings.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -60,8 +61,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry return engine.GetTextureInfo(tex_handle); } if constexpr (std::is_same_v) { - const auto stage = static_cast(shader_type); - return engine.GetStageTexture(stage, entry.GetOffset()); + return engine.GetStageTexture(shader_type, entry.GetOffset()); } else { return engine.GetTexture(entry.GetOffset()); } @@ -263,7 +263,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; - const Maxwell::ShaderProgram program{static_cast(index)}; + const auto program{static_cast(index)}; // Skip stages that are not enabled if (!gpu.regs.IsShaderConfigEnabled(index)) { @@ -288,7 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { Shader shader{shader_cache.GetStageProgram(program)}; // Stage indices are 0 - 5 - const auto stage = static_cast(index == 0 ? 0 : index - 1); + const std::size_t stage = index == 0 ? 0 : index - 1; SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); SetupDrawTextures(stage, shader, base_bindings); @@ -856,11 +856,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader) { +void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); const auto& stages = system.GPU().Maxwell3D().state.shader_stages; - const auto& shader_stage = stages[static_cast(stage)]; + const auto& shader_stage = stages[stage_index]; for (const auto& entry : shader->GetShaderEntries().const_buffers) { const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; SetupConstBuffer(buffer, entry); @@ -899,11 +898,10 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b bind_ubo_pushbuffer.Push(cbuf, offset, size); } -void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader) { +void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast(stage)]}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read(addr)}; @@ -932,7 +930,7 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast(size)); } -void RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, const Shader& shader, +void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); @@ -945,7 +943,7 @@ void RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, const Shade const auto num_entries = static_cast(entries.size()); for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto shader_type = static_cast(stage); + const auto shader_type = static_cast(stage_index); const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); SetupTexture(base_bindings.sampler + bindpoint, texture, entry); } @@ -988,7 +986,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu texture.tic.w_source); } -void RasterizerOpenGL::SetupDrawImages(Maxwell::ShaderStage stage, const Shader& shader, +void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings) { const auto& maxwell3d = system.GPU().Maxwell3D(); const auto& entries = shader->GetShaderEntries().images; @@ -996,7 +994,7 @@ void RasterizerOpenGL::SetupDrawImages(Maxwell::ShaderStage stage, const Shader& const auto num_entries = static_cast(entries.size()); for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto shader_type = static_cast(stage); + const auto shader_type = static_cast(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; SetupImage(base_bindings.image + bindpoint, tic, entry); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 267ed7803..6a2ce1586 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -83,8 +83,7 @@ private: bool using_depth_fb, bool using_stencil_fb); /// Configures the current constbuffers to use for the draw command. - void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader); + void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); /// Configures the current constbuffers to use for the kernel invocation. void SetupComputeConstBuffers(const Shader& kernel); @@ -94,8 +93,7 @@ private: const GLShader::ConstBufferEntry& entry); /// Configures the current global memory entries to use for the draw command. - void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader); + void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); /// Configures the current global memory entries to use for the kernel invocation. void SetupComputeGlobalMemory(const Shader& kernel); @@ -108,7 +106,7 @@ private: void DrawPrelude(); /// Configures the current textures to use for the draw command. - void SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, + void SetupDrawTextures(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings); /// Configures the textures used in a compute shader. @@ -119,8 +117,7 @@ private: const GLShader::SamplerEntry& entry); /// Configures images in a graphics shader. - void SetupDrawImages(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, - BaseBindings base_bindings); + void SetupDrawImages(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings); /// Configures images in a compute shader. void SetupComputeImages(const Shader& shader); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e7c92e45d..f474fb550 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,7 @@ #include "core/frontend/emu_window.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -84,28 +85,26 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { /// Gets the shader program code from memory for the specified address ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, const u8* host_ptr) { - ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); + ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); ASSERT_OR_EXECUTE(host_ptr != nullptr, { - std::fill(program_code.begin(), program_code.end(), 0); - return program_code; + std::fill(code.begin(), code.end(), 0); + return code; }); - memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), - program_code.size() * sizeof(u64)); - program_code.resize(CalculateProgramSize(program_code)); - return program_code; + memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); + code.resize(CalculateProgramSize(code)); + return code; } /// Gets the shader type from a Maxwell program type -constexpr GLenum GetShaderType(ProgramType program_type) { - switch (program_type) { - case ProgramType::VertexA: - case ProgramType::VertexB: +constexpr GLenum GetGLShaderType(ShaderType shader_type) { + switch (shader_type) { + case ShaderType::Vertex: return GL_VERTEX_SHADER; - case ProgramType::Geometry: + case ShaderType::Geometry: return GL_GEOMETRY_SHADER; - case ProgramType::Fragment: + case ShaderType::Fragment: return GL_FRAGMENT_SHADER; - case ProgramType::Compute: + case ShaderType::Compute: return GL_COMPUTE_SHADER; default: return GL_NONE; @@ -135,30 +134,11 @@ constexpr std::tuple GetPrimitiveDescription(GLen } } -ProgramType GetProgramType(Maxwell::ShaderProgram program) { - switch (program) { - case Maxwell::ShaderProgram::VertexA: - return ProgramType::VertexA; - case Maxwell::ShaderProgram::VertexB: - return ProgramType::VertexB; - case Maxwell::ShaderProgram::TesselationControl: - return ProgramType::TessellationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ProgramType::TessellationEval; - case Maxwell::ShaderProgram::Geometry: - return ProgramType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ProgramType::Fragment; - } - UNREACHABLE(); - return {}; -} - /// Hashes one (or two) program streams -u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, +u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, const ProgramCode& code_b) { u64 unique_identifier = boost::hash_value(code); - if (program_type == ProgramType::VertexA) { + if (is_a) { // VertexA programs include two programs boost::hash_combine(unique_identifier, boost::hash_value(code_b)); } @@ -166,79 +146,74 @@ u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, } /// Creates an unspecialized program from code streams -std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, +std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir, const std::optional& ir_b) { - switch (program_type) { - case ProgramType::VertexA: - case ProgramType::VertexB: + switch (shader_type) { + case ShaderType::Vertex: return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); - case ProgramType::Geometry: + case ShaderType::Geometry: return GLShader::GenerateGeometryShader(device, ir); - case ProgramType::Fragment: + case ShaderType::Fragment: return GLShader::GenerateFragmentShader(device, ir); - case ProgramType::Compute: + case ShaderType::Compute: return GLShader::GenerateComputeShader(device, ir); default: - UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast(program_type)); + UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast(shader_type)); return {}; } } -constexpr const char* GetProgramTypeName(ProgramType program_type) { - switch (program_type) { - case ProgramType::VertexA: - case ProgramType::VertexB: +constexpr const char* GetShaderTypeName(ShaderType shader_type) { + switch (shader_type) { + case ShaderType::Vertex: return "VS"; - case ProgramType::TessellationControl: - return "TCS"; - case ProgramType::TessellationEval: - return "TES"; - case ProgramType::Geometry: + case ShaderType::TesselationControl: + return "HS"; + case ShaderType::TesselationEval: + return "DS"; + case ShaderType::Geometry: return "GS"; - case ProgramType::Fragment: + case ShaderType::Fragment: return "FS"; - case ProgramType::Compute: + case ShaderType::Compute: return "CS"; } return "UNK"; } -Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { +constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { switch (program_type) { - case ProgramType::VertexA: - case ProgramType::VertexB: - return Tegra::Engines::ShaderType::Vertex; - case ProgramType::TessellationControl: - return Tegra::Engines::ShaderType::TesselationControl; - case ProgramType::TessellationEval: - return Tegra::Engines::ShaderType::TesselationEval; - case ProgramType::Geometry: - return Tegra::Engines::ShaderType::Geometry; - case ProgramType::Fragment: - return Tegra::Engines::ShaderType::Fragment; - case ProgramType::Compute: - return Tegra::Engines::ShaderType::Compute; + case Maxwell::ShaderProgram::VertexA: + case Maxwell::ShaderProgram::VertexB: + return ShaderType::Vertex; + case Maxwell::ShaderProgram::TesselationControl: + return ShaderType::TesselationControl; + case Maxwell::ShaderProgram::TesselationEval: + return ShaderType::TesselationEval; + case Maxwell::ShaderProgram::Geometry: + return ShaderType::Geometry; + case Maxwell::ShaderProgram::Fragment: + return ShaderType::Fragment; } - UNREACHABLE(); return {}; } -std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { - return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); +std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { + return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); } -Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface( - Core::System& system, ProgramType program_type) { - if (program_type == ProgramType::Compute) { +Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, + ShaderType shader_type) { + if (shader_type == ShaderType::Compute) { return system.GPU().KeplerCompute(); } else { return system.GPU().Maxwell3D(); } } -std::unique_ptr MakeLocker(Core::System& system, ProgramType program_type) { - return std::make_unique(GetEnginesShaderType(program_type), - GetConstBufferEngineInterface(system, program_type)); +std::unique_ptr MakeLocker(Core::System& system, ShaderType shader_type) { + return std::make_unique(shader_type, + GetConstBufferEngineInterface(system, shader_type)); } void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { @@ -255,18 +230,18 @@ void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { } } -CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, - const ProgramCode& program_code, const ProgramCode& program_code_b, +CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, + const ProgramCode& code, const ProgramCode& code_b, ConstBufferLocker& locker, const ProgramVariant& variant, bool hint_retrievable = false) { - LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); + LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); - const bool is_compute = program_type == ProgramType::Compute; + const bool is_compute = shader_type == ShaderType::Compute; const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); + const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker); std::optional ir_b; - if (!program_code_b.empty()) { - ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); + if (!code_b.empty()) { + ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); } const auto entries = GLShader::GetEntries(ir); @@ -274,7 +249,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy #version 430 core #extension GL_ARB_separate_shader_objects : enable )", - GetShaderId(unique_identifier, program_type)); + GetShaderId(unique_identifier, shader_type)); if (device.HasShaderBallot()) { source += "#extension GL_ARB_shader_ballot : require\n"; } @@ -313,14 +288,14 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); } - if (program_type == ProgramType::Geometry) { + if (shader_type == ShaderType::Geometry) { const auto [glsl_topology, debug_name, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); source += fmt::format("layout ({}) in;\n\n", glsl_topology); source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); } - if (program_type == ProgramType::Compute) { + if (shader_type == ShaderType::Compute) { source += fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", variant.block_x, variant.block_y, variant.block_z); @@ -337,10 +312,10 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy } source += '\n'; - source += GenerateGLSL(device, program_type, ir, ir_b); + source += GenerateGLSL(device, shader_type, ir, ir_b); OGLShader shader; - shader.Create(source.c_str(), GetShaderType(program_type)); + shader.Create(source.c_str(), GetGLShaderType(shader_type)); auto program = std::make_shared(); program->Create(true, hint_retrievable, shader.handle); @@ -363,18 +338,16 @@ std::unordered_set GetSupportedFormats() { } // Anonymous namespace -CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, - GLShader::ShaderEntries entries, ProgramCode program_code, - ProgramCode program_code_b) - : RasterizerCacheObject{params.host_ptr}, system{params.system}, - disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, - unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, - program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { +CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, + GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) + : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache}, + device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier}, + shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} { if (!params.precompiled_variants) { return; } for (const auto& pair : *params.precompiled_variants) { - auto locker = MakeLocker(system, program_type); + auto locker = MakeLocker(system, shader_type); const auto& usage = pair->first; FillLocker(*locker, usage); @@ -395,38 +368,37 @@ CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_t } Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b) { - params.disk_cache.SaveRaw(ShaderDiskCacheRaw( - params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); + Maxwell::ShaderProgram program_type, ProgramCode code, + ProgramCode code_b) { + const auto shader_type = GetShaderType(program_type); + params.disk_cache.SaveRaw( + ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b)); - ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)), - params.system.GPU().Maxwell3D()); - const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); + ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); // TODO(Rodrigo): Handle VertexA shaders // std::optional ir_b; - // if (!program_code_b.empty()) { - // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); + // if (!code_b.empty()) { + // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); // } - return std::shared_ptr( - new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), - std::move(program_code), std::move(program_code_b))); + return std::shared_ptr(new CachedShader( + params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { params.disk_cache.SaveRaw( - ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); + ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, params.system.GPU().KeplerCompute()); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); return std::shared_ptr(new CachedShader( - params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); + params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); } Shader CachedShader::CreateFromCache(const ShaderParameters& params, const UnspecializedShader& unspecialized) { - return std::shared_ptr(new CachedShader(params, unspecialized.program_type, + return std::shared_ptr(new CachedShader(params, unspecialized.type, unspecialized.entries, unspecialized.code, unspecialized.code_b)); } @@ -437,7 +409,7 @@ std::tuple CachedShader::GetHandle(const ProgramVariant& v const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); auto& program = entry->second; if (is_cache_miss) { - program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, + program = BuildShader(device, unique_identifier, shader_type, code, code_b, *curr_locker_variant->locker, variant); disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); @@ -469,7 +441,7 @@ bool CachedShader::EnsureValidLockerVariant() { if (!curr_locker_variant) { auto& new_variant = locker_variants.emplace_back(); new_variant = std::make_unique(); - new_variant->locker = MakeLocker(system, program_type); + new_variant->locker = MakeLocker(system, shader_type); curr_locker_variant = new_variant.get(); } return previous_variant == curr_locker_variant; @@ -537,10 +509,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } if (!shader) { - auto locker{MakeLocker(system, unspecialized.program_type)}; + auto locker{MakeLocker(system, unspecialized.type)}; FillLocker(*locker, usage); - shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, + shader = BuildShader(device, usage.unique_identifier, unspecialized.type, unspecialized.code, unspecialized.code_b, *locker, usage.variant, true); } @@ -645,7 +617,7 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders( const auto& raw{raws[i]}; const u64 unique_identifier{raw.GetUniqueIdentifier()}; const u64 calculated_hash{ - GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; + GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())}; if (unique_identifier != calculated_hash) { LOG_ERROR(Render_OpenGL, "Invalid hash in entry={:016x} (obtained hash={:016x}) - " @@ -656,9 +628,9 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders( } const u32 main_offset = - raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); - const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); + raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + ConstBufferLocker locker(raw.GetType()); + const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker); // TODO(Rodrigo): Handle VertexA shaders // std::optional ir_b; // if (raw.HasProgramA()) { @@ -667,9 +639,9 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders( UnspecializedShader unspecialized; unspecialized.entries = GLShader::GetEntries(ir); - unspecialized.program_type = raw.GetProgramType(); - unspecialized.code = raw.GetProgramCode(); - unspecialized.code_b = raw.GetProgramCodeB(); + unspecialized.type = raw.GetType(); + unspecialized.code = raw.GetCode(); + unspecialized.code_b = raw.GetCodeB(); unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); if (callback) { @@ -702,7 +674,8 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); } - const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); + const auto unique_identifier = GetUniqueIdentifier( + GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; const ShaderParameters params{system, disk_cache, precompiled_variants, device, @@ -730,7 +703,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { // No kernel found - create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; - const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; + const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; const ShaderParameters params{system, disk_cache, precompiled_variants, device, diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 795b05a19..d23c8d6d4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -17,6 +17,7 @@ #include #include "common/common_types.h" +#include "video_core/engines/shader_type.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -47,7 +48,7 @@ using PrecompiledVariants = std::vector; struct UnspecializedShader { GLShader::ShaderEntries entries; - ProgramType program_type; + Tegra::Engines::ShaderType type; ProgramCode code; ProgramCode code_b; }; @@ -77,7 +78,7 @@ public: } std::size_t GetSizeInBytes() const override { - return program_code.size() * sizeof(u64); + return code.size() * sizeof(u64); } /// Gets the shader entries for the shader @@ -94,7 +95,7 @@ private: std::unordered_map programs; }; - explicit CachedShader(const ShaderParameters& params, ProgramType program_type, + explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type, GLShader::ShaderEntries entries, ProgramCode program_code, ProgramCode program_code_b); @@ -110,12 +111,12 @@ private: VAddr cpu_addr{}; u64 unique_identifier{}; - ProgramType program_type{}; + Tegra::Engines::ShaderType shader_type{}; GLShader::ShaderEntries entries; - ProgramCode program_code; - ProgramCode program_code_b; + ProgramCode code; + ProgramCode code_b; LockerVariant* curr_locker_variant = nullptr; std::vector> locker_variants; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index fe016c05c..caec565d1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -16,6 +16,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -27,6 +28,7 @@ namespace OpenGL::GLShader { namespace { +using Tegra::Engines::ShaderType; using Tegra::Shader::Attribute; using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; @@ -331,8 +333,8 @@ std::string FlowStackTopName(MetaStackClass stack) { return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); } -constexpr bool IsVertexShader(ProgramType stage) { - return stage == ProgramType::VertexA || stage == ProgramType::VertexB; +[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) { + return stage == ShaderType::Vertex; } class ASTDecompiler; @@ -340,7 +342,7 @@ class ExprDecompiler; class GLSLDecompiler final { public: - explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, + explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, std::string suffix) : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} @@ -427,7 +429,7 @@ private: } void DeclareGeometry() { - if (stage != ProgramType::Geometry) { + if (stage != ShaderType::Geometry) { return; } @@ -510,7 +512,7 @@ private: } void DeclareLocalMemory() { - if (stage == ProgramType::Compute) { + if (stage == ShaderType::Compute) { code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); code.AddLine("#endif"); @@ -575,12 +577,12 @@ private: const u32 location{GetGenericAttributeIndex(index)}; std::string name{GetInputAttribute(index)}; - if (stage == ProgramType::Geometry) { + if (stage == ShaderType::Geometry) { name = "gs_" + name + "[]"; } std::string suffix; - if (stage == ProgramType::Fragment) { + if (stage == ShaderType::Fragment) { const auto input_mode{header.ps.GetAttributeUse(location)}; if (skip_unused && input_mode == AttributeUse::Unused) { return; @@ -592,7 +594,7 @@ private: } void DeclareOutputAttributes() { - if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { + if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { DeclareOutputAttribute(ToGenericAttribute(i)); } @@ -704,7 +706,7 @@ private: constexpr u32 element_stride = 4; const u32 address{generic_base + index * generic_stride + element * element_stride}; - const bool declared = stage != ProgramType::Fragment || + const bool declared = stage != ShaderType::Fragment || header.ps.GetAttributeUse(index) != AttributeUse::Unused; const std::string value = declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; @@ -796,7 +798,7 @@ private: } if (const auto abuf = std::get_if(&*node)) { - UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, + UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, "Physical attributes in geometry shaders are not implemented"); if (abuf->IsPhysicalBuffer()) { return {fmt::format("ReadPhysicalAttribute({})", @@ -891,7 +893,7 @@ private: Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { const auto GeometryPass = [&](std::string_view name) { - if (stage == ProgramType::Geometry && buffer) { + if (stage == ShaderType::Geometry && buffer) { // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games // set an 0x80000000 index for those and the shader fails to build. Find out why // this happens and what's its intent. @@ -903,11 +905,11 @@ private: switch (attribute) { case Attribute::Index::Position: switch (stage) { - case ProgramType::Geometry: + case ShaderType::Geometry: return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), GetSwizzle(element)), Type::Float}; - case ProgramType::Fragment: + case ShaderType::Fragment: return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), Type::Float}; default: @@ -941,7 +943,7 @@ private: return {"0", Type::Int}; case Attribute::Index::FrontFacing: // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ProgramType::Fragment); + ASSERT(stage == ShaderType::Fragment); switch (element) { case 3: return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; @@ -967,7 +969,7 @@ private: // be found in fragment shaders, so we disable precise there. There are vertex shaders that // also fail to build but nobody seems to care about those. // Note: Only bugged drivers will skip precise. - const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment; + const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; std::string temporary = code.GenerateTemporary(); code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), @@ -1233,7 +1235,7 @@ private: fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; } else if (const auto smem = std::get_if(&*dest)) { - ASSERT(stage == ProgramType::Compute); + ASSERT(stage == ShaderType::Compute); target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; } else if (const auto gmem = std::get_if(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()).AsUint(); @@ -1801,7 +1803,7 @@ private: } void PreExit() { - if (stage != ProgramType::Fragment) { + if (stage != ShaderType::Fragment) { return; } const auto& used_registers = ir.GetRegisters(); @@ -1854,14 +1856,14 @@ private: } Expression EmitVertex(Operation operation) { - ASSERT_MSG(stage == ProgramType::Geometry, + ASSERT_MSG(stage == ShaderType::Geometry, "EmitVertex is expected to be used in a geometry shader."); code.AddLine("EmitVertex();"); return {}; } Expression EndPrimitive(Operation operation) { - ASSERT_MSG(stage == ProgramType::Geometry, + ASSERT_MSG(stage == ShaderType::Geometry, "EndPrimitive is expected to be used in a geometry shader."); code.AddLine("EndPrimitive();"); return {}; @@ -2192,7 +2194,7 @@ private: const Device& device; const ShaderIR& ir; - const ProgramType stage; + const ShaderType stage; const std::string suffix; const Header header; @@ -2447,7 +2449,7 @@ const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); )"; } -std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, +std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage, const std::string& suffix) { GLSLDecompiler decompiler(device, ir, stage, suffix); decompiler.Decompile(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index b1e75e6cc..7876f48d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -10,6 +10,7 @@ #include #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -17,20 +18,8 @@ class ShaderIR; } namespace OpenGL { - class Device; - -enum class ProgramType : u32 { - VertexA = 0, - VertexB = 1, - TessellationControl = 2, - TessellationEval = 3, - Geometry = 4, - Fragment = 5, - Compute = 6 -}; - -} // namespace OpenGL +} namespace OpenGL::GLShader { @@ -94,6 +83,6 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); std::string GetCommonDeclarations(); std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ProgramType stage, const std::string& suffix); + Tegra::Engines::ShaderType stage, const std::string& suffix); } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index ccf530367..09f62c8c4 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include + #include #include "common/assert.h" @@ -12,16 +13,16 @@ #include "common/logging/log.h" #include "common/scm_rev.h" #include "common/zstd_compression.h" - #include "core/core.h" #include "core/hle/kernel/process.h" #include "core/settings.h" - +#include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" namespace OpenGL { +using Tegra::Engines::ShaderType; using VideoCommon::Shader::BindlessSamplerMap; using VideoCommon::Shader::BoundSamplerMap; using VideoCommon::Shader::KeyMap; @@ -67,10 +68,10 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { } // Anonymous namespace -ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - ProgramCode program_code, ProgramCode program_code_b) - : unique_identifier{unique_identifier}, program_type{program_type}, - program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} +ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, + ProgramCode code_b) + : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move( + code_b)} {} ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; @@ -78,42 +79,39 @@ ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || - file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) { + file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { return false; } - u32 program_code_size{}; - u32 program_code_size_b{}; - if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) { + u32 code_size{}; + u32 code_size_b{}; + if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || + file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { return false; } - program_code.resize(program_code_size); - program_code_b.resize(program_code_size_b); + code.resize(code_size); + code_b.resize(code_size_b); - if (file.ReadArray(program_code.data(), program_code_size) != program_code_size) + if (file.ReadArray(code.data(), code_size) != code_size) return false; - if (HasProgramA() && - file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { + if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { return false; } return true; } bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { - if (file.WriteObject(unique_identifier) != 1 || - file.WriteObject(static_cast(program_type)) != 1 || - file.WriteObject(static_cast(program_code.size())) != 1 || - file.WriteObject(static_cast(program_code_b.size())) != 1) { + if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast(type)) != 1 || + file.WriteObject(static_cast(code.size())) != 1 || + file.WriteObject(static_cast(code_b.size())) != 1) { return false; } - if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) + if (file.WriteArray(code.data(), code.size()) != code.size()) return false; - if (HasProgramA() && - file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) { + if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { return false; } return true; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 28689f6c7..917dbccdd 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -18,6 +18,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "core/file_sys/vfs_vector.h" +#include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/shader/const_buffer_locker.h" @@ -154,8 +155,8 @@ namespace OpenGL { /// Describes a shader how it's used by the guest GPU class ShaderDiskCacheRaw { public: - explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - ProgramCode program_code, ProgramCode program_code_b = {}); + explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, + ProgramCode code, ProgramCode code_b = {}); ShaderDiskCacheRaw(); ~ShaderDiskCacheRaw(); @@ -168,27 +169,26 @@ public: } bool HasProgramA() const { - return program_type == ProgramType::VertexA; + return !code.empty() && !code_b.empty(); } - ProgramType GetProgramType() const { - return program_type; + Tegra::Engines::ShaderType GetType() const { + return type; } - const ProgramCode& GetProgramCode() const { - return program_code; + const ProgramCode& GetCode() const { + return code; } - const ProgramCode& GetProgramCodeB() const { - return program_code_b; + const ProgramCode& GetCodeB() const { + return code_b; } private: u64 unique_identifier{}; - ProgramType program_type{}; - - ProgramCode program_code; - ProgramCode program_code_b; + Tegra::Engines::ShaderType type{}; + ProgramCode code; + ProgramCode code_b; }; /// Contains an OpenGL dumped binary program diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index af17216bd..2f601d550 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -4,6 +4,7 @@ #include #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/shader/shader_ir.h" @@ -11,6 +12,7 @@ namespace OpenGL::GLShader { using Tegra::Engines::Maxwell3D; +using Tegra::Engines::ShaderType; using VideoCommon::Shader::CompileDepth; using VideoCommon::Shader::CompilerSettings; using VideoCommon::Shader::ProgramCode; @@ -24,10 +26,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { }; )"; - const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; - out += Decompile(device, ir, stage, "vertex"); + out += Decompile(device, ir, ShaderType::Vertex, "vertex"); if (ir_b) { - out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); + out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); } out += R"( @@ -49,7 +50,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { }; )"; - out += Decompile(device, ir, ProgramType::Geometry, "geometry"); + out += Decompile(device, ir, ShaderType::Geometry, "geometry"); out += R"( void main() { @@ -76,7 +77,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { }; )"; - out += Decompile(device, ir, ProgramType::Fragment, "fragment"); + out += Decompile(device, ir, ShaderType::Fragment, "fragment"); out += R"( void main() { @@ -88,7 +89,7 @@ void main() { std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { std::string out = GetCommonDeclarations(); - out += Decompile(device, ir, ProgramType::Compute, "compute"); + out += Decompile(device, ir, ShaderType::Compute, "compute"); out += R"( void main() { execute_compute(); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 463ed43ae..7f0eb6b74 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -13,6 +13,8 @@ namespace Vulkan::MaxwellToVK { +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + namespace Sampler { vk::Filter Filter(Tegra::Texture::TextureFilter filter) { @@ -196,17 +198,17 @@ std::pair SurfaceFormat(const VKDevice& device, FormatType for return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable}; } -vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) { +vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { switch (stage) { - case Maxwell::ShaderStage::Vertex: + case Tegra::Engines::ShaderType::Vertex: return vk::ShaderStageFlagBits::eVertex; - case Maxwell::ShaderStage::TesselationControl: + case Tegra::Engines::ShaderType::TesselationControl: return vk::ShaderStageFlagBits::eTessellationControl; - case Maxwell::ShaderStage::TesselationEval: + case Tegra::Engines::ShaderType::TesselationEval: return vk::ShaderStageFlagBits::eTessellationEvaluation; - case Maxwell::ShaderStage::Geometry: + case Tegra::Engines::ShaderType::Geometry: return vk::ShaderStageFlagBits::eGeometry; - case Maxwell::ShaderStage::Fragment: + case Tegra::Engines::ShaderType::Fragment: return vk::ShaderStageFlagBits::eFragment; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast(stage)); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 5b0ffd87a..904a32e01 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -32,7 +32,7 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar std::pair SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); -vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage); +vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 2850d5b59..80738d3d0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -17,6 +17,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/engines/shader_type.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/node.h" @@ -25,13 +26,13 @@ namespace Vulkan::VKShader { using Sirit::Id; +using Tegra::Engines::ShaderType; using Tegra::Shader::Attribute; using Tegra::Shader::AttributeUse; using Tegra::Shader::Register; using namespace VideoCommon::Shader; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; using Operation = const OperationNode&; // TODO(Rodrigo): Use rasterizer's value @@ -93,7 +94,7 @@ class ExprDecompiler; class SPIRVDecompiler : public Sirit::Module { public: - explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) + explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage) : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { AddCapability(spv::Capability::Shader); AddExtension("SPV_KHR_storage_buffer_storage_class"); @@ -256,21 +257,21 @@ private: } void DeclareVertex() { - if (stage != ShaderStage::Vertex) + if (stage != ShaderType::Vertex) return; DeclareVertexRedeclarations(); } void DeclareGeometry() { - if (stage != ShaderStage::Geometry) + if (stage != ShaderType::Geometry) return; UNIMPLEMENTED(); } void DeclareFragment() { - if (stage != ShaderStage::Fragment) + if (stage != ShaderType::Fragment) return; for (u32 rt = 0; rt < static_cast(frag_colors.size()); ++rt) { @@ -354,7 +355,7 @@ private: continue; } - UNIMPLEMENTED_IF(stage == ShaderStage::Geometry); + UNIMPLEMENTED_IF(stage == ShaderType::Geometry); const u32 location = GetGenericAttributeLocation(index); const Id id = OpVariable(t_in_float4, spv::StorageClass::Input); @@ -364,7 +365,7 @@ private: Decorate(id, spv::Decoration::Location, location); - if (stage != ShaderStage::Fragment) { + if (stage != ShaderType::Fragment) { continue; } switch (header.ps.GetAttributeUse(location)) { @@ -548,7 +549,7 @@ private: switch (attribute) { case Attribute::Index::Position: - if (stage != ShaderStage::Fragment) { + if (stage != ShaderType::Fragment) { UNIMPLEMENTED(); break; } else { @@ -561,7 +562,7 @@ private: // TODO(Subv): Find out what the values are for the first two elements when inside a // vertex shader, and what's the value of the fourth element when inside a Tess Eval // shader. - ASSERT(stage == ShaderStage::Vertex); + ASSERT(stage == ShaderType::Vertex); switch (element) { case 2: return BitcastFrom(Emit(OpLoad(t_uint, instance_index))); @@ -572,7 +573,7 @@ private: return Constant(t_float, 0); case Attribute::Index::FrontFacing: // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderStage::Fragment); + ASSERT(stage == ShaderType::Fragment); if (element == 3) { const Id is_front_facing = Emit(OpLoad(t_bool, front_facing)); const Id true_value = @@ -1075,7 +1076,7 @@ private: Id PreExit() { switch (stage) { - case ShaderStage::Vertex: { + case ShaderType::Vertex: { // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); @@ -1085,7 +1086,7 @@ private: Emit(OpStore(z_pointer, depth)); break; } - case ShaderStage::Fragment: { + case ShaderType::Fragment: { const auto SafeGetRegister = [&](u32 reg) { // TODO(Rodrigo): Replace with contains once C++20 releases if (const auto it = registers.find(reg); it != registers.end()) { @@ -1511,7 +1512,7 @@ private: const VKDevice& device; const ShaderIR& ir; - const ShaderStage stage; + const ShaderType stage; const Tegra::Shader::Header header; u64 conditional_nest_count{}; u64 inside_branch{}; @@ -1843,7 +1844,7 @@ void SPIRVDecompiler::DecompileAST() { } DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, - Maxwell::ShaderStage stage) { + ShaderType stage) { auto decompiler = std::make_unique(device, ir, stage); decompiler->Decompile(); return {std::move(decompiler), decompiler->GetShaderEntries()}; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f90541cc1..203fc00d0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -79,6 +79,6 @@ struct ShaderEntries { using DecompilerResult = std::pair, ShaderEntries>; DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, - Maxwell::ShaderStage stage); + Tegra::Engines::ShaderType stage); } // namespace Vulkan::VKShader diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index fe467608e..b65399f91 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/shader/const_buffer_locker.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 600e2f3c3..50a8ce42a 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/shader_type.h" namespace VideoCommon::Shader { @@ -20,7 +21,7 @@ using BindlessSamplerMap = * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader * compiler. with it, the shader can obtain required data from GPU state and store it for disk * shader compilation. - **/ + */ class ConstBufferLocker { public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); From 180417c51438e2c97b800f4b19e621dbc8288493 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 18 Nov 2019 21:38:15 -0300 Subject: [PATCH 08/15] gl_shader_cache: Remove dynamic BaseBinding specialization --- src/video_core/engines/maxwell_3d.h | 1 - src/video_core/engines/shader_type.h | 1 + src/video_core/renderer_opengl/gl_device.cpp | 42 +++++++++ src/video_core/renderer_opengl/gl_device.h | 21 ++++- .../renderer_opengl/gl_rasterizer.cpp | 94 +++++++++---------- .../renderer_opengl/gl_rasterizer.h | 9 +- .../renderer_opengl/gl_shader_cache.cpp | 44 ++------- .../renderer_opengl/gl_shader_cache.h | 2 +- .../renderer_opengl/gl_shader_decompiler.cpp | 29 +++--- .../renderer_opengl/gl_shader_disk_cache.cpp | 5 +- .../renderer_opengl/gl_shader_disk_cache.h | 44 ++------- .../renderer_opengl/gl_shader_gen.cpp | 31 +++--- src/video_core/renderer_opengl/gl_state.cpp | 14 ++- src/video_core/renderer_opengl/gl_state.h | 5 +- src/video_core/renderer_opengl/utils.cpp | 32 ++----- src/video_core/renderer_opengl/utils.h | 18 ++-- 16 files changed, 200 insertions(+), 192 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 72994f4d2..c8dd362ab 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -63,7 +63,6 @@ public: static constexpr std::size_t NumVertexArrays = 32; static constexpr std::size_t NumVertexAttributes = 32; static constexpr std::size_t NumVaryings = 31; - static constexpr std::size_t NumTextureSamplers = 32; static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number static constexpr std::size_t NumClipDistances = 8; static constexpr std::size_t MaxShaderProgram = 6; diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h index 239196ba9..49ce5cde5 100644 --- a/src/video_core/engines/shader_type.h +++ b/src/video_core/engines/shader_type.h @@ -16,5 +16,6 @@ enum class ShaderType : u32 { Fragment = 4, Compute = 5, }; +static constexpr std::size_t MaxShaderTypes = 6; } // namespace Tegra::Engines diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b30d5be74..5cfa97fc2 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -17,6 +17,9 @@ namespace OpenGL { namespace { +// One uniform block is reserved for emulation purposes +constexpr u32 ReservedUniformBlocks = 1; + template T GetInteger(GLenum pname) { GLint temporary; @@ -48,6 +51,22 @@ bool HasExtension(const std::vector& images, std::string_view return std::find(images.begin(), images.end(), extension) != images.end(); } +constexpr Device::BaseBindings operator+(Device::BaseBindings lhs, Device::BaseBindings rhs) { + return Device::BaseBindings{lhs.uniform_buffer + rhs.uniform_buffer, + lhs.shader_storage_buffer + rhs.shader_storage_buffer, + lhs.sampler + rhs.sampler, lhs.image + rhs.image}; +} + +Device::BaseBindings BuildBaseBindings(GLenum uniform_blocks, GLenum shader_storage_blocks, + GLenum texture_image_units, GLenum image_uniforms) noexcept { + return Device::BaseBindings{ + GetInteger(uniform_blocks) - ReservedUniformBlocks, + GetInteger(shader_storage_blocks), + GetInteger(texture_image_units), + GetInteger(image_uniforms), + }; +} + } // Anonymous namespace Device::Device() { @@ -56,6 +75,29 @@ Device::Device() { const bool is_nvidia = vendor == "NVIDIA Corporation"; + // Reserve the first UBO for emulation bindings + base_bindings[0] = BaseBindings{ReservedUniformBlocks, 0, 0, 0}; + base_bindings[1] = base_bindings[0] + BuildBaseBindings(GL_MAX_VERTEX_UNIFORM_BLOCKS, + GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, + GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, + GL_MAX_VERTEX_IMAGE_UNIFORMS); + base_bindings[2] = + base_bindings[1] + BuildBaseBindings(GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, + GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, + GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS); + base_bindings[3] = + base_bindings[2] + BuildBaseBindings(GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, + GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS); + base_bindings[4] = base_bindings[3] + BuildBaseBindings(GL_MAX_GEOMETRY_UNIFORM_BLOCKS, + GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, + GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_GEOMETRY_IMAGE_UNIFORMS); + // Compute doesn't need any of that + base_bindings[5] = BaseBindings{0, 0, 0, 0}; + uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 6c86fe207..e7d3c48b0 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -6,14 +6,32 @@ #include #include "common/common_types.h" +#include "video_core/engines/shader_type.h" namespace OpenGL { -class Device { +static constexpr u32 EmulationUniformBlockBinding = 0; + +class Device final { public: + struct BaseBindings final { + u32 uniform_buffer{}; + u32 shader_storage_buffer{}; + u32 sampler{}; + u32 image{}; + }; + explicit Device(); explicit Device(std::nullptr_t); + const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { + return base_bindings[stage_index]; + } + + const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { + return GetBaseBindings(static_cast(shader_type)); + } + std::size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } @@ -67,6 +85,7 @@ private: static bool TestComponentIndexingBug(); static bool TestPreciseBug(); + std::array base_bindings; std::size_t uniform_buffer_alignment{}; std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8baa73ebf..5c5ad1f6c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -258,7 +258,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = system.GPU().Maxwell3D(); - BaseBindings base_bindings; std::array clip_distances{}; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -277,25 +276,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } - GLShader::MaxwellUniformData ubo{}; - ubo.SetFromRegs(gpu); - const auto [buffer, offset] = - buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - - // Bind the emulation info buffer - bind_ubo_pushbuffer.Push(buffer, offset, static_cast(sizeof(ubo))); - Shader shader{shader_cache.GetStageProgram(program)}; // Stage indices are 0 - 5 const std::size_t stage = index == 0 ? 0 : index - 1; SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader, base_bindings); - SetupDrawImages(stage, shader, base_bindings); + SetupDrawTextures(stage, shader); + SetupDrawImages(stage, shader); - const ProgramVariant variant(base_bindings, primitive_mode); - const auto [program_handle, next_bindings] = shader->GetHandle(variant); + const ProgramVariant variant(primitive_mode); + const auto program_handle = shader->GetHandle(variant); switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -326,8 +317,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // VertexB was combined with VertexA, so we skip the VertexB iteration ++index; } - - base_bindings = next_bindings; } SyncClipEnabled(clip_distances); @@ -612,8 +601,16 @@ void RasterizerOpenGL::DrawPrelude() { index_buffer_offset = SetupIndexBuffer(); // Prepare packed bindings. - bind_ubo_pushbuffer.Setup(0); - bind_ssbo_pushbuffer.Setup(0); + bind_ubo_pushbuffer.Setup(); + bind_ssbo_pushbuffer.Setup(); + + // Setup emulation uniform buffer. + GLShader::MaxwellUniformData ubo; + ubo.SetFromRegs(gpu); + const auto [buffer, offset] = + buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); + bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset, + static_cast(sizeof(ubo))); // Setup shaders and their used resources. texture_cache.GuardSamplers(true); @@ -754,7 +751,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z, launch_desc.shared_alloc, launch_desc.local_pos_alloc); - std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); + state.draw.shader_program = kernel->GetHandle(variant); state.draw.program_pipeline = 0; const std::size_t buffer_size = @@ -762,8 +759,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); - bind_ubo_pushbuffer.Setup(0); - bind_ssbo_pushbuffer.Setup(0); + bind_ubo_pushbuffer.Setup(); + bind_ssbo_pushbuffer.Setup(); SetupComputeConstBuffers(kernel); SetupComputeGlobalMemory(kernel); @@ -847,7 +844,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); if (params.pixel_format != pixel_format) { - LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); + LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); } screen_info.display_texture = surface->GetTexture(); @@ -858,17 +855,21 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); + const u32 base_binding = device.GetBaseBindings(stage_index).uniform_buffer; const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& shader_stage = stages[stage_index]; + for (const auto& entry : shader->GetShaderEntries().const_buffers) { const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; - SetupConstBuffer(buffer, entry); + SetupConstBuffer(base_binding + entry.GetIndex(), buffer, entry); } } void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { MICROPROFILE_SCOPE(OpenGL_UBO); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + + u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); @@ -876,15 +877,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { buffer.address = config.Address(); buffer.size = config.size; buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(buffer, entry); + SetupConstBuffer(binding++, buffer, entry); } } -void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, +void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, const GLShader::ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, + sizeof(float)); return; } @@ -895,18 +897,20 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b const auto alignment = device.GetUniformBufferAlignment(); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, device.HasFastBufferSubData()); - bind_ubo_pushbuffer.Push(cbuf, offset, size); + bind_ubo_pushbuffer.Push(binding, cbuf, offset, size); } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + + u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read(addr)}; const auto size{memory_manager.Read(addr + 8)}; - SetupGlobalMemory(entry, gpu_addr, size); + SetupGlobalMemory(binding++, entry, gpu_addr, size); } } @@ -914,38 +918,35 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + + u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read(addr)}; const auto size{memory_manager.Read(addr + 8)}; - SetupGlobalMemory(entry, gpu_addr, size); + SetupGlobalMemory(binding++, entry, gpu_addr, size); } } -void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, +void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto [ssbo, buffer_offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); - bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast(size)); + bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast(size)); } -void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); const auto& entries = shader->GetShaderEntries().samplers; - ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), - "Exceeded the number of active textures."); - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = device.GetBaseBindings(stage_index).sampler; + for (const auto& entry : entries) { const auto shader_type = static_cast(stage_index); const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); - SetupTexture(base_bindings.sampler + bindpoint, texture, entry); + SetupTexture(binding++, texture, entry); } } @@ -954,14 +955,10 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); const auto& entries = kernel->GetShaderEntries().samplers; - ASSERT_MSG(entries.size() <= std::size(state.textures), - "Exceeded the number of active textures."); - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = 0; + for (const auto& entry : entries) { const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); - SetupTexture(bindpoint, texture, entry); + SetupTexture(binding++, texture, entry); } } @@ -986,8 +983,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu texture.tic.w_source); } -void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { const auto& maxwell3d = system.GPU().Maxwell3D(); const auto& entries = shader->GetShaderEntries().images; @@ -996,7 +992,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh const auto& entry = entries[bindpoint]; const auto shader_type = static_cast(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; - SetupImage(base_bindings.image + bindpoint, tic, entry); + SetupImage(bindpoint, tic, entry); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6a2ce1586..0e47d71df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -89,7 +89,7 @@ private: void SetupComputeConstBuffers(const Shader& kernel); /// Configures a constant buffer. - void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, + void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, const GLShader::ConstBufferEntry& entry); /// Configures the current global memory entries to use for the draw command. @@ -99,15 +99,14 @@ private: void SetupComputeGlobalMemory(const Shader& kernel); /// Configures a constant buffer. - void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, + void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size); /// Syncs all the state, shaders, render targets and textures setting before a draw call. void DrawPrelude(); /// Configures the current textures to use for the draw command. - void SetupDrawTextures(std::size_t stage_index, const Shader& shader, - BaseBindings base_bindings); + void SetupDrawTextures(std::size_t stage_index, const Shader& shader); /// Configures the textures used in a compute shader. void SetupComputeTextures(const Shader& kernel); @@ -117,7 +116,7 @@ private: const GLShader::SamplerEntry& entry); /// Configures images in a graphics shader. - void SetupDrawImages(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings); + void SetupDrawImages(std::size_t stage_index, const Shader& shader); /// Configures images in a compute shader. void SetupComputeImages(const Shader& shader); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f474fb550..41ca005a1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,28 +266,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp } source += '\n'; - auto base_bindings = variant.base_bindings; - if (!is_compute) { - source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); - } - - for (const auto& cbuf : entries.const_buffers) { - source += - fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); - } - for (const auto& gmem : entries.global_memory_entries) { - source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(), - gmem.GetCbufOffset(), base_bindings.gmem++); - } - for (const auto& sampler : entries.samplers) { - source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), - base_bindings.sampler++); - } - for (const auto& image : entries.images) { - source += - fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); - } - if (shader_type == ShaderType::Geometry) { const auto [glsl_topology, debug_name, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); @@ -403,27 +381,21 @@ Shader CachedShader::CreateFromCache(const ShaderParameters& params, unspecialized.code_b)); } -std::tuple CachedShader::GetHandle(const ProgramVariant& variant) { +GLuint CachedShader::GetHandle(const ProgramVariant& variant) { EnsureValidLockerVariant(); const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); auto& program = entry->second; - if (is_cache_miss) { - program = BuildShader(device, unique_identifier, shader_type, code, code_b, - *curr_locker_variant->locker, variant); - disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); - - LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); + if (!is_cache_miss) { + return program->handle; } - auto base_bindings = variant.base_bindings; - base_bindings.cbuf += static_cast(entries.const_buffers.size()); - base_bindings.cbuf += STAGE_RESERVED_UBOS; - base_bindings.gmem += static_cast(entries.global_memory_entries.size()); - base_bindings.sampler += static_cast(entries.samplers.size()); - base_bindings.image += static_cast(entries.images.size()); + program = BuildShader(device, unique_identifier, shader_type, code, code_b, + *curr_locker_variant->locker, variant); + disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); - return {program->handle, base_bindings}; + LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); + return program->handle; } bool CachedShader::EnsureValidLockerVariant() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d23c8d6d4..7b1470db3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -87,7 +87,7 @@ public: } /// Gets the GL program handle for the shader - std::tuple GetHandle(const ProgramVariant& variant); + GLuint GetHandle(const ProgramVariant& variant); private: struct LockerVariant { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index caec565d1..5ad285c25 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -43,6 +43,9 @@ using namespace VideoCommon::Shader; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Operation = const OperationNode&; +class ASTDecompiler; +class ExprDecompiler; + enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; struct TextureAoffi {}; @@ -337,9 +340,6 @@ std::string FlowStackTopName(MetaStackClass stack) { return stage == ShaderType::Vertex; } -class ASTDecompiler; -class ExprDecompiler; - class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, @@ -621,7 +621,8 @@ private: void DeclareConstantBuffers() { for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, + const u32 binding = device.GetBaseBindings(stage).uniform_buffer + index; + code.AddLine("layout (std140, binding = {}) uniform {} {{", binding, GetConstBufferBlock(index)); code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); code.AddLine("}};"); @@ -630,6 +631,8 @@ private: } void DeclareGlobalMemory() { + u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; + for (const auto& gmem : ir.GetGlobalMemory()) { const auto& [base, usage] = gmem; @@ -642,8 +645,8 @@ private: qualifier += " writeonly"; } - code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", - base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); + code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, + GetGlobalMemoryBlock(base)); code.AddLine(" uint {}[];", GetGlobalMemory(base)); code.AddLine("}};"); code.AddNewLine(); @@ -653,9 +656,11 @@ private: void DeclareSamplers() { const auto& samplers = ir.GetSamplers(); for (const auto& sampler : samplers) { - const std::string name{GetSampler(sampler)}; - const std::string description{"layout (binding = SAMPLER_BINDING_" + - std::to_string(sampler.GetIndex()) + ") uniform"}; + const std::string name = GetSampler(sampler); + + const u32 binding = device.GetBaseBindings(stage).sampler + sampler.GetIndex(); + const std::string description = fmt::format("layout (binding = {}) uniform", binding); + std::string sampler_type = [&]() { if (sampler.IsBuffer()) { return "samplerBuffer"; @@ -732,10 +737,12 @@ private: qualifier += " writeonly"; } + const u32 binding = device.GetBaseBindings(stage).image + image.GetIndex(); + const char* format = image.IsAtomic() ? "r32ui, " : ""; const char* type_declaration = GetImageTypeDeclaration(image.GetType()); - code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format, - image.GetIndex(), qualifier, type_declaration, GetImage(image)); + code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding, + qualifier, type_declaration, GetImage(image)); } if (!images.empty()) { code.AddNewLine(); diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 09f62c8c4..cf874a09a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -53,11 +53,10 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 10; +constexpr u32 NativeVersion = 11; // Making sure sizes doesn't change by accident -static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ProgramVariant) == 36); +static_assert(sizeof(ProgramVariant) == 20); ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 917dbccdd..69a2fbdda 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -38,31 +38,13 @@ struct ShaderDiskCacheDump; using ProgramCode = std::vector; using ShaderDumpsMap = std::unordered_map; -/// Allocated bindings used by an OpenGL shader program -struct BaseBindings { - u32 cbuf{}; - u32 gmem{}; - u32 sampler{}; - u32 image{}; - - bool operator==(const BaseBindings& rhs) const noexcept { - return std::tie(cbuf, gmem, sampler, image) == - std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image); - } - - bool operator!=(const BaseBindings& rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(std::is_trivially_copyable_v); - /// Describes the different variants a program can be compiled with. struct ProgramVariant final { ProgramVariant() = default; /// Graphics constructor. - explicit constexpr ProgramVariant(BaseBindings base_bindings, GLenum primitive_mode) noexcept - : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} + explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept + : primitive_mode{primitive_mode} {} /// Compute constructor. explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, @@ -71,7 +53,6 @@ struct ProgramVariant final { shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} // Graphics specific parameters. - BaseBindings base_bindings{}; GLenum primitive_mode{}; // Compute specific parameters. @@ -82,10 +63,10 @@ struct ProgramVariant final { u32 local_memory_size{}; bool operator==(const ProgramVariant& rhs) const noexcept { - return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, - shared_memory_size, local_memory_size) == - std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, - rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size); + return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size, + local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y, + rhs.block_z, rhs.shared_memory_size, + rhs.local_memory_size); } bool operator!=(const ProgramVariant& rhs) const noexcept { @@ -117,21 +98,10 @@ struct ShaderDiskCacheUsage { namespace std { -template <> -struct hash { - std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { - return static_cast(bindings.cbuf) ^ - (static_cast(bindings.gmem) << 8) ^ - (static_cast(bindings.sampler) << 16) ^ - (static_cast(bindings.image) << 24); - } -}; - template <> struct hash { std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { - return std::hash{}(variant.base_bindings) ^ - (static_cast(variant.primitive_mode) << 6) ^ + return (static_cast(variant.primitive_mode) << 6) ^ static_cast(variant.block_x) ^ (static_cast(variant.block_y) << 32) ^ (static_cast(variant.block_z) << 48) ^ diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 2f601d550..296817efc 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -2,9 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include + #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/shader/shader_ir.h" @@ -20,12 +24,13 @@ using VideoCommon::Shader::ShaderIR; std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { std::string out = GetCommonDeclarations(); - out += R"( -layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { + out += fmt::format(R"( +layout (std140, binding = {}) uniform vs_config {{ float y_direction; -}; +}}; -)"; +)", + EmulationUniformBlockBinding); out += Decompile(device, ir, ShaderType::Vertex, "vertex"); if (ir_b) { out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); @@ -44,12 +49,13 @@ void main() { std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { std::string out = GetCommonDeclarations(); - out += R"( -layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { + out += fmt::format(R"( +layout (std140, binding = {}) uniform gs_config {{ float y_direction; -}; +}}; -)"; +)", + EmulationUniformBlockBinding); out += Decompile(device, ir, ShaderType::Geometry, "geometry"); out += R"( @@ -62,7 +68,7 @@ void main() { std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { std::string out = GetCommonDeclarations(); - out += R"( + out += fmt::format(R"( layout (location = 0) out vec4 FragColor0; layout (location = 1) out vec4 FragColor1; layout (location = 2) out vec4 FragColor2; @@ -72,11 +78,12 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { +layout (std140, binding = {}) uniform fs_config {{ float y_direction; -}; +}}; -)"; +)", + EmulationUniformBlockBinding); out += Decompile(device, ir, ShaderType::Fragment, "fragment"); out += R"( diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index ccbe5912e..4cf3d0a8a 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -417,14 +417,20 @@ void OpenGLState::ApplyClipControl() { } void OpenGLState::ApplyTextures() { - if (const auto update = UpdateArray(cur_state.textures, textures)) { - glBindTextures(update->first, update->second, textures.data() + update->first); + const std::size_t size = std::size(textures); + for (std::size_t i = 0; i < size; ++i) { + if (UpdateValue(cur_state.textures[i], textures[i])) { + glBindTextureUnit(static_cast(i), textures[i]); + } } } void OpenGLState::ApplySamplers() { - if (const auto update = UpdateArray(cur_state.samplers, samplers)) { - glBindSamplers(update->first, update->second, samplers.data() + update->first); + const std::size_t size = std::size(samplers); + for (std::size_t i = 0; i < size; ++i) { + if (UpdateValue(cur_state.samplers[i], samplers[i])) { + glBindSampler(static_cast(i), samplers[i]); + } } } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index eaff22bda..fd53eb81a 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -96,8 +96,9 @@ public: GLenum operation = GL_COPY; } logic_op; - std::array textures = {}; - std::array samplers = {}; + static constexpr std::size_t NumSamplers = 32 * 5; + std::array textures = {}; + std::array samplers = {}; std::array images = {}; struct { diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index c504a2c1a..9770dda1c 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -3,7 +3,10 @@ // Refer to the license.txt file included. #include +#include + #include + #include #include "common/assert.h" @@ -48,34 +51,19 @@ BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{t BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; -void BindBuffersRangePushBuffer::Setup(GLuint first_) { - first = first_; - buffer_pointers.clear(); - offsets.clear(); - sizes.clear(); +void BindBuffersRangePushBuffer::Setup() { + entries.clear(); } -void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { - buffer_pointers.push_back(buffer); - offsets.push_back(offset); - sizes.push_back(size); +void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset, + GLsizeiptr size) { + entries.push_back(Entry{binding, buffer, offset, size}); } void BindBuffersRangePushBuffer::Bind() { - // Ensure sizes are valid. - const std::size_t count{buffer_pointers.size()}; - DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); - if (count == 0) { - return; + for (const Entry& entry : entries) { + glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size); } - - // Dereference buffers. - buffers.resize(count); - std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), - [](const GLuint* pointer) { return *pointer; }); - - glBindBuffersRange(target, first, static_cast(count), buffers.data(), offsets.data(), - sizes.data()); } void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 6c2b45546..d56153fe7 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -43,20 +43,22 @@ public: explicit BindBuffersRangePushBuffer(GLenum target); ~BindBuffersRangePushBuffer(); - void Setup(GLuint first_); + void Setup(); - void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); + void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size); void Bind(); private: - GLenum target{}; - GLuint first{}; - std::vector buffer_pointers; + struct Entry { + GLuint binding; + const GLuint* buffer; + GLintptr offset; + GLsizeiptr size; + }; - std::vector buffers; - std::vector offsets; - std::vector sizes; + GLenum target; + std::vector entries; }; void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); From f936b86c7c5a7db8dd9d7b5aed32eb9c47962d0d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 21 Nov 2019 14:21:27 -0300 Subject: [PATCH 09/15] gl_rasterizer: Add missing cbuf counter reset on compute --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5c5ad1f6c..f2494e0e2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -743,6 +743,8 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { + buffer_cache.Acquire(); + auto kernel = shader_cache.GetComputeKernel(code_addr); SetupComputeTextures(kernel); SetupComputeImages(kernel); From 36d9b409fc8793b617c4ce5525d8dc297cb4d579 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 21 Nov 2019 14:21:42 -0300 Subject: [PATCH 10/15] gl_shader_decompiler: Normalize cbuf bindings Stage and compute shaders were using a different binding counter. Normalize these. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ++-- .../renderer_opengl/gl_shader_decompiler.cpp | 12 ++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f2494e0e2..de7b7ce93 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -857,13 +857,13 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); - const u32 base_binding = device.GetBaseBindings(stage_index).uniform_buffer; const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& shader_stage = stages[stage_index]; + u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; for (const auto& entry : shader->GetShaderEntries().const_buffers) { const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; - SetupConstBuffer(base_binding + entry.GetIndex(), buffer, entry); + SetupConstBuffer(binding++, buffer, entry); } } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 5ad285c25..040370c83 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -619,10 +619,9 @@ private: } void DeclareConstantBuffers() { - for (const auto& entry : ir.GetConstantBuffers()) { - const auto [index, size] = entry; - const u32 binding = device.GetBaseBindings(stage).uniform_buffer + index; - code.AddLine("layout (std140, binding = {}) uniform {} {{", binding, + u32 binding = device.GetBaseBindings(stage).uniform_buffer; + for (const auto& [index, cbuf] : ir.GetConstantBuffers()) { + code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, GetConstBufferBlock(index)); code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); code.AddLine("}};"); @@ -632,10 +631,7 @@ private: void DeclareGlobalMemory() { u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; - - for (const auto& gmem : ir.GetGlobalMemory()) { - const auto& [base, usage] = gmem; - + for (const auto& [base, usage] : ir.GetGlobalMemory()) { // Since we don't know how the shader will use the shader, hint the driver to disable as // much optimizations as possible std::string qualifier = "coherent volatile"; From e35b9597ef4ecf9d500462ef19510a36a564fd9d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 22 Nov 2019 04:34:14 -0300 Subject: [PATCH 11/15] gl_shader_decompiler: Normalize image bindings --- .../renderer_opengl/gl_rasterizer.cpp | 29 ++++++------------- .../renderer_opengl/gl_shader_decompiler.cpp | 20 +++++-------- src/video_core/renderer_opengl/gl_state.h | 3 +- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index de7b7ce93..fbb9bbac1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -940,12 +940,9 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemo void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_Texture); - const auto& gpu = system.GPU(); - const auto& maxwell3d = gpu.Maxwell3D(); - const auto& entries = shader->GetShaderEntries().samplers; - + const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).sampler; - for (const auto& entry : entries) { + for (const auto& entry : shader->GetShaderEntries().samplers) { const auto shader_type = static_cast(stage_index); const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); SetupTexture(binding++, texture, entry); @@ -955,10 +952,8 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& compute = system.GPU().KeplerCompute(); - const auto& entries = kernel->GetShaderEntries().samplers; - u32 binding = 0; - for (const auto& entry : entries) { + for (const auto& entry : kernel->GetShaderEntries().samplers) { const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); SetupTexture(binding++, texture, entry); } @@ -987,26 +982,20 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { const auto& maxwell3d = system.GPU().Maxwell3D(); - const auto& entries = shader->GetShaderEntries().images; - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = device.GetBaseBindings(stage_index).image; + for (const auto& entry : shader->GetShaderEntries().images) { const auto shader_type = static_cast(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; - SetupImage(bindpoint, tic, entry); + SetupImage(binding++, tic, entry); } } void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { const auto& compute = system.GPU().KeplerCompute(); - const auto& entries = shader->GetShaderEntries().images; - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = 0; + for (const auto& entry : shader->GetShaderEntries().images) { const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; - SetupImage(bindpoint, tic, entry); + SetupImage(binding++, tic, entry); } } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 040370c83..b17c4e703 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -650,12 +650,10 @@ private: } void DeclareSamplers() { - const auto& samplers = ir.GetSamplers(); - for (const auto& sampler : samplers) { + u32 binding = device.GetBaseBindings(stage).sampler; + for (const auto& sampler : ir.GetSamplers()) { const std::string name = GetSampler(sampler); - - const u32 binding = device.GetBaseBindings(stage).sampler + sampler.GetIndex(); - const std::string description = fmt::format("layout (binding = {}) uniform", binding); + const std::string description = fmt::format("layout (binding = {}) uniform", binding++); std::string sampler_type = [&]() { if (sampler.IsBuffer()) { @@ -684,7 +682,7 @@ private: code.AddLine("{} {} {};", description, sampler_type, name); } - if (!samplers.empty()) { + if (!ir.GetSamplers().empty()) { code.AddNewLine(); } } @@ -724,8 +722,8 @@ private: } void DeclareImages() { - const auto& images{ir.GetImages()}; - for (const auto& image : images) { + u32 binding = device.GetBaseBindings(stage).image; + for (const auto& image : ir.GetImages()) { std::string qualifier = "coherent volatile"; if (image.IsRead() && !image.IsWritten()) { qualifier += " readonly"; @@ -733,14 +731,12 @@ private: qualifier += " writeonly"; } - const u32 binding = device.GetBaseBindings(stage).image + image.GetIndex(); - const char* format = image.IsAtomic() ? "r32ui, " : ""; const char* type_declaration = GetImageTypeDeclaration(image.GetType()); - code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding, + code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, qualifier, type_declaration, GetImage(image)); } - if (!images.empty()) { + if (!ir.GetImages().empty()) { code.AddNewLine(); } } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index fd53eb81a..e53c2c5f2 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -97,9 +97,10 @@ public: } logic_op; static constexpr std::size_t NumSamplers = 32 * 5; + static constexpr std::size_t NumImages = 8 * 5; std::array textures = {}; std::array samplers = {}; - std::array images = {}; + std::array images = {}; struct { GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING From 894ad74b8776687bdf9699b53c75659fbc757941 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 22 Nov 2019 04:59:18 -0300 Subject: [PATCH 12/15] gl_shader_cache: Hack shared memory size The current shared memory size seems to be smaller than what the game actually uses. This makes Nvidia's driver consistently blow up; in the case of FE3H it made it explode on Qt's SwapBuffers while SDL2 worked just fine. For now keep this hack since it's still progress over the previous hardcoded shared memory size. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 41ca005a1..370bdf052 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -279,8 +279,9 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp variant.block_x, variant.block_y, variant.block_z); if (variant.shared_memory_size > 0) { - source += fmt::format("shared uint smem[{}];", - Common::AlignUp(variant.shared_memory_size, 4) / 4); + // TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool + // avoids out of bound stores. Find out why shared memory size is being invalid. + source += fmt::format("shared uint smem[{}];", variant.shared_memory_size); } if (variant.local_memory_size > 0) { From 919ac2c4d3a81c94ae441f7298ebe4f3710a343f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 22 Nov 2019 21:12:31 -0300 Subject: [PATCH 13/15] gl_rasterizer: Disable compute shaders on Intel Intel's proprietary driver enters in a corrupt state when compute shaders are executed. For now, disable these. --- src/video_core/renderer_opengl/gl_device.cpp | 3 +++ src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 5cfa97fc2..3adf57c96 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -74,6 +74,7 @@ Device::Device() { const std::vector extensions = GetExtensions(); const bool is_nvidia = vendor == "NVIDIA Corporation"; + const bool is_intel = vendor == "Intel"; // Reserve the first UBO for emulation bindings base_bindings[0] = BaseBindings{ReservedUniformBlocks, 0, 0, 0}; @@ -110,6 +111,7 @@ Device::Device() { has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); has_precise_bug = TestPreciseBug(); + has_broken_compute = is_intel; has_fast_buffer_sub_data = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); @@ -127,6 +129,7 @@ Device::Device(std::nullptr_t) { has_image_load_formatted = true; has_variable_aoffi = true; has_component_indexing_bug = false; + has_broken_compute = false; has_precise_bug = false; } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e7d3c48b0..5433815b9 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -76,6 +76,10 @@ public: return has_precise_bug; } + bool HasBrokenCompute() const { + return has_broken_compute; + } + bool HasFastBufferSubData() const { return has_fast_buffer_sub_data; } @@ -97,6 +101,7 @@ private: bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; + bool has_broken_compute{}; bool has_fast_buffer_sub_data{}; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fbb9bbac1..f97ec06f0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -743,6 +743,10 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { + if (device.HasBrokenCompute()) { + return; + } + buffer_cache.Acquire(); auto kernel = shader_cache.GetComputeKernel(code_addr); From e3d7334be9c63e4aba7e90fec6c3bdedc743a785 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 22 Nov 2019 21:17:29 -0300 Subject: [PATCH 14/15] gl_state: Skip null texture binds glBindTextureUnit doesn't support null textures. Skip binding these. --- src/video_core/renderer_opengl/gl_state.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 4cf3d0a8a..39b3986d3 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -420,7 +420,11 @@ void OpenGLState::ApplyTextures() { const std::size_t size = std::size(textures); for (std::size_t i = 0; i < size; ++i) { if (UpdateValue(cur_state.textures[i], textures[i])) { - glBindTextureUnit(static_cast(i), textures[i]); + // BindTextureUnit doesn't support binding null textures, skip those binds. + // TODO(Rodrigo): Stop using null textures + if (textures[i] != 0) { + glBindTextureUnit(static_cast(i), textures[i]); + } } } } From dc2e83fa31a7bd3a7e1f1deb6f41cdb62be5f46e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 22 Nov 2019 21:22:01 -0300 Subject: [PATCH 15/15] gl_device: Reserve base bindings on limited devices SSBOs and other resources are limited per pipeline on Intel and AMD. Heuristically reserve resources per stage having in mind the reported OpenGL limits. --- src/video_core/renderer_opengl/gl_device.cpp | 112 +++++++++++++------ 1 file changed, 76 insertions(+), 36 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3adf57c96..a95bd4b2c 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include + #include #include "common/logging/log.h" @@ -20,6 +22,27 @@ namespace { // One uniform block is reserved for emulation purposes constexpr u32 ReservedUniformBlocks = 1; +constexpr u32 NumStages = 5; + +constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, + GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, + GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; + +constexpr std::array LimitSSBOs = { + GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, + GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, + GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; + +constexpr std::array LimitSamplers = { + GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_TEXTURE_IMAGE_UNITS}; + +constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, + GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, + GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, + GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; + template T GetInteger(GLenum pname) { GLint temporary; @@ -51,54 +74,71 @@ bool HasExtension(const std::vector& images, std::string_view return std::find(images.begin(), images.end(), extension) != images.end(); } -constexpr Device::BaseBindings operator+(Device::BaseBindings lhs, Device::BaseBindings rhs) { - return Device::BaseBindings{lhs.uniform_buffer + rhs.uniform_buffer, - lhs.shader_storage_buffer + rhs.shader_storage_buffer, - lhs.sampler + rhs.sampler, lhs.image + rhs.image}; +u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { + ASSERT(num >= amount); + if (limit) { + amount = std::min(amount, GetInteger(*limit)); + } + num -= amount; + return std::exchange(base, base + amount); } -Device::BaseBindings BuildBaseBindings(GLenum uniform_blocks, GLenum shader_storage_blocks, - GLenum texture_image_units, GLenum image_uniforms) noexcept { - return Device::BaseBindings{ - GetInteger(uniform_blocks) - ReservedUniformBlocks, - GetInteger(shader_storage_blocks), - GetInteger(texture_image_units), - GetInteger(image_uniforms), - }; +std::array BuildBaseBindings() noexcept { + std::array bindings; + + static std::array stage_swizzle = {0, 1, 2, 3, 4}; + const u32 total_ubos = GetInteger(GL_MAX_UNIFORM_BUFFER_BINDINGS); + const u32 total_ssbos = GetInteger(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); + const u32 total_samplers = GetInteger(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); + + u32 num_ubos = total_ubos - ReservedUniformBlocks; + u32 num_ssbos = total_ssbos; + u32 num_samplers = total_samplers; + + u32 base_ubo = ReservedUniformBlocks; + u32 base_ssbo = 0; + u32 base_samplers = 0; + + for (std::size_t i = 0; i < NumStages; ++i) { + const std::size_t stage = stage_swizzle[i]; + bindings[stage] = { + Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), + Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), + Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; + } + + u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); + u32 base_images = 0; + + // Reserve more image bindings on fragment and vertex stages. + bindings[4].image = + Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]); + bindings[0].image = + Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]); + + // Reserve the other image bindings. + const u32 total_extracted_images = num_images / (NumStages - 2); + for (std::size_t i = 2; i < NumStages; ++i) { + const std::size_t stage = stage_swizzle[i]; + bindings[stage].image = + Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); + } + + // Compute doesn't care about any of this. + bindings[5] = {0, 0, 0, 0}; + + return bindings; } } // Anonymous namespace -Device::Device() { +Device::Device() : base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast(glGetString(GL_VENDOR)); const std::vector extensions = GetExtensions(); const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_intel = vendor == "Intel"; - // Reserve the first UBO for emulation bindings - base_bindings[0] = BaseBindings{ReservedUniformBlocks, 0, 0, 0}; - base_bindings[1] = base_bindings[0] + BuildBaseBindings(GL_MAX_VERTEX_UNIFORM_BLOCKS, - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_VERTEX_IMAGE_UNIFORMS); - base_bindings[2] = - base_bindings[1] + BuildBaseBindings(GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, - GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS); - base_bindings[3] = - base_bindings[2] + BuildBaseBindings(GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, - GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS); - base_bindings[4] = base_bindings[3] + BuildBaseBindings(GL_MAX_GEOMETRY_UNIFORM_BLOCKS, - GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_IMAGE_UNIFORMS); - // Compute doesn't need any of that - base_bindings[5] = BaseBindings{0, 0, 0, 0}; - uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS);