From 862dc2b2b31b8353449f61dcc68d69255721f6c6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 23 Aug 2021 19:52:07 -0400 Subject: [PATCH 1/3] structured_control_flow: Add DemoteCombinationPass Some drivers misread data when demotes are interleaved in the program. This moves demote branches to be checked at the end of the program. Fixes "wireframe" issue in Pokemon SwSh on some drivers --- .../maxwell/structured_control_flow.cpp | 108 +++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 8b3e0a15c..f124dc8c0 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -660,6 +660,9 @@ public: IR::Block& first_block{*syntax_list.front().data.block}; IR::IREmitter ir(first_block, first_block.begin()); ir.Prologue(); + if (uses_demote_to_helper) { + DemoteCombinationPass(); + } } private: @@ -809,7 +812,14 @@ private: } case StatementType::Return: { ensure_block(); - IR::IREmitter{*current_block}.Epilogue(); + IR::Block* return_block{block_pool.Create(inst_pool)}; + IR::IREmitter{*return_block}.Epilogue(); + current_block->AddBranch(return_block); + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = return_block; + current_block = nullptr; syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; @@ -824,6 +834,7 @@ private: auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; merge.data.block = demote_block; + uses_demote_to_helper = true; break; } case StatementType::Unreachable: { @@ -855,11 +866,106 @@ private: return block_pool.Create(inst_pool); } + void DemoteCombinationPass() { + using Type = IR::AbstractSyntaxNode::Type; + std::vector demote_blocks; + std::vector demote_conds; + u32 num_epilogues{}; + for (const IR::AbstractSyntaxNode& node : syntax_list) { + if (node.type != Type::Block) { + continue; + } + for (const IR::Inst& inst : node.data.block->Instructions()) { + const IR::Opcode op{inst.GetOpcode()}; + if (op == IR::Opcode::DemoteToHelperInvocation) { + demote_blocks.push_back(node.data.block); + break; + } + if (op == IR::Opcode::Epilogue) { + ++num_epilogues; + } + } + } + if (demote_blocks.size() == 0) { + return; + } + if (num_epilogues > 1) { + LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented."); + return; + } + s64 last_iterator_offset{}; + auto& asl{syntax_list}; + for (const IR::Block* demote_block : demote_blocks) { + const auto start_it{asl.begin() + last_iterator_offset}; + auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::If && asn.data.if_node.body == demote_block; + })}; + if (asl_it == asl.end()) { + // Demote without a conditional branch. + // No need to proceed since all fragment instances will be demoted regardless. + return; + } + const IR::Block* const end_if = asl_it->data.if_node.merge; + demote_conds.push_back(asl_it->data.if_node.cond); + last_iterator_offset = std::distance(asl.begin(), asl_it); + + asl_it = asl.erase(asl_it); + asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::Block && asn.data.block == demote_block; + }); + + asl_it = asl.erase(asl_it); + asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::EndIf && asn.data.end_if.merge == end_if; + }); + asl_it = asl.erase(asl_it); + } + const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) { + if (asn.type != Type::Block) { + return false; + } + for (const auto& inst : asn.data.block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return true; + } + } + return false; + }}; + const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)}; + const auto return_block_it{(reverse_it + 1).base()}; + + IR::IREmitter ir{*(return_block_it - 1)->data.block}; + IR::U1 cond(IR::Value(false)); + for (const auto& demote_cond : demote_conds) { + cond = ir.LogicalOr(cond, demote_cond); + } + cond.Inst()->DestructiveAddUsage(1); + + IR::AbstractSyntaxNode demote_if_node{}; + demote_if_node.type = Type::If; + demote_if_node.data.if_node.cond = cond; + demote_if_node.data.if_node.body = demote_blocks[0]; + demote_if_node.data.if_node.merge = return_block_it->data.block; + + IR::AbstractSyntaxNode demote_node{}; + demote_node.type = Type::Block; + demote_node.data.block = demote_blocks[0]; + + IR::AbstractSyntaxNode demote_endif_node{}; + demote_endif_node.type = Type::EndIf; + demote_endif_node.data.end_if.merge = return_block_it->data.block; + + asl.insert(return_block_it, demote_endif_node); + asl.insert(return_block_it, demote_node); + asl.insert(return_block_it, demote_if_node); + } + ObjectPool& stmt_pool; ObjectPool& inst_pool; ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; + bool uses_demote_to_helper{}; // TODO: C++20 Remove this when all compilers support constexpr std::vector #if __cpp_lib_constexpr_vector >= 201907 From 4fda7f1c821e1000210b1dfdf1074795b1aa9d96 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 23 Aug 2021 20:00:11 -0400 Subject: [PATCH 2/3] structured_control_flow: Conditionally invoke demote reorder pass This is only needed on select drivers when a fragment shader discards/demotes. --- .../frontend/maxwell/structured_control_flow.cpp | 10 ++++++---- .../frontend/maxwell/structured_control_flow.h | 9 ++++++--- .../frontend/maxwell/translate_program.cpp | 2 +- src/shader_recompiler/host_translate_info.h | 5 +++-- src/video_core/renderer_opengl/gl_device.h | 4 ++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 7 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index f124dc8c0..77d1cd0fc 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -20,6 +20,7 @@ #include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { @@ -652,7 +653,7 @@ class TranslatePass { public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, ObjectPool& stmt_pool_, Environment& env_, Statement& root_stmt, - IR::AbstractSyntaxList& syntax_list_) + IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); @@ -660,7 +661,7 @@ public: IR::Block& first_block{*syntax_list.front().data.block}; IR::IREmitter ir(first_block, first_block.begin()); ir.Prologue(); - if (uses_demote_to_helper) { + if (uses_demote_to_helper && host_info.needs_demote_reorder) { DemoteCombinationPass(); } } @@ -977,12 +978,13 @@ private: } // Anonymous namespace IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, + const HostTranslateInfo& host_info) { ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info}; return syntax_list; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index 88b083649..e38158da3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,10 +11,13 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" -namespace Shader::Maxwell { +namespace Shader { +struct HostTranslateInfo; +namespace Maxwell { [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); -} // namespace Shader::Maxwell +} // namespace Maxwell +} // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c067d459c..012d55357 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) { IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; - program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = PostOrder(program.syntax_list.front()); program.stage = env.ShaderStage(); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 94a584219..96468b2e7 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -11,8 +11,9 @@ namespace Shader { /// Misc information about the host struct HostTranslateInfo { - bool support_float16{}; ///< True when the device supports 16-bit floats - bool support_int64{}; ///< True when the device supports 64-bit integers + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers + bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered }; } // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ee992aed4..de9e41659 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -156,6 +156,10 @@ public: return shader_backend; } + bool IsAmd() const { + return vendor_name == "ATI Technologies Inc."; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1f4dda17e..b0e14182e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo host_info{ .support_float16 = false, .support_int64 = device.HasShaderInt64(), + .needs_demote_reorder = device.IsAmd(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f316c4f92..31bfbcb06 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw host_info = Shader::HostTranslateInfo{ .support_float16 = device.IsFloat16Supported(), .support_int64 = device.IsShaderInt64Supported(), + .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || + driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, }; } From 907dfbea71bbfd92290d1eff1d2f0f7a33b32dc1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 25 Aug 2021 14:26:49 -0400 Subject: [PATCH 3/3] structured_control_flow: Skip reordering nested demote branches. Nested demote branches add complexity with combining the condition if it has not been initialized yet. Skip them for the time being. --- .../frontend/maxwell/structured_control_flow.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 77d1cd0fc..69eeaa3e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -872,10 +872,21 @@ private: std::vector demote_blocks; std::vector demote_conds; u32 num_epilogues{}; + u32 branch_depth{}; for (const IR::AbstractSyntaxNode& node : syntax_list) { + if (node.type == Type::If) { + ++branch_depth; + } + if (node.type == Type::EndIf) { + --branch_depth; + } if (node.type != Type::Block) { continue; } + if (branch_depth > 1) { + // Skip reordering nested demote branches. + continue; + } for (const IR::Inst& inst : node.data.block->Instructions()) { const IR::Opcode op{inst.GetOpcode()}; if (op == IR::Opcode::DemoteToHelperInvocation) {