From 8af6e6a05207b1c9736bd80a89ec3aed1f96dfea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 24 Jun 2019 19:46:49 -0400 Subject: [PATCH 01/15] shader_ir: Implement a new shader scanner --- CMakeModules/GenerateSCMRev.cmake | 2 + src/common/CMakeLists.txt | 2 + src/video_core/CMakeLists.txt | 2 + src/video_core/shader/control_flow.cpp | 393 +++++++++++++++++++++++++ src/video_core/shader/control_flow.h | 55 ++++ src/video_core/shader/decode.cpp | 37 ++- 6 files changed, 476 insertions(+), 15 deletions(-) create mode 100644 src/video_core/shader/control_flow.cpp create mode 100644 src/video_core/shader/control_flow.h diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index dd65cfe422..abdc74428c 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -82,6 +82,8 @@ set(HASH_FILES "${VIDEO_CORE}/shader/decode/shift.cpp" "${VIDEO_CORE}/shader/decode/video.cpp" "${VIDEO_CORE}/shader/decode/xmad.cpp" + "${VIDEO_CORE}/shader/control_flow.cpp" + "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/decode.cpp" "${VIDEO_CORE}/shader/node.h" "${VIDEO_CORE}/shader/node_helper.cpp" diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2554add28e..2b4266f297 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/decode/shift.cpp" "${VIDEO_CORE}/shader/decode/video.cpp" "${VIDEO_CORE}/shader/decode/xmad.cpp" + "${VIDEO_CORE}/shader/control_flow.cpp" + "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/decode.cpp" "${VIDEO_CORE}/shader/node.h" "${VIDEO_CORE}/shader/node_helper.cpp" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6839abe716..cd32c65d3b 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -103,6 +103,8 @@ add_library(video_core STATIC shader/decode/video.cpp shader/decode/xmad.cpp shader/decode/other.cpp + shader/control_flow.cpp + shader/control_flow.h shader/decode.cpp shader/node_helper.cpp shader/node_helper.h diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp new file mode 100644 index 0000000000..fcf22c7f25 --- /dev/null +++ b/src/video_core/shader/control_flow.cpp @@ -0,0 +1,393 @@ + +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/shader/control_flow.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +constexpr s32 unassigned_branch = -2; + +struct BlockBranchInfo { + Condition condition{}; + s32 address{exit_branch}; + bool kill{}; + bool is_sync{}; + bool is_brk{}; +}; + +struct BlockInfo { + BlockInfo() {} + u32 start{}; + u32 end{}; + bool visited{}; + BlockBranchInfo branch{}; + + bool IsInside(const u32 address) const { + return start <= address && address <= end; + } +}; + +struct Stamp { + Stamp() = default; + Stamp(u32 address, u32 target) : address{address}, target{target} {} + u32 address{}; + u32 target{}; + bool operator==(const Stamp& sb) const { + return std::tie(address, target) == std::tie(sb.address, sb.target); + } + bool operator<(const Stamp& sb) const { + return address < sb.address; + } + bool operator>(const Stamp& sb) const { + return address > sb.address; + } + bool operator<=(const Stamp& sb) const { + return address <= sb.address; + } + bool operator>=(const Stamp& sb) const { + return address >= sb.address; + } +}; + +struct CFGRebuildState { + explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) + : program_code{program_code}, program_size{program_size} { + // queries.clear(); + block_info.clear(); + labels.clear(); + visited_address.clear(); + ssy_labels.clear(); + pbk_labels.clear(); + inspect_queries.clear(); + } + + std::vector block_info{}; + std::list inspect_queries{}; + // std::list queries{}; + std::unordered_set visited_address{}; + std::unordered_set labels{}; + std::set ssy_labels; + std::set pbk_labels; + const ProgramCode& program_code; + const std::size_t program_size; +}; + +enum class BlockCollision : u32 { None = 0, Found = 1, Inside = 2 }; + +std::pair::iterator> TryGetBlock(CFGRebuildState& state, + u32 address) { + auto it = state.block_info.begin(); + while (it != state.block_info.end()) { + if (it->start == address) { + return {BlockCollision::Found, it}; + } + if (it->IsInside(address)) { + return {BlockCollision::Inside, it}; + } + it++; + } + return {BlockCollision::None, it}; +} + +struct ParseInfo { + BlockBranchInfo branch_info{}; + u32 end_address{}; +}; + +BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { + auto& it = state.block_info.emplace_back(); + it.start = start; + it.end = end; + state.visited_address.insert(start); + return ⁢ +} + +Pred GetPredicate(u32 index, bool negated) { + return static_cast(index + (negated ? 8 : 0)); +} + +enum class ParseResult : u32 { + ControlCaught = 0, + BlockEnd = 1, + AbnormalFlow = 2, +}; + +ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) { + + u32 offset = static_cast(address); + u32 end_address = static_cast(state.program_size - 10U) * 8U; + + auto insert_label = ([](CFGRebuildState& state, u32 address) { + auto pair = state.labels.emplace(address); + if (pair.second) { + state.inspect_queries.push_back(address); + } + }); + + while (true) { + if (offset >= end_address) { + parse_info.branch_info.address = exit_branch; + break; + } + if (state.visited_address.count(offset) != 0) { + parse_info.branch_info.address = offset; + break; + } + const Instruction instr = {state.program_code[offset]}; + const auto opcode = OpCode::Decode(instr); + if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { + offset++; + continue; + } + + switch (opcode->get().GetId()) { + case OpCode::Id::EXIT: { + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + parse_info.branch_info.address = exit_branch; + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::BRA: { + if (instr.bra.constant_buffer != 0) { + return ParseResult::AbnormalFlow; + } + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + u32 branch_offset = offset + instr.bra.GetBranchTarget(); + if (branch_offset == 0) { + parse_info.branch_info.address = exit_branch; + } else { + parse_info.branch_info.address = branch_offset; + } + insert_label(state, branch_offset); + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::SYNC: { + parse_info.branch_info.condition; + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + parse_info.branch_info.address = unassigned_branch; + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = true; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::BRK: { + parse_info.branch_info.condition; + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + parse_info.branch_info.address = unassigned_branch; + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = true; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::KIL: { + parse_info.branch_info.condition; + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + parse_info.branch_info.address = exit_branch; + parse_info.branch_info.kill = true; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::SSY: { + const u32 target = offset + instr.bra.GetBranchTarget(); + insert_label(state, target); + state.ssy_labels.emplace(offset, target); + break; + } + case OpCode::Id::PBK: { + const u32 target = offset + instr.bra.GetBranchTarget(); + insert_label(state, target); + state.pbk_labels.emplace(offset, target); + break; + } + default: + break; + } + + offset++; + } + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset - 1; + return ParseResult::BlockEnd; +} + +bool TryInspectAddress(CFGRebuildState& state) { + if (state.inspect_queries.empty()) { + return false; + } + u32 address = state.inspect_queries.front(); + state.inspect_queries.pop_front(); + auto search_result = TryGetBlock(state, address); + BlockInfo* block_info; + switch (search_result.first) { + case BlockCollision::Found: { + return true; + break; + } + case BlockCollision::Inside: { + // This case is the tricky one: + // We need to Split the block in 2 sepprate blocks + auto it = search_result.second; + block_info = CreateBlockInfo(state, address, it->end); + it->end = address - 1; + block_info->branch = it->branch; + BlockBranchInfo forward_branch{}; + forward_branch.address = address; + it->branch = forward_branch; + return true; + break; + } + default: + break; + } + ParseInfo parse_info; + ParseResult parse_result = ParseCode(state, address, parse_info); + if (parse_result == ParseResult::AbnormalFlow) { + // if it's the end of the program, end it safely + // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction + return false; + } + + block_info = CreateBlockInfo(state, address, parse_info.end_address); + block_info->branch = parse_info.branch_info; + if (parse_info.branch_info.condition.IsUnconditional()) { + return true; + } + + u32 fallthrough_address = parse_info.end_address + 1; + state.inspect_queries.push_front(fallthrough_address); + return true; +} + +bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, + ShaderCharacteristics& result_out) { + CFGRebuildState state{program_code, program_size}; + // Inspect Code and generate blocks + state.labels.clear(); + state.labels.emplace(start_address); + state.inspect_queries.push_back(start_address); + while (!state.inspect_queries.empty()) { + if (!TryInspectAddress(state)) { + return false; + } + } + std::sort(state.block_info.begin(), state.block_info.end(), + [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); + // Remove unvisited blocks + result_out.blocks.clear(); + result_out.decompilable = false; + result_out.start = start_address; + result_out.end = start_address; + for (auto& block : state.block_info) { + ShaderBlock new_block{}; + new_block.start = block.start; + new_block.end = block.end; + new_block.branch.cond = block.branch.condition; + new_block.branch.kills = block.branch.kill; + new_block.branch.address = block.branch.address; + result_out.end = std::max(result_out.end, block.end); + result_out.blocks.push_back(new_block); + } + if (result_out.decompilable) { + return true; + } + auto back = result_out.blocks.begin(); + auto next = std::next(back); + while (next != result_out.blocks.end()) { + if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { + back->end = next->end; + next = result_out.blocks.erase(next); + continue; + } + back = next; + next++; + } + return true; +} +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h new file mode 100644 index 0000000000..16736d57ac --- /dev/null +++ b/src/video_core/shader/control_flow.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include +#include + +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::ConditionCode; +using Tegra::Shader::Pred; + +constexpr s32 exit_branch = -1; + +struct Condition { + Pred predicate{Pred::UnusedIndex}; + ConditionCode cc{ConditionCode::T}; + + bool IsUnconditional() const { + return (predicate == Pred::UnusedIndex) && (cc == ConditionCode::T); + } +}; + +struct ShaderBlock { + ShaderBlock() {} + ShaderBlock(const ShaderBlock& sb) = default; + u32 start{}; + u32 end{}; + struct Branch { + Condition cond{}; + bool kills{}; + s32 address{}; + bool operator==(const Branch& b) const { + return std::memcmp(this, &b, sizeof(Branch)) == 0; + } + } branch; + bool operator==(const ShaderBlock& sb) const { + return std::memcmp(this, &sb, sizeof(ShaderBlock)) == 0; + } +}; + +struct ShaderCharacteristics { + std::list blocks; + bool decompilable{}; + u32 start; + u32 end; +}; + +bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, + ShaderCharacteristics& result_out); + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2c9ff28f2b..7f433c56b0 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/control_flow.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" @@ -51,25 +52,31 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - std::set labels; - const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); - if (exit_method != ExitMethod::AlwaysEnd) { - UNREACHABLE_MSG("Program does not always end"); - } - - if (labels.empty()) { - basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); + ShaderCharacteristics shader_info{}; + bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info); + if (can_proceed) { + coverage_begin = shader_info.start; + coverage_end = shader_info.end; + if (shader_info.decompilable) { + return; + } + // we can't decompile it, fallback to standard method + for (const auto& block : shader_info.blocks) { + basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); + } return; } + LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); - labels.insert(main_offset); - - for (const u32 label : labels) { - const auto next_it = labels.lower_bound(label + 1); - const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; - - basic_blocks.insert({label, DecodeRange(label, next_label)}); + // Now we need to deal with an undecompilable shader. We need to brute force + // a shader that captures every position. + coverage_begin = shader_info.start; + const u32 shader_end = static_cast(MAX_PROGRAM_LENGTH); + coverage_end = shader_end; + for (u32 label = main_offset; label < shader_end; label++) { + basic_blocks.insert({label, DecodeRange(label, label + 1)}); } + return; } ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set& labels) { From c218ae4b022a9b47366e88441220fa6c66bcae4b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 24 Jun 2019 21:01:49 -0400 Subject: [PATCH 02/15] shader_ir: Remove the old scanner. --- src/video_core/shader/decode.cpp | 66 ------------------------------- src/video_core/shader/shader_ir.h | 11 ------ 2 files changed, 77 deletions(-) diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 7f433c56b0..65029d35e7 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -22,20 +22,6 @@ using Tegra::Shader::OpCode; namespace { -/// Merges exit method of two parallel branches. -constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { - if (a == ExitMethod::Undetermined) { - return b; - } - if (b == ExitMethod::Undetermined) { - return a; - } - if (a == b) { - return a; - } - return ExitMethod::Conditional; -} - /** * Returns whether the instruction at the specified offset is a 'sched' instruction. * Sched instructions always appear before a sequence of 3 instructions. @@ -79,58 +65,6 @@ void ShaderIR::Decode() { return; } -ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set& labels) { - const auto [iter, inserted] = - exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); - ExitMethod& exit_method = iter->second; - if (!inserted) - return exit_method; - - for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { - coverage_begin = std::min(coverage_begin, offset); - coverage_end = std::max(coverage_end, offset + 1); - - const Instruction instr = {program_code[offset]}; - const auto opcode = OpCode::Decode(instr); - if (!opcode) - continue; - switch (opcode->get().GetId()) { - case OpCode::Id::EXIT: { - // The EXIT instruction can be predicated, which means that the shader can conditionally - // end on this instruction. We have to consider the case where the condition is not met - // and check the exit method of that other basic block. - using Tegra::Shader::Pred; - if (instr.pred.pred_index == static_cast(Pred::UnusedIndex)) { - return exit_method = ExitMethod::AlwaysEnd; - } else { - const ExitMethod not_met = Scan(offset + 1, end, labels); - return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); - } - } - case OpCode::Id::BRA: { - const u32 target = offset + instr.bra.GetBranchTarget(); - labels.insert(target); - const ExitMethod no_jmp = Scan(offset + 1, end, labels); - const ExitMethod jmp = Scan(target, end, labels); - return exit_method = ParallelExit(no_jmp, jmp); - } - case OpCode::Id::SSY: - case OpCode::Id::PBK: { - // The SSY and PBK use a similar encoding as the BRA instruction. - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer branching is not supported"); - const u32 target = offset + instr.bra.GetBranchTarget(); - labels.insert(target); - // Continue scanning for an exit method. - break; - } - default: - break; - } - } - return exit_method = ExitMethod::AlwaysReturn; -} - NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { NodeBlock basic_block; for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e225482081..e71462e024 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -26,14 +26,6 @@ using ProgramCode = std::vector; constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; -/// Describes the behaviour of code path of a given entry point and a return point. -enum class ExitMethod { - Undetermined, ///< Internal value. Only occur when analyzing JMP loop. - AlwaysReturn, ///< All code paths reach the return point. - Conditional, ///< Code path reaches the return point or an END instruction conditionally. - AlwaysEnd, ///< All code paths reach a END instruction. -}; - class ConstBuffer { public: explicit ConstBuffer(u32 max_offset, bool is_indirect) @@ -132,8 +124,6 @@ public: private: void Decode(); - ExitMethod Scan(u32 begin, u32 end, std::set& labels); - NodeBlock DecodeRange(u32 begin, u32 end); /** @@ -329,7 +319,6 @@ private: u32 coverage_begin{}; u32 coverage_end{}; - std::map, ExitMethod> exit_method_map; std::map basic_blocks; NodeBlock global_code; From 8a6fc529a968e007f01464abadd32f9b5eb0a26c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 24 Jun 2019 21:25:38 -0400 Subject: [PATCH 03/15] shader_ir: Implement BRX & BRA.CC --- src/video_core/engines/shader_bytecode.h | 16 +++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 9 ++++ .../renderer_vulkan/vk_shader_decompiler.cpp | 9 ++++ src/video_core/shader/control_flow.cpp | 3 ++ src/video_core/shader/decode/other.cpp | 42 +++++++++++++++++-- src/video_core/shader/node.h | 1 + 6 files changed, 76 insertions(+), 4 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 404d4f5aae..c3055602b9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1367,6 +1367,20 @@ union Instruction { } } bra; + union { + BitField<20, 24, u64> target; + BitField<5, 1, u64> constant_buffer; + + s32 GetBranchExtend() const { + // Sign extend the branch target offset + u32 mask = 1U << (24 - 1); + u32 value = static_cast(target); + // The branch offset is relative to the next instruction and is stored in bytes, so + // divide it by the size of an instruction and add 1 to it. + return static_cast((value ^ mask) - mask) / sizeof(Instruction) + 1; + } + } brx; + union { BitField<39, 1, u64> emit; // EmitVertex BitField<40, 1, u64> cut; // EndPrimitive @@ -1464,6 +1478,7 @@ public: BFE_IMM, BFI_IMM_R, BRA, + BRX, PBK, LD_A, LD_L, @@ -1738,6 +1753,7 @@ private: INST("111000101001----", Id::SSY, Type::Flow, "SSY"), INST("111000101010----", Id::PBK, Type::Flow, "PBK"), INST("111000100100----", Id::BRA, Type::Flow, "BRA"), + INST("111000100101----", Id::BRX, Type::Flow, "BRX"), INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), INST("111000110100---", Id::BRK, Type::Flow, "BRK"), INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 5f2f1510cd..cedfe30b1d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1555,6 +1555,14 @@ private: return {}; } + std::string BranchIndirect(Operation operation) { + const std::string op_a = VisitOperand(operation, 0, Type::Uint); + + code.AddLine("jmp_to = {};", op_a); + code.AddLine("break;"); + return {}; + } + std::string PushFlowStack(Operation operation) { const auto stack = std::get(operation.GetMeta()); const auto target = std::get_if(&*operation[0]); @@ -1789,6 +1797,7 @@ private: &GLSLDecompiler::ImageStore, &GLSLDecompiler::Branch, + &GLSLDecompiler::BranchIndirect, &GLSLDecompiler::PushFlowStack, &GLSLDecompiler::PopFlowStack, &GLSLDecompiler::Exit, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 97ce214b18..1bb04607bc 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -949,6 +949,14 @@ private: return {}; } + Id BranchIndirect(Operation operation) { + const Id op_a = VisitOperand(operation, 0); + + Emit(OpStore(jmp_to, op_a)); + BranchingOp([&]() { Emit(OpBranch(continue_label)); }); + return {}; + } + Id PushFlowStack(Operation operation) { const auto target = std::get_if(&*operation[0]); ASSERT(target); @@ -1334,6 +1342,7 @@ private: &SPIRVDecompiler::ImageStore, &SPIRVDecompiler::Branch, + &SPIRVDecompiler::BranchIndirect, &SPIRVDecompiler::PushFlowStack, &SPIRVDecompiler::PopFlowStack, &SPIRVDecompiler::Exit, diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index fcf22c7f25..a9de8f8147 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -284,6 +284,9 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info state.pbk_labels.emplace(offset, target); break; } + case OpCode::Id::BRX: { + return ParseResult::AbnormalFlow; + } default: break; } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46a8ab82d..ed3c63781f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -91,11 +91,45 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::BRA: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "BRA with constant buffers are not implemented"); + Node branch; + if (instr.bra.constant_buffer == 0) { + const u32 target = pc + instr.bra.GetBranchTarget(); + branch = Operation(OperationCode::Branch, Immediate(target)); + } else { + const u32 target = pc + 1; + const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); + const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, + true, PRECISE, op_a, Immediate(3)); + const Node operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); + branch = Operation(OperationCode::BranchIndirect, convert); + } - const u32 target = pc + instr.bra.GetBranchTarget(); - const Node branch = Operation(OperationCode::Branch, Immediate(target)); + const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; + if (cc != Tegra::Shader::ConditionCode::T) { + bb.push_back(Conditional(GetConditionCode(cc), {branch})); + } else { + bb.push_back(branch); + } + break; + } + case OpCode::Id::BRX: { + Node operand; + if (instr.brx.constant_buffer != 0) { + const s32 target = pc + 1; + const Node index = GetRegister(instr.gpr8); + const Node op_a = + GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); + const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, + true, PRECISE, op_a, Immediate(3)); + operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); + } else { + const s32 target = pc + instr.brx.GetBranchExtend(); + const Node op_a = GetRegister(instr.gpr8); + const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, + true, PRECISE, op_a, Immediate(3)); + operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); + } + const Node branch = Operation(OperationCode::BranchIndirect, operand); const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; if (cc != Tegra::Shader::ConditionCode::T) { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 0ac83fcf08..e468758a6d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -149,6 +149,7 @@ enum class OperationCode { ImageStore, /// (MetaImage, float[N] coords) -> void Branch, /// (uint branch_target) -> void + BranchIndirect,/// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void PopFlowStack, /// () -> void Exit, /// () -> void From 459fce3a8f26241ff2a68c323e75fb70e7e1ba79 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 07:57:32 -0400 Subject: [PATCH 04/15] shader_ir: propagate shader size to the IR --- .../renderer_opengl/gl_shader_cache.cpp | 22 +++++++++++++------ .../renderer_opengl/gl_shader_gen.cpp | 8 +++---- .../renderer_opengl/gl_shader_gen.h | 2 ++ src/video_core/shader/decode.cpp | 6 ++--- src/video_core/shader/shader_ir.cpp | 4 ++-- src/video_core/shader/shader_ir.h | 3 ++- 6 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f9b2b03a0a..5d76ee12db 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -129,9 +129,11 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { /// Hashes one (or two) program streams u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, - const ProgramCode& code_b) { - u64 unique_identifier = - Common::CityHash64(reinterpret_cast(code.data()), CalculateProgramSize(code)); + const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { + if (size_a == 0) { + size_a = CalculateProgramSize(code); + } + u64 unique_identifier = Common::CityHash64(reinterpret_cast(code.data()), size_a); if (program_type != Maxwell::ShaderProgram::VertexA) { return unique_identifier; } @@ -140,8 +142,11 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& std::size_t seed = 0; boost::hash_combine(seed, unique_identifier); - const u64 identifier_b = Common::CityHash64(reinterpret_cast(code_b.data()), - CalculateProgramSize(code_b)); + if (size_b == 0) { + size_b = CalculateProgramSize(code_b); + } + const u64 identifier_b = + Common::CityHash64(reinterpret_cast(code_b.data()), size_b); boost::hash_combine(seed, identifier_b); return static_cast(seed); } @@ -150,14 +155,17 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, ProgramCode program_code, ProgramCode program_code_b) { GLShader::ShaderSetup setup(program_code); + setup.program.size_a = CalculateProgramSize(program_code); + setup.program.size_b = 0; if (program_type == Maxwell::ShaderProgram::VertexA) { // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. // Conventional HW does not support this, so we combine VertexA and VertexB into one // stage here. setup.SetProgramB(program_code_b); + setup.program.size_b = CalculateProgramSize(program_code_b); } - setup.program.unique_identifier = - GetUniqueIdentifier(program_type, program_code, program_code_b); + setup.program.unique_identifier = GetUniqueIdentifier( + program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); switch (program_type) { case Maxwell::ShaderProgram::VertexA: diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9148629ec0..f9ee8429ee 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -29,14 +29,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { }; )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); out += program.first; if (setup.IsDualProgram()) { - const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); + const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); ProgramResult program_b = Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); @@ -80,7 +80,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { }; )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); out += program.first; @@ -115,7 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { }; )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 0536c8a034..7cbc590f8f 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -27,6 +27,8 @@ struct ShaderSetup { ProgramCode code; ProgramCode code_b; // Used for dual vertex shaders u64 unique_identifier; + std::size_t size_a; + std::size_t size_b; } program; /// Used in scenarios where we have a dual vertex shaders diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 65029d35e7..09f55bd214 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); ShaderCharacteristics shader_info{}; - bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info); + bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); if (can_proceed) { coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -52,12 +52,12 @@ void ShaderIR::Decode() { } return; } - LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); + LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); // Now we need to deal with an undecompilable shader. We need to brute force // a shader that captures every position. coverage_begin = shader_info.start; - const u32 shader_end = static_cast(MAX_PROGRAM_LENGTH); + const u32 shader_end = static_cast(program_size / sizeof(u64)); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 11b545ccac..5994bfc4e9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) - : program_code{program_code}, main_offset{main_offset} { +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) + : program_code{program_code}, main_offset{main_offset}, program_size{size} { Decode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e71462e024..a67d4f390d 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -65,7 +65,7 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -316,6 +316,7 @@ private: const ProgramCode& program_code; const u32 main_offset; + const std::size_t program_size; u32 coverage_begin{}; u32 coverage_end{}; From 926b80102f1c00675a9f3956258a066bfe0c3642 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 11:10:45 -0400 Subject: [PATCH 05/15] shader_ir: Decompile Flow Stack --- src/video_core/shader/control_flow.cpp | 167 +++++++++++++++++++++++-- src/video_core/shader/control_flow.h | 1 + src/video_core/shader/decode.cpp | 46 +++++++ src/video_core/shader/shader_ir.h | 3 + 4 files changed, 206 insertions(+), 11 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index a9de8f8147..3af4c61902 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include @@ -16,12 +16,80 @@ using Tegra::Shader::OpCode; constexpr s32 unassigned_branch = -2; +struct ControlStack { + std::array stack; + u32 index{}; + + ControlStack() {} + + ControlStack(const ControlStack& cp) { + index = cp.index; + std::memcpy(stack.data(), cp.stack.data(), index * sizeof(u32)); + } + + bool Compare(const ControlStack& cs) const { + if (index != cs.index) { + return false; + } + return std::memcmp(stack.data(), cs.stack.data(), index * sizeof(u32)) == 0; + } + + bool SoftCompare(const ControlStack& cs) const { + if (index == 0 || cs.index == 0) { + return index == cs.index; + } + return Top() == cs.Top(); + } + + u32 Size() const { + return index; + } + + u32 Top() const { + return stack[index - 1]; + } + + bool Push(u32 address) { + if (index >= 20) { + return false; + } + stack[index] = address; + index++; + return true; + } + + bool Pop() { + if (index == 0) { + return false; + } + index--; + return true; + } +}; + +struct Query { + Query() {} + Query(const Query& q) : address{q.address}, ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} + u32 address; + ControlStack ssy_stack{}; + ControlStack pbk_stack{}; +}; + +struct BlockStack { + BlockStack() = default; + BlockStack(const BlockStack& b) : ssy_stack{b.ssy_stack}, pbk_stack{b.pbk_stack} {} + BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} + ControlStack ssy_stack{}; + ControlStack pbk_stack{}; +}; + struct BlockBranchInfo { Condition condition{}; s32 address{exit_branch}; bool kill{}; bool is_sync{}; bool is_brk{}; + bool ignore{}; }; struct BlockInfo { @@ -64,19 +132,21 @@ struct CFGRebuildState { // queries.clear(); block_info.clear(); labels.clear(); - visited_address.clear(); + registered.clear(); ssy_labels.clear(); pbk_labels.clear(); inspect_queries.clear(); + queries.clear(); } std::vector block_info{}; std::list inspect_queries{}; - // std::list queries{}; - std::unordered_set visited_address{}; + std::list queries{}; + std::unordered_map registered{}; std::unordered_set labels{}; std::set ssy_labels; std::set pbk_labels; + std::unordered_map stacks{}; const ProgramCode& program_code; const std::size_t program_size; }; @@ -107,7 +177,8 @@ BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { auto& it = state.block_info.emplace_back(); it.start = start; it.end = end; - state.visited_address.insert(start); + u32 index = state.block_info.size() - 1; + state.registered.insert({start, index}); return ⁢ } @@ -136,10 +207,12 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info while (true) { if (offset >= end_address) { parse_info.branch_info.address = exit_branch; + parse_info.branch_info.ignore = false; break; } - if (state.visited_address.count(offset) != 0) { + if (state.registered.count(offset) != 0) { parse_info.branch_info.address = offset; + parse_info.branch_info.ignore = true; break; } const Instruction instr = {state.program_code[offset]}; @@ -168,6 +241,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = false; parse_info.branch_info.is_sync = false; parse_info.branch_info.is_brk = false; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -199,6 +273,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = false; parse_info.branch_info.is_sync = false; parse_info.branch_info.is_brk = false; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -222,6 +297,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = false; parse_info.branch_info.is_sync = true; parse_info.branch_info.is_brk = false; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -245,6 +321,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = false; parse_info.branch_info.is_sync = false; parse_info.branch_info.is_brk = true; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -268,6 +345,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = true; parse_info.branch_info.is_sync = false; parse_info.branch_info.is_brk = false; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -322,6 +400,7 @@ bool TryInspectAddress(CFGRebuildState& state) { block_info->branch = it->branch; BlockBranchInfo forward_branch{}; forward_branch.address = address; + forward_branch.ignore = true; it->branch = forward_branch; return true; break; @@ -348,6 +427,58 @@ bool TryInspectAddress(CFGRebuildState& state) { return true; } +bool TryQuery(CFGRebuildState& state) { + auto gather_labels = ([](ControlStack& cc, std::set labels, BlockInfo& block) { + Stamp start{block.start, 0}; + Stamp end{block.end, 0}; + auto gather_start = labels.lower_bound(start); + auto gather_end = labels.upper_bound(end); + while (gather_start != gather_end) { + cc.Push(gather_start->target); + gather_start++; + } + }); + if (state.queries.empty()) { + return false; + } + Query& q = state.queries.front(); + u32 block_index = state.registered[q.address]; + BlockInfo& block = state.block_info[block_index]; + if (block.visited) { + BlockStack& stack = state.stacks[q.address]; + bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack); + state.queries.pop_front(); + return all_okay; + } + block.visited = true; + BlockStack bs{q}; + state.stacks[q.address] = bs; + Query q2(q); + state.queries.pop_front(); + gather_labels(q2.ssy_stack, state.ssy_labels, block); + gather_labels(q2.pbk_stack, state.pbk_labels, block); + if (!block.branch.condition.IsUnconditional()) { + q2.address = block.end + 1; + state.queries.push_back(q2); + } + Query conditional_query{q2}; + if (block.branch.is_sync) { + if (block.branch.address == unassigned_branch) { + block.branch.address = conditional_query.ssy_stack.Top(); + } + conditional_query.ssy_stack.Pop(); + } + if (block.branch.is_brk) { + if (block.branch.address == unassigned_branch) { + block.branch.address = conditional_query.pbk_stack.Top(); + } + conditional_query.pbk_stack.Pop(); + } + conditional_query.address = block.branch.address; + state.queries.push_back(conditional_query); + return true; +} + bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, ShaderCharacteristics& result_out) { CFGRebuildState state{program_code, program_size}; @@ -360,20 +491,34 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre return false; } } + // Decompile Stacks + Query start_query{}; + start_query.address = start_address; + state.queries.push_back(start_query); + bool decompiled = true; + while (!state.queries.empty()) { + if (!TryQuery(state)) { + decompiled = false; + break; + } + } + // Sort and organize results std::sort(state.block_info.begin(), state.block_info.end(), [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); - // Remove unvisited blocks result_out.blocks.clear(); - result_out.decompilable = false; + result_out.decompilable = decompiled; result_out.start = start_address; result_out.end = start_address; for (auto& block : state.block_info) { ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; - new_block.branch.cond = block.branch.condition; - new_block.branch.kills = block.branch.kill; - new_block.branch.address = block.branch.address; + new_block.ignore_branch = block.branch.ignore; + if (!new_block.ignore_branch) { + new_block.branch.cond = block.branch.condition; + new_block.branch.kills = block.branch.kill; + new_block.branch.address = block.branch.address; + } result_out.end = std::max(result_out.end, block.end); result_out.blocks.push_back(new_block); } diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 16736d57ac..f5d37a231f 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -29,6 +29,7 @@ struct ShaderBlock { ShaderBlock(const ShaderBlock& sb) = default; u32 start{}; u32 end{}; + bool ignore_branch{}; struct Branch { Condition cond{}; bool kills{}; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 09f55bd214..1a74b70cb4 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -44,6 +44,17 @@ void ShaderIR::Decode() { coverage_begin = shader_info.start; coverage_end = shader_info.end; if (shader_info.decompilable) { + std::list& blocks = shader_info.blocks; + for (auto& block : blocks) { + NodeBlock nodes; + if (!block.ignore_branch) { + nodes = DecodeRange(block.start, block.end); + InsertControlFlow(nodes, block); + } else { + nodes = DecodeRange(block.start, block.end + 1); + } + basic_blocks.insert({block.start, nodes}); + } return; } // we can't decompile it, fallback to standard method @@ -73,6 +84,41 @@ NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { return basic_block; } +void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { + auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { + Node result = n; + if (cond.cc != ConditionCode::T) { + result = Conditional(GetConditionCode(cond.cc), {result}); + } + if (cond.predicate != Pred::UnusedIndex) { + u32 pred = static_cast(cond.predicate); + bool is_neg = pred > 7; + if (is_neg) + pred -= 8; + result = Conditional(GetPredicate(pred, is_neg), {result}); + } + return result; + }); + if (block.branch.address < 0) { + if (block.branch.kills) { + Node n = Operation(OperationCode::Discard); + n = apply_conditions(block.branch.cond, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Exit); + n = apply_conditions(block.branch.cond, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); + n = apply_conditions(block.branch.cond, n); + bb.push_back(n); + global_code.push_back(n); +} + u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { // Ignore sched instructions when generating code. if (IsSchedInstruction(pc, main_offset)) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index a67d4f390d..a6729064b8 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -22,6 +22,8 @@ namespace VideoCommon::Shader { +struct ShaderBlock; + using ProgramCode = std::vector; constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; @@ -125,6 +127,7 @@ private: void Decode(); NodeBlock DecodeRange(u32 begin, u32 end); + void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); /** * Decodes a single instruction from Tegra to IR. From d5533b440c764093c04a4859b30fc78ddb0e0bbe Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 13:03:51 -0400 Subject: [PATCH 06/15] shader_ir: Unify blocks in decompiled shaders. --- .../renderer_opengl/gl_shader_decompiler.cpp | 10 ++-- src/video_core/shader/control_flow.cpp | 47 ++++++------------- src/video_core/shader/control_flow.h | 3 +- src/video_core/shader/decode.cpp | 45 +++++++++++++----- src/video_core/shader/decode/other.cpp | 30 +++++++++--- src/video_core/shader/node.h | 12 ++--- src/video_core/shader/shader_ir.h | 6 +++ 7 files changed, 90 insertions(+), 63 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index cedfe30b1d..bfc975a04d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -191,10 +191,12 @@ public: // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { - code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); - code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); + if (!ir.IsFlowStackDisabled()) { + constexpr u32 FLOW_STACK_SIZE = 20; + for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { + code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); + code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); + } } code.AddLine("while (true) {{"); diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 3af4c61902..c99d95b574 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -104,28 +105,6 @@ struct BlockInfo { } }; -struct Stamp { - Stamp() = default; - Stamp(u32 address, u32 target) : address{address}, target{target} {} - u32 address{}; - u32 target{}; - bool operator==(const Stamp& sb) const { - return std::tie(address, target) == std::tie(sb.address, sb.target); - } - bool operator<(const Stamp& sb) const { - return address < sb.address; - } - bool operator>(const Stamp& sb) const { - return address > sb.address; - } - bool operator<=(const Stamp& sb) const { - return address <= sb.address; - } - bool operator>=(const Stamp& sb) const { - return address >= sb.address; - } -}; - struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) : program_code{program_code}, program_size{program_size} { @@ -144,8 +123,8 @@ struct CFGRebuildState { std::list queries{}; std::unordered_map registered{}; std::unordered_set labels{}; - std::set ssy_labels; - std::set pbk_labels; + std::map ssy_labels; + std::map pbk_labels; std::unordered_map stacks{}; const ProgramCode& program_code; const std::size_t program_size; @@ -393,7 +372,7 @@ bool TryInspectAddress(CFGRebuildState& state) { } case BlockCollision::Inside: { // This case is the tricky one: - // We need to Split the block in 2 sepprate blocks + // We need to Split the block in 2 sepparate blocks auto it = search_result.second; block_info = CreateBlockInfo(state, address, it->end); it->end = address - 1; @@ -428,13 +407,11 @@ bool TryInspectAddress(CFGRebuildState& state) { } bool TryQuery(CFGRebuildState& state) { - auto gather_labels = ([](ControlStack& cc, std::set labels, BlockInfo& block) { - Stamp start{block.start, 0}; - Stamp end{block.end, 0}; - auto gather_start = labels.lower_bound(start); - auto gather_end = labels.upper_bound(end); + auto gather_labels = ([](ControlStack& cc, std::map& labels, BlockInfo& block) { + auto gather_start = labels.lower_bound(block.start); + auto gather_end = labels.upper_bound(block.end); while (gather_start != gather_end) { - cc.Push(gather_start->target); + cc.Push(gather_start->second); gather_start++; } }); @@ -444,9 +421,13 @@ bool TryQuery(CFGRebuildState& state) { Query& q = state.queries.front(); u32 block_index = state.registered[q.address]; BlockInfo& block = state.block_info[block_index]; + // If the block is visted, check if the stacks match, else gather the ssy/pbk + // labels into the current stack and look if the branch at the end of the block + // consumes a label. Schedule new queries accordingly if (block.visited) { BlockStack& stack = state.stacks[q.address]; - bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack); + bool all_okay = (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) && + (stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack)); state.queries.pop_front(); return all_okay; } @@ -523,8 +504,10 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre result_out.blocks.push_back(new_block); } if (result_out.decompilable) { + result_out.labels = std::move(state.labels); return true; } + // If it's not decompilable, merge the unlabelled blocks together auto back = result_out.blocks.begin(); auto next = std::next(back); while (next != result_out.blocks.end()) { diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index f5d37a231f..4a2cd622c9 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -48,6 +48,7 @@ struct ShaderCharacteristics { bool decompilable{}; u32 start; u32 end; + std::unordered_set labels{}; }; bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 1a74b70cb4..f9b1960daa 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -38,32 +38,47 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); + disable_flow_stack = false; ShaderCharacteristics shader_info{}; bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); if (can_proceed) { coverage_begin = shader_info.start; coverage_end = shader_info.end; if (shader_info.decompilable) { - std::list& blocks = shader_info.blocks; - for (auto& block : blocks) { - NodeBlock nodes; - if (!block.ignore_branch) { - nodes = DecodeRange(block.start, block.end); - InsertControlFlow(nodes, block); - } else { - nodes = DecodeRange(block.start, block.end + 1); + disable_flow_stack = true; + auto insert_block = ([this](NodeBlock& nodes, u32 label) { + if (label == exit_branch) { + return; + } + basic_blocks.insert({label, nodes}); + }); + std::list& blocks = shader_info.blocks; + NodeBlock current_block; + u32 current_label = exit_branch; + for (auto& block : blocks) { + if (shader_info.labels.count(block.start) != 0) { + insert_block(current_block, current_label); + current_block.clear(); + current_label = block.start; + } + if (!block.ignore_branch) { + DecodeRangeInner(current_block, block.start, block.end); + InsertControlFlow(current_block, block); + } else { + DecodeRangeInner(current_block, block.start, block.end + 1); } - basic_blocks.insert({block.start, nodes}); } + insert_block(current_block, current_label); return; } + LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); // we can't decompile it, fallback to standard method for (const auto& block : shader_info.blocks) { basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); } return; } - LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); + LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); // Now we need to deal with an undecompilable shader. We need to brute force // a shader that captures every position. @@ -78,12 +93,16 @@ void ShaderIR::Decode() { NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { NodeBlock basic_block; - for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { - pc = DecodeInstr(basic_block, pc); - } + DecodeRangeInner(basic_block, begin, end); return basic_block; } +void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { + for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { + pc = DecodeInstr(bb, pc); + } +} + void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { Node result = n; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index ed3c63781f..42e3de02fc 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -98,9 +98,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { } else { const u32 target = pc + 1; const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, - true, PRECISE, op_a, Immediate(3)); - const Node operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); + const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, + PRECISE, op_a, Immediate(3)); + const Node operand = + Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); branch = Operation(OperationCode::BranchIndirect, convert); } @@ -119,14 +120,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Node index = GetRegister(instr.gpr8); const Node op_a = GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, - true, PRECISE, op_a, Immediate(3)); + const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, + PRECISE, op_a, Immediate(3)); operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); } else { const s32 target = pc + instr.brx.GetBranchExtend(); const Node op_a = GetRegister(instr.gpr8); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, - true, PRECISE, op_a, Immediate(3)); + const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, + PRECISE, op_a, Immediate(3)); operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); } const Node branch = Operation(OperationCode::BranchIndirect, operand); @@ -143,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "Constant buffer flow is not supported"); + if (disable_flow_stack) { + break; + } + // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. const u32 target = pc + instr.bra.GetBranchTarget(); bb.push_back( @@ -153,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, "Constant buffer PBK is not supported"); + if (disable_flow_stack) { + break; + } + // PBK pushes to a stack the address where BRK will jump to. const u32 target = pc + instr.bra.GetBranchTarget(); bb.push_back( @@ -164,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", static_cast(cc)); + if (disable_flow_stack) { + break; + } + // The SYNC opcode jumps to the address previously set by the SSY opcode bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); break; @@ -172,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", static_cast(cc)); + if (disable_flow_stack) { + break; + } // The BRK opcode jumps to the address previously set by the PBK opcode bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index e468758a6d..7427ed896d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -148,12 +148,12 @@ enum class OperationCode { ImageStore, /// (MetaImage, float[N] coords) -> void - Branch, /// (uint branch_target) -> void - BranchIndirect,/// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void + Branch, /// (uint branch_target) -> void + BranchIndirect, /// (uint branch_target) -> void + PushFlowStack, /// (uint branch_target) -> void + PopFlowStack, /// () -> void + Exit, /// () -> void + Discard, /// () -> void EmitVertex, /// () -> void EndPrimitive, /// () -> void diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index a6729064b8..928ac7cb5c 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -123,10 +123,15 @@ public: return header; } + bool IsFlowStackDisabled() const { + return disable_flow_stack; + } + private: void Decode(); NodeBlock DecodeRange(u32 begin, u32 end); + void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); /** @@ -320,6 +325,7 @@ private: const ProgramCode& program_code; const u32 main_offset; const std::size_t program_size; + bool disable_flow_stack{}; u32 coverage_begin{}; u32 coverage_end{}; From 01b21ee1e8e7455dd84ee7f22d33426caaaafdb3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 20:15:40 -0400 Subject: [PATCH 07/15] shader_ir: Corrections, documenting and asserting control_flow --- src/video_core/shader/control_flow.cpp | 80 ++++++++++++-------------- src/video_core/shader/control_flow.h | 16 ++++-- src/video_core/shader/decode.cpp | 10 ++-- 3 files changed, 54 insertions(+), 52 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index c99d95b574..deef0cd3a8 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -1,3 +1,6 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. #include #include @@ -17,16 +20,18 @@ using Tegra::Shader::OpCode; constexpr s32 unassigned_branch = -2; +/*** + * 'ControlStack' represents a static stack of control jumps such as SSY and PBK + * stacks in Maxwell. + ***/ struct ControlStack { - std::array stack; + static constexpr std::size_t stack_fixed_size = 20; + std::array stack{}; u32 index{}; ControlStack() {} - ControlStack(const ControlStack& cp) { - index = cp.index; - std::memcpy(stack.data(), cp.stack.data(), index * sizeof(u32)); - } + ControlStack(const ControlStack& cp) = default; bool Compare(const ControlStack& cs) const { if (index != cs.index) { @@ -35,6 +40,7 @@ struct ControlStack { return std::memcmp(stack.data(), cs.stack.data(), index * sizeof(u32)) == 0; } + /// This compare just compares the top of the stack against one another bool SoftCompare(const ControlStack& cs) const { if (index == 0 || cs.index == 0) { return index == cs.index; @@ -51,7 +57,7 @@ struct ControlStack { } bool Push(u32 address) { - if (index >= 20) { + if (index >= stack.size()) { return false; } stack[index] = address; @@ -70,21 +76,23 @@ struct ControlStack { struct Query { Query() {} - Query(const Query& q) : address{q.address}, ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} - u32 address; + Query(const Query& q) = default; + u32 address{}; ControlStack ssy_stack{}; ControlStack pbk_stack{}; }; struct BlockStack { BlockStack() = default; - BlockStack(const BlockStack& b) : ssy_stack{b.ssy_stack}, pbk_stack{b.pbk_stack} {} + BlockStack(const BlockStack& b) = default; BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} ControlStack ssy_stack{}; ControlStack pbk_stack{}; }; struct BlockBranchInfo { + BlockBranchInfo() = default; + BlockBranchInfo(const BlockBranchInfo& b) = default; Condition condition{}; s32 address{exit_branch}; bool kill{}; @@ -94,7 +102,7 @@ struct BlockBranchInfo { }; struct BlockInfo { - BlockInfo() {} + BlockInfo() = default; u32 start{}; u32 end{}; bool visited{}; @@ -107,24 +115,15 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) - : program_code{program_code}, program_size{program_size} { - // queries.clear(); - block_info.clear(); - labels.clear(); - registered.clear(); - ssy_labels.clear(); - pbk_labels.clear(); - inspect_queries.clear(); - queries.clear(); - } + : program_code{program_code}, program_size{program_size} {} std::vector block_info{}; std::list inspect_queries{}; std::list queries{}; std::unordered_map registered{}; std::unordered_set labels{}; - std::map ssy_labels; - std::map pbk_labels; + std::map ssy_labels{}; + std::map pbk_labels{}; std::unordered_map stacks{}; const ProgramCode& program_code; const std::size_t program_size; @@ -156,7 +155,7 @@ BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { auto& it = state.block_info.emplace_back(); it.start = start; it.end = end; - u32 index = state.block_info.size() - 1; + const u32 index = static_cast(state.block_info.size() - 1); state.registered.insert({start, index}); return ⁢ } @@ -172,11 +171,10 @@ enum class ParseResult : u32 { }; ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) { - u32 offset = static_cast(address); - u32 end_address = static_cast(state.program_size - 10U) * 8U; + const u32 end_address = static_cast(state.program_size - 10U) * 8U; - auto insert_label = ([](CFGRebuildState& state, u32 address) { + const auto insert_label = ([](CFGRebuildState& state, u32 address) { auto pair = state.labels.emplace(address); if (pair.second) { state.inspect_queries.push_back(address); @@ -361,20 +359,18 @@ bool TryInspectAddress(CFGRebuildState& state) { if (state.inspect_queries.empty()) { return false; } - u32 address = state.inspect_queries.front(); + const u32 address = state.inspect_queries.front(); state.inspect_queries.pop_front(); - auto search_result = TryGetBlock(state, address); - BlockInfo* block_info; + const auto search_result = TryGetBlock(state, address); switch (search_result.first) { case BlockCollision::Found: { return true; - break; } case BlockCollision::Inside: { // This case is the tricky one: // We need to Split the block in 2 sepparate blocks auto it = search_result.second; - block_info = CreateBlockInfo(state, address, it->end); + BlockInfo* block_info = CreateBlockInfo(state, address, it->end); it->end = address - 1; block_info->branch = it->branch; BlockBranchInfo forward_branch{}; @@ -382,34 +378,32 @@ bool TryInspectAddress(CFGRebuildState& state) { forward_branch.ignore = true; it->branch = forward_branch; return true; - break; } default: break; } ParseInfo parse_info; - ParseResult parse_result = ParseCode(state, address, parse_info); + const ParseResult parse_result = ParseCode(state, address, parse_info); if (parse_result == ParseResult::AbnormalFlow) { - // if it's the end of the program, end it safely // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction return false; } - block_info = CreateBlockInfo(state, address, parse_info.end_address); + BlockInfo* block_info = CreateBlockInfo(state, address, parse_info.end_address); block_info->branch = parse_info.branch_info; if (parse_info.branch_info.condition.IsUnconditional()) { return true; } - u32 fallthrough_address = parse_info.end_address + 1; + const u32 fallthrough_address = parse_info.end_address + 1; state.inspect_queries.push_front(fallthrough_address); return true; } bool TryQuery(CFGRebuildState& state) { - auto gather_labels = ([](ControlStack& cc, std::map& labels, BlockInfo& block) { + const auto gather_labels = ([](ControlStack& cc, std::map& labels, BlockInfo& block) { auto gather_start = labels.lower_bound(block.start); - auto gather_end = labels.upper_bound(block.end); + const auto gather_end = labels.upper_bound(block.end); while (gather_start != gather_end) { cc.Push(gather_start->second); gather_start++; @@ -419,21 +413,21 @@ bool TryQuery(CFGRebuildState& state) { return false; } Query& q = state.queries.front(); - u32 block_index = state.registered[q.address]; + const u32 block_index = state.registered[q.address]; BlockInfo& block = state.block_info[block_index]; // If the block is visted, check if the stacks match, else gather the ssy/pbk // labels into the current stack and look if the branch at the end of the block // consumes a label. Schedule new queries accordingly if (block.visited) { BlockStack& stack = state.stacks[q.address]; - bool all_okay = (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) && - (stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack)); + const bool all_okay = + (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) && + (stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack)); state.queries.pop_front(); return all_okay; } block.visited = true; - BlockStack bs{q}; - state.stacks[q.address] = bs; + state.stacks[q.address] = BlockStack{q}; Query q2(q); state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 4a2cd622c9..4689b0c102 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -1,3 +1,7 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #pragma once #include @@ -20,12 +24,15 @@ struct Condition { ConditionCode cc{ConditionCode::T}; bool IsUnconditional() const { - return (predicate == Pred::UnusedIndex) && (cc == ConditionCode::T); + return predicate == Pred::UnusedIndex && cc == ConditionCode::T; + } + bool operator==(const Condition& other) const { + return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); } }; struct ShaderBlock { - ShaderBlock() {} + ShaderBlock() = default; ShaderBlock(const ShaderBlock& sb) = default; u32 start{}; u32 end{}; @@ -35,11 +42,12 @@ struct ShaderBlock { bool kills{}; s32 address{}; bool operator==(const Branch& b) const { - return std::memcmp(this, &b, sizeof(Branch)) == 0; + return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); } } branch; bool operator==(const ShaderBlock& sb) const { - return std::memcmp(this, &sb, sizeof(ShaderBlock)) == 0; + return std::tie(start, end, ignore_branch, branch) == + std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); } }; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index f9b1960daa..b4a282cbd1 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -46,7 +46,7 @@ void ShaderIR::Decode() { coverage_end = shader_info.end; if (shader_info.decompilable) { disable_flow_stack = true; - auto insert_block = ([this](NodeBlock& nodes, u32 label) { + const auto insert_block = ([this](NodeBlock& nodes, u32 label) { if (label == exit_branch) { return; } @@ -88,7 +88,6 @@ void ShaderIR::Decode() { for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); } - return; } NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { @@ -104,16 +103,17 @@ void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { } void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { - auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { + const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { Node result = n; if (cond.cc != ConditionCode::T) { result = Conditional(GetConditionCode(cond.cc), {result}); } if (cond.predicate != Pred::UnusedIndex) { u32 pred = static_cast(cond.predicate); - bool is_neg = pred > 7; - if (is_neg) + const bool is_neg = pred > 7; + if (is_neg) { pred -= 8; + } result = Conditional(GetPredicate(pred, is_neg), {result}); } return result; From d45fed303055fa699377bedcc3a7973bd03b7870 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 20:40:38 -0400 Subject: [PATCH 08/15] shader_ir: Remove unnecessary constructors and use optional for ScanFlow result --- src/video_core/shader/control_flow.cpp | 21 ++++++--------------- src/video_core/shader/control_flow.h | 14 ++++++-------- src/video_core/shader/decode.cpp | 10 +++++----- 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index deef0cd3a8..6259ad5947 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -29,10 +29,6 @@ struct ControlStack { std::array stack{}; u32 index{}; - ControlStack() {} - - ControlStack(const ControlStack& cp) = default; - bool Compare(const ControlStack& cs) const { if (index != cs.index) { return false; @@ -75,8 +71,6 @@ struct ControlStack { }; struct Query { - Query() {} - Query(const Query& q) = default; u32 address{}; ControlStack ssy_stack{}; ControlStack pbk_stack{}; @@ -91,8 +85,6 @@ struct BlockStack { }; struct BlockBranchInfo { - BlockBranchInfo() = default; - BlockBranchInfo(const BlockBranchInfo& b) = default; Condition condition{}; s32 address{exit_branch}; bool kill{}; @@ -102,7 +94,6 @@ struct BlockBranchInfo { }; struct BlockInfo { - BlockInfo() = default; u32 start{}; u32 end{}; bool visited{}; @@ -454,8 +445,8 @@ bool TryQuery(CFGRebuildState& state) { return true; } -bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, - ShaderCharacteristics& result_out) { +std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, + u32 start_address) { CFGRebuildState state{program_code, program_size}; // Inspect Code and generate blocks state.labels.clear(); @@ -463,7 +454,7 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre state.inspect_queries.push_back(start_address); while (!state.inspect_queries.empty()) { if (!TryInspectAddress(state)) { - return false; + return {}; } } // Decompile Stacks @@ -480,7 +471,7 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre // Sort and organize results std::sort(state.block_info.begin(), state.block_info.end(), [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); - result_out.blocks.clear(); + ShaderCharacteristics result_out{}; result_out.decompilable = decompiled; result_out.start = start_address; result_out.end = start_address; @@ -499,7 +490,7 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre } if (result_out.decompilable) { result_out.labels = std::move(state.labels); - return true; + return {result_out}; } // If it's not decompilable, merge the unlabelled blocks together auto back = result_out.blocks.begin(); @@ -513,6 +504,6 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre back = next; next++; } - return true; + return {result_out}; } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 4689b0c102..5e8ea32716 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -32,8 +32,6 @@ struct Condition { }; struct ShaderBlock { - ShaderBlock() = default; - ShaderBlock(const ShaderBlock& sb) = default; u32 start{}; u32 end{}; bool ignore_branch{}; @@ -44,7 +42,7 @@ struct ShaderBlock { bool operator==(const Branch& b) const { return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); } - } branch; + } branch{}; bool operator==(const ShaderBlock& sb) const { return std::tie(start, end, ignore_branch, branch) == std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); @@ -52,14 +50,14 @@ struct ShaderBlock { }; struct ShaderCharacteristics { - std::list blocks; + std::list blocks{}; bool decompilable{}; - u32 start; - u32 end; + u32 start{}; + u32 end{}; std::unordered_set labels{}; }; -bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, - ShaderCharacteristics& result_out); +std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, + u32 start_address); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index b4a282cbd1..15cb33bbf3 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,9 +39,9 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); disable_flow_stack = false; - ShaderCharacteristics shader_info{}; - bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); - if (can_proceed) { + const auto info = ScanFlow(program_code, program_code.size(), main_offset); + if (info) { + const auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; if (shader_info.decompilable) { @@ -52,7 +52,7 @@ void ShaderIR::Decode() { } basic_blocks.insert({label, nodes}); }); - std::list& blocks = shader_info.blocks; + const auto& blocks = shader_info.blocks; NodeBlock current_block; u32 current_label = exit_branch; for (auto& block : blocks) { @@ -82,7 +82,7 @@ void ShaderIR::Decode() { // Now we need to deal with an undecompilable shader. We need to brute force // a shader that captures every position. - coverage_begin = shader_info.start; + coverage_begin = main_offset; const u32 shader_end = static_cast(program_size / sizeof(u64)); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { From cfb3db1a32975583b94a0df5f3ff0020254208c0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 20:56:04 -0400 Subject: [PATCH 09/15] shader_ir: Correct max sizing --- src/video_core/shader/control_flow.cpp | 2 +- src/video_core/shader/decode.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 6259ad5947..a26de67958 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -163,7 +163,7 @@ enum class ParseResult : u32 { ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) { u32 offset = static_cast(address); - const u32 end_address = static_cast(state.program_size - 10U) * 8U; + const u32 end_address = static_cast(state.program_size / 8U); const auto insert_label = ([](CFGRebuildState& state, u32 address) { auto pair = state.labels.emplace(address); diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 15cb33bbf3..b0bd6630fc 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); disable_flow_stack = false; - const auto info = ScanFlow(program_code, program_code.size(), main_offset); + const auto info = ScanFlow(program_code, program_size, main_offset); if (info) { const auto& shader_info = *info; coverage_begin = shader_info.start; From 34357b110c3f04f6b98ca586fd776b0df569b6d8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 26 Jun 2019 12:19:43 -0400 Subject: [PATCH 10/15] shader_ir: Correct parsing of scheduling instructions and correct sizing --- src/video_core/shader/control_flow.cpp | 41 ++++++++++++++++++-------- src/video_core/shader/decode.cpp | 2 +- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index a26de67958..1775dfd810 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -20,10 +20,10 @@ using Tegra::Shader::OpCode; constexpr s32 unassigned_branch = -2; -/*** +/** * 'ControlStack' represents a static stack of control jumps such as SSY and PBK * stacks in Maxwell. - ***/ + **/ struct ControlStack { static constexpr std::size_t stack_fixed_size = 20; std::array stack{}; @@ -105,9 +105,11 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) - : program_code{program_code}, program_size{program_size} {} + explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, + const u32 start) + : program_code{program_code}, program_size{program_size}, start{start} {} + u32 start{}; std::vector block_info{}; std::list inspect_queries{}; std::list queries{}; @@ -120,7 +122,7 @@ struct CFGRebuildState { const std::size_t program_size; }; -enum class BlockCollision : u32 { None = 0, Found = 1, Inside = 2 }; +enum class BlockCollision : u32 { None, Found, Inside }; std::pair::iterator> TryGetBlock(CFGRebuildState& state, u32 address) { @@ -155,15 +157,26 @@ Pred GetPredicate(u32 index, bool negated) { return static_cast(index + (negated ? 8 : 0)); } +/** + * Returns whether the instruction at the specified offset is a 'sched' instruction. + * Sched instructions always appear before a sequence of 3 instructions. + */ +constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { + constexpr u32 SchedPeriod = 4; + u32 absolute_offset = offset - main_offset; + + return (absolute_offset % SchedPeriod) == 0; +} + enum class ParseResult : u32 { - ControlCaught = 0, - BlockEnd = 1, - AbnormalFlow = 2, + ControlCaught, + BlockEnd, + AbnormalFlow, }; ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) { u32 offset = static_cast(address); - const u32 end_address = static_cast(state.program_size / 8U); + const u32 end_address = static_cast(state.program_size / sizeof(Instruction)); const auto insert_label = ([](CFGRebuildState& state, u32 address) { auto pair = state.labels.emplace(address); @@ -183,6 +196,10 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.ignore = true; break; } + if (IsSchedInstruction(offset, state.start)) { + offset++; + continue; + } const Instruction instr = {state.program_code[offset]}; const auto opcode = OpCode::Decode(instr); if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { @@ -447,11 +464,11 @@ bool TryQuery(CFGRebuildState& state) { std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address) { - CFGRebuildState state{program_code, program_size}; + CFGRebuildState state{program_code, program_size, start_address}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); - state.inspect_queries.push_back(start_address); + state.inspect_queries.push_back(state.start); while (!state.inspect_queries.empty()) { if (!TryInspectAddress(state)) { return {}; @@ -459,7 +476,7 @@ std::optional ScanFlow(const ProgramCode& program_code, u } // Decompile Stacks Query start_query{}; - start_query.address = start_address; + start_query.address = state.start; state.queries.push_back(start_query); bool decompiled = true; while (!state.queries.empty()) { diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index b0bd6630fc..07a154d770 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); disable_flow_stack = false; - const auto info = ScanFlow(program_code, program_size, main_offset); + const auto info = ScanFlow(program_code, MAX_PROGRAM_LENGTH * sizeof(u64), main_offset); if (info) { const auto& shader_info = *info; coverage_begin = shader_info.start; From e7a88f0ab32625c1422583ce63d0f8f20086f7c3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 26 Jun 2019 12:56:03 -0400 Subject: [PATCH 11/15] control_flow: Address feedback. --- src/video_core/shader/control_flow.cpp | 126 ++++++++----------------- 1 file changed, 37 insertions(+), 89 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 1775dfd810..7b424d65d8 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -20,68 +21,18 @@ using Tegra::Shader::OpCode; constexpr s32 unassigned_branch = -2; -/** - * 'ControlStack' represents a static stack of control jumps such as SSY and PBK - * stacks in Maxwell. - **/ -struct ControlStack { - static constexpr std::size_t stack_fixed_size = 20; - std::array stack{}; - u32 index{}; - - bool Compare(const ControlStack& cs) const { - if (index != cs.index) { - return false; - } - return std::memcmp(stack.data(), cs.stack.data(), index * sizeof(u32)) == 0; - } - - /// This compare just compares the top of the stack against one another - bool SoftCompare(const ControlStack& cs) const { - if (index == 0 || cs.index == 0) { - return index == cs.index; - } - return Top() == cs.Top(); - } - - u32 Size() const { - return index; - } - - u32 Top() const { - return stack[index - 1]; - } - - bool Push(u32 address) { - if (index >= stack.size()) { - return false; - } - stack[index] = address; - index++; - return true; - } - - bool Pop() { - if (index == 0) { - return false; - } - index--; - return true; - } -}; - struct Query { u32 address{}; - ControlStack ssy_stack{}; - ControlStack pbk_stack{}; + std::stack ssy_stack{}; + std::stack pbk_stack{}; }; struct BlockStack { BlockStack() = default; BlockStack(const BlockStack& b) = default; BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} - ControlStack ssy_stack{}; - ControlStack pbk_stack{}; + std::stack ssy_stack{}; + std::stack pbk_stack{}; }; struct BlockBranchInfo { @@ -144,13 +95,13 @@ struct ParseInfo { u32 end_address{}; }; -BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { +BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { auto& it = state.block_info.emplace_back(); it.start = start; it.end = end; const u32 index = static_cast(state.block_info.size() - 1); state.registered.insert({start, index}); - return ⁢ + return it; } Pred GetPredicate(u32 index, bool negated) { @@ -174,16 +125,17 @@ enum class ParseResult : u32 { AbnormalFlow, }; -ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) { +std::pair ParseCode(CFGRebuildState& state, u32 address) { u32 offset = static_cast(address); const u32 end_address = static_cast(state.program_size / sizeof(Instruction)); + ParseInfo parse_info{}; - const auto insert_label = ([](CFGRebuildState& state, u32 address) { - auto pair = state.labels.emplace(address); + const auto insert_label = [](CFGRebuildState& state, u32 address) { + const auto pair = state.labels.emplace(address); if (pair.second) { state.inspect_queries.push_back(address); } - }); + }; while (true) { if (offset >= end_address) { @@ -229,11 +181,11 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.ignore = false; parse_info.end_address = offset; - return ParseResult::ControlCaught; + return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::BRA: { if (instr.bra.constant_buffer != 0) { - return ParseResult::AbnormalFlow; + return {ParseResult::AbnormalFlow, parse_info}; } const auto pred_index = static_cast(instr.pred.pred_index); parse_info.branch_info.condition.predicate = @@ -248,7 +200,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info offset++; continue; } - u32 branch_offset = offset + instr.bra.GetBranchTarget(); + const u32 branch_offset = offset + instr.bra.GetBranchTarget(); if (branch_offset == 0) { parse_info.branch_info.address = exit_branch; } else { @@ -261,10 +213,9 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.ignore = false; parse_info.end_address = offset; - return ParseResult::ControlCaught; + return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::SYNC: { - parse_info.branch_info.condition; const auto pred_index = static_cast(instr.pred.pred_index); parse_info.branch_info.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); @@ -285,10 +236,9 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.ignore = false; parse_info.end_address = offset; - return ParseResult::ControlCaught; + return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::BRK: { - parse_info.branch_info.condition; const auto pred_index = static_cast(instr.pred.pred_index); parse_info.branch_info.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); @@ -309,10 +259,9 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.ignore = false; parse_info.end_address = offset; - return ParseResult::ControlCaught; + return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::KIL: { - parse_info.branch_info.condition; const auto pred_index = static_cast(instr.pred.pred_index); parse_info.branch_info.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); @@ -333,7 +282,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.ignore = false; parse_info.end_address = offset; - return ParseResult::ControlCaught; + return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::SSY: { const u32 target = offset + instr.bra.GetBranchTarget(); @@ -348,7 +297,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info break; } case OpCode::Id::BRX: { - return ParseResult::AbnormalFlow; + return {ParseResult::AbnormalFlow, parse_info}; } default: break; @@ -360,7 +309,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.is_sync = false; parse_info.branch_info.is_brk = false; parse_info.end_address = offset - 1; - return ParseResult::BlockEnd; + return {ParseResult::BlockEnd, parse_info}; } bool TryInspectAddress(CFGRebuildState& state) { @@ -377,10 +326,10 @@ bool TryInspectAddress(CFGRebuildState& state) { case BlockCollision::Inside: { // This case is the tricky one: // We need to Split the block in 2 sepparate blocks - auto it = search_result.second; - BlockInfo* block_info = CreateBlockInfo(state, address, it->end); + const auto it = search_result.second; + BlockInfo& block_info = CreateBlockInfo(state, address, it->end); it->end = address - 1; - block_info->branch = it->branch; + block_info.branch = it->branch; BlockBranchInfo forward_branch{}; forward_branch.address = address; forward_branch.ignore = true; @@ -390,15 +339,14 @@ bool TryInspectAddress(CFGRebuildState& state) { default: break; } - ParseInfo parse_info; - const ParseResult parse_result = ParseCode(state, address, parse_info); + const auto [parse_result, parse_info] = ParseCode(state, address); if (parse_result == ParseResult::AbnormalFlow) { // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction return false; } - BlockInfo* block_info = CreateBlockInfo(state, address, parse_info.end_address); - block_info->branch = parse_info.branch_info; + BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); + block_info.branch = parse_info.branch_info; if (parse_info.branch_info.condition.IsUnconditional()) { return true; } @@ -409,14 +357,15 @@ bool TryInspectAddress(CFGRebuildState& state) { } bool TryQuery(CFGRebuildState& state) { - const auto gather_labels = ([](ControlStack& cc, std::map& labels, BlockInfo& block) { + const auto gather_labels = [](std::stack& cc, std::map& labels, + BlockInfo& block) { auto gather_start = labels.lower_bound(block.start); const auto gather_end = labels.upper_bound(block.end); while (gather_start != gather_end) { - cc.Push(gather_start->second); + cc.push(gather_start->second); gather_start++; } - }); + }; if (state.queries.empty()) { return false; } @@ -428,9 +377,8 @@ bool TryQuery(CFGRebuildState& state) { // consumes a label. Schedule new queries accordingly if (block.visited) { BlockStack& stack = state.stacks[q.address]; - const bool all_okay = - (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) && - (stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack)); + const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) && + (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack); state.queries.pop_front(); return all_okay; } @@ -447,15 +395,15 @@ bool TryQuery(CFGRebuildState& state) { Query conditional_query{q2}; if (block.branch.is_sync) { if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.ssy_stack.Top(); + block.branch.address = conditional_query.ssy_stack.top(); } - conditional_query.ssy_stack.Pop(); + conditional_query.ssy_stack.pop(); } if (block.branch.is_brk) { if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.pbk_stack.Top(); + block.branch.address = conditional_query.pbk_stack.top(); } - conditional_query.pbk_stack.Pop(); + conditional_query.pbk_stack.pop(); } conditional_query.address = block.branch.address; state.queries.push_back(conditional_query); From dc4a93594cd74eb1f663213d3b340a83dd95842e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 26 Jun 2019 13:16:13 -0400 Subject: [PATCH 12/15] control_flow: Assert shaders bigger than limit. --- src/video_core/shader/control_flow.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 7b424d65d8..bdf9d4dd43 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -139,6 +139,8 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) while (true) { if (offset >= end_address) { + // ASSERT_OR_EXECUTE can't be used, as it ignores the break + ASSERT_MSG(false, "Shader passed the current limit!"); parse_info.branch_info.address = exit_branch; parse_info.branch_info.ignore = false; break; From e7c6045a03584ac5cd93e9030cdf4f47867f9ee3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 27 Jun 2019 09:24:40 -0400 Subject: [PATCH 13/15] control_flow: Correct block breaking algorithm. --- src/video_core/shader/control_flow.cpp | 34 +++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index bdf9d4dd43..fdcc970ff8 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -75,19 +75,17 @@ struct CFGRebuildState { enum class BlockCollision : u32 { None, Found, Inside }; -std::pair::iterator> TryGetBlock(CFGRebuildState& state, - u32 address) { - auto it = state.block_info.begin(); - while (it != state.block_info.end()) { - if (it->start == address) { - return {BlockCollision::Found, it}; +std::pair TryGetBlock(CFGRebuildState& state, u32 address) { + const auto& blocks = state.block_info; + for (u32 index = 0; index < blocks.size(); index++) { + if (blocks[index].start == address) { + return {BlockCollision::Found, index}; } - if (it->IsInside(address)) { - return {BlockCollision::Inside, it}; + if (blocks[index].IsInside(address)) { + return {BlockCollision::Inside, index}; } - it++; } - return {BlockCollision::None, it}; + return {BlockCollision::None, -1}; } struct ParseInfo { @@ -318,24 +316,26 @@ bool TryInspectAddress(CFGRebuildState& state) { if (state.inspect_queries.empty()) { return false; } + const u32 address = state.inspect_queries.front(); state.inspect_queries.pop_front(); - const auto search_result = TryGetBlock(state, address); - switch (search_result.first) { + const auto [result, block_index] = TryGetBlock(state, address); + switch (result) { case BlockCollision::Found: { return true; } case BlockCollision::Inside: { // This case is the tricky one: // We need to Split the block in 2 sepparate blocks - const auto it = search_result.second; - BlockInfo& block_info = CreateBlockInfo(state, address, it->end); - it->end = address - 1; - block_info.branch = it->branch; + const u32 end = state.block_info[block_index].end; + BlockInfo& new_block = CreateBlockInfo(state, address, end); + BlockInfo& current_block = state.block_info[block_index]; + current_block.end = address - 1; + new_block.branch = current_block.branch; BlockBranchInfo forward_branch{}; forward_branch.address = address; forward_branch.ignore = true; - it->branch = forward_branch; + current_block.branch = forward_branch; return true; } default: From 2de764931141314357944627524767476d6d2cf9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 29 Jun 2019 14:01:44 -0400 Subject: [PATCH 14/15] shader_ir: limit explorastion to best known program size. --- src/video_core/shader/decode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 07a154d770..b0bd6630fc 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); disable_flow_stack = false; - const auto info = ScanFlow(program_code, MAX_PROGRAM_LENGTH * sizeof(u64), main_offset); + const auto info = ScanFlow(program_code, program_size, main_offset); if (info) { const auto& shader_info = *info; coverage_begin = shader_info.start; From f2549739d1166d9177bfff3a6af150266ba5309f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 5 Jul 2019 14:13:14 -0400 Subject: [PATCH 15/15] shader_ir: Add comments on missing instruction. Also shows Nvidia's address space on comments. --- src/video_core/shader/decode.cpp | 7 +++++-- src/video_core/shader/shader_ir.h | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index b0bd6630fc..29c8895c58 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -146,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); + const u32 nv_address = ConvertAddressToNvidiaSpace(pc); // Decoding failure if (!opcode) { UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); + bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", + nv_address, instr.value))); return pc + 1; } - bb.push_back( - Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); + bb.push_back(Comment( + fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); using Tegra::Shader::Pred; UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 928ac7cb5c..6145f0a707 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -127,6 +127,10 @@ public: return disable_flow_stack; } + u32 ConvertAddressToNvidiaSpace(const u32 address) const { + return (address - main_offset) * sizeof(Tegra::Shader::Instruction); + } + private: void Decode();