glsl: Address rest of feedback

master
ameerj 2021-06-15 17:23:57 +07:00
parent c5dfa0b630
commit d36f667bc0
11 changed files with 86 additions and 38 deletions

@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) {
} }
} }
std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
if (is_written && !is_read) {
return "writeonly ";
}
if (is_read && !is_written) {
return "readonly ";
}
return "";
}
std::string_view GetTessMode(TessPrimitive primitive) { std::string_view GetTessMode(TessPrimitive primitive) {
switch (primitive) { switch (primitive) {
case TessPrimitive::Triangles: case TessPrimitive::Triangles:
@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
const RuntimeInfo& runtime_info_) const RuntimeInfo& runtime_info_)
: info{program.info}, profile{profile_}, runtime_info{runtime_info_} { : info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
if (profile.need_fastmath_off) {
header += "#pragma optionNV(fastmath off)\n"; header += "#pragma optionNV(fastmath off)\n";
}
SetupExtensions(); SetupExtensions();
stage = program.stage; stage = program.stage;
switch (program.stage) { switch (program.stage) {
@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
} }
for (size_t index = 0; index < info.stores_generics.size(); ++index) { for (size_t index = 0; index < info.stores_generics.size(); ++index) {
// TODO: Properly resolve attribute issues // TODO: Properly resolve attribute issues
if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { if (info.stores_generics[index] || StageInitializesVaryings()) {
DefineGenericOutput(index, program.invocations); DefineGenericOutput(index, program.invocations);
} }
} }
@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
DefineConstants(); DefineConstants();
} }
bool EmitContext::StageInitializesVaryings() const noexcept {
switch (stage) {
case Stage::VertexA:
case Stage::VertexB:
case Stage::Geometry:
return true;
default:
return false;
}
}
void EmitContext::SetupExtensions() { void EmitContext::SetupExtensions() {
if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
header += "#extension GL_EXT_texture_shadow_lod : enable\n"; header += "#extension GL_EXT_texture_shadow_lod : enable\n";
@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() {
header += "#extension GL_NV_shader_atomic_float : enable\n"; header += "#extension GL_NV_shader_atomic_float : enable\n";
} }
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
header += "#extension NV_shader_atomic_fp16_vector : enable\n"; header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
} }
if (info.uses_fp16) { if (info.uses_fp16) {
if (profile.support_gl_nv_gpu_shader_5) { if (profile.support_gl_nv_gpu_shader_5) {
@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() {
if (info.stores_viewport_mask && profile.support_viewport_mask) { if (info.stores_viewport_mask && profile.support_viewport_mask) {
header += "#extension GL_NV_viewport_array2 : enable\n"; header += "#extension GL_NV_viewport_array2 : enable\n";
} }
if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { if (info.uses_typeless_image_reads) {
header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
} }
if (info.uses_derivatives && profile.support_gl_derivative_control) { if (info.uses_derivatives && profile.support_gl_derivative_control) {
@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() {
"return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
"uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
} }
write_func += "}"; write_func += '}';
write_func_64 += "}"; write_func_64 += '}';
write_func_128 += "}"; write_func_128 += '}';
load_func += "return 0u;}"; load_func += "return 0u;}";
load_func_64 += "return uvec2(0);}"; load_func_64 += "return uvec2(0);}";
load_func_128 += "return uvec4(0);}"; load_func_128 += "return uvec4(0);}";
@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) {
for (const auto& desc : info.image_buffer_descriptors) { for (const auto& desc : info.image_buffer_descriptors) {
image_buffers.push_back({bindings.image, desc.count}); image_buffers.push_back({bindings.image, desc.count});
const auto format{ImageFormatString(desc.format)}; const auto format{ImageFormatString(desc.format)};
const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
format, bindings.image, array_decorator); bindings.image, format, qualifier, bindings.image, array_decorator);
bindings.image += desc.count; bindings.image += desc.count;
} }
images.reserve(info.image_descriptors.size()); images.reserve(info.image_descriptors.size());
@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) {
images.push_back({bindings.image, desc.count}); images.push_back({bindings.image, desc.count});
const auto format{ImageFormatString(desc.format)}; const auto format{ImageFormatString(desc.format)};
const auto image_type{ImageType(desc.type)}; const auto image_type{ImageType(desc.type)};
const auto qualifier{desc.is_written ? "" : "readonly "}; const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format,
qualifier, image_type, bindings.image, array_decorator); qualifier, image_type, bindings.image, array_decorator);

@ -136,6 +136,8 @@ public:
code += '\n'; code += '\n';
} }
[[nodiscard]] bool StageInitializesVaryings() const noexcept;
std::string header; std::string header;
std::string code; std::string code;
VarAlloc var_alloc; VarAlloc var_alloc;

@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value);
break; break;
case IR::Attribute::FogCoordinate: case IR::Attribute::FogCoordinate:
ctx.Add("gl_FogFragCoord.x={};", value); ctx.Add("gl_FogFragCoord={};", value);
break; break;
case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance0:
case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance1:

@ -10,6 +10,17 @@
#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLSL { namespace Shader::Backend::GLSL {
namespace {
void InitializeVaryings(EmitContext& ctx) {
ctx.Add("gl_Position=vec4(0,0,0,1);");
// TODO: Properly resolve attribute issues
for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) {
if (!ctx.info.stores_generics[index]) {
ctx.Add("out_attr{}=vec4(0,0,0,1);", index);
}
}
}
} // Anonymous namespace
void EmitPhi(EmitContext& ctx, IR::Inst& phi) { void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
const size_t num_args{phi.NumArgs()}; const size_t num_args{phi.NumArgs()};
@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value&
} }
void EmitPrologue(EmitContext& ctx) { void EmitPrologue(EmitContext& ctx) {
if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) { if (ctx.StageInitializesVaryings()) {
ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);"); InitializeVaryings(ctx);
// TODO: Properly resolve attribute issues
for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) {
if (!ctx.info.stores_generics[index]) {
ctx.Add("out_attr{}=vec4(0,0,0,1);", index);
}
}
} }
} }
@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {}
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream));
InitializeVaryings(ctx);
} }
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {

@ -312,11 +312,14 @@ public:
} }
u32 Add(const ImageBufferDescriptor& desc) { u32 Add(const ImageBufferDescriptor& desc) {
return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
desc.size_shift == existing.size_shift; desc.size_shift == existing.size_shift;
}); })};
image_buffer_descriptors[index].is_written |= desc.is_written;
image_buffer_descriptors[index].is_read |= desc.is_read;
return index;
} }
u32 Add(const TextureDescriptor& desc) { u32 Add(const TextureDescriptor& desc) {
@ -339,6 +342,7 @@ public:
desc.size_shift == existing.size_shift; desc.size_shift == existing.size_shift;
})}; })};
image_descriptors[index].is_written |= desc.is_written; image_descriptors[index].is_written |= desc.is_written;
image_descriptors[index].is_read |= desc.is_read;
return index; return index;
} }
@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) {
throw NotImplementedException("Unexpected separate sampler"); throw NotImplementedException("Unexpected separate sampler");
} }
const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead};
if (flags.type == TextureType::Buffer) { if (flags.type == TextureType::Buffer) {
index = descriptors.Add(ImageBufferDescriptor{ index = descriptors.Add(ImageBufferDescriptor{
.format = flags.image_format, .format = flags.image_format,
.is_written = is_written, .is_written = is_written,
.is_read = is_read,
.cbuf_index = cbuf.index, .cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset, .cbuf_offset = cbuf.offset,
.count = cbuf.count, .count = cbuf.count,
@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) {
.type = flags.type, .type = flags.type,
.format = flags.image_format, .format = flags.image_format,
.is_written = is_written, .is_written = is_written,
.is_read = is_read,
.cbuf_index = cbuf.index, .cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset, .cbuf_offset = cbuf.offset,
.count = cbuf.count, .count = cbuf.count,

@ -97,6 +97,8 @@ struct Profile {
/// Fragment outputs have to be declared even if they are not written to avoid undefined values. /// Fragment outputs have to be declared even if they are not written to avoid undefined values.
/// See Ori and the Blind Forest's main menu for reference. /// See Ori and the Blind Forest's main menu for reference.
bool need_declared_frag_colors{}; bool need_declared_frag_colors{};
/// Prevents fast math optimizations that may cause inaccuracies
bool need_fastmath_off{};
/// OpFClamp is broken and OpFMax + OpFMin should be used instead /// OpFClamp is broken and OpFMax + OpFMin should be used instead
bool has_broken_spirv_clamp{}; bool has_broken_spirv_clamp{};

@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDes
struct ImageBufferDescriptor { struct ImageBufferDescriptor {
ImageFormat format; ImageFormat format;
bool is_written; bool is_written;
bool is_read;
u32 cbuf_index; u32 cbuf_index;
u32 cbuf_offset; u32 cbuf_offset;
u32 count; u32 count;
@ -99,6 +100,7 @@ struct ImageDescriptor {
TextureType type; TextureType type;
ImageFormat format; ImageFormat format;
bool is_written; bool is_written;
bool is_read;
u32 cbuf_index; u32 cbuf_index;
u32 cbuf_offset; u32 cbuf_offset;
u32 count; u32 count;

@ -162,6 +162,7 @@ Device::Device() {
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants" // uniform buffers as "push constants"

@ -136,6 +136,10 @@ public:
return warp_size_potentially_larger_than_guest; return warp_size_potentially_larger_than_guest;
} }
bool NeedsFastmathOff() const {
return need_fastmath_off;
}
private: private:
static bool TestVariableAoffi(); static bool TestVariableAoffi();
static bool TestPreciseBug(); static bool TestPreciseBug();
@ -171,6 +175,7 @@ private:
bool has_amd_shader_half_float{}; bool has_amd_shader_half_float{};
bool has_sparse_texture_2{}; bool has_sparse_texture_2{};
bool warp_size_potentially_larger_than_guest{}; bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
}; };
} // namespace OpenGL } // namespace OpenGL

@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
std::ranges::transform(infos, stage_infos.begin(), std::ranges::transform(infos, stage_infos.begin(),
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); [](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable {
if (device.UseAssemblyShaders()) { if (!device.UseAssemblyShaders()) {
program.handle = glCreateProgram();
}
for (size_t stage = 0; stage < 5; ++stage) { for (size_t stage = 0; stage < 5; ++stage) {
const auto code{sources[stage]}; const auto code{sources[stage]};
if (code.empty()) { if (code.empty()) {
continue; continue;
} }
if (device.UseAssemblyShaders()) {
assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage));
enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage;
}
} else { } else {
program.handle = glCreateProgram();
for (size_t stage = 0; stage < 5; ++stage) {
const auto code{sources[stage]};
if (code.empty()) {
continue;
}
AttachShader(Stage(stage), program.handle, code); AttachShader(Stage(stage), program.handle, code);
} }
LinkProgram(program.handle);
} }
if (shader_notify) { if (!device.UseAssemblyShaders()) {
shader_notify->MarkShaderComplete(); LinkProgram(program.handle);
} }
u32 num_textures{}; u32 num_textures{};
u32 num_images{}; u32 num_images{};
@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
if (assembly_shaders && xfb_state) { if (assembly_shaders && xfb_state) {
GenerateTransformFeedbackState(*xfb_state); GenerateTransformFeedbackState(*xfb_state);
} }
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
is_built.store(true, std::memory_order_relaxed); is_built.store(true, std::memory_order_relaxed);
}}; }};
if (thread_worker) { if (thread_worker) {

@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.lower_left_origin_mode = true, .lower_left_origin_mode = true,
.need_declared_frag_colors = true, .need_declared_frag_colors = true,
.need_fastmath_off = device.NeedsFastmathOff(),
.has_broken_spirv_clamp = true, .has_broken_spirv_clamp = true,
.has_broken_unsigned_image_offsets = true, .has_broken_unsigned_image_offsets = true,