Merge pull request #5823 from SachinVin/dyn

Android: Backport easy stuff
master
SachinVin 2021-10-03 18:58:20 +07:00 committed by GitHub
commit 6183b5d76c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
43 changed files with 1052 additions and 365 deletions

@ -8,3 +8,21 @@
/build
/captures
.externalNativeBuild
# CXX compile cache
app/.cxx
# Google Services (e.g. APIs or Firebase)
google-services.json
# Freeline
freeline.py
freeline/
freeline_project_description.json
# fastlane
fastlane/report.xml
fastlane/Preview.html
fastlane/screenshots
fastlane/test_output
fastlane/readme.md

@ -345,7 +345,6 @@ void Source::GenerateFrame() {
break;
case InterpolationMode::Polyphase:
// TODO(merry): Implement polyphase interpolation
LOG_DEBUG(Audio_DSP, "Polyphase interpolation unimplemented; falling back to linear");
AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier,
current_frame, frame_position);
break;

@ -8,11 +8,7 @@
#include <QString>
#include <QVBoxLayout>
#include "citra_qt/applets/mii_selector.h"
#include "common/file_util.h"
#include "common/string_util.h"
#include "core/file_sys/archive_extsavedata.h"
#include "core/file_sys/file_backend.h"
#include "core/hle/service/ptm/ptm.h"
QtMiiSelectorDialog::QtMiiSelectorDialog(QWidget* parent, QtMiiSelector* mii_selector_)
: QDialog(parent), mii_selector(mii_selector_) {
@ -33,37 +29,9 @@ QtMiiSelectorDialog::QtMiiSelectorDialog(QWidget* parent, QtMiiSelector* mii_sel
miis.push_back(HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data);
combobox->addItem(tr("Standard Mii"));
std::string nand_directory{FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)};
FileSys::ArchiveFactory_ExtSaveData extdata_archive_factory(nand_directory, true);
auto archive_result = extdata_archive_factory.Open(Service::PTM::ptm_shared_extdata_id, 0);
if (archive_result.Succeeded()) {
auto archive = std::move(archive_result).Unwrap();
FileSys::Path file_path = "/CFL_DB.dat";
FileSys::Mode mode{};
mode.read_flag.Assign(1);
auto file_result = archive->OpenFile(file_path, mode);
if (file_result.Succeeded()) {
auto file = std::move(file_result).Unwrap();
u32 saved_miis_offset = 0x8;
// The Mii Maker has a 100 Mii limit on the 3ds
for (int i = 0; i < 100; ++i) {
HLE::Applets::MiiData mii;
std::array<u8, sizeof(mii)> mii_raw;
file->Read(saved_miis_offset, sizeof(mii), mii_raw.data());
std::memcpy(&mii, mii_raw.data(), sizeof(mii));
if (mii.mii_id != 0) {
std::string name = Common::UTF16BufferToUTF8(mii.mii_name);
miis.push_back(mii);
combobox->addItem(QString::fromStdString(name));
}
saved_miis_offset += sizeof(mii);
}
}
for (const auto& mii : Frontend::LoadMiis()) {
miis.push_back(mii);
combobox->addItem(QString::fromStdString(Common::UTF16BufferToUTF8(mii.mii_name)));
}
if (combobox->count() > static_cast<int>(config.initially_selected_mii_index)) {

@ -26,6 +26,10 @@
namespace Log {
Filter filter;
void SetGlobalFilter(const Filter& f) {
filter = f;
}
/**
* Static state as a singleton.
*/
@ -58,14 +62,6 @@ public:
backends.erase(it, backends.end());
}
const Filter& GetGlobalFilter() const {
return filter;
}
void SetGlobalFilter(const Filter& f) {
filter = f;
}
Backend* GetBackend(std::string_view backend_name) {
const auto it =
std::find_if(backends.begin(), backends.end(),
@ -144,6 +140,10 @@ void ColorConsoleBackend::Write(const Entry& entry) {
PrintColoredMessage(entry);
}
void LogcatBackend::Write(const Entry& entry) {
PrintMessageToLogcat(entry);
}
FileBackend::FileBackend(const std::string& filename) : bytes_written(0) {
if (FileUtil::Exists(filename + ".old.txt")) {
FileUtil::Delete(filename + ".old.txt");
@ -283,10 +283,6 @@ const char* GetLevelName(Level log_level) {
return "Invalid";
}
void SetGlobalFilter(const Filter& filter) {
Impl::Instance().SetGlobalFilter(filter);
}
void AddBackend(std::unique_ptr<Backend> backend) {
Impl::Instance().AddBackend(std::move(backend));
}
@ -303,10 +299,6 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
unsigned int line_num, const char* function, const char* format,
const fmt::format_args& args) {
auto& instance = Impl::Instance();
const auto& filter = instance.GetGlobalFilter();
if (!filter.CheckMessage(log_class, log_level))
return;
instance.PushEntry(log_class, log_level, filename, line_num, function,
fmt::vformat(format, args));
}

@ -14,8 +14,6 @@
namespace Log {
class Filter;
/**
* A log entry. Log entries are store in a structured format to permit more varied output
* formatting on different frontends, as well as facilitating filtering and aggregation.
@ -83,6 +81,21 @@ public:
void Write(const Entry& entry) override;
};
/**
* Backend that writes to the Android logcat
*/
class LogcatBackend : public Backend {
public:
static const char* Name() {
return "logcat";
}
const char* GetName() const override {
return Name();
}
void Write(const Entry& entry) override;
};
/**
* Backend that writes to a file passed into the constructor
*/
@ -136,10 +149,4 @@ const char* GetLogClassName(Class log_class);
*/
const char* GetLevelName(Level log_level);
/**
* The global filter will prevent any messages from even being processed if they are filtered. Each
* backend can have a filter, but if the level is lower than the global filter, the backend will
* never get the message
*/
void SetGlobalFilter(const Filter& filter);
} // namespace Log

@ -9,43 +9,4 @@
#include <string_view>
#include "common/logging/log.h"
namespace Log {
/**
* Implements a log message filter which allows different log classes to have different minimum
* severity levels. The filter can be changed at runtime and can be parsed from a string to allow
* editing via the interface or loading from a configuration file.
*/
class Filter {
public:
/// Initializes the filter with all classes having `default_level` as the minimum level.
explicit Filter(Level default_level = Level::Info);
/// Resets the filter so that all classes have `level` as the minimum displayed level.
void ResetAll(Level level);
/// Sets the minimum level of `log_class` (and not of its subclasses) to `level`.
void SetClassLevel(Class log_class, Level level);
/**
* Parses a filter string and applies it to this filter.
*
* A filter string consists of a space-separated list of filter rules, each of the format
* `<class>:<level>`. `<class>` is a log class name, with subclasses separated using periods.
* `*` is allowed as a class name and will reset all filters to the specified level. `<level>`
* a severity level name which will be set as the minimum logging level of the matched classes.
* Rules are applied left to right, with each rule overriding previous ones in the sequence.
*
* A few examples of filter rules:
* - `*:Info` -- Resets the level of all classes to Info.
* - `Service:Info` -- Sets the level of Service to Info.
* - `Service.FS:Trace` -- Sets the level of the Service.FS class to Trace.
*/
void ParseFilterString(std::string_view filter_view);
/// Matches class/level combination against the filter, returning true if it passed.
bool CheckMessage(Class log_class, Level level) const;
private:
std::array<Level, static_cast<std::size_t>(Class::Count)> class_levels;
};
} // namespace Log
namespace Log {} // namespace Log

@ -4,13 +4,14 @@
#pragma once
#include <array>
#include <fmt/format.h>
#include "common/common_types.h"
namespace Log {
// trims up to and including the last of ../, ..\, src/, src\ in a string
constexpr const char* TrimSourcePath(std::string_view source) {
inline const char* TrimSourcePath(std::string_view source) {
const auto rfind = [source](const std::string_view match) {
return source.rfind(match) == source.npos ? 0 : (source.rfind(match) + match.size());
};
@ -113,6 +114,47 @@ enum class Class : ClassType {
Count ///< Total number of logging classes
};
/**
* Implements a log message filter which allows different log classes to have different minimum
* severity levels. The filter can be changed at runtime and can be parsed from a string to allow
* editing via the interface or loading from a configuration file.
*/
class Filter {
public:
/// Initializes the filter with all classes having `default_level` as the minimum level.
explicit Filter(Level default_level = Level::Info);
/// Resets the filter so that all classes have `level` as the minimum displayed level.
void ResetAll(Level level);
/// Sets the minimum level of `log_class` (and not of its subclasses) to `level`.
void SetClassLevel(Class log_class, Level level);
/**
* Parses a filter string and applies it to this filter.
*
* A filter string consists of a space-separated list of filter rules, each of the format
* `<class>:<level>`. `<class>` is a log class name, with subclasses separated using periods.
* `*` is allowed as a class name and will reset all filters to the specified level. `<level>`
* a severity level name which will be set as the minimum logging level of the matched classes.
* Rules are applied left to right, with each rule overriding previous ones in the sequence.
*
* A few examples of filter rules:
* - `*:Info` -- Resets the level of all classes to Info.
* - `Service:Info` -- Sets the level of Service to Info.
* - `Service.FS:Trace` -- Sets the level of the Service.FS class to Trace.
*/
void ParseFilterString(std::string_view filter_view);
/// Matches class/level combination against the filter, returning true if it passed.
bool CheckMessage(Class log_class, Level level) const;
private:
std::array<Level, static_cast<std::size_t>(Class::Count)> class_levels;
};
extern Filter filter;
void SetGlobalFilter(const Filter& f);
/// Logs a message to the global logger, using fmt
void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
unsigned int line_num, const char* function, const char* format,
@ -121,6 +163,9 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
template <typename... Args>
void FmtLogMessage(Class log_class, Level log_level, const char* filename, unsigned int line_num,
const char* function, const char* format, const Args&... args) {
if (!filter.CheckMessage(log_class, log_level))
return;
FmtLogMessageImpl(log_class, log_level, filename, line_num, function, format,
fmt::make_format_args(args...));
}

@ -34,13 +34,7 @@ std::string FormatLogMessage(const Entry& entry) {
void PrintMessage(const Entry& entry) {
const auto str = FormatLogMessage(entry).append(1, '\n');
#ifdef ANDROID
// Android's log level enum are offset by '2'
const int android_log_level = static_cast<int>(entry.log_level) + 2;
__android_log_print(android_log_level, "CitraNative", "%s", str.c_str());
#else
fputs(str.c_str(), stderr);
#endif
}
void PrintColoredMessage(const Entry& entry) {
@ -78,7 +72,7 @@ void PrintColoredMessage(const Entry& entry) {
}
SetConsoleTextAttribute(console_handle, color);
#elif !defined(ANDROID)
#else
#define ESC "\x1b"
const char* color = "";
switch (entry.log_level) {
@ -111,9 +105,40 @@ void PrintColoredMessage(const Entry& entry) {
#ifdef _WIN32
SetConsoleTextAttribute(console_handle, original_info.wAttributes);
#elif !defined(ANDROID)
#else
fputs(ESC "[0m", stderr);
#undef ESC
#endif
}
void PrintMessageToLogcat(const Entry& entry) {
#ifdef ANDROID
const auto str = FormatLogMessage(entry);
android_LogPriority android_log_priority;
switch (entry.log_level) {
case Level::Trace:
android_log_priority = ANDROID_LOG_VERBOSE;
break;
case Level::Debug:
android_log_priority = ANDROID_LOG_DEBUG;
break;
case Level::Info:
android_log_priority = ANDROID_LOG_INFO;
break;
case Level::Warning:
android_log_priority = ANDROID_LOG_WARN;
break;
case Level::Error:
android_log_priority = ANDROID_LOG_ERROR;
break;
case Level::Critical:
android_log_priority = ANDROID_LOG_FATAL;
break;
case Level::Count:
UNREACHABLE();
}
__android_log_print(android_log_priority, "CitraNative", "%s", str.c_str());
#endif
}
} // namespace Log

@ -17,4 +17,6 @@ std::string FormatLogMessage(const Entry& entry);
void PrintMessage(const Entry& entry);
/// Prints the same message as `PrintMessage`, but colored according to the severity level.
void PrintColoredMessage(const Entry& entry);
/// Formats and prints a log entry to the android logcat.
void PrintMessageToLogcat(const Entry& entry);
} // namespace Log

@ -108,8 +108,8 @@ add_library(core STATIC
frontend/framebuffer_layout.h
frontend/image_interface.h
frontend/input.h
frontend/mic.h
frontend/mic.cpp
frontend/mic.h
frontend/scope_acquire_context.cpp
frontend/scope_acquire_context.h
gdbstub/gdbstub.cpp

@ -953,6 +953,9 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
#define INC_PC(l) ptr += sizeof(arm_inst) + l
#define INC_PC_STUB ptr += sizeof(arm_inst)
#ifdef ANDROID
#define GDB_BP_CHECK
#else
#define GDB_BP_CHECK \
cpu->Cpsr &= ~(1 << 5); \
cpu->Cpsr |= cpu->TFlag << 5; \
@ -965,6 +968,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
goto END; \
} \
}
#endif
// GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a
// clunky switch statement.
@ -1652,11 +1656,13 @@ DISPATCH : {
goto END;
}
#ifndef ANDROID
// Find breakpoint if one exists within the block
if (GDBStub::IsConnected()) {
breakpoint_data =
GDBStub::GetNextBreakpointFromAddress(cpu->Reg[15], GDBStub::BreakpointType::Execute);
}
#endif
inst_base = (arm_inst*)&trans_cache_buf[ptr];
GOTO_NEXT_INST;

@ -182,13 +182,16 @@ void ARMul_State::ResetMPCoreCP15Registers() {
CP15[CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE] = 0x00000000;
CP15[CP15_TLB_DEBUG_CONTROL] = 0x00000000;
}
#ifdef ANDROID
static void CheckMemoryBreakpoint(u32 address, GDBStub::BreakpointType type) {}
#else
static void CheckMemoryBreakpoint(u32 address, GDBStub::BreakpointType type) {
if (GDBStub::IsServerEnabled() && GDBStub::CheckBreakpoint(address, type)) {
LOG_DEBUG(Debug, "Found memory breakpoint @ {:08x}", address);
GDBStub::Break(true);
}
}
#endif
u8 ARMul_State::ReadMemory8(u32 address) const {
CheckMemoryBreakpoint(address, GDBStub::BreakpointType::Read);

@ -2,7 +2,12 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/file_util.h"
#include "common/string_util.h"
#include "core/file_sys/archive_extsavedata.h"
#include "core/file_sys/file_backend.h"
#include "core/frontend/applets/mii_selector.h"
#include "core/hle/service/ptm/ptm.h"
namespace Frontend {
@ -10,6 +15,42 @@ void MiiSelector::Finalize(u32 return_code, HLE::Applets::MiiData mii) {
data = {return_code, mii};
}
std::vector<HLE::Applets::MiiData> LoadMiis() {
std::vector<HLE::Applets::MiiData> miis;
std::string nand_directory{FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)};
FileSys::ArchiveFactory_ExtSaveData extdata_archive_factory(nand_directory, true);
auto archive_result = extdata_archive_factory.Open(Service::PTM::ptm_shared_extdata_id, 0);
if (archive_result.Succeeded()) {
auto archive = std::move(archive_result).Unwrap();
FileSys::Path file_path = "/CFL_DB.dat";
FileSys::Mode mode{};
mode.read_flag.Assign(1);
auto file_result = archive->OpenFile(file_path, mode);
if (file_result.Succeeded()) {
auto file = std::move(file_result).Unwrap();
u32 saved_miis_offset = 0x8;
// The Mii Maker has a 100 Mii limit on the 3ds
for (int i = 0; i < 100; ++i) {
HLE::Applets::MiiData mii;
std::array<u8, sizeof(mii)> mii_raw;
file->Read(saved_miis_offset, sizeof(mii), mii_raw.data());
std::memcpy(&mii, mii_raw.data(), sizeof(mii));
if (mii.mii_id != 0) {
miis.push_back(mii);
}
saved_miis_offset += sizeof(mii);
}
}
}
return miis;
}
void DefaultMiiSelector::Setup(const Frontend::MiiSelectorConfig& config) {
MiiSelector::Setup(config);
Finalize(0, HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data);

@ -50,6 +50,8 @@ protected:
MiiSelectorData data;
};
std::vector<HLE::Applets::MiiData> LoadMiis();
class DefaultMiiSelector final : public MiiSelector {
public:
void Setup(const MiiSelectorConfig& config) override;

@ -54,6 +54,8 @@ add_library(video_core STATIC
renderer_opengl/post_processing_opengl.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h
renderer_opengl/texture_downloader_es.cpp
renderer_opengl/texture_downloader_es.h
renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp
renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h
renderer_opengl/texture_filters/bicubic/bicubic.cpp
@ -99,11 +101,12 @@ add_library(video_core STATIC
)
set(SHADER_FILES
renderer_opengl/depth_to_color.frag
renderer_opengl/depth_to_color.vert
renderer_opengl/ds_to_color.frag
renderer_opengl/texture_filters/anime4k/refine.frag
renderer_opengl/texture_filters/anime4k/refine.vert
renderer_opengl/texture_filters/anime4k/x_gradient.frag
renderer_opengl/texture_filters/anime4k/y_gradient.frag
renderer_opengl/texture_filters/anime4k/y_gradient.vert
renderer_opengl/texture_filters/bicubic/bicubic.frag
renderer_opengl/texture_filters/scale_force/scale_force.frag
renderer_opengl/texture_filters/tex_coord.vert
@ -121,7 +124,7 @@ endforeach()
add_custom_target(shaders
BYPRODUCTS ${SHADER_HEADERS}
COMMAND cmake -P ${CMAKE_CURRENT_SOURCE_DIR}/generate_shaders.cmake
COMMAND "${CMAKE_COMMAND}" -P ${CMAKE_CURRENT_SOURCE_DIR}/generate_shaders.cmake
SOURCES ${SHADER_FILES}
)
add_dependencies(video_core shaders)

@ -0,0 +1,10 @@
//? #version 320 es
out highp uint color;
uniform highp sampler2D depth;
uniform int lod;
void main() {
color = uint(texelFetch(depth, ivec2(gl_FragCoord.xy), lod).x * (exp2(32.0) - 1.0));
}

@ -1,12 +1,8 @@
//? #version 330
out vec2 input_max;
uniform sampler2D tex_size;
//? #version 320 es
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
input_max = textureSize(tex_size, 0) * 2 - 1;
}

@ -0,0 +1,9 @@
//? #version 320 es
#extension GL_ARM_shader_framebuffer_fetch_depth_stencil : enable
out highp uint color;
void main() {
color = uint(gl_LastFragDepthARM * (exp2(24.0) - 1.0)) << 8;
color |= uint(gl_LastFragStencilARM);
}

@ -220,9 +220,175 @@ private:
GLint d24s8_abgr_viewport_u_id;
};
class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase {
public:
ShaderD24S8toRGBA8() {
constexpr std::string_view vs_source = R"(
out vec2 dst_coord;
uniform mediump ivec2 dst_size;
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size);
}
)";
constexpr std::string_view fs_source = R"(
in mediump vec2 dst_coord;
out lowp vec4 frag_color;
uniform highp sampler2D depth;
uniform lowp usampler2D stencil;
uniform mediump ivec2 dst_size;
uniform mediump ivec2 src_size;
uniform mediump ivec2 src_offset;
void main() {
mediump ivec2 tex_coord;
if (src_size == dst_size) {
tex_coord = ivec2(dst_coord);
} else {
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
mediump int y = tex_index / src_size.x;
tex_coord = ivec2(tex_index - y * src_size.x, y);
}
tex_coord -= src_offset;
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
highp uvec4 components =
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
frag_color = vec4(components) / (exp2(8.0) - 1.0);
}
)";
program.Create(vs_source.data(), fs_source.data());
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
src_size_loc = glGetUniformLocation(program.handle, "src_size");
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
vao.Create();
auto state = OpenGLState::GetCurState();
auto cur_program = state.draw.shader_program;
state.draw.shader_program = program.handle;
state.Apply();
glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1);
state.draw.shader_program = cur_program;
state.Apply();
// OES_texture_view doesn't seem to support D24S8 views, at least on adreno
// so instead it will do an intermediate copy before running through the shader
if (GLAD_GL_ARB_texture_view) {
texture_view_func = glTextureView;
} else {
LOG_INFO(Render_OpenGL,
"Texture views are unsupported, reinterpretation will do intermediate copy");
temp_tex.Create();
}
}
void Reinterpret(GLuint src_tex, const Common::Rectangle<u32>& src_rect, GLuint read_fb_handle,
GLuint dst_tex, const Common::Rectangle<u32>& dst_rect,
GLuint draw_fb_handle) override {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = src_tex;
if (texture_view_func) {
temp_tex.Create();
glActiveTexture(GL_TEXTURE1);
texture_view_func(temp_tex.handle, GL_TEXTURE_2D, src_tex, GL_DEPTH24_STENCIL8, 0, 1, 0,
1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
} else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) {
temp_tex.Release();
temp_tex.Create();
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
temp_rect = src_rect;
}
state.texture_units[1].texture_2d = temp_tex.handle;
state.draw.draw_framebuffer = draw_fb_handle;
state.draw.shader_program = program.handle;
state.draw.vertex_array = vao.handle;
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
static_cast<GLsizei>(dst_rect.GetWidth()),
static_cast<GLsizei>(dst_rect.GetHeight())};
state.Apply();
glActiveTexture(GL_TEXTURE1);
if (!texture_view_func) {
glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
src_rect.GetWidth(), src_rect.GetHeight(), 1);
}
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (texture_view_func) {
temp_tex.Release();
}
}
private:
decltype(glTextureView) texture_view_func = nullptr;
OGLProgram program{};
GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1};
OGLVertexArray vao{};
OGLTexture temp_tex{};
Common::Rectangle<u32> temp_rect{0, 0, 0, 0};
};
class CopyImageSubData final : public FormatReinterpreterBase {
void Reinterpret(GLuint src_tex, const Common::Rectangle<u32>& src_rect, GLuint read_fb_handle,
GLuint dst_tex, const Common::Rectangle<u32>& dst_rect,
GLuint draw_fb_handle) override {
glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex,
GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(),
src_rect.GetHeight(), 1);
}
};
FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() {
reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
std::make_unique<PixelBufferD24S8toABGR>());
std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
if (vendor.find("NVIDIA") != vendor.npos) {
reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
std::make_unique<CopyImageSubData>());
// Nvidia bends the spec and allows direct copies between color and depth formats
// might as well take advantage of it
LOG_INFO(Render_OpenGL, "Using glCopyImageSubData for D24S8 to RGBA8 reinterpretation");
} else if ((GLAD_GL_ARB_stencil_texturing && GLAD_GL_ARB_texture_storage) || GLES) {
reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
std::make_unique<ShaderD24S8toRGBA8>());
LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation");
} else {
reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
std::make_unique<PixelBufferD24S8toABGR>());
LOG_INFO(Render_OpenGL, "Using pbo for D24S8 to RGBA8 reinterpretation");
}
reinterpreters.emplace(PixelFormatPair{PixelFormat::RGB5A1, PixelFormat::RGBA4},
std::make_unique<RGBA4toRGB5A1>());
}

@ -52,16 +52,17 @@ RasterizerOpenGL::RasterizerOpenGL()
: is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd),
uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false),
index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false),
texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) {
texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false),
texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) {
allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size &&
GLAD_GL_ARB_framebuffer_no_attachments;
allow_shadow = GLES || (GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size &&
GLAD_GL_ARB_framebuffer_no_attachments);
if (!allow_shadow) {
LOG_WARNING(Render_OpenGL,
"Shadow might not be able to render because of unsupported OpenGL extensions.");
}
if (!GLAD_GL_ARB_copy_image) {
if (!GLAD_GL_ARB_copy_image && !GLES) {
LOG_WARNING(Render_OpenGL,
"ARB_copy_image not supported. Some games might produce artifacts.");
}
@ -149,11 +150,15 @@ RasterizerOpenGL::RasterizerOpenGL()
framebuffer.Create();
// Allocate and bind texture buffer lut textures
texture_buffer_lut_lf.Create();
texture_buffer_lut_rg.Create();
texture_buffer_lut_rgba.Create();
state.texture_buffer_lut_lf.texture_buffer = texture_buffer_lut_lf.handle;
state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle;
state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle;
state.Apply();
glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.GetHandle());
glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle());
glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum());
@ -777,7 +782,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
}
OGLTexture temp_tex;
if (need_duplicate_texture && GLAD_GL_ARB_copy_image) {
if (need_duplicate_texture && (GLAD_GL_ARB_copy_image || GLES)) {
// The game is trying to use a surface as a texture and framebuffer at the same time
// which causes unpredictable behavior on the host.
// Making a copy to sample from eliminates this issue and seems to be fairly cheap.
@ -821,6 +826,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
// Sync the LUTs within the texture buffer
SyncAndUploadLUTs();
SyncAndUploadLUTsLF();
// Sync the uniform data
UploadUniforms(accelerate);
@ -942,6 +948,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Blending
case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
if (GLES) {
// With GLES, we need this in the fragment shader to emulate logic operations
shader_dirty = true;
}
SyncBlendEnabled();
break;
case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending):
@ -1062,6 +1072,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Logic op
case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
if (GLES) {
// With GLES, we need this in the fragment shader to emulate logic operations
shader_dirty = true;
}
SyncLogicOp();
break;
@ -1816,11 +1830,31 @@ void RasterizerOpenGL::SyncAlphaTest() {
}
void RasterizerOpenGL::SyncLogicOp() {
state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.framebuffer.output_merger.logic_op);
const auto& regs = Pica::g_state.regs;
state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op);
if (GLES) {
if (!regs.framebuffer.output_merger.alphablend_enable) {
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
// Color output is disabled by logic operation. We use color write mask to skip
// color but allow depth write.
state.color_mask = {};
}
}
}
}
void RasterizerOpenGL::SyncColorWriteMask() {
const auto& regs = Pica::g_state.regs;
if (GLES) {
if (!regs.framebuffer.output_merger.alphablend_enable) {
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
// Color output is disabled by logic operation. We use color write mask to skip
// color but allow depth write. Return early to avoid overwriting this.
return;
}
}
}
auto IsColorWriteEnabled = [&](u32 value) {
return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE
@ -2005,18 +2039,11 @@ void RasterizerOpenGL::SyncShadowTextureBias() {
}
}
void RasterizerOpenGL::SyncAndUploadLUTs() {
constexpr std::size_t max_size = sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler +
sizeof(GLvec2) * 128 + // fog
sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha
sizeof(GLvec4) * 256 + // proctex
sizeof(GLvec4) * 256; // proctex diff
void RasterizerOpenGL::SyncAndUploadLUTsLF() {
constexpr std::size_t max_size =
sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(GLvec2) * 128; // fog
if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty &&
!uniform_block_data.proctex_noise_lut_dirty &&
!uniform_block_data.proctex_color_map_dirty &&
!uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty &&
!uniform_block_data.proctex_diff_lut_dirty) {
if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) {
return;
}
@ -2024,8 +2051,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
GLintptr offset;
bool invalidate;
std::size_t bytes_used = 0;
glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle());
std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4));
glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.GetHandle());
std::tie(buffer, offset, invalidate) = texture_lf_buffer.Map(max_size, sizeof(GLvec4));
// Sync the lighting luts
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
@ -2050,8 +2077,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
uniform_block_data.lighting_lut_dirty[index] = false;
}
}
uniform_block_data.lighting_lut_dirty_any = false;
}
uniform_block_data.lighting_lut_dirty_any = false;
// Sync the fog lut
if (uniform_block_data.fog_lut_dirty || invalidate) {
@ -2073,6 +2100,28 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
uniform_block_data.fog_lut_dirty = false;
}
texture_lf_buffer.Unmap(bytes_used);
}
void RasterizerOpenGL::SyncAndUploadLUTs() {
constexpr std::size_t max_size = sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha
sizeof(GLvec4) * 256 + // proctex
sizeof(GLvec4) * 256; // proctex diff
if (!uniform_block_data.proctex_noise_lut_dirty &&
!uniform_block_data.proctex_color_map_dirty &&
!uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty &&
!uniform_block_data.proctex_diff_lut_dirty) {
return;
}
u8* buffer;
GLintptr offset;
bool invalidate;
std::size_t bytes_used = 0;
glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle());
std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4));
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used](
const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut,

@ -233,6 +233,7 @@ private:
/// Syncs and uploads the lighting, fog and proctex LUTs
void SyncAndUploadLUTs();
void SyncAndUploadLUTsLF();
/// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw);
@ -303,6 +304,7 @@ private:
OGLStreamBuffer uniform_buffer;
OGLStreamBuffer index_buffer;
OGLStreamBuffer texture_buffer;
OGLStreamBuffer texture_lf_buffer;
OGLFramebuffer framebuffer;
GLint uniform_buffer_alignment;
std::size_t uniform_size_aligned_vs;
@ -310,6 +312,7 @@ private:
SamplerInfo texture_cube_sampler;
OGLTexture texture_buffer_lut_lf;
OGLTexture texture_buffer_lut_rg;
OGLTexture texture_buffer_lut_rgba;

@ -6,6 +6,7 @@
#include <array>
#include <atomic>
#include <bitset>
#include <cmath>
#include <cstring>
#include <iterator>
#include <memory>
@ -36,6 +37,7 @@
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/texture_downloader_es.h"
#include "video_core/renderer_opengl/texture_filters/texture_filterer.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
@ -64,13 +66,6 @@ static constexpr std::array<FormatTuple, 5> fb_format_tuples_oes = {{
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4
}};
static constexpr std::array<FormatTuple, 4> depth_format_tuples = {{
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
{},
{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
}};
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
if (type == SurfaceType::Color) {
@ -87,79 +82,6 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
return tex_tuple;
}
/**
* OpenGL ES does not support glGetTexImage. Obtain the pixels by attaching the
* texture to a framebuffer.
* Originally from https://github.com/apitrace/apitrace/blob/master/retrace/glstate_images.cpp
*/
static void GetTexImageOES(GLenum target, GLint level, GLenum format, GLenum type, GLint height,
GLint width, GLint depth, GLubyte* pixels, std::size_t size) {
memset(pixels, 0x80, size);
OpenGLState cur_state = OpenGLState::GetCurState();
OpenGLState state;
GLenum texture_binding = GL_NONE;
switch (target) {
case GL_TEXTURE_2D:
texture_binding = GL_TEXTURE_BINDING_2D;
break;
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
texture_binding = GL_TEXTURE_BINDING_CUBE_MAP;
break;
case GL_TEXTURE_3D_OES:
texture_binding = GL_TEXTURE_BINDING_3D_OES;
default:
return;
}
GLint texture = 0;
glGetIntegerv(texture_binding, &texture);
if (!texture) {
return;
}
OGLFramebuffer fbo;
fbo.Create();
state.draw.read_framebuffer = fbo.handle;
state.Apply();
switch (target) {
case GL_TEXTURE_2D:
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture,
level);
GLenum status = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER);
if (status != GL_FRAMEBUFFER_COMPLETE) {
LOG_DEBUG(Render_OpenGL, "Framebuffer is incomplete, status: {:X}", status);
}
glReadPixels(0, 0, width, height, format, type, pixels);
break;
}
case GL_TEXTURE_3D_OES:
for (int i = 0; i < depth; i++) {
glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_3D,
texture, level, i);
glReadPixels(0, 0, width, height, format, type, pixels + 4 * i * width * height);
}
break;
}
cur_state.Apply();
fbo.Release();
}
template <typename Map, typename Interval>
static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
@ -329,8 +251,14 @@ OGLTexture RasterizerCacheOpenGL::AllocateSurfaceTexture(const FormatTuple& form
cur_state.Apply();
glActiveTexture(GL_TEXTURE0);
glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
format_tuple.format, format_tuple.type, nullptr);
if (GL_ARB_texture_storage) {
// Allocate all possible mipmap levels upfront
auto levels = std::log2(std::max(width, height)) + 1;
glTexStorage2D(GL_TEXTURE_2D, levels, format_tuple.internal_format, width, height);
} else {
glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
format_tuple.format, format_tuple.type, nullptr);
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@ -352,17 +280,22 @@ static void AllocateTextureCube(GLuint texture, const FormatTuple& format_tuple,
cur_state.texture_cube_unit.texture_cube = texture;
cur_state.Apply();
glActiveTexture(TextureUnits::TextureCube.Enum());
for (auto faces : {
GL_TEXTURE_CUBE_MAP_POSITIVE_X,
GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
}) {
glTexImage2D(faces, 0, format_tuple.internal_format, width, width, 0, format_tuple.format,
format_tuple.type, nullptr);
if (GL_ARB_texture_storage) {
// Allocate all possible mipmap levels in case the game uses them later
auto levels = std::log2(width) + 1;
glTexStorage2D(GL_TEXTURE_CUBE_MAP, levels, format_tuple.internal_format, width, width);
} else {
for (auto faces : {
GL_TEXTURE_CUBE_MAP_POSITIVE_X,
GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
}) {
glTexImage2D(faces, 0, format_tuple.internal_format, width, width, 0,
format_tuple.format, format_tuple.type, nullptr);
}
}
// Restore previous texture bindings
@ -775,23 +708,28 @@ void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) {
LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path);
std::vector<u8> decoded_texture;
decoded_texture.resize(width * height * 4);
glBindTexture(GL_TEXTURE_2D, target_tex);
OpenGLState state = OpenGLState::GetCurState();
GLuint old_texture = state.texture_units[0].texture_2d;
state.Apply();
/*
GetTexImageOES is used even if not using OpenGL ES to work around a small issue that
happens if using custom textures with texture dumping at the same.
Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a
higher quality 256x256 texture. If the 256x256 texture is displayed first and the 32x32
texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture will
appear in the corner of the 256x256 texture.
If texture dumping is enabled and the 32x32 is undumped, Citra will attempt to dump it.
Since the underlying OpenGL texture is still 256x256, Citra crashes because it thinks the
texture is only 32x32.
higher quality 256x256 texture. If the 256x256 texture is displayed first and the
32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture
will appear in the corner of the 256x256 texture. If texture dumping is enabled and
the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL
texture is still 256x256, Citra crashes because it thinks the texture is only 32x32.
GetTexImageOES conveniently only dumps the specified region, and works on both
desktop and ES.
*/
GetTexImageOES(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, height, width, 0,
&decoded_texture[0], decoded_texture.size());
glBindTexture(GL_TEXTURE_2D, 0);
// if the backend isn't OpenGL ES, this won't be initialized yet
if (!owner.texture_downloader_es)
owner.texture_downloader_es = std::make_unique<TextureDownloaderES>(false);
owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE,
height, width, &decoded_texture[0]);
state.texture_units[0].texture_2d = old_texture;
state.Apply();
Common::FlipRGBA8Texture(decoded_texture, width, height);
if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height))
LOG_ERROR(Render_OpenGL, "Failed to save decoded texture");
@ -901,8 +839,9 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect, GLuint read_fb_
MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint read_fb_handle,
GLuint draw_fb_handle) {
if (type == SurfaceType::Fill)
if (type == SurfaceType::Fill) {
return;
}
MICROPROFILE_SCOPE(OpenGL_TextureDL);
@ -941,9 +880,9 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint
glActiveTexture(GL_TEXTURE0);
if (GLES) {
GetTexImageOES(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect.GetHeight(),
rect.GetWidth(), 0, &gl_buffer[buffer_offset],
gl_buffer.size() - buffer_offset);
owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect.GetHeight(), rect.GetWidth(),
&gl_buffer[buffer_offset]);
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]);
}
@ -967,6 +906,20 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
texture.handle, 0);
}
switch (glCheckFramebufferStatus(GL_FRAMEBUFFER)) {
case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT:
LOG_WARNING(Render_OpenGL, "Framebuffer incomplete attachment");
break;
case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS:
LOG_WARNING(Render_OpenGL, "Framebuffer incomplete dimensions");
break;
case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:
LOG_WARNING(Render_OpenGL, "Framebuffer incomplete missing attachment");
break;
case GL_FRAMEBUFFER_UNSUPPORTED:
LOG_WARNING(Render_OpenGL, "Framebuffer unsupported");
break;
}
glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
tuple.format, tuple.type, &gl_buffer[buffer_offset]);
@ -1083,13 +1036,18 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
texture_filterer = std::make_unique<TextureFilterer>(Settings::values.texture_filter_name,
resolution_scale_factor);
format_reinterpreter = std::make_unique<FormatReinterpreterOpenGL>();
if (GLES)
texture_downloader_es = std::make_unique<TextureDownloaderES>(false);
read_framebuffer.Create();
draw_framebuffer.Create();
}
RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
#ifndef ANDROID
// This is for switching renderers, which is unsupported on Android, and costly on shutdown
ClearAll(false);
#endif
}
MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
@ -1304,9 +1262,14 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::Texture::TextureInf
width = surface->GetScaledWidth();
height = surface->GetScaledHeight();
}
for (u32 level = surface->max_level + 1; level <= max_level; ++level) {
glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level,
height >> level, 0, format_tuple.format, format_tuple.type, nullptr);
// If we are using ARB_texture_storage then we've already allocated all of the mipmap
// levels
if (!GL_ARB_texture_storage) {
for (u32 level = surface->max_level + 1; level <= max_level; ++level) {
glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level,
height >> level, 0, format_tuple.format, format_tuple.type,
nullptr);
}
}
if (surface->is_custom || !texture_filterer->IsNull()) {
// TODO: proper mipmap support for custom textures
@ -1806,6 +1769,8 @@ void RasterizerCacheOpenGL::ClearAll(bool flush) {
}
void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) {
std::lock_guard lock{mutex};
if (size == 0)
return;
@ -1842,6 +1807,8 @@ void RasterizerCacheOpenGL::FlushAll() {
}
void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) {
std::lock_guard lock{mutex};
if (size == 0)
return;
@ -1917,6 +1884,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
}
void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
std::lock_guard lock{mutex};
if (surface->registered) {
return;
}
@ -1926,6 +1895,8 @@ void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
}
void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
std::lock_guard lock{mutex};
if (!surface->registered) {
return;
}

@ -7,6 +7,7 @@
#include <array>
#include <list>
#include <memory>
#include <mutex>
#include <set>
#include <tuple>
#ifdef __GNUC__
@ -170,6 +171,8 @@ private:
bool valid = false;
};
class RasterizerCacheOpenGL;
struct CachedSurface : SurfaceParams, std::enable_shared_from_this<CachedSurface> {
CachedSurface(RasterizerCacheOpenGL& owner) : owner{owner} {}
~CachedSurface();
@ -266,6 +269,15 @@ struct CachedTextureCube {
std::shared_ptr<SurfaceWatcher> nz;
};
static constexpr std::array<FormatTuple, 4> depth_format_tuples = {{
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
{},
{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
}};
class TextureDownloaderES;
class RasterizerCacheOpenGL : NonCopyable {
public:
RasterizerCacheOpenGL();
@ -365,11 +377,14 @@ private:
std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache;
std::recursive_mutex mutex;
public:
OGLTexture AllocateSurfaceTexture(const FormatTuple& format_tuple, u32 width, u32 height);
std::unique_ptr<TextureFilterer> texture_filterer;
std::unique_ptr<FormatReinterpreterOpenGL> format_reinterpreter;
std::unique_ptr<TextureDownloaderES> texture_downloader_es;
};
} // namespace OpenGL

@ -514,11 +514,21 @@ private:
}
case OpCode::Id::RCP: {
if (!sanitize_mul) {
// When accurate multiplication is OFF, NaN are not really handled. This is a
// workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
shader.AddLine("if ({}.x != 0.0)", src1);
}
SetDest(swizzle, dest_reg, fmt::format("(1.0 / {}.x)", src1), 4, 1);
break;
}
case OpCode::Id::RSQ: {
if (!sanitize_mul) {
// When accurate multiplication is OFF, NaN are not really handled. This is a
// workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
shader.AddLine("if ({}.x > 0.0)", src1);
}
SetDest(swizzle, dest_reg, fmt::format("inversesqrt({}.x)", src1), 4, 1);
break;
}
@ -807,6 +817,13 @@ private:
void Generate() {
if (sanitize_mul) {
#ifdef ANDROID
// Use a cheaper sanitize_mul on Android, as mobile GPUs struggle here
// This seems to be sufficient at least for Ocarina of Time and Attack on Titan accurate
// multiplication bugs
shader.AddLine(
"#define sanitize_mul(lhs, rhs) mix(lhs * rhs, vec4(0.0), isnan(lhs * rhs))");
#else
shader.AddLine("vec4 sanitize_mul(vec4 lhs, vec4 rhs) {{");
++shader.scope;
shader.AddLine("vec4 product = lhs * rhs;");
@ -814,6 +831,7 @@ private:
"isnan(lhs)), isnan(product));");
--shader.scope;
shader.AddLine("}}\n");
#endif
}
// Add declarations for registers

@ -102,7 +102,9 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_
out += R"(
out gl_PerVertex {
vec4 gl_Position;
#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
float gl_ClipDistance[2];
#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
};
)";
}
@ -127,6 +129,17 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
if (GLES) {
// With GLES, we need this in the fragment shader to emulate logic operations
state.alphablend_enable =
Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1;
state.logic_op = regs.framebuffer.output_merger.logic_op;
} else {
// We don't need these otherwise, reset them to avoid unnecessary shader generation
state.alphablend_enable = {};
state.logic_op = {};
}
// Copy relevant tev stages fields.
// We don't sync const_color here because of the high variance, it is a
// shader uniform instead.
@ -607,13 +620,15 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned
if (!IsPassThroughTevStage(stage)) {
const std::string index_name = std::to_string(index);
out += fmt::format("vec3 color_results_{}[3] = vec3[3](", index_name);
out += fmt::format("vec3 color_results_{}_1 = ", index_name);
AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
out += ", ";
out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name);
AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
out += ", ";
out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name);
AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
out += ");\n";
out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, "
"color_results_{}_2, color_results_{}_3);\n",
index_name, index_name, index_name, index_name);
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
out += fmt::format("vec3 color_output_{} = byteround(", index_name);
@ -1216,14 +1231,21 @@ float ProcTexNoiseCoef(vec2 x) {
ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config,
bool separable_shader) {
const auto& state = config.state;
std::string out;
std::string out = R"(
if (GLES) {
out += R"(
#define ALLOW_SHADOW (defined(CITRA_GLES))
)";
} else {
out += R"(
#extension GL_ARB_shader_image_load_store : enable
#extension GL_ARB_shader_image_size : enable
#define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size))
)";
}
if (separable_shader) {
if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n";
}
@ -1244,6 +1266,7 @@ uniform sampler2D tex0;
uniform sampler2D tex1;
uniform sampler2D tex2;
uniform samplerCube tex_cube;
uniform samplerBuffer texture_buffer_lut_lf;
uniform samplerBuffer texture_buffer_lut_rg;
uniform samplerBuffer texture_buffer_lut_rgba;
@ -1267,7 +1290,7 @@ vec3 quaternion_rotate(vec4 q, vec3 v) {
}
float LookupLightingLUT(int lut_index, int index, float delta) {
vec2 entry = texelFetch(texture_buffer_lut_rg, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg;
vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg;
return entry.r + entry.g * delta;
}
@ -1519,7 +1542,7 @@ vec4 secondary_fragment_color = vec4(0.0);
// Generate clamped fog factor from LUT for given fog index
out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"
"float fog_f = fog_index - fog_i;\n"
"vec2 fog_lut_entry = texelFetch(texture_buffer_lut_rg, int(fog_i) + "
"vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + "
"fog_lut_offset).rg;\n"
"float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"
"fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
@ -1537,8 +1560,8 @@ vec4 secondary_fragment_color = vec4(0.0);
if (state.shadow_rendering) {
out += R"(
#if ALLOW_SHADOW
uint d = uint(clamp(depth, 0.0, 1.0) * 0xFFFFFF);
uint s = uint(last_tex_env_out.g * 0xFF);
uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF));
uint s = uint(last_tex_env_out.g * float(0xFF));
ivec2 image_coord = ivec2(gl_FragCoord.xy);
uint old = imageLoad(shadow_buffer, image_coord).x;
@ -1567,6 +1590,32 @@ do {
out += "color = byteround(last_tex_env_out);\n";
}
if (GLES) {
if (!state.alphablend_enable) {
switch (state.logic_op) {
case FramebufferRegs::LogicOp::Clear:
out += "color = vec4(0);\n";
break;
case FramebufferRegs::LogicOp::Set:
out += "color = vec4(1);\n";
break;
case FramebufferRegs::LogicOp::Copy:
// Take the color output as-is
break;
case FramebufferRegs::LogicOp::CopyInverted:
out += "color = ~color;\n";
break;
case FramebufferRegs::LogicOp::NoOp:
// We need to discard the color, but not necessarily the depth. This is not possible
// with fragment shader alone, so we emulate this behavior on GLES with glColorMask.
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<int>(state.logic_op));
UNIMPLEMENTED();
}
}
}
out += '}';
return {std::move(out)};
@ -1574,7 +1623,7 @@ do {
ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) {
std::string out;
if (separable_shader) {
if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n";
}
@ -1617,8 +1666,8 @@ void main() {
std::optional<ShaderDecompiler::ProgramResult> GenerateVertexShader(
const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) {
std::string out = "";
if (separable_shader) {
std::string out;
if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n";
}
@ -1767,8 +1816,8 @@ void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) {
ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config,
bool separable_shader) {
std::string out = "";
if (separable_shader) {
std::string out;
if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
}

@ -61,6 +61,8 @@ struct PicaFSConfigState {
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
Pica::TexturingRegs::FogMode fog_mode;
bool fog_flip;
bool alphablend_enable;
Pica::FramebufferRegs::LogicOp logic_op;
struct {
struct {

@ -123,6 +123,7 @@ static void SetShaderSamplerBindings(GLuint shader) {
SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube);
// Set the texture samplers to correspond to different lookup table texture units
SetShaderSamplerBinding(shader, "texture_buffer_lut_lf", TextureUnits::TextureBufferLUT_LF);
SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG);
SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA);
@ -176,7 +177,10 @@ public:
OGLProgram& program = boost::get<OGLProgram>(shader_or_program);
program.Create(true, {shader.handle});
SetShaderUniformBlockBindings(program.handle);
SetShaderSamplerBindings(program.handle);
if (type == GL_FRAGMENT_SHADER) {
SetShaderSamplerBindings(program.handle);
}
}
}

@ -14,7 +14,7 @@
namespace OpenGL {
GLuint LoadShader(const char* source, GLenum type) {
const std::string version = GLES ? R"(#version 310 es
const std::string version = GLES ? R"(#version 320 es
#define CITRA_GLES

@ -12,11 +12,15 @@ namespace OpenGL {
// High precision may or may not supported in GLES3. If it isn't, use medium precision instead.
static constexpr char fragment_shader_precision_OES[] = R"(
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
precision highp int;
precision highp float;
precision highp samplerBuffer;
precision highp uimage2D;
#else
precision mediump float;
precision mediump int;
precision mediump float;
precision mediump samplerBuffer;
precision mediump uimage2D;
#endif // GL_FRAGMENT_PRECISION_HIGH
)";

@ -58,6 +58,7 @@ OpenGLState::OpenGLState() {
texture_cube_unit.texture_cube = 0;
texture_cube_unit.sampler = 0;
texture_buffer_lut_lf.texture_buffer = 0;
texture_buffer_lut_rg.texture_buffer = 0;
texture_buffer_lut_rgba.texture_buffer = 0;
@ -169,10 +170,17 @@ void OpenGLState::Apply() const {
if (blend.enabled != cur_state.blend.enabled) {
if (blend.enabled) {
glEnable(GL_BLEND);
glDisable(GL_COLOR_LOGIC_OP);
} else {
glDisable(GL_BLEND);
glEnable(GL_COLOR_LOGIC_OP);
}
// GLES does not support glLogicOp
if (!GLES) {
if (blend.enabled) {
glDisable(GL_COLOR_LOGIC_OP);
} else {
glEnable(GL_COLOR_LOGIC_OP);
}
}
}
@ -196,13 +204,11 @@ void OpenGLState::Apply() const {
glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
}
// GLES3 does not support glLogicOp
// GLES does not support glLogicOp
if (!GLES) {
if (logic_op != cur_state.logic_op) {
glLogicOp(logic_op);
}
} else {
LOG_TRACE(Render_OpenGL, "glLogicOps are unimplemented...");
}
// Textures
@ -224,6 +230,12 @@ void OpenGLState::Apply() const {
glBindSampler(TextureUnits::TextureCube.id, texture_cube_unit.sampler);
}
// Texture buffer LUTs
if (texture_buffer_lut_lf.texture_buffer != cur_state.texture_buffer_lut_lf.texture_buffer) {
glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum());
glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_lf.texture_buffer);
}
// Texture buffer LUTs
if (texture_buffer_lut_rg.texture_buffer != cur_state.texture_buffer_lut_rg.texture_buffer) {
glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum());
@ -354,6 +366,8 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) {
}
if (texture_cube_unit.texture_cube == handle)
texture_cube_unit.texture_cube = 0;
if (texture_buffer_lut_lf.texture_buffer == handle)
texture_buffer_lut_lf.texture_buffer = 0;
if (texture_buffer_lut_rg.texture_buffer == handle)
texture_buffer_lut_rg.texture_buffer = 0;
if (texture_buffer_lut_rgba.texture_buffer == handle)

@ -22,7 +22,8 @@ constexpr TextureUnit PicaTexture(int unit) {
return TextureUnit{unit};
}
constexpr TextureUnit TextureCube{3};
constexpr TextureUnit TextureCube{6};
constexpr TextureUnit TextureBufferLUT_LF{3};
constexpr TextureUnit TextureBufferLUT_RG{4};
constexpr TextureUnit TextureBufferLUT_RGBA{5};
@ -101,6 +102,10 @@ public:
GLuint sampler; // GL_SAMPLER_BINDING
} texture_cube_unit;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} texture_buffer_lut_lf;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} texture_buffer_lut_rg;

@ -29,6 +29,7 @@
#include "core/tracer/recorder.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/post_processing_opengl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@ -39,7 +40,12 @@ namespace OpenGL {
// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
// to wait on available presentation frames. There doesn't seem to be much of a downside to a larger
// number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine
#ifdef ANDROID
// Reduce the size of swap_chain, since the UI only allows upto 200% speed.
constexpr std::size_t SWAP_CHAIN_SIZE = 6;
#else
constexpr std::size_t SWAP_CHAIN_SIZE = 9;
#endif
class OGLTextureMailboxException : public std::runtime_error {
public:
@ -96,7 +102,7 @@ public:
frame->color.Create();
state.renderbuffer = frame->color.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA, width, height);
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height);
// Recreate the FBO for the render target
frame->render.Release();
@ -1197,14 +1203,18 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
/// Initialize the renderer
VideoCore::ResultStatus RendererOpenGL::Init() {
#ifndef ANDROID
if (!gladLoadGL()) {
return VideoCore::ResultStatus::ErrorBelowGL33;
}
// Qualcomm has some spammy info messages that are marked as errors but not important
// https://developer.qualcomm.com/comment/11845
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
}
#endif
const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};

@ -0,0 +1,254 @@
// Copyright 2020 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <chrono>
#include <vector>
#include <fmt/chrono.h>
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/texture_downloader_es.h"
#include "shaders/depth_to_color.frag"
#include "shaders/depth_to_color.vert"
#include "shaders/ds_to_color.frag"
namespace OpenGL {
/**
* Self tests for the texture downloader
*/
void TextureDownloaderES::Test() {
auto cur_state = OpenGLState::GetCurState();
OpenGLState state;
{
GLint range[2];
GLint precision;
#define PRECISION_TEST(type) \
glGetShaderPrecisionFormat(GL_FRAGMENT_SHADER, type, range, &precision); \
LOG_INFO(Render_OpenGL, #type " range: [{}, {}], precision: {}", range[0], range[1], precision);
PRECISION_TEST(GL_LOW_INT);
PRECISION_TEST(GL_MEDIUM_INT);
PRECISION_TEST(GL_HIGH_INT);
PRECISION_TEST(GL_LOW_FLOAT);
PRECISION_TEST(GL_MEDIUM_FLOAT);
PRECISION_TEST(GL_HIGH_FLOAT);
#undef PRECISION_TEST
}
glActiveTexture(GL_TEXTURE0);
const auto test = [this, &state](FormatTuple tuple, auto original_data, std::size_t tex_size,
auto data_generator) {
OGLTexture texture;
texture.Create();
state.texture_units[0].texture_2d = texture.handle;
state.Apply();
original_data.resize(tex_size * tex_size);
for (std::size_t idx = 0; idx < original_data.size(); ++idx)
original_data[idx] = data_generator(idx);
glTexStorage2D(GL_TEXTURE_2D, 1, tuple.internal_format, tex_size, tex_size);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, tex_size, tex_size, tuple.format, tuple.type,
original_data.data());
decltype(original_data) new_data(original_data.size());
glFinish();
auto start = std::chrono::high_resolution_clock::now();
GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, tex_size, tex_size,
new_data.data());
glFinish();
auto time = std::chrono::high_resolution_clock::now() - start;
LOG_INFO(Render_OpenGL, "test took {}", std::chrono::duration<double, std::milli>(time));
int diff = 0;
for (std::size_t idx = 0; idx < original_data.size(); ++idx)
if (new_data[idx] - original_data[idx] != diff) {
diff = new_data[idx] - original_data[idx];
// every time the error between the real and expected value changes, log it
// some error is expected in D24 due to floating point precision
LOG_WARNING(Render_OpenGL, "difference changed at {:#X}: {:#X} -> {:#X}", idx,
original_data[idx], new_data[idx]);
}
};
LOG_INFO(Render_OpenGL, "GL_DEPTH24_STENCIL8 download test starting");
test(depth_format_tuples[3], std::vector<u32>{}, 4096,
[](std::size_t idx) { return static_cast<u32>((idx << 8) | (idx & 0xFF)); });
LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT24 download test starting");
test(depth_format_tuples[2], std::vector<u32>{}, 4096,
[](std::size_t idx) { return static_cast<u32>(idx << 8); });
LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT16 download test starting");
test(depth_format_tuples[0], std::vector<u16>{}, 256,
[](std::size_t idx) { return static_cast<u16>(idx); });
cur_state.Apply();
}
TextureDownloaderES::TextureDownloaderES(bool enable_depth_stencil) {
vao.Create();
read_fbo_generic.Create();
depth32_fbo.Create();
r32ui_renderbuffer.Create();
depth16_fbo.Create();
r16_renderbuffer.Create();
const auto init_program = [](ConversionShader& converter, std::string_view frag) {
converter.program.Create(depth_to_color_vert.data(), frag.data());
converter.lod_location = glGetUniformLocation(converter.program.handle, "lod");
};
// xperia64: The depth stencil shader currently uses a GLES extension that is not supported
// across all devices Reportedly broken on Tegra devices and the Nexus 6P, so enabling it can be
// toggled
if (enable_depth_stencil) {
init_program(d24s8_r32ui_conversion_shader, ds_to_color_frag);
}
init_program(d24_r32ui_conversion_shader, depth_to_color_frag);
init_program(d16_r16_conversion_shader, R"(
out highp float color;
uniform highp sampler2D depth;
uniform int lod;
void main(){
color = texelFetch(depth, ivec2(gl_FragCoord.xy), lod).x;
}
)");
sampler.Create();
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
auto cur_state = OpenGLState::GetCurState();
auto state = cur_state;
state.draw.shader_program = d24s8_r32ui_conversion_shader.program.handle;
state.draw.draw_framebuffer = depth32_fbo.handle;
state.renderbuffer = r32ui_renderbuffer.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, max_size, max_size);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
r32ui_renderbuffer.handle);
glUniform1i(glGetUniformLocation(d24s8_r32ui_conversion_shader.program.handle, "depth"), 1);
state.draw.draw_framebuffer = depth16_fbo.handle;
state.renderbuffer = r16_renderbuffer.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_R16, max_size, max_size);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
r16_renderbuffer.handle);
cur_state.Apply();
}
/**
* OpenGL ES does not support glReadBuffer for depth/stencil formats
* This gets around it by converting to a Red surface before downloading
*/
GLuint TextureDownloaderES::ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type,
GLint height, GLint width) {
ASSERT(width <= max_size && height <= max_size);
const OpenGLState cur_state = OpenGLState::GetCurState();
OpenGLState state;
state.texture_units[0] = {cur_state.texture_units[0].texture_2d, sampler.handle};
state.draw.vertex_array = vao.handle;
OGLTexture texture_view;
const ConversionShader* converter;
switch (type) {
case GL_UNSIGNED_SHORT:
state.draw.draw_framebuffer = depth16_fbo.handle;
converter = &d16_r16_conversion_shader;
format = GL_RED;
break;
case GL_UNSIGNED_INT:
state.draw.draw_framebuffer = depth32_fbo.handle;
converter = &d24_r32ui_conversion_shader;
format = GL_RED_INTEGER;
break;
case GL_UNSIGNED_INT_24_8:
state.draw.draw_framebuffer = depth32_fbo.handle;
converter = &d24s8_r32ui_conversion_shader;
format = GL_RED_INTEGER;
type = GL_UNSIGNED_INT;
break;
default:
UNREACHABLE_MSG("Destination type not recognized");
}
state.draw.shader_program = converter->program.handle;
state.viewport = {0, 0, width, height};
state.Apply();
if (converter->program.handle == d24s8_r32ui_conversion_shader.program.handle) {
// TODO BreadFish64: the ARM framebuffer reading extension is probably not the most optimal
// way to do this, search for another solution
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
state.texture_units[0].texture_2d, level);
}
glUniform1i(converter->lod_location, level);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (texture_view.handle) {
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT);
}
return state.draw.draw_framebuffer;
}
/**
* OpenGL ES does not support glGetTexImage. Obtain the pixels by attaching the
* texture to a framebuffer.
* Originally from https://github.com/apitrace/apitrace/blob/master/retrace/glstate_images.cpp
* Depth texture download assumes that the texture's format tuple matches what is found
* OpenGL::depth_format_tuples
*/
void TextureDownloaderES::GetTexImage(GLenum target, GLuint level, GLenum format, GLenum type,
GLint height, GLint width, void* pixels) {
OpenGLState state = OpenGLState::GetCurState();
GLuint texture;
const GLuint old_read_buffer = state.draw.read_framebuffer;
switch (target) {
case GL_TEXTURE_2D:
texture = state.texture_units[0].texture_2d;
break;
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
texture = state.texture_cube_unit.texture_cube;
break;
default:
UNIMPLEMENTED_MSG("Unexpected target {:x}", target);
}
switch (format) {
case GL_DEPTH_COMPONENT:
case GL_DEPTH_STENCIL:
// unfortunately, the accurate way is too slow for release
return;
state.draw.read_framebuffer = ConvertDepthToColor(level, format, type, height, width);
state.Apply();
break;
default:
state.draw.read_framebuffer = read_fbo_generic.handle;
state.Apply();
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture,
level);
}
GLenum status = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER);
if (status != GL_FRAMEBUFFER_COMPLETE) {
LOG_DEBUG(Render_OpenGL, "Framebuffer is incomplete, status: {:X}", status);
}
glReadPixels(0, 0, width, height, format, type, pixels);
state.draw.read_framebuffer = old_read_buffer;
state.Apply();
}
} // namespace OpenGL

@ -0,0 +1,36 @@
// Copyright 2020 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class OpenGLState;
class TextureDownloaderES {
static constexpr u16 max_size = 1024;
OGLVertexArray vao;
OGLFramebuffer read_fbo_generic;
OGLFramebuffer depth32_fbo, depth16_fbo;
OGLRenderbuffer r32ui_renderbuffer, r16_renderbuffer;
struct ConversionShader {
OGLProgram program;
GLint lod_location{-1};
} d24_r32ui_conversion_shader, d16_r16_conversion_shader, d24s8_r32ui_conversion_shader;
OGLSampler sampler;
void Test();
GLuint ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type, GLint height,
GLint width);
public:
TextureDownloaderES(bool enable_depth_stencil);
void GetTexImage(GLenum target, GLuint level, GLenum format, const GLenum type, GLint height,
GLint width, void* pixels);
};
} // namespace OpenGL

@ -34,30 +34,14 @@
#include "video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h"
#include "shaders/refine.frag"
#include "shaders/refine.vert"
#include "shaders/tex_coord.vert"
#include "shaders/x_gradient.frag"
#include "shaders/y_gradient.frag"
#include "shaders/y_gradient.vert"
namespace OpenGL {
Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_factor) {
const OpenGLState cur_state = OpenGLState::GetCurState();
const auto setup_temp_tex = [this](TempTex& texture, GLint internal_format, GLint format) {
texture.fbo.Create();
texture.tex.Create();
state.draw.draw_framebuffer = texture.fbo.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_RECTANGLE, texture.tex.handle);
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, internal_format, 1024 * internal_scale_factor,
1024 * internal_scale_factor, 0, format, GL_HALF_FLOAT, nullptr);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE,
texture.tex.handle, 0);
};
setup_temp_tex(LUMAD, GL_R16F, GL_RED);
setup_temp_tex(XY, GL_RG16F, GL_RG);
vao.Create();
@ -65,17 +49,17 @@ Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_f
samplers[idx].Create();
state.texture_units[idx].sampler = samplers[idx].handle;
glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MIN_FILTER,
idx == 0 ? GL_LINEAR : GL_NEAREST);
idx != 2 ? GL_LINEAR : GL_NEAREST);
glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MAG_FILTER,
idx == 0 ? GL_LINEAR : GL_NEAREST);
idx != 2 ? GL_LINEAR : GL_NEAREST);
glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
state.draw.vertex_array = vao.handle;
gradient_x_program.Create(tex_coord_vert.data(), x_gradient_frag.data());
gradient_y_program.Create(y_gradient_vert.data(), y_gradient_frag.data());
refine_program.Create(refine_vert.data(), refine_frag.data());
gradient_y_program.Create(tex_coord_vert.data(), y_gradient_frag.data());
refine_program.Create(tex_coord_vert.data(), refine_frag.data());
state.draw.shader_program = gradient_y_program.handle;
state.Apply();
@ -84,8 +68,6 @@ Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_f
state.draw.shader_program = refine_program.handle;
state.Apply();
glUniform1i(glGetUniformLocation(refine_program.handle, "LUMAD"), 1);
glUniform1f(glGetUniformLocation(refine_program.handle, "final_scale"),
static_cast<GLfloat>(internal_scale_factor) / scale_factor);
cur_state.Apply();
}
@ -95,20 +77,48 @@ void Anime4kUltrafast::Filter(GLuint src_tex, const Common::Rectangle<u32>& src_
GLuint read_fb_handle, GLuint draw_fb_handle) {
const OpenGLState cur_state = OpenGLState::GetCurState();
// These will have handles from the previous texture that was filtered, reset them to avoid
// binding invalid textures.
state.texture_units[0].texture_2d = 0;
state.texture_units[1].texture_2d = 0;
state.texture_units[2].texture_2d = 0;
const auto setup_temp_tex = [this, &src_rect](GLint internal_format, GLint format) {
TempTex texture;
texture.fbo.Create();
texture.tex.Create();
state.texture_units[0].texture_2d = texture.tex.handle;
state.draw.draw_framebuffer = texture.fbo.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, texture.tex.handle);
if (GL_ARB_texture_storage) {
glTexStorage2D(GL_TEXTURE_2D, 1, internal_format,
src_rect.GetWidth() * internal_scale_factor,
src_rect.GetHeight() * internal_scale_factor);
} else {
glTexImage2D(
GL_TEXTURE_2D, 0, internal_format, src_rect.GetWidth() * internal_scale_factor,
src_rect.GetHeight() * internal_scale_factor, 0, format, GL_HALF_FLOAT, nullptr);
}
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
texture.tex.handle, 0);
return texture;
};
auto XY = setup_temp_tex(GL_RG16F, GL_RG);
auto LUMAD = setup_temp_tex(GL_R16F, GL_RED);
state.viewport = {static_cast<GLint>(src_rect.left * internal_scale_factor),
static_cast<GLint>(src_rect.bottom * internal_scale_factor),
static_cast<GLsizei>(src_rect.GetWidth() * internal_scale_factor),
static_cast<GLsizei>(src_rect.GetHeight() * internal_scale_factor)};
state.texture_units[0].texture_2d = src_tex;
state.texture_units[1].texture_2d = LUMAD.tex.handle;
state.texture_units[2].texture_2d = XY.tex.handle;
state.draw.draw_framebuffer = XY.fbo.handle;
state.draw.shader_program = gradient_x_program.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_RECTANGLE, LUMAD.tex.handle);
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_RECTANGLE, XY.tex.handle);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// gradient y pass

@ -30,8 +30,6 @@ private:
OGLTexture tex;
OGLFramebuffer fbo;
};
TempTex LUMAD;
TempTex XY;
std::array<OGLSampler, 3> samplers;

@ -1,14 +1,12 @@
//? #version 330
precision mediump float;
in vec2 tex_coord;
in vec2 input_max;
out vec4 frag_color;
uniform sampler2D HOOKED;
uniform sampler2DRect LUMAD;
uniform sampler2DRect LUMAG;
uniform float final_scale;
uniform sampler2D LUMAD;
const float LINE_DETECT_THRESHOLD = 0.4;
const float STRENGTH = 0.6;
@ -21,12 +19,12 @@ struct RGBAL {
};
vec4 getAverage(vec4 cc, vec4 a, vec4 b, vec4 c) {
return cc * (1 - STRENGTH) + ((a + b + c) / 3) * STRENGTH;
return cc * (1.0 - STRENGTH) + ((a + b + c) / 3.0) * STRENGTH;
}
#define GetRGBAL(offset) \
RGBAL(textureOffset(HOOKED, tex_coord, offset), \
texture(LUMAD, clamp((gl_FragCoord.xy + offset) * final_scale, vec2(0.0), input_max)).x)
#define GetRGBAL(x_offset, y_offset) \
RGBAL(textureLodOffset(HOOKED, tex_coord, 0.0, ivec2(x_offset, y_offset)), \
textureLodOffset(LUMAD, tex_coord, 0.0, ivec2(x_offset, y_offset)).x)
float min3v(float a, float b, float c) {
return min(min(a, b), c);
@ -37,23 +35,23 @@ float max3v(float a, float b, float c) {
}
vec4 Compute() {
RGBAL cc = GetRGBAL(ivec2(0));
RGBAL cc = GetRGBAL(0, 0);
if (cc.l > LINE_DETECT_THRESHOLD) {
return cc.c;
}
RGBAL tl = GetRGBAL(ivec2(-1, -1));
RGBAL t = GetRGBAL(ivec2(0, -1));
RGBAL tr = GetRGBAL(ivec2(1, -1));
RGBAL tl = GetRGBAL(-1, -1);
RGBAL t = GetRGBAL(0, -1);
RGBAL tr = GetRGBAL(1, -1);
RGBAL l = GetRGBAL(ivec2(-1, 0));
RGBAL l = GetRGBAL(-1, 0);
RGBAL r = GetRGBAL(ivec2(1, 0));
RGBAL r = GetRGBAL(1, 0);
RGBAL bl = GetRGBAL(ivec2(-1, 1));
RGBAL b = GetRGBAL(ivec2(0, 1));
RGBAL br = GetRGBAL(ivec2(1, 1));
RGBAL bl = GetRGBAL(-1, 1);
RGBAL b = GetRGBAL(0, 1);
RGBAL br = GetRGBAL(1, 1);
// Kernel 0 and 4
float maxDark = max3v(br.l, b.l, bl.l);

@ -1,14 +0,0 @@
//? #version 330
out vec2 tex_coord;
out vec2 input_max;
uniform sampler2D HOOKED;
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
tex_coord = (vertices[gl_VertexID] + 1.0) / 2.0;
input_max = textureSize(HOOKED, 0) * 2.0 - 1.0;
}

@ -1,4 +1,6 @@
//? #version 330
precision mediump float;
in vec2 tex_coord;
out vec2 frag_color;
@ -7,7 +9,7 @@ uniform sampler2D tex_input;
const vec3 K = vec3(0.2627, 0.6780, 0.0593);
// TODO: improve handling of alpha channel
#define GetLum(xoffset) dot(K, textureOffset(tex_input, tex_coord, ivec2(xoffset, 0)).rgb)
#define GetLum(xoffset) dot(K, textureLodOffset(tex_input, tex_coord, 0.0, ivec2(xoffset, 0)).rgb)
void main() {
float l = GetLum(-1);

@ -1,16 +1,18 @@
//? #version 330
in vec2 input_max;
precision mediump float;
in vec2 tex_coord;
out float frag_color;
uniform sampler2DRect tex_input;
uniform sampler2D tex_input;
void main() {
vec2 t = texture(tex_input, min(gl_FragCoord.xy + vec2(0.0, 1.0), input_max)).xy;
vec2 c = texture(tex_input, gl_FragCoord.xy).xy;
vec2 b = texture(tex_input, max(gl_FragCoord.xy - vec2(0.0, 1.0), vec2(0.0))).xy;
vec2 t = textureLodOffset(tex_input, tex_coord, 0.0, ivec2(0, 1)).xy;
vec2 c = textureLod(tex_input, tex_coord, 0.0).xy;
vec2 b = textureLodOffset(tex_input, tex_coord, 0.0, ivec2(0, -1)).xy;
vec2 grad = vec2(t.x + 2 * c.x + b.x, b.y - t.y);
vec2 grad = vec2(t.x + 2.0 * c.x + b.x, b.y - t.y);
frag_color = 1 - length(grad);
frag_color = 1.0 - length(grad);
}

@ -1,4 +1,6 @@
//? #version 330
precision mediump float;
in vec2 tex_coord;
out vec4 frag_color;
@ -18,7 +20,7 @@ vec4 cubic(float v) {
vec4 textureBicubic(sampler2D sampler, vec2 texCoords) {
vec2 texSize = textureSize(sampler, 0);
vec2 texSize = vec2(textureSize(sampler, 0));
vec2 invTexSize = 1.0 / texSize;
texCoords = texCoords * texSize - 0.5;

@ -1,4 +1,6 @@
//? #version 330
precision mediump float;
in vec2 tex_coord;
in vec2 source_size;
in vec2 output_size;
@ -6,7 +8,7 @@ in vec2 output_size;
out vec4 frag_color;
uniform sampler2D tex;
uniform float scale;
uniform lowp float scale;
const int BLEND_NONE = 0;
const int BLEND_NORMAL = 1;
@ -42,12 +44,12 @@ float GetLeftRatio(vec2 center, vec2 origin, vec2 direction) {
return smoothstep(-sqrt(2.0) / 2.0, sqrt(2.0) / 2.0, v);
}
vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5);
vec2 coord = tex_coord - pos / source_size;
#define P(x, y) textureOffset(tex, coord, ivec2(x, y))
void main() {
vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5);
vec2 coord = tex_coord - pos / source_size;
//---------------------------------------
// Input Pixel Mapping: -|x|x|x|-
// x|A|B|C|x
@ -142,15 +144,15 @@ void main() {
(IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) &&
IsPixEqual(F, C) && !IsPixEqual(E, I))));
vec2 origin = vec2(0.0, 1.0 / sqrt(2.0));
ivec2 direction = ivec2(1, -1);
vec2 direction = vec2(1.0, -1.0);
if (doLineBlend) {
bool haveShallowLine =
(STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && E != G && D != G;
bool haveSteepLine =
(STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && E != C && B != C;
origin = haveShallowLine ? vec2(0.0, 0.25) : vec2(0.0, 0.5);
direction.x += haveShallowLine ? 1 : 0;
direction.y -= haveSteepLine ? 1 : 0;
direction.x += haveShallowLine ? 1.0 : 0.0;
direction.y -= haveSteepLine ? 1.0 : 0.0;
}
vec4 blendPix = mix(H, F, step(ColorDist(E, F), ColorDist(E, H)));
res = mix(res, blendPix, GetLeftRatio(pos, origin, direction));
@ -169,15 +171,15 @@ void main() {
(IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) &&
IsPixEqual(H, I) && !IsPixEqual(E, G))));
vec2 origin = vec2(-1.0 / sqrt(2.0), 0.0);
ivec2 direction = ivec2(1, 1);
vec2 direction = vec2(1.0, 1.0);
if (doLineBlend) {
bool haveShallowLine =
(STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && E != A && B != A;
bool haveSteepLine =
(STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && E != I && F != I;
origin = haveShallowLine ? vec2(-0.25, 0.0) : vec2(-0.5, 0.0);
direction.y += haveShallowLine ? 1 : 0;
direction.x += haveSteepLine ? 1 : 0;
direction.y += haveShallowLine ? 1.0 : 0.0;
direction.x += haveSteepLine ? 1.0 : 0.0;
}
origin = origin;
direction = direction;
@ -198,15 +200,15 @@ void main() {
(IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) &&
IsPixEqual(B, A) && !IsPixEqual(E, C))));
vec2 origin = vec2(1.0 / sqrt(2.0), 0.0);
ivec2 direction = ivec2(-1, -1);
vec2 direction = vec2(-1.0, -1.0);
if (doLineBlend) {
bool haveShallowLine =
(STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && E != I && H != I;
bool haveSteepLine =
(STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && E != A && D != A;
origin = haveShallowLine ? vec2(0.25, 0.0) : vec2(0.5, 0.0);
direction.y -= haveShallowLine ? 1 : 0;
direction.x -= haveSteepLine ? 1 : 0;
direction.y -= haveShallowLine ? 1.0 : 0.0;
direction.x -= haveSteepLine ? 1.0 : 0.0;
}
vec4 blendPix = mix(F, B, step(ColorDist(E, B), ColorDist(E, F)));
res = mix(res, blendPix, GetLeftRatio(pos, origin, direction));
@ -225,15 +227,15 @@ void main() {
(IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) &&
IsPixEqual(D, G) && !IsPixEqual(E, A))));
vec2 origin = vec2(0.0, -1.0 / sqrt(2.0));
ivec2 direction = ivec2(-1, 1);
vec2 direction = vec2(-1.0, 1.0);
if (doLineBlend) {
bool haveShallowLine =
(STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && E != C && F != C;
bool haveSteepLine =
(STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && E != G && H != G;
origin = haveShallowLine ? vec2(0.0, -0.25) : vec2(0.0, -0.5);
direction.x -= haveShallowLine ? 1 : 0;
direction.y += haveSteepLine ? 1 : 0;
direction.x -= haveShallowLine ? 1.0 : 0.0;
direction.y += haveSteepLine ? 1.0 : 0.0;
}
vec4 blendPix = mix(D, B, step(ColorDist(E, B), ColorDist(E, D)));
res = mix(res, blendPix, GetLeftRatio(pos, origin, direction));

@ -4,7 +4,7 @@ out vec2 source_size;
out vec2 output_size;
uniform sampler2D tex;
uniform float scale;
uniform lowp float scale;
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
@ -12,6 +12,6 @@ const vec2 vertices[4] =
void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
tex_coord = (vertices[gl_VertexID] + 1.0) / 2.0;
source_size = textureSize(tex, 0);
source_size = vec2(textureSize(tex, 0));
output_size = source_size * scale;
}