Merge pull request #5823 from SachinVin/dyn

Android: Backport easy stuff
master
SachinVin 2021-10-03 18:58:20 +07:00 committed by GitHub
commit 6183b5d76c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
43 changed files with 1052 additions and 365 deletions

@ -8,3 +8,21 @@
/build /build
/captures /captures
.externalNativeBuild .externalNativeBuild
# CXX compile cache
app/.cxx
# Google Services (e.g. APIs or Firebase)
google-services.json
# Freeline
freeline.py
freeline/
freeline_project_description.json
# fastlane
fastlane/report.xml
fastlane/Preview.html
fastlane/screenshots
fastlane/test_output
fastlane/readme.md

@ -345,7 +345,6 @@ void Source::GenerateFrame() {
break; break;
case InterpolationMode::Polyphase: case InterpolationMode::Polyphase:
// TODO(merry): Implement polyphase interpolation // TODO(merry): Implement polyphase interpolation
LOG_DEBUG(Audio_DSP, "Polyphase interpolation unimplemented; falling back to linear");
AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier, AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier,
current_frame, frame_position); current_frame, frame_position);
break; break;

@ -8,11 +8,7 @@
#include <QString> #include <QString>
#include <QVBoxLayout> #include <QVBoxLayout>
#include "citra_qt/applets/mii_selector.h" #include "citra_qt/applets/mii_selector.h"
#include "common/file_util.h"
#include "common/string_util.h" #include "common/string_util.h"
#include "core/file_sys/archive_extsavedata.h"
#include "core/file_sys/file_backend.h"
#include "core/hle/service/ptm/ptm.h"
QtMiiSelectorDialog::QtMiiSelectorDialog(QWidget* parent, QtMiiSelector* mii_selector_) QtMiiSelectorDialog::QtMiiSelectorDialog(QWidget* parent, QtMiiSelector* mii_selector_)
: QDialog(parent), mii_selector(mii_selector_) { : QDialog(parent), mii_selector(mii_selector_) {
@ -33,37 +29,9 @@ QtMiiSelectorDialog::QtMiiSelectorDialog(QWidget* parent, QtMiiSelector* mii_sel
miis.push_back(HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data); miis.push_back(HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data);
combobox->addItem(tr("Standard Mii")); combobox->addItem(tr("Standard Mii"));
for (const auto& mii : Frontend::LoadMiis()) {
std::string nand_directory{FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)};
FileSys::ArchiveFactory_ExtSaveData extdata_archive_factory(nand_directory, true);
auto archive_result = extdata_archive_factory.Open(Service::PTM::ptm_shared_extdata_id, 0);
if (archive_result.Succeeded()) {
auto archive = std::move(archive_result).Unwrap();
FileSys::Path file_path = "/CFL_DB.dat";
FileSys::Mode mode{};
mode.read_flag.Assign(1);
auto file_result = archive->OpenFile(file_path, mode);
if (file_result.Succeeded()) {
auto file = std::move(file_result).Unwrap();
u32 saved_miis_offset = 0x8;
// The Mii Maker has a 100 Mii limit on the 3ds
for (int i = 0; i < 100; ++i) {
HLE::Applets::MiiData mii;
std::array<u8, sizeof(mii)> mii_raw;
file->Read(saved_miis_offset, sizeof(mii), mii_raw.data());
std::memcpy(&mii, mii_raw.data(), sizeof(mii));
if (mii.mii_id != 0) {
std::string name = Common::UTF16BufferToUTF8(mii.mii_name);
miis.push_back(mii); miis.push_back(mii);
combobox->addItem(QString::fromStdString(name)); combobox->addItem(QString::fromStdString(Common::UTF16BufferToUTF8(mii.mii_name)));
}
saved_miis_offset += sizeof(mii);
}
}
} }
if (combobox->count() > static_cast<int>(config.initially_selected_mii_index)) { if (combobox->count() > static_cast<int>(config.initially_selected_mii_index)) {

@ -26,6 +26,10 @@
namespace Log { namespace Log {
Filter filter;
void SetGlobalFilter(const Filter& f) {
filter = f;
}
/** /**
* Static state as a singleton. * Static state as a singleton.
*/ */
@ -58,14 +62,6 @@ public:
backends.erase(it, backends.end()); backends.erase(it, backends.end());
} }
const Filter& GetGlobalFilter() const {
return filter;
}
void SetGlobalFilter(const Filter& f) {
filter = f;
}
Backend* GetBackend(std::string_view backend_name) { Backend* GetBackend(std::string_view backend_name) {
const auto it = const auto it =
std::find_if(backends.begin(), backends.end(), std::find_if(backends.begin(), backends.end(),
@ -144,6 +140,10 @@ void ColorConsoleBackend::Write(const Entry& entry) {
PrintColoredMessage(entry); PrintColoredMessage(entry);
} }
void LogcatBackend::Write(const Entry& entry) {
PrintMessageToLogcat(entry);
}
FileBackend::FileBackend(const std::string& filename) : bytes_written(0) { FileBackend::FileBackend(const std::string& filename) : bytes_written(0) {
if (FileUtil::Exists(filename + ".old.txt")) { if (FileUtil::Exists(filename + ".old.txt")) {
FileUtil::Delete(filename + ".old.txt"); FileUtil::Delete(filename + ".old.txt");
@ -283,10 +283,6 @@ const char* GetLevelName(Level log_level) {
return "Invalid"; return "Invalid";
} }
void SetGlobalFilter(const Filter& filter) {
Impl::Instance().SetGlobalFilter(filter);
}
void AddBackend(std::unique_ptr<Backend> backend) { void AddBackend(std::unique_ptr<Backend> backend) {
Impl::Instance().AddBackend(std::move(backend)); Impl::Instance().AddBackend(std::move(backend));
} }
@ -303,10 +299,6 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
unsigned int line_num, const char* function, const char* format, unsigned int line_num, const char* function, const char* format,
const fmt::format_args& args) { const fmt::format_args& args) {
auto& instance = Impl::Instance(); auto& instance = Impl::Instance();
const auto& filter = instance.GetGlobalFilter();
if (!filter.CheckMessage(log_class, log_level))
return;
instance.PushEntry(log_class, log_level, filename, line_num, function, instance.PushEntry(log_class, log_level, filename, line_num, function,
fmt::vformat(format, args)); fmt::vformat(format, args));
} }

@ -14,8 +14,6 @@
namespace Log { namespace Log {
class Filter;
/** /**
* A log entry. Log entries are store in a structured format to permit more varied output * A log entry. Log entries are store in a structured format to permit more varied output
* formatting on different frontends, as well as facilitating filtering and aggregation. * formatting on different frontends, as well as facilitating filtering and aggregation.
@ -83,6 +81,21 @@ public:
void Write(const Entry& entry) override; void Write(const Entry& entry) override;
}; };
/**
* Backend that writes to the Android logcat
*/
class LogcatBackend : public Backend {
public:
static const char* Name() {
return "logcat";
}
const char* GetName() const override {
return Name();
}
void Write(const Entry& entry) override;
};
/** /**
* Backend that writes to a file passed into the constructor * Backend that writes to a file passed into the constructor
*/ */
@ -136,10 +149,4 @@ const char* GetLogClassName(Class log_class);
*/ */
const char* GetLevelName(Level log_level); const char* GetLevelName(Level log_level);
/**
* The global filter will prevent any messages from even being processed if they are filtered. Each
* backend can have a filter, but if the level is lower than the global filter, the backend will
* never get the message
*/
void SetGlobalFilter(const Filter& filter);
} // namespace Log } // namespace Log

@ -9,43 +9,4 @@
#include <string_view> #include <string_view>
#include "common/logging/log.h" #include "common/logging/log.h"
namespace Log { namespace Log {} // namespace Log
/**
* Implements a log message filter which allows different log classes to have different minimum
* severity levels. The filter can be changed at runtime and can be parsed from a string to allow
* editing via the interface or loading from a configuration file.
*/
class Filter {
public:
/// Initializes the filter with all classes having `default_level` as the minimum level.
explicit Filter(Level default_level = Level::Info);
/// Resets the filter so that all classes have `level` as the minimum displayed level.
void ResetAll(Level level);
/// Sets the minimum level of `log_class` (and not of its subclasses) to `level`.
void SetClassLevel(Class log_class, Level level);
/**
* Parses a filter string and applies it to this filter.
*
* A filter string consists of a space-separated list of filter rules, each of the format
* `<class>:<level>`. `<class>` is a log class name, with subclasses separated using periods.
* `*` is allowed as a class name and will reset all filters to the specified level. `<level>`
* a severity level name which will be set as the minimum logging level of the matched classes.
* Rules are applied left to right, with each rule overriding previous ones in the sequence.
*
* A few examples of filter rules:
* - `*:Info` -- Resets the level of all classes to Info.
* - `Service:Info` -- Sets the level of Service to Info.
* - `Service.FS:Trace` -- Sets the level of the Service.FS class to Trace.
*/
void ParseFilterString(std::string_view filter_view);
/// Matches class/level combination against the filter, returning true if it passed.
bool CheckMessage(Class log_class, Level level) const;
private:
std::array<Level, static_cast<std::size_t>(Class::Count)> class_levels;
};
} // namespace Log

@ -4,13 +4,14 @@
#pragma once #pragma once
#include <array>
#include <fmt/format.h> #include <fmt/format.h>
#include "common/common_types.h" #include "common/common_types.h"
namespace Log { namespace Log {
// trims up to and including the last of ../, ..\, src/, src\ in a string // trims up to and including the last of ../, ..\, src/, src\ in a string
constexpr const char* TrimSourcePath(std::string_view source) { inline const char* TrimSourcePath(std::string_view source) {
const auto rfind = [source](const std::string_view match) { const auto rfind = [source](const std::string_view match) {
return source.rfind(match) == source.npos ? 0 : (source.rfind(match) + match.size()); return source.rfind(match) == source.npos ? 0 : (source.rfind(match) + match.size());
}; };
@ -113,6 +114,47 @@ enum class Class : ClassType {
Count ///< Total number of logging classes Count ///< Total number of logging classes
}; };
/**
* Implements a log message filter which allows different log classes to have different minimum
* severity levels. The filter can be changed at runtime and can be parsed from a string to allow
* editing via the interface or loading from a configuration file.
*/
class Filter {
public:
/// Initializes the filter with all classes having `default_level` as the minimum level.
explicit Filter(Level default_level = Level::Info);
/// Resets the filter so that all classes have `level` as the minimum displayed level.
void ResetAll(Level level);
/// Sets the minimum level of `log_class` (and not of its subclasses) to `level`.
void SetClassLevel(Class log_class, Level level);
/**
* Parses a filter string and applies it to this filter.
*
* A filter string consists of a space-separated list of filter rules, each of the format
* `<class>:<level>`. `<class>` is a log class name, with subclasses separated using periods.
* `*` is allowed as a class name and will reset all filters to the specified level. `<level>`
* a severity level name which will be set as the minimum logging level of the matched classes.
* Rules are applied left to right, with each rule overriding previous ones in the sequence.
*
* A few examples of filter rules:
* - `*:Info` -- Resets the level of all classes to Info.
* - `Service:Info` -- Sets the level of Service to Info.
* - `Service.FS:Trace` -- Sets the level of the Service.FS class to Trace.
*/
void ParseFilterString(std::string_view filter_view);
/// Matches class/level combination against the filter, returning true if it passed.
bool CheckMessage(Class log_class, Level level) const;
private:
std::array<Level, static_cast<std::size_t>(Class::Count)> class_levels;
};
extern Filter filter;
void SetGlobalFilter(const Filter& f);
/// Logs a message to the global logger, using fmt /// Logs a message to the global logger, using fmt
void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
unsigned int line_num, const char* function, const char* format, unsigned int line_num, const char* function, const char* format,
@ -121,6 +163,9 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
template <typename... Args> template <typename... Args>
void FmtLogMessage(Class log_class, Level log_level, const char* filename, unsigned int line_num, void FmtLogMessage(Class log_class, Level log_level, const char* filename, unsigned int line_num,
const char* function, const char* format, const Args&... args) { const char* function, const char* format, const Args&... args) {
if (!filter.CheckMessage(log_class, log_level))
return;
FmtLogMessageImpl(log_class, log_level, filename, line_num, function, format, FmtLogMessageImpl(log_class, log_level, filename, line_num, function, format,
fmt::make_format_args(args...)); fmt::make_format_args(args...));
} }

@ -34,13 +34,7 @@ std::string FormatLogMessage(const Entry& entry) {
void PrintMessage(const Entry& entry) { void PrintMessage(const Entry& entry) {
const auto str = FormatLogMessage(entry).append(1, '\n'); const auto str = FormatLogMessage(entry).append(1, '\n');
#ifdef ANDROID
// Android's log level enum are offset by '2'
const int android_log_level = static_cast<int>(entry.log_level) + 2;
__android_log_print(android_log_level, "CitraNative", "%s", str.c_str());
#else
fputs(str.c_str(), stderr); fputs(str.c_str(), stderr);
#endif
} }
void PrintColoredMessage(const Entry& entry) { void PrintColoredMessage(const Entry& entry) {
@ -78,7 +72,7 @@ void PrintColoredMessage(const Entry& entry) {
} }
SetConsoleTextAttribute(console_handle, color); SetConsoleTextAttribute(console_handle, color);
#elif !defined(ANDROID) #else
#define ESC "\x1b" #define ESC "\x1b"
const char* color = ""; const char* color = "";
switch (entry.log_level) { switch (entry.log_level) {
@ -111,9 +105,40 @@ void PrintColoredMessage(const Entry& entry) {
#ifdef _WIN32 #ifdef _WIN32
SetConsoleTextAttribute(console_handle, original_info.wAttributes); SetConsoleTextAttribute(console_handle, original_info.wAttributes);
#elif !defined(ANDROID) #else
fputs(ESC "[0m", stderr); fputs(ESC "[0m", stderr);
#undef ESC #undef ESC
#endif #endif
} }
void PrintMessageToLogcat(const Entry& entry) {
#ifdef ANDROID
const auto str = FormatLogMessage(entry);
android_LogPriority android_log_priority;
switch (entry.log_level) {
case Level::Trace:
android_log_priority = ANDROID_LOG_VERBOSE;
break;
case Level::Debug:
android_log_priority = ANDROID_LOG_DEBUG;
break;
case Level::Info:
android_log_priority = ANDROID_LOG_INFO;
break;
case Level::Warning:
android_log_priority = ANDROID_LOG_WARN;
break;
case Level::Error:
android_log_priority = ANDROID_LOG_ERROR;
break;
case Level::Critical:
android_log_priority = ANDROID_LOG_FATAL;
break;
case Level::Count:
UNREACHABLE();
}
__android_log_print(android_log_priority, "CitraNative", "%s", str.c_str());
#endif
}
} // namespace Log } // namespace Log

@ -17,4 +17,6 @@ std::string FormatLogMessage(const Entry& entry);
void PrintMessage(const Entry& entry); void PrintMessage(const Entry& entry);
/// Prints the same message as `PrintMessage`, but colored according to the severity level. /// Prints the same message as `PrintMessage`, but colored according to the severity level.
void PrintColoredMessage(const Entry& entry); void PrintColoredMessage(const Entry& entry);
/// Formats and prints a log entry to the android logcat.
void PrintMessageToLogcat(const Entry& entry);
} // namespace Log } // namespace Log

@ -108,8 +108,8 @@ add_library(core STATIC
frontend/framebuffer_layout.h frontend/framebuffer_layout.h
frontend/image_interface.h frontend/image_interface.h
frontend/input.h frontend/input.h
frontend/mic.h
frontend/mic.cpp frontend/mic.cpp
frontend/mic.h
frontend/scope_acquire_context.cpp frontend/scope_acquire_context.cpp
frontend/scope_acquire_context.h frontend/scope_acquire_context.h
gdbstub/gdbstub.cpp gdbstub/gdbstub.cpp

@ -953,6 +953,9 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
#define INC_PC(l) ptr += sizeof(arm_inst) + l #define INC_PC(l) ptr += sizeof(arm_inst) + l
#define INC_PC_STUB ptr += sizeof(arm_inst) #define INC_PC_STUB ptr += sizeof(arm_inst)
#ifdef ANDROID
#define GDB_BP_CHECK
#else
#define GDB_BP_CHECK \ #define GDB_BP_CHECK \
cpu->Cpsr &= ~(1 << 5); \ cpu->Cpsr &= ~(1 << 5); \
cpu->Cpsr |= cpu->TFlag << 5; \ cpu->Cpsr |= cpu->TFlag << 5; \
@ -965,6 +968,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
goto END; \ goto END; \
} \ } \
} }
#endif
// GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a // GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a
// clunky switch statement. // clunky switch statement.
@ -1652,11 +1656,13 @@ DISPATCH : {
goto END; goto END;
} }
#ifndef ANDROID
// Find breakpoint if one exists within the block // Find breakpoint if one exists within the block
if (GDBStub::IsConnected()) { if (GDBStub::IsConnected()) {
breakpoint_data = breakpoint_data =
GDBStub::GetNextBreakpointFromAddress(cpu->Reg[15], GDBStub::BreakpointType::Execute); GDBStub::GetNextBreakpointFromAddress(cpu->Reg[15], GDBStub::BreakpointType::Execute);
} }
#endif
inst_base = (arm_inst*)&trans_cache_buf[ptr]; inst_base = (arm_inst*)&trans_cache_buf[ptr];
GOTO_NEXT_INST; GOTO_NEXT_INST;

@ -182,13 +182,16 @@ void ARMul_State::ResetMPCoreCP15Registers() {
CP15[CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE] = 0x00000000; CP15[CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE] = 0x00000000;
CP15[CP15_TLB_DEBUG_CONTROL] = 0x00000000; CP15[CP15_TLB_DEBUG_CONTROL] = 0x00000000;
} }
#ifdef ANDROID
static void CheckMemoryBreakpoint(u32 address, GDBStub::BreakpointType type) {}
#else
static void CheckMemoryBreakpoint(u32 address, GDBStub::BreakpointType type) { static void CheckMemoryBreakpoint(u32 address, GDBStub::BreakpointType type) {
if (GDBStub::IsServerEnabled() && GDBStub::CheckBreakpoint(address, type)) { if (GDBStub::IsServerEnabled() && GDBStub::CheckBreakpoint(address, type)) {
LOG_DEBUG(Debug, "Found memory breakpoint @ {:08x}", address); LOG_DEBUG(Debug, "Found memory breakpoint @ {:08x}", address);
GDBStub::Break(true); GDBStub::Break(true);
} }
} }
#endif
u8 ARMul_State::ReadMemory8(u32 address) const { u8 ARMul_State::ReadMemory8(u32 address) const {
CheckMemoryBreakpoint(address, GDBStub::BreakpointType::Read); CheckMemoryBreakpoint(address, GDBStub::BreakpointType::Read);

@ -2,7 +2,12 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "common/file_util.h"
#include "common/string_util.h"
#include "core/file_sys/archive_extsavedata.h"
#include "core/file_sys/file_backend.h"
#include "core/frontend/applets/mii_selector.h" #include "core/frontend/applets/mii_selector.h"
#include "core/hle/service/ptm/ptm.h"
namespace Frontend { namespace Frontend {
@ -10,6 +15,42 @@ void MiiSelector::Finalize(u32 return_code, HLE::Applets::MiiData mii) {
data = {return_code, mii}; data = {return_code, mii};
} }
std::vector<HLE::Applets::MiiData> LoadMiis() {
std::vector<HLE::Applets::MiiData> miis;
std::string nand_directory{FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)};
FileSys::ArchiveFactory_ExtSaveData extdata_archive_factory(nand_directory, true);
auto archive_result = extdata_archive_factory.Open(Service::PTM::ptm_shared_extdata_id, 0);
if (archive_result.Succeeded()) {
auto archive = std::move(archive_result).Unwrap();
FileSys::Path file_path = "/CFL_DB.dat";
FileSys::Mode mode{};
mode.read_flag.Assign(1);
auto file_result = archive->OpenFile(file_path, mode);
if (file_result.Succeeded()) {
auto file = std::move(file_result).Unwrap();
u32 saved_miis_offset = 0x8;
// The Mii Maker has a 100 Mii limit on the 3ds
for (int i = 0; i < 100; ++i) {
HLE::Applets::MiiData mii;
std::array<u8, sizeof(mii)> mii_raw;
file->Read(saved_miis_offset, sizeof(mii), mii_raw.data());
std::memcpy(&mii, mii_raw.data(), sizeof(mii));
if (mii.mii_id != 0) {
miis.push_back(mii);
}
saved_miis_offset += sizeof(mii);
}
}
}
return miis;
}
void DefaultMiiSelector::Setup(const Frontend::MiiSelectorConfig& config) { void DefaultMiiSelector::Setup(const Frontend::MiiSelectorConfig& config) {
MiiSelector::Setup(config); MiiSelector::Setup(config);
Finalize(0, HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data); Finalize(0, HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data);

@ -50,6 +50,8 @@ protected:
MiiSelectorData data; MiiSelectorData data;
}; };
std::vector<HLE::Applets::MiiData> LoadMiis();
class DefaultMiiSelector final : public MiiSelector { class DefaultMiiSelector final : public MiiSelector {
public: public:
void Setup(const MiiSelectorConfig& config) override; void Setup(const MiiSelectorConfig& config) override;

@ -54,6 +54,8 @@ add_library(video_core STATIC
renderer_opengl/post_processing_opengl.h renderer_opengl/post_processing_opengl.h
renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h renderer_opengl/renderer_opengl.h
renderer_opengl/texture_downloader_es.cpp
renderer_opengl/texture_downloader_es.h
renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp
renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h
renderer_opengl/texture_filters/bicubic/bicubic.cpp renderer_opengl/texture_filters/bicubic/bicubic.cpp
@ -99,11 +101,12 @@ add_library(video_core STATIC
) )
set(SHADER_FILES set(SHADER_FILES
renderer_opengl/depth_to_color.frag
renderer_opengl/depth_to_color.vert
renderer_opengl/ds_to_color.frag
renderer_opengl/texture_filters/anime4k/refine.frag renderer_opengl/texture_filters/anime4k/refine.frag
renderer_opengl/texture_filters/anime4k/refine.vert
renderer_opengl/texture_filters/anime4k/x_gradient.frag renderer_opengl/texture_filters/anime4k/x_gradient.frag
renderer_opengl/texture_filters/anime4k/y_gradient.frag renderer_opengl/texture_filters/anime4k/y_gradient.frag
renderer_opengl/texture_filters/anime4k/y_gradient.vert
renderer_opengl/texture_filters/bicubic/bicubic.frag renderer_opengl/texture_filters/bicubic/bicubic.frag
renderer_opengl/texture_filters/scale_force/scale_force.frag renderer_opengl/texture_filters/scale_force/scale_force.frag
renderer_opengl/texture_filters/tex_coord.vert renderer_opengl/texture_filters/tex_coord.vert
@ -121,7 +124,7 @@ endforeach()
add_custom_target(shaders add_custom_target(shaders
BYPRODUCTS ${SHADER_HEADERS} BYPRODUCTS ${SHADER_HEADERS}
COMMAND cmake -P ${CMAKE_CURRENT_SOURCE_DIR}/generate_shaders.cmake COMMAND "${CMAKE_COMMAND}" -P ${CMAKE_CURRENT_SOURCE_DIR}/generate_shaders.cmake
SOURCES ${SHADER_FILES} SOURCES ${SHADER_FILES}
) )
add_dependencies(video_core shaders) add_dependencies(video_core shaders)

@ -0,0 +1,10 @@
//? #version 320 es
out highp uint color;
uniform highp sampler2D depth;
uniform int lod;
void main() {
color = uint(texelFetch(depth, ivec2(gl_FragCoord.xy), lod).x * (exp2(32.0) - 1.0));
}

@ -1,12 +1,8 @@
//? #version 330 //? #version 320 es
out vec2 input_max;
uniform sampler2D tex_size;
const vec2 vertices[4] = const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
void main() { void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
input_max = textureSize(tex_size, 0) * 2 - 1;
} }

@ -0,0 +1,9 @@
//? #version 320 es
#extension GL_ARM_shader_framebuffer_fetch_depth_stencil : enable
out highp uint color;
void main() {
color = uint(gl_LastFragDepthARM * (exp2(24.0) - 1.0)) << 8;
color |= uint(gl_LastFragStencilARM);
}

@ -220,9 +220,175 @@ private:
GLint d24s8_abgr_viewport_u_id; GLint d24s8_abgr_viewport_u_id;
}; };
class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase {
public:
ShaderD24S8toRGBA8() {
constexpr std::string_view vs_source = R"(
out vec2 dst_coord;
uniform mediump ivec2 dst_size;
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size);
}
)";
constexpr std::string_view fs_source = R"(
in mediump vec2 dst_coord;
out lowp vec4 frag_color;
uniform highp sampler2D depth;
uniform lowp usampler2D stencil;
uniform mediump ivec2 dst_size;
uniform mediump ivec2 src_size;
uniform mediump ivec2 src_offset;
void main() {
mediump ivec2 tex_coord;
if (src_size == dst_size) {
tex_coord = ivec2(dst_coord);
} else {
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
mediump int y = tex_index / src_size.x;
tex_coord = ivec2(tex_index - y * src_size.x, y);
}
tex_coord -= src_offset;
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
highp uvec4 components =
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
frag_color = vec4(components) / (exp2(8.0) - 1.0);
}
)";
program.Create(vs_source.data(), fs_source.data());
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
src_size_loc = glGetUniformLocation(program.handle, "src_size");
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
vao.Create();
auto state = OpenGLState::GetCurState();
auto cur_program = state.draw.shader_program;
state.draw.shader_program = program.handle;
state.Apply();
glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1);
state.draw.shader_program = cur_program;
state.Apply();
// OES_texture_view doesn't seem to support D24S8 views, at least on adreno
// so instead it will do an intermediate copy before running through the shader
if (GLAD_GL_ARB_texture_view) {
texture_view_func = glTextureView;
} else {
LOG_INFO(Render_OpenGL,
"Texture views are unsupported, reinterpretation will do intermediate copy");
temp_tex.Create();
}
}
void Reinterpret(GLuint src_tex, const Common::Rectangle<u32>& src_rect, GLuint read_fb_handle,
GLuint dst_tex, const Common::Rectangle<u32>& dst_rect,
GLuint draw_fb_handle) override {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = src_tex;
if (texture_view_func) {
temp_tex.Create();
glActiveTexture(GL_TEXTURE1);
texture_view_func(temp_tex.handle, GL_TEXTURE_2D, src_tex, GL_DEPTH24_STENCIL8, 0, 1, 0,
1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
} else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) {
temp_tex.Release();
temp_tex.Create();
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
temp_rect = src_rect;
}
state.texture_units[1].texture_2d = temp_tex.handle;
state.draw.draw_framebuffer = draw_fb_handle;
state.draw.shader_program = program.handle;
state.draw.vertex_array = vao.handle;
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
static_cast<GLsizei>(dst_rect.GetWidth()),
static_cast<GLsizei>(dst_rect.GetHeight())};
state.Apply();
glActiveTexture(GL_TEXTURE1);
if (!texture_view_func) {
glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
src_rect.GetWidth(), src_rect.GetHeight(), 1);
}
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (texture_view_func) {
temp_tex.Release();
}
}
private:
decltype(glTextureView) texture_view_func = nullptr;
OGLProgram program{};
GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1};
OGLVertexArray vao{};
OGLTexture temp_tex{};
Common::Rectangle<u32> temp_rect{0, 0, 0, 0};
};
class CopyImageSubData final : public FormatReinterpreterBase {
void Reinterpret(GLuint src_tex, const Common::Rectangle<u32>& src_rect, GLuint read_fb_handle,
GLuint dst_tex, const Common::Rectangle<u32>& dst_rect,
GLuint draw_fb_handle) override {
glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex,
GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(),
src_rect.GetHeight(), 1);
}
};
FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() { FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() {
std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
if (vendor.find("NVIDIA") != vendor.npos) {
reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
std::make_unique<CopyImageSubData>());
// Nvidia bends the spec and allows direct copies between color and depth formats
// might as well take advantage of it
LOG_INFO(Render_OpenGL, "Using glCopyImageSubData for D24S8 to RGBA8 reinterpretation");
} else if ((GLAD_GL_ARB_stencil_texturing && GLAD_GL_ARB_texture_storage) || GLES) {
reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
std::make_unique<ShaderD24S8toRGBA8>());
LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation");
} else {
reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8}, reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
std::make_unique<PixelBufferD24S8toABGR>()); std::make_unique<PixelBufferD24S8toABGR>());
LOG_INFO(Render_OpenGL, "Using pbo for D24S8 to RGBA8 reinterpretation");
}
reinterpreters.emplace(PixelFormatPair{PixelFormat::RGB5A1, PixelFormat::RGBA4}, reinterpreters.emplace(PixelFormatPair{PixelFormat::RGB5A1, PixelFormat::RGBA4},
std::make_unique<RGBA4toRGB5A1>()); std::make_unique<RGBA4toRGB5A1>());
} }

@ -52,16 +52,17 @@ RasterizerOpenGL::RasterizerOpenGL()
: is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd),
uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false),
index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false),
texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) { texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false),
texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) {
allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && allow_shadow = GLES || (GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size &&
GLAD_GL_ARB_framebuffer_no_attachments; GLAD_GL_ARB_framebuffer_no_attachments);
if (!allow_shadow) { if (!allow_shadow) {
LOG_WARNING(Render_OpenGL, LOG_WARNING(Render_OpenGL,
"Shadow might not be able to render because of unsupported OpenGL extensions."); "Shadow might not be able to render because of unsupported OpenGL extensions.");
} }
if (!GLAD_GL_ARB_copy_image) { if (!GLAD_GL_ARB_copy_image && !GLES) {
LOG_WARNING(Render_OpenGL, LOG_WARNING(Render_OpenGL,
"ARB_copy_image not supported. Some games might produce artifacts."); "ARB_copy_image not supported. Some games might produce artifacts.");
} }
@ -149,11 +150,15 @@ RasterizerOpenGL::RasterizerOpenGL()
framebuffer.Create(); framebuffer.Create();
// Allocate and bind texture buffer lut textures // Allocate and bind texture buffer lut textures
texture_buffer_lut_lf.Create();
texture_buffer_lut_rg.Create(); texture_buffer_lut_rg.Create();
texture_buffer_lut_rgba.Create(); texture_buffer_lut_rgba.Create();
state.texture_buffer_lut_lf.texture_buffer = texture_buffer_lut_lf.handle;
state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle; state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle;
state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle; state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle;
state.Apply(); state.Apply();
glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.GetHandle());
glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle()); glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle());
glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum());
@ -777,7 +782,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
} }
OGLTexture temp_tex; OGLTexture temp_tex;
if (need_duplicate_texture && GLAD_GL_ARB_copy_image) { if (need_duplicate_texture && (GLAD_GL_ARB_copy_image || GLES)) {
// The game is trying to use a surface as a texture and framebuffer at the same time // The game is trying to use a surface as a texture and framebuffer at the same time
// which causes unpredictable behavior on the host. // which causes unpredictable behavior on the host.
// Making a copy to sample from eliminates this issue and seems to be fairly cheap. // Making a copy to sample from eliminates this issue and seems to be fairly cheap.
@ -821,6 +826,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
// Sync the LUTs within the texture buffer // Sync the LUTs within the texture buffer
SyncAndUploadLUTs(); SyncAndUploadLUTs();
SyncAndUploadLUTsLF();
// Sync the uniform data // Sync the uniform data
UploadUniforms(accelerate); UploadUniforms(accelerate);
@ -942,6 +948,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Blending // Blending
case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
if (GLES) {
// With GLES, we need this in the fragment shader to emulate logic operations
shader_dirty = true;
}
SyncBlendEnabled(); SyncBlendEnabled();
break; break;
case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending):
@ -1062,6 +1072,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Logic op // Logic op
case PICA_REG_INDEX(framebuffer.output_merger.logic_op): case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
if (GLES) {
// With GLES, we need this in the fragment shader to emulate logic operations
shader_dirty = true;
}
SyncLogicOp(); SyncLogicOp();
break; break;
@ -1816,11 +1830,31 @@ void RasterizerOpenGL::SyncAlphaTest() {
} }
void RasterizerOpenGL::SyncLogicOp() { void RasterizerOpenGL::SyncLogicOp() {
state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.framebuffer.output_merger.logic_op); const auto& regs = Pica::g_state.regs;
state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op);
if (GLES) {
if (!regs.framebuffer.output_merger.alphablend_enable) {
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
// Color output is disabled by logic operation. We use color write mask to skip
// color but allow depth write.
state.color_mask = {};
}
}
}
} }
void RasterizerOpenGL::SyncColorWriteMask() { void RasterizerOpenGL::SyncColorWriteMask() {
const auto& regs = Pica::g_state.regs; const auto& regs = Pica::g_state.regs;
if (GLES) {
if (!regs.framebuffer.output_merger.alphablend_enable) {
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
// Color output is disabled by logic operation. We use color write mask to skip
// color but allow depth write. Return early to avoid overwriting this.
return;
}
}
}
auto IsColorWriteEnabled = [&](u32 value) { auto IsColorWriteEnabled = [&](u32 value) {
return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE
@ -2005,18 +2039,11 @@ void RasterizerOpenGL::SyncShadowTextureBias() {
} }
} }
void RasterizerOpenGL::SyncAndUploadLUTs() { void RasterizerOpenGL::SyncAndUploadLUTsLF() {
constexpr std::size_t max_size = sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + constexpr std::size_t max_size =
sizeof(GLvec2) * 128 + // fog sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(GLvec2) * 128; // fog
sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha
sizeof(GLvec4) * 256 + // proctex
sizeof(GLvec4) * 256; // proctex diff
if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty && if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) {
!uniform_block_data.proctex_noise_lut_dirty &&
!uniform_block_data.proctex_color_map_dirty &&
!uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty &&
!uniform_block_data.proctex_diff_lut_dirty) {
return; return;
} }
@ -2024,8 +2051,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
GLintptr offset; GLintptr offset;
bool invalidate; bool invalidate;
std::size_t bytes_used = 0; std::size_t bytes_used = 0;
glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.GetHandle());
std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4)); std::tie(buffer, offset, invalidate) = texture_lf_buffer.Map(max_size, sizeof(GLvec4));
// Sync the lighting luts // Sync the lighting luts
if (uniform_block_data.lighting_lut_dirty_any || invalidate) { if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
@ -2050,8 +2077,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
uniform_block_data.lighting_lut_dirty[index] = false; uniform_block_data.lighting_lut_dirty[index] = false;
} }
} }
}
uniform_block_data.lighting_lut_dirty_any = false; uniform_block_data.lighting_lut_dirty_any = false;
}
// Sync the fog lut // Sync the fog lut
if (uniform_block_data.fog_lut_dirty || invalidate) { if (uniform_block_data.fog_lut_dirty || invalidate) {
@ -2073,6 +2100,28 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
uniform_block_data.fog_lut_dirty = false; uniform_block_data.fog_lut_dirty = false;
} }
texture_lf_buffer.Unmap(bytes_used);
}
void RasterizerOpenGL::SyncAndUploadLUTs() {
constexpr std::size_t max_size = sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha
sizeof(GLvec4) * 256 + // proctex
sizeof(GLvec4) * 256; // proctex diff
if (!uniform_block_data.proctex_noise_lut_dirty &&
!uniform_block_data.proctex_color_map_dirty &&
!uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty &&
!uniform_block_data.proctex_diff_lut_dirty) {
return;
}
u8* buffer;
GLintptr offset;
bool invalidate;
std::size_t bytes_used = 0;
glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle());
std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4));
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used]( auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used](
const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut, const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut,

@ -233,6 +233,7 @@ private:
/// Syncs and uploads the lighting, fog and proctex LUTs /// Syncs and uploads the lighting, fog and proctex LUTs
void SyncAndUploadLUTs(); void SyncAndUploadLUTs();
void SyncAndUploadLUTsLF();
/// Upload the uniform blocks to the uniform buffer object /// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw); void UploadUniforms(bool accelerate_draw);
@ -303,6 +304,7 @@ private:
OGLStreamBuffer uniform_buffer; OGLStreamBuffer uniform_buffer;
OGLStreamBuffer index_buffer; OGLStreamBuffer index_buffer;
OGLStreamBuffer texture_buffer; OGLStreamBuffer texture_buffer;
OGLStreamBuffer texture_lf_buffer;
OGLFramebuffer framebuffer; OGLFramebuffer framebuffer;
GLint uniform_buffer_alignment; GLint uniform_buffer_alignment;
std::size_t uniform_size_aligned_vs; std::size_t uniform_size_aligned_vs;
@ -310,6 +312,7 @@ private:
SamplerInfo texture_cube_sampler; SamplerInfo texture_cube_sampler;
OGLTexture texture_buffer_lut_lf;
OGLTexture texture_buffer_lut_rg; OGLTexture texture_buffer_lut_rg;
OGLTexture texture_buffer_lut_rgba; OGLTexture texture_buffer_lut_rgba;

@ -6,6 +6,7 @@
#include <array> #include <array>
#include <atomic> #include <atomic>
#include <bitset> #include <bitset>
#include <cmath>
#include <cstring> #include <cstring>
#include <iterator> #include <iterator>
#include <memory> #include <memory>
@ -36,6 +37,7 @@
#include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/texture_downloader_es.h"
#include "video_core/renderer_opengl/texture_filters/texture_filterer.h" #include "video_core/renderer_opengl/texture_filters/texture_filterer.h"
#include "video_core/utils.h" #include "video_core/utils.h"
#include "video_core/video_core.h" #include "video_core/video_core.h"
@ -64,13 +66,6 @@ static constexpr std::array<FormatTuple, 5> fb_format_tuples_oes = {{
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4
}}; }};
static constexpr std::array<FormatTuple, 4> depth_format_tuples = {{
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
{},
{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
}};
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
if (type == SurfaceType::Color) { if (type == SurfaceType::Color) {
@ -87,79 +82,6 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
return tex_tuple; return tex_tuple;
} }
/**
* OpenGL ES does not support glGetTexImage. Obtain the pixels by attaching the
* texture to a framebuffer.
* Originally from https://github.com/apitrace/apitrace/blob/master/retrace/glstate_images.cpp
*/
static void GetTexImageOES(GLenum target, GLint level, GLenum format, GLenum type, GLint height,
GLint width, GLint depth, GLubyte* pixels, std::size_t size) {
memset(pixels, 0x80, size);
OpenGLState cur_state = OpenGLState::GetCurState();
OpenGLState state;
GLenum texture_binding = GL_NONE;
switch (target) {
case GL_TEXTURE_2D:
texture_binding = GL_TEXTURE_BINDING_2D;
break;
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
texture_binding = GL_TEXTURE_BINDING_CUBE_MAP;
break;
case GL_TEXTURE_3D_OES:
texture_binding = GL_TEXTURE_BINDING_3D_OES;
default:
return;
}
GLint texture = 0;
glGetIntegerv(texture_binding, &texture);
if (!texture) {
return;
}
OGLFramebuffer fbo;
fbo.Create();
state.draw.read_framebuffer = fbo.handle;
state.Apply();
switch (target) {
case GL_TEXTURE_2D:
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture,
level);
GLenum status = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER);
if (status != GL_FRAMEBUFFER_COMPLETE) {
LOG_DEBUG(Render_OpenGL, "Framebuffer is incomplete, status: {:X}", status);
}
glReadPixels(0, 0, width, height, format, type, pixels);
break;
}
case GL_TEXTURE_3D_OES:
for (int i = 0; i < depth; i++) {
glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_3D,
texture, level, i);
glReadPixels(0, 0, width, height, format, type, pixels + 4 * i * width * height);
}
break;
}
cur_state.Apply();
fbo.Release();
}
template <typename Map, typename Interval> template <typename Map, typename Interval>
static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval)); return boost::make_iterator_range(map.equal_range(interval));
@ -329,8 +251,14 @@ OGLTexture RasterizerCacheOpenGL::AllocateSurfaceTexture(const FormatTuple& form
cur_state.Apply(); cur_state.Apply();
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
if (GL_ARB_texture_storage) {
// Allocate all possible mipmap levels upfront
auto levels = std::log2(std::max(width, height)) + 1;
glTexStorage2D(GL_TEXTURE_2D, levels, format_tuple.internal_format, width, height);
} else {
glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
format_tuple.format, format_tuple.type, nullptr); format_tuple.format, format_tuple.type, nullptr);
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@ -352,7 +280,11 @@ static void AllocateTextureCube(GLuint texture, const FormatTuple& format_tuple,
cur_state.texture_cube_unit.texture_cube = texture; cur_state.texture_cube_unit.texture_cube = texture;
cur_state.Apply(); cur_state.Apply();
glActiveTexture(TextureUnits::TextureCube.Enum()); glActiveTexture(TextureUnits::TextureCube.Enum());
if (GL_ARB_texture_storage) {
// Allocate all possible mipmap levels in case the game uses them later
auto levels = std::log2(width) + 1;
glTexStorage2D(GL_TEXTURE_CUBE_MAP, levels, format_tuple.internal_format, width, width);
} else {
for (auto faces : { for (auto faces : {
GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_X,
GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
@ -361,8 +293,9 @@ static void AllocateTextureCube(GLuint texture, const FormatTuple& format_tuple,
GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
}) { }) {
glTexImage2D(faces, 0, format_tuple.internal_format, width, width, 0, format_tuple.format, glTexImage2D(faces, 0, format_tuple.internal_format, width, width, 0,
format_tuple.type, nullptr); format_tuple.format, format_tuple.type, nullptr);
}
} }
// Restore previous texture bindings // Restore previous texture bindings
@ -775,23 +708,28 @@ void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) {
LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path); LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path);
std::vector<u8> decoded_texture; std::vector<u8> decoded_texture;
decoded_texture.resize(width * height * 4); decoded_texture.resize(width * height * 4);
glBindTexture(GL_TEXTURE_2D, target_tex); OpenGLState state = OpenGLState::GetCurState();
GLuint old_texture = state.texture_units[0].texture_2d;
state.Apply();
/* /*
GetTexImageOES is used even if not using OpenGL ES to work around a small issue that GetTexImageOES is used even if not using OpenGL ES to work around a small issue that
happens if using custom textures with texture dumping at the same. happens if using custom textures with texture dumping at the same.
Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a
higher quality 256x256 texture. If the 256x256 texture is displayed first and the 32x32 higher quality 256x256 texture. If the 256x256 texture is displayed first and the
texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture will 32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture
appear in the corner of the 256x256 texture. will appear in the corner of the 256x256 texture. If texture dumping is enabled and
If texture dumping is enabled and the 32x32 is undumped, Citra will attempt to dump it. the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL
Since the underlying OpenGL texture is still 256x256, Citra crashes because it thinks the texture is still 256x256, Citra crashes because it thinks the texture is only 32x32.
texture is only 32x32.
GetTexImageOES conveniently only dumps the specified region, and works on both GetTexImageOES conveniently only dumps the specified region, and works on both
desktop and ES. desktop and ES.
*/ */
GetTexImageOES(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, height, width, 0, // if the backend isn't OpenGL ES, this won't be initialized yet
&decoded_texture[0], decoded_texture.size()); if (!owner.texture_downloader_es)
glBindTexture(GL_TEXTURE_2D, 0); owner.texture_downloader_es = std::make_unique<TextureDownloaderES>(false);
owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE,
height, width, &decoded_texture[0]);
state.texture_units[0].texture_2d = old_texture;
state.Apply();
Common::FlipRGBA8Texture(decoded_texture, width, height); Common::FlipRGBA8Texture(decoded_texture, width, height);
if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height)) if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height))
LOG_ERROR(Render_OpenGL, "Failed to save decoded texture"); LOG_ERROR(Render_OpenGL, "Failed to save decoded texture");
@ -901,8 +839,9 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect, GLuint read_fb_
MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint read_fb_handle, void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint read_fb_handle,
GLuint draw_fb_handle) { GLuint draw_fb_handle) {
if (type == SurfaceType::Fill) if (type == SurfaceType::Fill) {
return; return;
}
MICROPROFILE_SCOPE(OpenGL_TextureDL); MICROPROFILE_SCOPE(OpenGL_TextureDL);
@ -941,9 +880,9 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
if (GLES) { if (GLES) {
GetTexImageOES(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect.GetHeight(), owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect.GetWidth(), 0, &gl_buffer[buffer_offset], rect.GetHeight(), rect.GetWidth(),
gl_buffer.size() - buffer_offset); &gl_buffer[buffer_offset]);
} else { } else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]);
} }
@ -967,6 +906,20 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
texture.handle, 0); texture.handle, 0);
} }
switch (glCheckFramebufferStatus(GL_FRAMEBUFFER)) {
case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT:
LOG_WARNING(Render_OpenGL, "Framebuffer incomplete attachment");
break;
case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS:
LOG_WARNING(Render_OpenGL, "Framebuffer incomplete dimensions");
break;
case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:
LOG_WARNING(Render_OpenGL, "Framebuffer incomplete missing attachment");
break;
case GL_FRAMEBUFFER_UNSUPPORTED:
LOG_WARNING(Render_OpenGL, "Framebuffer unsupported");
break;
}
glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
tuple.format, tuple.type, &gl_buffer[buffer_offset]); tuple.format, tuple.type, &gl_buffer[buffer_offset]);
@ -1083,13 +1036,18 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
texture_filterer = std::make_unique<TextureFilterer>(Settings::values.texture_filter_name, texture_filterer = std::make_unique<TextureFilterer>(Settings::values.texture_filter_name,
resolution_scale_factor); resolution_scale_factor);
format_reinterpreter = std::make_unique<FormatReinterpreterOpenGL>(); format_reinterpreter = std::make_unique<FormatReinterpreterOpenGL>();
if (GLES)
texture_downloader_es = std::make_unique<TextureDownloaderES>(false);
read_framebuffer.Create(); read_framebuffer.Create();
draw_framebuffer.Create(); draw_framebuffer.Create();
} }
RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
#ifndef ANDROID
// This is for switching renderers, which is unsupported on Android, and costly on shutdown
ClearAll(false); ClearAll(false);
#endif
} }
MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
@ -1304,9 +1262,14 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::Texture::TextureInf
width = surface->GetScaledWidth(); width = surface->GetScaledWidth();
height = surface->GetScaledHeight(); height = surface->GetScaledHeight();
} }
// If we are using ARB_texture_storage then we've already allocated all of the mipmap
// levels
if (!GL_ARB_texture_storage) {
for (u32 level = surface->max_level + 1; level <= max_level; ++level) { for (u32 level = surface->max_level + 1; level <= max_level; ++level) {
glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level, glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level,
height >> level, 0, format_tuple.format, format_tuple.type, nullptr); height >> level, 0, format_tuple.format, format_tuple.type,
nullptr);
}
} }
if (surface->is_custom || !texture_filterer->IsNull()) { if (surface->is_custom || !texture_filterer->IsNull()) {
// TODO: proper mipmap support for custom textures // TODO: proper mipmap support for custom textures
@ -1806,6 +1769,8 @@ void RasterizerCacheOpenGL::ClearAll(bool flush) {
} }
void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) {
std::lock_guard lock{mutex};
if (size == 0) if (size == 0)
return; return;
@ -1842,6 +1807,8 @@ void RasterizerCacheOpenGL::FlushAll() {
} }
void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) {
std::lock_guard lock{mutex};
if (size == 0) if (size == 0)
return; return;
@ -1917,6 +1884,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
} }
void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
std::lock_guard lock{mutex};
if (surface->registered) { if (surface->registered) {
return; return;
} }
@ -1926,6 +1895,8 @@ void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
} }
void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
std::lock_guard lock{mutex};
if (!surface->registered) { if (!surface->registered) {
return; return;
} }

@ -7,6 +7,7 @@
#include <array> #include <array>
#include <list> #include <list>
#include <memory> #include <memory>
#include <mutex>
#include <set> #include <set>
#include <tuple> #include <tuple>
#ifdef __GNUC__ #ifdef __GNUC__
@ -170,6 +171,8 @@ private:
bool valid = false; bool valid = false;
}; };
class RasterizerCacheOpenGL;
struct CachedSurface : SurfaceParams, std::enable_shared_from_this<CachedSurface> { struct CachedSurface : SurfaceParams, std::enable_shared_from_this<CachedSurface> {
CachedSurface(RasterizerCacheOpenGL& owner) : owner{owner} {} CachedSurface(RasterizerCacheOpenGL& owner) : owner{owner} {}
~CachedSurface(); ~CachedSurface();
@ -266,6 +269,15 @@ struct CachedTextureCube {
std::shared_ptr<SurfaceWatcher> nz; std::shared_ptr<SurfaceWatcher> nz;
}; };
static constexpr std::array<FormatTuple, 4> depth_format_tuples = {{
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
{},
{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
}};
class TextureDownloaderES;
class RasterizerCacheOpenGL : NonCopyable { class RasterizerCacheOpenGL : NonCopyable {
public: public:
RasterizerCacheOpenGL(); RasterizerCacheOpenGL();
@ -365,11 +377,14 @@ private:
std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache; std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache;
std::recursive_mutex mutex;
public: public:
OGLTexture AllocateSurfaceTexture(const FormatTuple& format_tuple, u32 width, u32 height); OGLTexture AllocateSurfaceTexture(const FormatTuple& format_tuple, u32 width, u32 height);
std::unique_ptr<TextureFilterer> texture_filterer; std::unique_ptr<TextureFilterer> texture_filterer;
std::unique_ptr<FormatReinterpreterOpenGL> format_reinterpreter; std::unique_ptr<FormatReinterpreterOpenGL> format_reinterpreter;
std::unique_ptr<TextureDownloaderES> texture_downloader_es;
}; };
} // namespace OpenGL } // namespace OpenGL

@ -514,11 +514,21 @@ private:
} }
case OpCode::Id::RCP: { case OpCode::Id::RCP: {
if (!sanitize_mul) {
// When accurate multiplication is OFF, NaN are not really handled. This is a
// workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
shader.AddLine("if ({}.x != 0.0)", src1);
}
SetDest(swizzle, dest_reg, fmt::format("(1.0 / {}.x)", src1), 4, 1); SetDest(swizzle, dest_reg, fmt::format("(1.0 / {}.x)", src1), 4, 1);
break; break;
} }
case OpCode::Id::RSQ: { case OpCode::Id::RSQ: {
if (!sanitize_mul) {
// When accurate multiplication is OFF, NaN are not really handled. This is a
// workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
shader.AddLine("if ({}.x > 0.0)", src1);
}
SetDest(swizzle, dest_reg, fmt::format("inversesqrt({}.x)", src1), 4, 1); SetDest(swizzle, dest_reg, fmt::format("inversesqrt({}.x)", src1), 4, 1);
break; break;
} }
@ -807,6 +817,13 @@ private:
void Generate() { void Generate() {
if (sanitize_mul) { if (sanitize_mul) {
#ifdef ANDROID
// Use a cheaper sanitize_mul on Android, as mobile GPUs struggle here
// This seems to be sufficient at least for Ocarina of Time and Attack on Titan accurate
// multiplication bugs
shader.AddLine(
"#define sanitize_mul(lhs, rhs) mix(lhs * rhs, vec4(0.0), isnan(lhs * rhs))");
#else
shader.AddLine("vec4 sanitize_mul(vec4 lhs, vec4 rhs) {{"); shader.AddLine("vec4 sanitize_mul(vec4 lhs, vec4 rhs) {{");
++shader.scope; ++shader.scope;
shader.AddLine("vec4 product = lhs * rhs;"); shader.AddLine("vec4 product = lhs * rhs;");
@ -814,6 +831,7 @@ private:
"isnan(lhs)), isnan(product));"); "isnan(lhs)), isnan(product));");
--shader.scope; --shader.scope;
shader.AddLine("}}\n"); shader.AddLine("}}\n");
#endif
} }
// Add declarations for registers // Add declarations for registers

@ -102,7 +102,9 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_
out += R"( out += R"(
out gl_PerVertex { out gl_PerVertex {
vec4 gl_Position; vec4 gl_Position;
#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
float gl_ClipDistance[2]; float gl_ClipDistance[2];
#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
}; };
)"; )";
} }
@ -127,6 +129,17 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0; state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
if (GLES) {
// With GLES, we need this in the fragment shader to emulate logic operations
state.alphablend_enable =
Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1;
state.logic_op = regs.framebuffer.output_merger.logic_op;
} else {
// We don't need these otherwise, reset them to avoid unnecessary shader generation
state.alphablend_enable = {};
state.logic_op = {};
}
// Copy relevant tev stages fields. // Copy relevant tev stages fields.
// We don't sync const_color here because of the high variance, it is a // We don't sync const_color here because of the high variance, it is a
// shader uniform instead. // shader uniform instead.
@ -607,13 +620,15 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned
if (!IsPassThroughTevStage(stage)) { if (!IsPassThroughTevStage(stage)) {
const std::string index_name = std::to_string(index); const std::string index_name = std::to_string(index);
out += fmt::format("vec3 color_results_{}[3] = vec3[3](", index_name); out += fmt::format("vec3 color_results_{}_1 = ", index_name);
AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
out += ", "; out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name);
AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
out += ", "; out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name);
AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
out += ");\n"; out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, "
"color_results_{}_2, color_results_{}_3);\n",
index_name, index_name, index_name, index_name);
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision // Round the output of each TEV stage to maintain the PICA's 8 bits of precision
out += fmt::format("vec3 color_output_{} = byteround(", index_name); out += fmt::format("vec3 color_output_{} = byteround(", index_name);
@ -1216,14 +1231,21 @@ float ProcTexNoiseCoef(vec2 x) {
ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config, ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config,
bool separable_shader) { bool separable_shader) {
const auto& state = config.state; const auto& state = config.state;
std::string out;
std::string out = R"( if (GLES) {
out += R"(
#define ALLOW_SHADOW (defined(CITRA_GLES))
)";
} else {
out += R"(
#extension GL_ARB_shader_image_load_store : enable #extension GL_ARB_shader_image_load_store : enable
#extension GL_ARB_shader_image_size : enable #extension GL_ARB_shader_image_size : enable
#define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size)) #define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size))
)"; )";
}
if (separable_shader) { if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n";
} }
@ -1244,6 +1266,7 @@ uniform sampler2D tex0;
uniform sampler2D tex1; uniform sampler2D tex1;
uniform sampler2D tex2; uniform sampler2D tex2;
uniform samplerCube tex_cube; uniform samplerCube tex_cube;
uniform samplerBuffer texture_buffer_lut_lf;
uniform samplerBuffer texture_buffer_lut_rg; uniform samplerBuffer texture_buffer_lut_rg;
uniform samplerBuffer texture_buffer_lut_rgba; uniform samplerBuffer texture_buffer_lut_rgba;
@ -1267,7 +1290,7 @@ vec3 quaternion_rotate(vec4 q, vec3 v) {
} }
float LookupLightingLUT(int lut_index, int index, float delta) { float LookupLightingLUT(int lut_index, int index, float delta) {
vec2 entry = texelFetch(texture_buffer_lut_rg, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg;
return entry.r + entry.g * delta; return entry.r + entry.g * delta;
} }
@ -1519,7 +1542,7 @@ vec4 secondary_fragment_color = vec4(0.0);
// Generate clamped fog factor from LUT for given fog index // Generate clamped fog factor from LUT for given fog index
out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n" out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"
"float fog_f = fog_index - fog_i;\n" "float fog_f = fog_index - fog_i;\n"
"vec2 fog_lut_entry = texelFetch(texture_buffer_lut_rg, int(fog_i) + " "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + "
"fog_lut_offset).rg;\n" "fog_lut_offset).rg;\n"
"float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n" "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"
"fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; "fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
@ -1537,8 +1560,8 @@ vec4 secondary_fragment_color = vec4(0.0);
if (state.shadow_rendering) { if (state.shadow_rendering) {
out += R"( out += R"(
#if ALLOW_SHADOW #if ALLOW_SHADOW
uint d = uint(clamp(depth, 0.0, 1.0) * 0xFFFFFF); uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF));
uint s = uint(last_tex_env_out.g * 0xFF); uint s = uint(last_tex_env_out.g * float(0xFF));
ivec2 image_coord = ivec2(gl_FragCoord.xy); ivec2 image_coord = ivec2(gl_FragCoord.xy);
uint old = imageLoad(shadow_buffer, image_coord).x; uint old = imageLoad(shadow_buffer, image_coord).x;
@ -1567,6 +1590,32 @@ do {
out += "color = byteround(last_tex_env_out);\n"; out += "color = byteround(last_tex_env_out);\n";
} }
if (GLES) {
if (!state.alphablend_enable) {
switch (state.logic_op) {
case FramebufferRegs::LogicOp::Clear:
out += "color = vec4(0);\n";
break;
case FramebufferRegs::LogicOp::Set:
out += "color = vec4(1);\n";
break;
case FramebufferRegs::LogicOp::Copy:
// Take the color output as-is
break;
case FramebufferRegs::LogicOp::CopyInverted:
out += "color = ~color;\n";
break;
case FramebufferRegs::LogicOp::NoOp:
// We need to discard the color, but not necessarily the depth. This is not possible
// with fragment shader alone, so we emulate this behavior on GLES with glColorMask.
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<int>(state.logic_op));
UNIMPLEMENTED();
}
}
}
out += '}'; out += '}';
return {std::move(out)}; return {std::move(out)};
@ -1574,7 +1623,7 @@ do {
ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) { ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) {
std::string out; std::string out;
if (separable_shader) { if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n";
} }
@ -1617,8 +1666,8 @@ void main() {
std::optional<ShaderDecompiler::ProgramResult> GenerateVertexShader( std::optional<ShaderDecompiler::ProgramResult> GenerateVertexShader(
const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) { const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) {
std::string out = ""; std::string out;
if (separable_shader) { if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n";
} }
@ -1767,8 +1816,8 @@ void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) {
ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config, ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config,
bool separable_shader) { bool separable_shader) {
std::string out = ""; std::string out;
if (separable_shader) { if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
} }

@ -61,6 +61,8 @@ struct PicaFSConfigState {
Pica::RasterizerRegs::DepthBuffering depthmap_enable; Pica::RasterizerRegs::DepthBuffering depthmap_enable;
Pica::TexturingRegs::FogMode fog_mode; Pica::TexturingRegs::FogMode fog_mode;
bool fog_flip; bool fog_flip;
bool alphablend_enable;
Pica::FramebufferRegs::LogicOp logic_op;
struct { struct {
struct { struct {

@ -123,6 +123,7 @@ static void SetShaderSamplerBindings(GLuint shader) {
SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube); SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube);
// Set the texture samplers to correspond to different lookup table texture units // Set the texture samplers to correspond to different lookup table texture units
SetShaderSamplerBinding(shader, "texture_buffer_lut_lf", TextureUnits::TextureBufferLUT_LF);
SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG);
SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA);
@ -176,9 +177,12 @@ public:
OGLProgram& program = boost::get<OGLProgram>(shader_or_program); OGLProgram& program = boost::get<OGLProgram>(shader_or_program);
program.Create(true, {shader.handle}); program.Create(true, {shader.handle});
SetShaderUniformBlockBindings(program.handle); SetShaderUniformBlockBindings(program.handle);
if (type == GL_FRAGMENT_SHADER) {
SetShaderSamplerBindings(program.handle); SetShaderSamplerBindings(program.handle);
} }
} }
}
GLuint GetHandle() const { GLuint GetHandle() const {
if (shader_or_program.which() == 0) { if (shader_or_program.which() == 0) {

@ -14,7 +14,7 @@
namespace OpenGL { namespace OpenGL {
GLuint LoadShader(const char* source, GLenum type) { GLuint LoadShader(const char* source, GLenum type) {
const std::string version = GLES ? R"(#version 310 es const std::string version = GLES ? R"(#version 320 es
#define CITRA_GLES #define CITRA_GLES

@ -12,11 +12,15 @@ namespace OpenGL {
// High precision may or may not supported in GLES3. If it isn't, use medium precision instead. // High precision may or may not supported in GLES3. If it isn't, use medium precision instead.
static constexpr char fragment_shader_precision_OES[] = R"( static constexpr char fragment_shader_precision_OES[] = R"(
#ifdef GL_FRAGMENT_PRECISION_HIGH #ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp int;
precision highp float; precision highp float;
precision highp samplerBuffer; precision highp samplerBuffer;
precision highp uimage2D;
#else #else
precision mediump int;
precision mediump float; precision mediump float;
precision mediump samplerBuffer; precision mediump samplerBuffer;
precision mediump uimage2D;
#endif // GL_FRAGMENT_PRECISION_HIGH #endif // GL_FRAGMENT_PRECISION_HIGH
)"; )";

@ -58,6 +58,7 @@ OpenGLState::OpenGLState() {
texture_cube_unit.texture_cube = 0; texture_cube_unit.texture_cube = 0;
texture_cube_unit.sampler = 0; texture_cube_unit.sampler = 0;
texture_buffer_lut_lf.texture_buffer = 0;
texture_buffer_lut_rg.texture_buffer = 0; texture_buffer_lut_rg.texture_buffer = 0;
texture_buffer_lut_rgba.texture_buffer = 0; texture_buffer_lut_rgba.texture_buffer = 0;
@ -169,12 +170,19 @@ void OpenGLState::Apply() const {
if (blend.enabled != cur_state.blend.enabled) { if (blend.enabled != cur_state.blend.enabled) {
if (blend.enabled) { if (blend.enabled) {
glEnable(GL_BLEND); glEnable(GL_BLEND);
glDisable(GL_COLOR_LOGIC_OP);
} else { } else {
glDisable(GL_BLEND); glDisable(GL_BLEND);
}
// GLES does not support glLogicOp
if (!GLES) {
if (blend.enabled) {
glDisable(GL_COLOR_LOGIC_OP);
} else {
glEnable(GL_COLOR_LOGIC_OP); glEnable(GL_COLOR_LOGIC_OP);
} }
} }
}
if (blend.color.red != cur_state.blend.color.red || if (blend.color.red != cur_state.blend.color.red ||
blend.color.green != cur_state.blend.color.green || blend.color.green != cur_state.blend.color.green ||
@ -196,13 +204,11 @@ void OpenGLState::Apply() const {
glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
} }
// GLES3 does not support glLogicOp // GLES does not support glLogicOp
if (!GLES) { if (!GLES) {
if (logic_op != cur_state.logic_op) { if (logic_op != cur_state.logic_op) {
glLogicOp(logic_op); glLogicOp(logic_op);
} }
} else {
LOG_TRACE(Render_OpenGL, "glLogicOps are unimplemented...");
} }
// Textures // Textures
@ -224,6 +230,12 @@ void OpenGLState::Apply() const {
glBindSampler(TextureUnits::TextureCube.id, texture_cube_unit.sampler); glBindSampler(TextureUnits::TextureCube.id, texture_cube_unit.sampler);
} }
// Texture buffer LUTs
if (texture_buffer_lut_lf.texture_buffer != cur_state.texture_buffer_lut_lf.texture_buffer) {
glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum());
glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_lf.texture_buffer);
}
// Texture buffer LUTs // Texture buffer LUTs
if (texture_buffer_lut_rg.texture_buffer != cur_state.texture_buffer_lut_rg.texture_buffer) { if (texture_buffer_lut_rg.texture_buffer != cur_state.texture_buffer_lut_rg.texture_buffer) {
glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum());
@ -354,6 +366,8 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) {
} }
if (texture_cube_unit.texture_cube == handle) if (texture_cube_unit.texture_cube == handle)
texture_cube_unit.texture_cube = 0; texture_cube_unit.texture_cube = 0;
if (texture_buffer_lut_lf.texture_buffer == handle)
texture_buffer_lut_lf.texture_buffer = 0;
if (texture_buffer_lut_rg.texture_buffer == handle) if (texture_buffer_lut_rg.texture_buffer == handle)
texture_buffer_lut_rg.texture_buffer = 0; texture_buffer_lut_rg.texture_buffer = 0;
if (texture_buffer_lut_rgba.texture_buffer == handle) if (texture_buffer_lut_rgba.texture_buffer == handle)

@ -22,7 +22,8 @@ constexpr TextureUnit PicaTexture(int unit) {
return TextureUnit{unit}; return TextureUnit{unit};
} }
constexpr TextureUnit TextureCube{3}; constexpr TextureUnit TextureCube{6};
constexpr TextureUnit TextureBufferLUT_LF{3};
constexpr TextureUnit TextureBufferLUT_RG{4}; constexpr TextureUnit TextureBufferLUT_RG{4};
constexpr TextureUnit TextureBufferLUT_RGBA{5}; constexpr TextureUnit TextureBufferLUT_RGBA{5};
@ -101,6 +102,10 @@ public:
GLuint sampler; // GL_SAMPLER_BINDING GLuint sampler; // GL_SAMPLER_BINDING
} texture_cube_unit; } texture_cube_unit;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} texture_buffer_lut_lf;
struct { struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} texture_buffer_lut_rg; } texture_buffer_lut_rg;

@ -29,6 +29,7 @@
#include "core/tracer/recorder.h" #include "core/tracer/recorder.h"
#include "video_core/debug_utils/debug_utils.h" #include "video_core/debug_utils/debug_utils.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/post_processing_opengl.h" #include "video_core/renderer_opengl/post_processing_opengl.h"
#include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h"
@ -39,7 +40,12 @@ namespace OpenGL {
// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
// to wait on available presentation frames. There doesn't seem to be much of a downside to a larger // to wait on available presentation frames. There doesn't seem to be much of a downside to a larger
// number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine // number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine
#ifdef ANDROID
// Reduce the size of swap_chain, since the UI only allows upto 200% speed.
constexpr std::size_t SWAP_CHAIN_SIZE = 6;
#else
constexpr std::size_t SWAP_CHAIN_SIZE = 9; constexpr std::size_t SWAP_CHAIN_SIZE = 9;
#endif
class OGLTextureMailboxException : public std::runtime_error { class OGLTextureMailboxException : public std::runtime_error {
public: public:
@ -96,7 +102,7 @@ public:
frame->color.Create(); frame->color.Create();
state.renderbuffer = frame->color.handle; state.renderbuffer = frame->color.handle;
state.Apply(); state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA, width, height); glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height);
// Recreate the FBO for the render target // Recreate the FBO for the render target
frame->render.Release(); frame->render.Release();
@ -1197,14 +1203,18 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
/// Initialize the renderer /// Initialize the renderer
VideoCore::ResultStatus RendererOpenGL::Init() { VideoCore::ResultStatus RendererOpenGL::Init() {
#ifndef ANDROID
if (!gladLoadGL()) { if (!gladLoadGL()) {
return VideoCore::ResultStatus::ErrorBelowGL33; return VideoCore::ResultStatus::ErrorBelowGL33;
} }
// Qualcomm has some spammy info messages that are marked as errors but not important
// https://developer.qualcomm.com/comment/11845
if (GLAD_GL_KHR_debug) { if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT); glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr); glDebugMessageCallback(DebugHandler, nullptr);
} }
#endif
const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};

@ -0,0 +1,254 @@
// Copyright 2020 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <chrono>
#include <vector>
#include <fmt/chrono.h>
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/texture_downloader_es.h"
#include "shaders/depth_to_color.frag"
#include "shaders/depth_to_color.vert"
#include "shaders/ds_to_color.frag"
namespace OpenGL {
/**
* Self tests for the texture downloader
*/
void TextureDownloaderES::Test() {
auto cur_state = OpenGLState::GetCurState();
OpenGLState state;
{
GLint range[2];
GLint precision;
#define PRECISION_TEST(type) \
glGetShaderPrecisionFormat(GL_FRAGMENT_SHADER, type, range, &precision); \
LOG_INFO(Render_OpenGL, #type " range: [{}, {}], precision: {}", range[0], range[1], precision);
PRECISION_TEST(GL_LOW_INT);
PRECISION_TEST(GL_MEDIUM_INT);
PRECISION_TEST(GL_HIGH_INT);
PRECISION_TEST(GL_LOW_FLOAT);
PRECISION_TEST(GL_MEDIUM_FLOAT);
PRECISION_TEST(GL_HIGH_FLOAT);
#undef PRECISION_TEST
}
glActiveTexture(GL_TEXTURE0);
const auto test = [this, &state](FormatTuple tuple, auto original_data, std::size_t tex_size,
auto data_generator) {
OGLTexture texture;
texture.Create();
state.texture_units[0].texture_2d = texture.handle;
state.Apply();
original_data.resize(tex_size * tex_size);
for (std::size_t idx = 0; idx < original_data.size(); ++idx)
original_data[idx] = data_generator(idx);
glTexStorage2D(GL_TEXTURE_2D, 1, tuple.internal_format, tex_size, tex_size);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, tex_size, tex_size, tuple.format, tuple.type,
original_data.data());
decltype(original_data) new_data(original_data.size());
glFinish();
auto start = std::chrono::high_resolution_clock::now();
GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, tex_size, tex_size,
new_data.data());
glFinish();
auto time = std::chrono::high_resolution_clock::now() - start;
LOG_INFO(Render_OpenGL, "test took {}", std::chrono::duration<double, std::milli>(time));
int diff = 0;
for (std::size_t idx = 0; idx < original_data.size(); ++idx)
if (new_data[idx] - original_data[idx] != diff) {
diff = new_data[idx] - original_data[idx];
// every time the error between the real and expected value changes, log it
// some error is expected in D24 due to floating point precision
LOG_WARNING(Render_OpenGL, "difference changed at {:#X}: {:#X} -> {:#X}", idx,
original_data[idx], new_data[idx]);
}
};
LOG_INFO(Render_OpenGL, "GL_DEPTH24_STENCIL8 download test starting");
test(depth_format_tuples[3], std::vector<u32>{}, 4096,
[](std::size_t idx) { return static_cast<u32>((idx << 8) | (idx & 0xFF)); });
LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT24 download test starting");
test(depth_format_tuples[2], std::vector<u32>{}, 4096,
[](std::size_t idx) { return static_cast<u32>(idx << 8); });
LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT16 download test starting");
test(depth_format_tuples[0], std::vector<u16>{}, 256,
[](std::size_t idx) { return static_cast<u16>(idx); });
cur_state.Apply();
}
TextureDownloaderES::TextureDownloaderES(bool enable_depth_stencil) {
vao.Create();
read_fbo_generic.Create();
depth32_fbo.Create();
r32ui_renderbuffer.Create();
depth16_fbo.Create();
r16_renderbuffer.Create();
const auto init_program = [](ConversionShader& converter, std::string_view frag) {
converter.program.Create(depth_to_color_vert.data(), frag.data());
converter.lod_location = glGetUniformLocation(converter.program.handle, "lod");
};
// xperia64: The depth stencil shader currently uses a GLES extension that is not supported
// across all devices Reportedly broken on Tegra devices and the Nexus 6P, so enabling it can be
// toggled
if (enable_depth_stencil) {
init_program(d24s8_r32ui_conversion_shader, ds_to_color_frag);
}
init_program(d24_r32ui_conversion_shader, depth_to_color_frag);
init_program(d16_r16_conversion_shader, R"(
out highp float color;
uniform highp sampler2D depth;
uniform int lod;
void main(){
color = texelFetch(depth, ivec2(gl_FragCoord.xy), lod).x;
}
)");
sampler.Create();
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
auto cur_state = OpenGLState::GetCurState();
auto state = cur_state;
state.draw.shader_program = d24s8_r32ui_conversion_shader.program.handle;
state.draw.draw_framebuffer = depth32_fbo.handle;
state.renderbuffer = r32ui_renderbuffer.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, max_size, max_size);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
r32ui_renderbuffer.handle);
glUniform1i(glGetUniformLocation(d24s8_r32ui_conversion_shader.program.handle, "depth"), 1);
state.draw.draw_framebuffer = depth16_fbo.handle;
state.renderbuffer = r16_renderbuffer.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_R16, max_size, max_size);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
r16_renderbuffer.handle);
cur_state.Apply();
}
/**
* OpenGL ES does not support glReadBuffer for depth/stencil formats
* This gets around it by converting to a Red surface before downloading
*/
GLuint TextureDownloaderES::ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type,
GLint height, GLint width) {
ASSERT(width <= max_size && height <= max_size);
const OpenGLState cur_state = OpenGLState::GetCurState();
OpenGLState state;
state.texture_units[0] = {cur_state.texture_units[0].texture_2d, sampler.handle};
state.draw.vertex_array = vao.handle;
OGLTexture texture_view;
const ConversionShader* converter;
switch (type) {
case GL_UNSIGNED_SHORT:
state.draw.draw_framebuffer = depth16_fbo.handle;
converter = &d16_r16_conversion_shader;
format = GL_RED;
break;
case GL_UNSIGNED_INT:
state.draw.draw_framebuffer = depth32_fbo.handle;
converter = &d24_r32ui_conversion_shader;
format = GL_RED_INTEGER;
break;
case GL_UNSIGNED_INT_24_8:
state.draw.draw_framebuffer = depth32_fbo.handle;
converter = &d24s8_r32ui_conversion_shader;
format = GL_RED_INTEGER;
type = GL_UNSIGNED_INT;
break;
default:
UNREACHABLE_MSG("Destination type not recognized");
}
state.draw.shader_program = converter->program.handle;
state.viewport = {0, 0, width, height};
state.Apply();
if (converter->program.handle == d24s8_r32ui_conversion_shader.program.handle) {
// TODO BreadFish64: the ARM framebuffer reading extension is probably not the most optimal
// way to do this, search for another solution
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
state.texture_units[0].texture_2d, level);
}
glUniform1i(converter->lod_location, level);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (texture_view.handle) {
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT);
}
return state.draw.draw_framebuffer;
}
/**
* OpenGL ES does not support glGetTexImage. Obtain the pixels by attaching the
* texture to a framebuffer.
* Originally from https://github.com/apitrace/apitrace/blob/master/retrace/glstate_images.cpp
* Depth texture download assumes that the texture's format tuple matches what is found
* OpenGL::depth_format_tuples
*/
void TextureDownloaderES::GetTexImage(GLenum target, GLuint level, GLenum format, GLenum type,
GLint height, GLint width, void* pixels) {
OpenGLState state = OpenGLState::GetCurState();
GLuint texture;
const GLuint old_read_buffer = state.draw.read_framebuffer;
switch (target) {
case GL_TEXTURE_2D:
texture = state.texture_units[0].texture_2d;
break;
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
texture = state.texture_cube_unit.texture_cube;
break;
default:
UNIMPLEMENTED_MSG("Unexpected target {:x}", target);
}
switch (format) {
case GL_DEPTH_COMPONENT:
case GL_DEPTH_STENCIL:
// unfortunately, the accurate way is too slow for release
return;
state.draw.read_framebuffer = ConvertDepthToColor(level, format, type, height, width);
state.Apply();
break;
default:
state.draw.read_framebuffer = read_fbo_generic.handle;
state.Apply();
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture,
level);
}
GLenum status = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER);
if (status != GL_FRAMEBUFFER_COMPLETE) {
LOG_DEBUG(Render_OpenGL, "Framebuffer is incomplete, status: {:X}", status);
}
glReadPixels(0, 0, width, height, format, type, pixels);
state.draw.read_framebuffer = old_read_buffer;
state.Apply();
}
} // namespace OpenGL

@ -0,0 +1,36 @@
// Copyright 2020 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class OpenGLState;
class TextureDownloaderES {
static constexpr u16 max_size = 1024;
OGLVertexArray vao;
OGLFramebuffer read_fbo_generic;
OGLFramebuffer depth32_fbo, depth16_fbo;
OGLRenderbuffer r32ui_renderbuffer, r16_renderbuffer;
struct ConversionShader {
OGLProgram program;
GLint lod_location{-1};
} d24_r32ui_conversion_shader, d16_r16_conversion_shader, d24s8_r32ui_conversion_shader;
OGLSampler sampler;
void Test();
GLuint ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type, GLint height,
GLint width);
public:
TextureDownloaderES(bool enable_depth_stencil);
void GetTexImage(GLenum target, GLuint level, GLenum format, const GLenum type, GLint height,
GLint width, void* pixels);
};
} // namespace OpenGL

@ -34,30 +34,14 @@
#include "video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h" #include "video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h"
#include "shaders/refine.frag" #include "shaders/refine.frag"
#include "shaders/refine.vert"
#include "shaders/tex_coord.vert" #include "shaders/tex_coord.vert"
#include "shaders/x_gradient.frag" #include "shaders/x_gradient.frag"
#include "shaders/y_gradient.frag" #include "shaders/y_gradient.frag"
#include "shaders/y_gradient.vert"
namespace OpenGL { namespace OpenGL {
Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_factor) { Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_factor) {
const OpenGLState cur_state = OpenGLState::GetCurState(); const OpenGLState cur_state = OpenGLState::GetCurState();
const auto setup_temp_tex = [this](TempTex& texture, GLint internal_format, GLint format) {
texture.fbo.Create();
texture.tex.Create();
state.draw.draw_framebuffer = texture.fbo.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_RECTANGLE, texture.tex.handle);
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, internal_format, 1024 * internal_scale_factor,
1024 * internal_scale_factor, 0, format, GL_HALF_FLOAT, nullptr);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE,
texture.tex.handle, 0);
};
setup_temp_tex(LUMAD, GL_R16F, GL_RED);
setup_temp_tex(XY, GL_RG16F, GL_RG);
vao.Create(); vao.Create();
@ -65,17 +49,17 @@ Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_f
samplers[idx].Create(); samplers[idx].Create();
state.texture_units[idx].sampler = samplers[idx].handle; state.texture_units[idx].sampler = samplers[idx].handle;
glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MIN_FILTER, glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MIN_FILTER,
idx == 0 ? GL_LINEAR : GL_NEAREST); idx != 2 ? GL_LINEAR : GL_NEAREST);
glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MAG_FILTER, glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MAG_FILTER,
idx == 0 ? GL_LINEAR : GL_NEAREST); idx != 2 ? GL_LINEAR : GL_NEAREST);
glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
} }
state.draw.vertex_array = vao.handle; state.draw.vertex_array = vao.handle;
gradient_x_program.Create(tex_coord_vert.data(), x_gradient_frag.data()); gradient_x_program.Create(tex_coord_vert.data(), x_gradient_frag.data());
gradient_y_program.Create(y_gradient_vert.data(), y_gradient_frag.data()); gradient_y_program.Create(tex_coord_vert.data(), y_gradient_frag.data());
refine_program.Create(refine_vert.data(), refine_frag.data()); refine_program.Create(tex_coord_vert.data(), refine_frag.data());
state.draw.shader_program = gradient_y_program.handle; state.draw.shader_program = gradient_y_program.handle;
state.Apply(); state.Apply();
@ -84,8 +68,6 @@ Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_f
state.draw.shader_program = refine_program.handle; state.draw.shader_program = refine_program.handle;
state.Apply(); state.Apply();
glUniform1i(glGetUniformLocation(refine_program.handle, "LUMAD"), 1); glUniform1i(glGetUniformLocation(refine_program.handle, "LUMAD"), 1);
glUniform1f(glGetUniformLocation(refine_program.handle, "final_scale"),
static_cast<GLfloat>(internal_scale_factor) / scale_factor);
cur_state.Apply(); cur_state.Apply();
} }
@ -95,20 +77,48 @@ void Anime4kUltrafast::Filter(GLuint src_tex, const Common::Rectangle<u32>& src_
GLuint read_fb_handle, GLuint draw_fb_handle) { GLuint read_fb_handle, GLuint draw_fb_handle) {
const OpenGLState cur_state = OpenGLState::GetCurState(); const OpenGLState cur_state = OpenGLState::GetCurState();
// These will have handles from the previous texture that was filtered, reset them to avoid
// binding invalid textures.
state.texture_units[0].texture_2d = 0;
state.texture_units[1].texture_2d = 0;
state.texture_units[2].texture_2d = 0;
const auto setup_temp_tex = [this, &src_rect](GLint internal_format, GLint format) {
TempTex texture;
texture.fbo.Create();
texture.tex.Create();
state.texture_units[0].texture_2d = texture.tex.handle;
state.draw.draw_framebuffer = texture.fbo.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, texture.tex.handle);
if (GL_ARB_texture_storage) {
glTexStorage2D(GL_TEXTURE_2D, 1, internal_format,
src_rect.GetWidth() * internal_scale_factor,
src_rect.GetHeight() * internal_scale_factor);
} else {
glTexImage2D(
GL_TEXTURE_2D, 0, internal_format, src_rect.GetWidth() * internal_scale_factor,
src_rect.GetHeight() * internal_scale_factor, 0, format, GL_HALF_FLOAT, nullptr);
}
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
texture.tex.handle, 0);
return texture;
};
auto XY = setup_temp_tex(GL_RG16F, GL_RG);
auto LUMAD = setup_temp_tex(GL_R16F, GL_RED);
state.viewport = {static_cast<GLint>(src_rect.left * internal_scale_factor), state.viewport = {static_cast<GLint>(src_rect.left * internal_scale_factor),
static_cast<GLint>(src_rect.bottom * internal_scale_factor), static_cast<GLint>(src_rect.bottom * internal_scale_factor),
static_cast<GLsizei>(src_rect.GetWidth() * internal_scale_factor), static_cast<GLsizei>(src_rect.GetWidth() * internal_scale_factor),
static_cast<GLsizei>(src_rect.GetHeight() * internal_scale_factor)}; static_cast<GLsizei>(src_rect.GetHeight() * internal_scale_factor)};
state.texture_units[0].texture_2d = src_tex; state.texture_units[0].texture_2d = src_tex;
state.texture_units[1].texture_2d = LUMAD.tex.handle;
state.texture_units[2].texture_2d = XY.tex.handle;
state.draw.draw_framebuffer = XY.fbo.handle; state.draw.draw_framebuffer = XY.fbo.handle;
state.draw.shader_program = gradient_x_program.handle; state.draw.shader_program = gradient_x_program.handle;
state.Apply(); state.Apply();
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_RECTANGLE, LUMAD.tex.handle);
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_RECTANGLE, XY.tex.handle);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// gradient y pass // gradient y pass

@ -30,8 +30,6 @@ private:
OGLTexture tex; OGLTexture tex;
OGLFramebuffer fbo; OGLFramebuffer fbo;
}; };
TempTex LUMAD;
TempTex XY;
std::array<OGLSampler, 3> samplers; std::array<OGLSampler, 3> samplers;

@ -1,14 +1,12 @@
//? #version 330 //? #version 330
precision mediump float;
in vec2 tex_coord; in vec2 tex_coord;
in vec2 input_max;
out vec4 frag_color; out vec4 frag_color;
uniform sampler2D HOOKED; uniform sampler2D HOOKED;
uniform sampler2DRect LUMAD; uniform sampler2D LUMAD;
uniform sampler2DRect LUMAG;
uniform float final_scale;
const float LINE_DETECT_THRESHOLD = 0.4; const float LINE_DETECT_THRESHOLD = 0.4;
const float STRENGTH = 0.6; const float STRENGTH = 0.6;
@ -21,12 +19,12 @@ struct RGBAL {
}; };
vec4 getAverage(vec4 cc, vec4 a, vec4 b, vec4 c) { vec4 getAverage(vec4 cc, vec4 a, vec4 b, vec4 c) {
return cc * (1 - STRENGTH) + ((a + b + c) / 3) * STRENGTH; return cc * (1.0 - STRENGTH) + ((a + b + c) / 3.0) * STRENGTH;
} }
#define GetRGBAL(offset) \ #define GetRGBAL(x_offset, y_offset) \
RGBAL(textureOffset(HOOKED, tex_coord, offset), \ RGBAL(textureLodOffset(HOOKED, tex_coord, 0.0, ivec2(x_offset, y_offset)), \
texture(LUMAD, clamp((gl_FragCoord.xy + offset) * final_scale, vec2(0.0), input_max)).x) textureLodOffset(LUMAD, tex_coord, 0.0, ivec2(x_offset, y_offset)).x)
float min3v(float a, float b, float c) { float min3v(float a, float b, float c) {
return min(min(a, b), c); return min(min(a, b), c);
@ -37,23 +35,23 @@ float max3v(float a, float b, float c) {
} }
vec4 Compute() { vec4 Compute() {
RGBAL cc = GetRGBAL(ivec2(0)); RGBAL cc = GetRGBAL(0, 0);
if (cc.l > LINE_DETECT_THRESHOLD) { if (cc.l > LINE_DETECT_THRESHOLD) {
return cc.c; return cc.c;
} }
RGBAL tl = GetRGBAL(ivec2(-1, -1)); RGBAL tl = GetRGBAL(-1, -1);
RGBAL t = GetRGBAL(ivec2(0, -1)); RGBAL t = GetRGBAL(0, -1);
RGBAL tr = GetRGBAL(ivec2(1, -1)); RGBAL tr = GetRGBAL(1, -1);
RGBAL l = GetRGBAL(ivec2(-1, 0)); RGBAL l = GetRGBAL(-1, 0);
RGBAL r = GetRGBAL(ivec2(1, 0)); RGBAL r = GetRGBAL(1, 0);
RGBAL bl = GetRGBAL(ivec2(-1, 1)); RGBAL bl = GetRGBAL(-1, 1);
RGBAL b = GetRGBAL(ivec2(0, 1)); RGBAL b = GetRGBAL(0, 1);
RGBAL br = GetRGBAL(ivec2(1, 1)); RGBAL br = GetRGBAL(1, 1);
// Kernel 0 and 4 // Kernel 0 and 4
float maxDark = max3v(br.l, b.l, bl.l); float maxDark = max3v(br.l, b.l, bl.l);

@ -1,14 +0,0 @@
//? #version 330
out vec2 tex_coord;
out vec2 input_max;
uniform sampler2D HOOKED;
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
tex_coord = (vertices[gl_VertexID] + 1.0) / 2.0;
input_max = textureSize(HOOKED, 0) * 2.0 - 1.0;
}

@ -1,4 +1,6 @@
//? #version 330 //? #version 330
precision mediump float;
in vec2 tex_coord; in vec2 tex_coord;
out vec2 frag_color; out vec2 frag_color;
@ -7,7 +9,7 @@ uniform sampler2D tex_input;
const vec3 K = vec3(0.2627, 0.6780, 0.0593); const vec3 K = vec3(0.2627, 0.6780, 0.0593);
// TODO: improve handling of alpha channel // TODO: improve handling of alpha channel
#define GetLum(xoffset) dot(K, textureOffset(tex_input, tex_coord, ivec2(xoffset, 0)).rgb) #define GetLum(xoffset) dot(K, textureLodOffset(tex_input, tex_coord, 0.0, ivec2(xoffset, 0)).rgb)
void main() { void main() {
float l = GetLum(-1); float l = GetLum(-1);

@ -1,16 +1,18 @@
//? #version 330 //? #version 330
in vec2 input_max; precision mediump float;
in vec2 tex_coord;
out float frag_color; out float frag_color;
uniform sampler2DRect tex_input; uniform sampler2D tex_input;
void main() { void main() {
vec2 t = texture(tex_input, min(gl_FragCoord.xy + vec2(0.0, 1.0), input_max)).xy; vec2 t = textureLodOffset(tex_input, tex_coord, 0.0, ivec2(0, 1)).xy;
vec2 c = texture(tex_input, gl_FragCoord.xy).xy; vec2 c = textureLod(tex_input, tex_coord, 0.0).xy;
vec2 b = texture(tex_input, max(gl_FragCoord.xy - vec2(0.0, 1.0), vec2(0.0))).xy; vec2 b = textureLodOffset(tex_input, tex_coord, 0.0, ivec2(0, -1)).xy;
vec2 grad = vec2(t.x + 2 * c.x + b.x, b.y - t.y); vec2 grad = vec2(t.x + 2.0 * c.x + b.x, b.y - t.y);
frag_color = 1 - length(grad); frag_color = 1.0 - length(grad);
} }

@ -1,4 +1,6 @@
//? #version 330 //? #version 330
precision mediump float;
in vec2 tex_coord; in vec2 tex_coord;
out vec4 frag_color; out vec4 frag_color;
@ -18,7 +20,7 @@ vec4 cubic(float v) {
vec4 textureBicubic(sampler2D sampler, vec2 texCoords) { vec4 textureBicubic(sampler2D sampler, vec2 texCoords) {
vec2 texSize = textureSize(sampler, 0); vec2 texSize = vec2(textureSize(sampler, 0));
vec2 invTexSize = 1.0 / texSize; vec2 invTexSize = 1.0 / texSize;
texCoords = texCoords * texSize - 0.5; texCoords = texCoords * texSize - 0.5;

@ -1,4 +1,6 @@
//? #version 330 //? #version 330
precision mediump float;
in vec2 tex_coord; in vec2 tex_coord;
in vec2 source_size; in vec2 source_size;
in vec2 output_size; in vec2 output_size;
@ -6,7 +8,7 @@ in vec2 output_size;
out vec4 frag_color; out vec4 frag_color;
uniform sampler2D tex; uniform sampler2D tex;
uniform float scale; uniform lowp float scale;
const int BLEND_NONE = 0; const int BLEND_NONE = 0;
const int BLEND_NORMAL = 1; const int BLEND_NORMAL = 1;
@ -42,12 +44,12 @@ float GetLeftRatio(vec2 center, vec2 origin, vec2 direction) {
return smoothstep(-sqrt(2.0) / 2.0, sqrt(2.0) / 2.0, v); return smoothstep(-sqrt(2.0) / 2.0, sqrt(2.0) / 2.0, v);
} }
vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5);
vec2 coord = tex_coord - pos / source_size;
#define P(x, y) textureOffset(tex, coord, ivec2(x, y)) #define P(x, y) textureOffset(tex, coord, ivec2(x, y))
void main() { void main() {
vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5);
vec2 coord = tex_coord - pos / source_size;
//--------------------------------------- //---------------------------------------
// Input Pixel Mapping: -|x|x|x|- // Input Pixel Mapping: -|x|x|x|-
// x|A|B|C|x // x|A|B|C|x
@ -142,15 +144,15 @@ void main() {
(IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) && (IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) &&
IsPixEqual(F, C) && !IsPixEqual(E, I)))); IsPixEqual(F, C) && !IsPixEqual(E, I))));
vec2 origin = vec2(0.0, 1.0 / sqrt(2.0)); vec2 origin = vec2(0.0, 1.0 / sqrt(2.0));
ivec2 direction = ivec2(1, -1); vec2 direction = vec2(1.0, -1.0);
if (doLineBlend) { if (doLineBlend) {
bool haveShallowLine = bool haveShallowLine =
(STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && E != G && D != G; (STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && E != G && D != G;
bool haveSteepLine = bool haveSteepLine =
(STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && E != C && B != C; (STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && E != C && B != C;
origin = haveShallowLine ? vec2(0.0, 0.25) : vec2(0.0, 0.5); origin = haveShallowLine ? vec2(0.0, 0.25) : vec2(0.0, 0.5);
direction.x += haveShallowLine ? 1 : 0; direction.x += haveShallowLine ? 1.0 : 0.0;
direction.y -= haveSteepLine ? 1 : 0; direction.y -= haveSteepLine ? 1.0 : 0.0;
} }
vec4 blendPix = mix(H, F, step(ColorDist(E, F), ColorDist(E, H))); vec4 blendPix = mix(H, F, step(ColorDist(E, F), ColorDist(E, H)));
res = mix(res, blendPix, GetLeftRatio(pos, origin, direction)); res = mix(res, blendPix, GetLeftRatio(pos, origin, direction));
@ -169,15 +171,15 @@ void main() {
(IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) && (IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) &&
IsPixEqual(H, I) && !IsPixEqual(E, G)))); IsPixEqual(H, I) && !IsPixEqual(E, G))));
vec2 origin = vec2(-1.0 / sqrt(2.0), 0.0); vec2 origin = vec2(-1.0 / sqrt(2.0), 0.0);
ivec2 direction = ivec2(1, 1); vec2 direction = vec2(1.0, 1.0);
if (doLineBlend) { if (doLineBlend) {
bool haveShallowLine = bool haveShallowLine =
(STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && E != A && B != A; (STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && E != A && B != A;
bool haveSteepLine = bool haveSteepLine =
(STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && E != I && F != I; (STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && E != I && F != I;
origin = haveShallowLine ? vec2(-0.25, 0.0) : vec2(-0.5, 0.0); origin = haveShallowLine ? vec2(-0.25, 0.0) : vec2(-0.5, 0.0);
direction.y += haveShallowLine ? 1 : 0; direction.y += haveShallowLine ? 1.0 : 0.0;
direction.x += haveSteepLine ? 1 : 0; direction.x += haveSteepLine ? 1.0 : 0.0;
} }
origin = origin; origin = origin;
direction = direction; direction = direction;
@ -198,15 +200,15 @@ void main() {
(IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) && (IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) &&
IsPixEqual(B, A) && !IsPixEqual(E, C)))); IsPixEqual(B, A) && !IsPixEqual(E, C))));
vec2 origin = vec2(1.0 / sqrt(2.0), 0.0); vec2 origin = vec2(1.0 / sqrt(2.0), 0.0);
ivec2 direction = ivec2(-1, -1); vec2 direction = vec2(-1.0, -1.0);
if (doLineBlend) { if (doLineBlend) {
bool haveShallowLine = bool haveShallowLine =
(STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && E != I && H != I; (STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && E != I && H != I;
bool haveSteepLine = bool haveSteepLine =
(STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && E != A && D != A; (STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && E != A && D != A;
origin = haveShallowLine ? vec2(0.25, 0.0) : vec2(0.5, 0.0); origin = haveShallowLine ? vec2(0.25, 0.0) : vec2(0.5, 0.0);
direction.y -= haveShallowLine ? 1 : 0; direction.y -= haveShallowLine ? 1.0 : 0.0;
direction.x -= haveSteepLine ? 1 : 0; direction.x -= haveSteepLine ? 1.0 : 0.0;
} }
vec4 blendPix = mix(F, B, step(ColorDist(E, B), ColorDist(E, F))); vec4 blendPix = mix(F, B, step(ColorDist(E, B), ColorDist(E, F)));
res = mix(res, blendPix, GetLeftRatio(pos, origin, direction)); res = mix(res, blendPix, GetLeftRatio(pos, origin, direction));
@ -225,15 +227,15 @@ void main() {
(IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) && (IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) &&
IsPixEqual(D, G) && !IsPixEqual(E, A)))); IsPixEqual(D, G) && !IsPixEqual(E, A))));
vec2 origin = vec2(0.0, -1.0 / sqrt(2.0)); vec2 origin = vec2(0.0, -1.0 / sqrt(2.0));
ivec2 direction = ivec2(-1, 1); vec2 direction = vec2(-1.0, 1.0);
if (doLineBlend) { if (doLineBlend) {
bool haveShallowLine = bool haveShallowLine =
(STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && E != C && F != C; (STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && E != C && F != C;
bool haveSteepLine = bool haveSteepLine =
(STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && E != G && H != G; (STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && E != G && H != G;
origin = haveShallowLine ? vec2(0.0, -0.25) : vec2(0.0, -0.5); origin = haveShallowLine ? vec2(0.0, -0.25) : vec2(0.0, -0.5);
direction.x -= haveShallowLine ? 1 : 0; direction.x -= haveShallowLine ? 1.0 : 0.0;
direction.y += haveSteepLine ? 1 : 0; direction.y += haveSteepLine ? 1.0 : 0.0;
} }
vec4 blendPix = mix(D, B, step(ColorDist(E, B), ColorDist(E, D))); vec4 blendPix = mix(D, B, step(ColorDist(E, B), ColorDist(E, D)));
res = mix(res, blendPix, GetLeftRatio(pos, origin, direction)); res = mix(res, blendPix, GetLeftRatio(pos, origin, direction));

@ -4,7 +4,7 @@ out vec2 source_size;
out vec2 output_size; out vec2 output_size;
uniform sampler2D tex; uniform sampler2D tex;
uniform float scale; uniform lowp float scale;
const vec2 vertices[4] = const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
@ -12,6 +12,6 @@ const vec2 vertices[4] =
void main() { void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
tex_coord = (vertices[gl_VertexID] + 1.0) / 2.0; tex_coord = (vertices[gl_VertexID] + 1.0) / 2.0;
source_size = textureSize(tex, 0); source_size = vec2(textureSize(tex, 0));
output_size = source_size * scale; output_size = source_size * scale;
} }