|
|
|
@ -18,254 +18,19 @@
|
|
|
|
|
#include "video_core/debug_utils/debug_utils.h"
|
|
|
|
|
#include "video_core/pica_state.h"
|
|
|
|
|
#include "video_core/pica_types.h"
|
|
|
|
|
#include "video_core/rasterizer.h"
|
|
|
|
|
#include "video_core/regs_framebuffer.h"
|
|
|
|
|
#include "video_core/regs_rasterizer.h"
|
|
|
|
|
#include "video_core/regs_texturing.h"
|
|
|
|
|
#include "video_core/shader/shader.h"
|
|
|
|
|
#include "video_core/swrasterizer/framebuffer.h"
|
|
|
|
|
#include "video_core/swrasterizer/rasterizer.h"
|
|
|
|
|
#include "video_core/swrasterizer/texturing.h"
|
|
|
|
|
#include "video_core/texture/texture_decode.h"
|
|
|
|
|
#include "video_core/utils.h"
|
|
|
|
|
|
|
|
|
|
namespace Pica {
|
|
|
|
|
|
|
|
|
|
namespace Rasterizer {
|
|
|
|
|
|
|
|
|
|
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
|
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
|
|
|
|
|
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
|
|
|
|
|
|
|
|
|
// Similarly to textures, the render framebuffer is laid out from bottom to top, too.
|
|
|
|
|
// NOTE: The framebuffer height register contains the actual FB height minus one.
|
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
|
u32 bytes_per_pixel =
|
|
|
|
|
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
|
|
|
|
u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
|
|
|
|
coarse_y * framebuffer.width * bytes_per_pixel;
|
|
|
|
|
u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
|
|
|
|
|
|
|
|
|
|
switch (framebuffer.color_format) {
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGBA8:
|
|
|
|
|
Color::EncodeRGBA8(color, dst_pixel);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGB8:
|
|
|
|
|
Color::EncodeRGB8(color, dst_pixel);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGB5A1:
|
|
|
|
|
Color::EncodeRGB5A1(color, dst_pixel);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGB565:
|
|
|
|
|
Color::EncodeRGB565(color, dst_pixel);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGBA4:
|
|
|
|
|
Color::EncodeRGBA4(color, dst_pixel);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
|
|
|
|
|
framebuffer.color_format.Value());
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const Math::Vec4<u8> GetPixel(int x, int y) {
|
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
|
|
|
|
|
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
|
|
|
|
|
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
|
u32 bytes_per_pixel =
|
|
|
|
|
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
|
|
|
|
u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
|
|
|
|
coarse_y * framebuffer.width * bytes_per_pixel;
|
|
|
|
|
u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
|
|
|
|
|
|
|
|
|
|
switch (framebuffer.color_format) {
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGBA8:
|
|
|
|
|
return Color::DecodeRGBA8(src_pixel);
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGB8:
|
|
|
|
|
return Color::DecodeRGB8(src_pixel);
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGB5A1:
|
|
|
|
|
return Color::DecodeRGB5A1(src_pixel);
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGB565:
|
|
|
|
|
return Color::DecodeRGB565(src_pixel);
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::ColorFormat::RGBA4:
|
|
|
|
|
return Color::DecodeRGBA4(src_pixel);
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
|
|
|
|
|
framebuffer.color_format.Value());
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {0, 0, 0, 0};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static u32 GetDepth(int x, int y) {
|
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
|
|
|
|
|
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
|
|
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
|
|
|
|
|
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
|
u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
|
|
|
|
|
u32 stride = framebuffer.width * bytes_per_pixel;
|
|
|
|
|
|
|
|
|
|
u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
|
|
|
|
u8* src_pixel = depth_buffer + src_offset;
|
|
|
|
|
|
|
|
|
|
switch (framebuffer.depth_format) {
|
|
|
|
|
case FramebufferRegs::DepthFormat::D16:
|
|
|
|
|
return Color::DecodeD16(src_pixel);
|
|
|
|
|
case FramebufferRegs::DepthFormat::D24:
|
|
|
|
|
return Color::DecodeD24(src_pixel);
|
|
|
|
|
case FramebufferRegs::DepthFormat::D24S8:
|
|
|
|
|
return Color::DecodeD24S8(src_pixel).x;
|
|
|
|
|
default:
|
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static u8 GetStencil(int x, int y) {
|
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
|
|
|
|
|
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
|
|
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
|
|
|
|
|
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
|
u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
|
|
|
|
|
u32 stride = framebuffer.width * bytes_per_pixel;
|
|
|
|
|
|
|
|
|
|
u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
|
|
|
|
u8* src_pixel = depth_buffer + src_offset;
|
|
|
|
|
|
|
|
|
|
switch (framebuffer.depth_format) {
|
|
|
|
|
case FramebufferRegs::DepthFormat::D24S8:
|
|
|
|
|
return Color::DecodeD24S8(src_pixel).y;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
LOG_WARNING(
|
|
|
|
|
HW_GPU,
|
|
|
|
|
"GetStencil called for function which doesn't have a stencil component (format %u)",
|
|
|
|
|
framebuffer.depth_format);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void SetDepth(int x, int y, u32 value) {
|
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
|
|
|
|
|
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
|
|
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
|
|
|
|
|
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
|
u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
|
|
|
|
|
u32 stride = framebuffer.width * bytes_per_pixel;
|
|
|
|
|
|
|
|
|
|
u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
|
|
|
|
u8* dst_pixel = depth_buffer + dst_offset;
|
|
|
|
|
|
|
|
|
|
switch (framebuffer.depth_format) {
|
|
|
|
|
case FramebufferRegs::DepthFormat::D16:
|
|
|
|
|
Color::EncodeD16(value, dst_pixel);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::DepthFormat::D24:
|
|
|
|
|
Color::EncodeD24(value, dst_pixel);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::DepthFormat::D24S8:
|
|
|
|
|
Color::EncodeD24X8(value, dst_pixel);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void SetStencil(int x, int y, u8 value) {
|
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
|
|
|
|
|
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
|
|
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
|
|
|
|
|
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
|
u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
|
|
|
|
|
u32 stride = framebuffer.width * bytes_per_pixel;
|
|
|
|
|
|
|
|
|
|
u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
|
|
|
|
u8* dst_pixel = depth_buffer + dst_offset;
|
|
|
|
|
|
|
|
|
|
switch (framebuffer.depth_format) {
|
|
|
|
|
case Pica::FramebufferRegs::DepthFormat::D16:
|
|
|
|
|
case Pica::FramebufferRegs::DepthFormat::D24:
|
|
|
|
|
// Nothing to do
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case Pica::FramebufferRegs::DepthFormat::D24S8:
|
|
|
|
|
Color::EncodeX24S8(value, dst_pixel);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) {
|
|
|
|
|
switch (action) {
|
|
|
|
|
case FramebufferRegs::StencilAction::Keep:
|
|
|
|
|
return old_stencil;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::StencilAction::Zero:
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::StencilAction::Replace:
|
|
|
|
|
return ref;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::StencilAction::Increment:
|
|
|
|
|
// Saturated increment
|
|
|
|
|
return std::min<u8>(old_stencil, 254) + 1;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::StencilAction::Decrement:
|
|
|
|
|
// Saturated decrement
|
|
|
|
|
return std::max<u8>(old_stencil, 1) - 1;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::StencilAction::Invert:
|
|
|
|
|
return ~old_stencil;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::StencilAction::IncrementWrap:
|
|
|
|
|
return old_stencil + 1;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::StencilAction::DecrementWrap:
|
|
|
|
|
return old_stencil - 1;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
|
|
|
|
|
struct Fix12P4 {
|
|
|
|
|
Fix12P4() {}
|
|
|
|
@ -539,34 +304,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
|
|
|
|
|
int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height)))
|
|
|
|
|
.ToFloat32();
|
|
|
|
|
|
|
|
|
|
static auto GetWrappedTexCoord = [](TexturingRegs::TextureConfig::WrapMode mode,
|
|
|
|
|
int val, unsigned size) {
|
|
|
|
|
switch (mode) {
|
|
|
|
|
case TexturingRegs::TextureConfig::ClampToEdge:
|
|
|
|
|
val = std::max(val, 0);
|
|
|
|
|
val = std::min(val, (int)size - 1);
|
|
|
|
|
return val;
|
|
|
|
|
|
|
|
|
|
case TexturingRegs::TextureConfig::ClampToBorder:
|
|
|
|
|
return val;
|
|
|
|
|
|
|
|
|
|
case TexturingRegs::TextureConfig::Repeat:
|
|
|
|
|
return (int)((unsigned)val % size);
|
|
|
|
|
|
|
|
|
|
case TexturingRegs::TextureConfig::MirroredRepeat: {
|
|
|
|
|
unsigned int coord = ((unsigned)val % (2 * size));
|
|
|
|
|
if (coord >= size)
|
|
|
|
|
coord = 2 * size - 1 - coord;
|
|
|
|
|
return (int)coord;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder &&
|
|
|
|
|
(s < 0 || static_cast<u32>(s) >= texture.config.width)) ||
|
|
|
|
|
(texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder &&
|
|
|
|
@ -615,9 +352,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
|
|
|
|
|
++tev_stage_index) {
|
|
|
|
|
const auto& tev_stage = tev_stages[tev_stage_index];
|
|
|
|
|
using Source = TexturingRegs::TevStageConfig::Source;
|
|
|
|
|
using ColorModifier = TexturingRegs::TevStageConfig::ColorModifier;
|
|
|
|
|
using AlphaModifier = TexturingRegs::TevStageConfig::AlphaModifier;
|
|
|
|
|
using Operation = TexturingRegs::TevStageConfig::Operation;
|
|
|
|
|
|
|
|
|
|
auto GetSource = [&](Source source) -> Math::Vec4<u8> {
|
|
|
|
|
switch (source) {
|
|
|
|
@ -657,187 +391,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static auto GetColorModifier = [](ColorModifier factor,
|
|
|
|
|
const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
|
|
|
|
|
switch (factor) {
|
|
|
|
|
case ColorModifier::SourceColor:
|
|
|
|
|
return values.rgb();
|
|
|
|
|
|
|
|
|
|
case ColorModifier::OneMinusSourceColor:
|
|
|
|
|
return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
|
|
|
|
|
|
|
|
|
|
case ColorModifier::SourceAlpha:
|
|
|
|
|
return values.aaa();
|
|
|
|
|
|
|
|
|
|
case ColorModifier::OneMinusSourceAlpha:
|
|
|
|
|
return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>();
|
|
|
|
|
|
|
|
|
|
case ColorModifier::SourceRed:
|
|
|
|
|
return values.rrr();
|
|
|
|
|
|
|
|
|
|
case ColorModifier::OneMinusSourceRed:
|
|
|
|
|
return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>();
|
|
|
|
|
|
|
|
|
|
case ColorModifier::SourceGreen:
|
|
|
|
|
return values.ggg();
|
|
|
|
|
|
|
|
|
|
case ColorModifier::OneMinusSourceGreen:
|
|
|
|
|
return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>();
|
|
|
|
|
|
|
|
|
|
case ColorModifier::SourceBlue:
|
|
|
|
|
return values.bbb();
|
|
|
|
|
|
|
|
|
|
case ColorModifier::OneMinusSourceBlue:
|
|
|
|
|
return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static auto GetAlphaModifier = [](AlphaModifier factor,
|
|
|
|
|
const Math::Vec4<u8>& values) -> u8 {
|
|
|
|
|
switch (factor) {
|
|
|
|
|
case AlphaModifier::SourceAlpha:
|
|
|
|
|
return values.a();
|
|
|
|
|
|
|
|
|
|
case AlphaModifier::OneMinusSourceAlpha:
|
|
|
|
|
return 255 - values.a();
|
|
|
|
|
|
|
|
|
|
case AlphaModifier::SourceRed:
|
|
|
|
|
return values.r();
|
|
|
|
|
|
|
|
|
|
case AlphaModifier::OneMinusSourceRed:
|
|
|
|
|
return 255 - values.r();
|
|
|
|
|
|
|
|
|
|
case AlphaModifier::SourceGreen:
|
|
|
|
|
return values.g();
|
|
|
|
|
|
|
|
|
|
case AlphaModifier::OneMinusSourceGreen:
|
|
|
|
|
return 255 - values.g();
|
|
|
|
|
|
|
|
|
|
case AlphaModifier::SourceBlue:
|
|
|
|
|
return values.b();
|
|
|
|
|
|
|
|
|
|
case AlphaModifier::OneMinusSourceBlue:
|
|
|
|
|
return 255 - values.b();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static auto ColorCombine = [](Operation op,
|
|
|
|
|
const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
|
|
|
|
|
switch (op) {
|
|
|
|
|
case Operation::Replace:
|
|
|
|
|
return input[0];
|
|
|
|
|
|
|
|
|
|
case Operation::Modulate:
|
|
|
|
|
return ((input[0] * input[1]) / 255).Cast<u8>();
|
|
|
|
|
|
|
|
|
|
case Operation::Add: {
|
|
|
|
|
auto result = input[0] + input[1];
|
|
|
|
|
result.r() = std::min(255, result.r());
|
|
|
|
|
result.g() = std::min(255, result.g());
|
|
|
|
|
result.b() = std::min(255, result.b());
|
|
|
|
|
return result.Cast<u8>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case Operation::AddSigned: {
|
|
|
|
|
// TODO(bunnei): Verify that the color conversion from (float) 0.5f to
|
|
|
|
|
// (byte) 128 is correct
|
|
|
|
|
auto result = input[0].Cast<int>() + input[1].Cast<int>() -
|
|
|
|
|
Math::MakeVec<int>(128, 128, 128);
|
|
|
|
|
result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
|
|
|
|
|
result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
|
|
|
|
|
result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
|
|
|
|
|
return result.Cast<u8>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case Operation::Lerp:
|
|
|
|
|
return ((input[0] * input[2] +
|
|
|
|
|
input[1] *
|
|
|
|
|
(Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
|
|
|
|
|
255)
|
|
|
|
|
.Cast<u8>();
|
|
|
|
|
|
|
|
|
|
case Operation::Subtract: {
|
|
|
|
|
auto result = input[0].Cast<int>() - input[1].Cast<int>();
|
|
|
|
|
result.r() = std::max(0, result.r());
|
|
|
|
|
result.g() = std::max(0, result.g());
|
|
|
|
|
result.b() = std::max(0, result.b());
|
|
|
|
|
return result.Cast<u8>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case Operation::MultiplyThenAdd: {
|
|
|
|
|
auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
|
|
|
|
|
result.r() = std::min(255, result.r());
|
|
|
|
|
result.g() = std::min(255, result.g());
|
|
|
|
|
result.b() = std::min(255, result.b());
|
|
|
|
|
return result.Cast<u8>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case Operation::AddThenMultiply: {
|
|
|
|
|
auto result = input[0] + input[1];
|
|
|
|
|
result.r() = std::min(255, result.r());
|
|
|
|
|
result.g() = std::min(255, result.g());
|
|
|
|
|
result.b() = std::min(255, result.b());
|
|
|
|
|
result = (result * input[2].Cast<int>()) / 255;
|
|
|
|
|
return result.Cast<u8>();
|
|
|
|
|
}
|
|
|
|
|
case Operation::Dot3_RGB: {
|
|
|
|
|
// Not fully accurate.
|
|
|
|
|
// Worst case scenario seems to yield a +/-3 error
|
|
|
|
|
// Some HW results indicate that the per-component computation can't have a
|
|
|
|
|
// higher precision than 1/256,
|
|
|
|
|
// while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb(
|
|
|
|
|
// (0x80,g0,b0),(0x80,g1,b1) ) give different results
|
|
|
|
|
int result =
|
|
|
|
|
((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
|
|
|
|
|
((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
|
|
|
|
|
((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
|
|
|
|
|
result = std::max(0, std::min(255, result));
|
|
|
|
|
return {(u8)result, (u8)result, (u8)result};
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
return {0, 0, 0};
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 {
|
|
|
|
|
switch (op) {
|
|
|
|
|
case Operation::Replace:
|
|
|
|
|
return input[0];
|
|
|
|
|
|
|
|
|
|
case Operation::Modulate:
|
|
|
|
|
return input[0] * input[1] / 255;
|
|
|
|
|
|
|
|
|
|
case Operation::Add:
|
|
|
|
|
return std::min(255, input[0] + input[1]);
|
|
|
|
|
|
|
|
|
|
case Operation::AddSigned: {
|
|
|
|
|
// TODO(bunnei): Verify that the color conversion from (float) 0.5f to
|
|
|
|
|
// (byte) 128 is correct
|
|
|
|
|
auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
|
|
|
|
|
return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case Operation::Lerp:
|
|
|
|
|
return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
|
|
|
|
|
|
|
|
|
|
case Operation::Subtract:
|
|
|
|
|
return std::max(0, (int)input[0] - (int)input[1]);
|
|
|
|
|
|
|
|
|
|
case Operation::MultiplyThenAdd:
|
|
|
|
|
return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255);
|
|
|
|
|
|
|
|
|
|
case Operation::AddThenMultiply:
|
|
|
|
|
return (std::min(255, (input[0] + input[1])) * input[2]) / 255;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op);
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// color combiner
|
|
|
|
|
// NOTE: Not sure if the alpha combiner might use the color output of the previous
|
|
|
|
|
// stage as input. Hence, we currently don't directly write the result to
|
|
|
|
@ -1152,56 +705,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
|
|
|
|
|
return combiner_output[channel];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static auto EvaluateBlendEquation = [](
|
|
|
|
|
const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
|
|
|
|
|
const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
|
|
|
|
|
FramebufferRegs::BlendEquation equation) {
|
|
|
|
|
|
|
|
|
|
Math::Vec4<int> result;
|
|
|
|
|
|
|
|
|
|
auto src_result = (src * srcfactor).Cast<int>();
|
|
|
|
|
auto dst_result = (dest * destfactor).Cast<int>();
|
|
|
|
|
|
|
|
|
|
switch (equation) {
|
|
|
|
|
case FramebufferRegs::BlendEquation::Add:
|
|
|
|
|
result = (src_result + dst_result) / 255;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::BlendEquation::Subtract:
|
|
|
|
|
result = (src_result - dst_result) / 255;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::BlendEquation::ReverseSubtract:
|
|
|
|
|
result = (dst_result - src_result) / 255;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
// TODO: How do these two actually work?
|
|
|
|
|
// OpenGL doesn't include the blend factors in the min/max computations,
|
|
|
|
|
// but is this what the 3DS actually does?
|
|
|
|
|
case FramebufferRegs::BlendEquation::Min:
|
|
|
|
|
result.r() = std::min(src.r(), dest.r());
|
|
|
|
|
result.g() = std::min(src.g(), dest.g());
|
|
|
|
|
result.b() = std::min(src.b(), dest.b());
|
|
|
|
|
result.a() = std::min(src.a(), dest.a());
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::BlendEquation::Max:
|
|
|
|
|
result.r() = std::max(src.r(), dest.r());
|
|
|
|
|
result.g() = std::max(src.g(), dest.g());
|
|
|
|
|
result.b() = std::max(src.b(), dest.b());
|
|
|
|
|
result.a() = std::max(src.a(), dest.a());
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation);
|
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Math::Vec4<u8>(
|
|
|
|
|
MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
|
|
|
|
|
MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
|
|
|
|
|
LookupFactor(1, params.factor_source_rgb),
|
|
|
|
|
LookupFactor(2, params.factor_source_rgb),
|
|
|
|
@ -1218,58 +721,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
|
|
|
|
|
dstfactor, params.blend_equation_a)
|
|
|
|
|
.a();
|
|
|
|
|
} else {
|
|
|
|
|
static auto LogicOp = [](u8 src, u8 dest, FramebufferRegs::LogicOp op) -> u8 {
|
|
|
|
|
switch (op) {
|
|
|
|
|
case FramebufferRegs::LogicOp::Clear:
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::And:
|
|
|
|
|
return src & dest;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::AndReverse:
|
|
|
|
|
return src & ~dest;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::Copy:
|
|
|
|
|
return src;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::Set:
|
|
|
|
|
return 255;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::CopyInverted:
|
|
|
|
|
return ~src;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::NoOp:
|
|
|
|
|
return dest;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::Invert:
|
|
|
|
|
return ~dest;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::Nand:
|
|
|
|
|
return ~(src & dest);
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::Or:
|
|
|
|
|
return src | dest;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::Nor:
|
|
|
|
|
return ~(src | dest);
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::Xor:
|
|
|
|
|
return src ^ dest;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::Equiv:
|
|
|
|
|
return ~(src ^ dest);
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::AndInverted:
|
|
|
|
|
return ~src & dest;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::OrReverse:
|
|
|
|
|
return src | ~dest;
|
|
|
|
|
|
|
|
|
|
case FramebufferRegs::LogicOp::OrInverted:
|
|
|
|
|
return ~src | dest;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
blend_output =
|
|
|
|
|
Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op),
|
|
|
|
|
LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),
|