|
|
@ -24,72 +24,74 @@ namespace Pica {
|
|
|
|
namespace Rasterizer {
|
|
|
|
namespace Rasterizer {
|
|
|
|
|
|
|
|
|
|
|
|
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
|
|
|
|
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
|
|
|
|
const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer;
|
|
|
|
|
|
|
|
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
|
|
|
|
|
|
|
|
|
|
|
// Similarly to textures, the render framebuffer is laid out from bottom to top, too.
|
|
|
|
// Similarly to textures, the render framebuffer is laid out from bottom to top, too.
|
|
|
|
// NOTE: The framebuffer height register contains the actual FB height minus one.
|
|
|
|
// NOTE: The framebuffer height register contains the actual FB height minus one.
|
|
|
|
y = (registers.framebuffer.height - y);
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
|
|
|
|
u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
|
|
|
u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
|
|
|
|
u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel;
|
|
|
|
u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
|
|
|
|
u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
|
|
|
|
|
|
|
|
|
|
|
|
switch (registers.framebuffer.color_format) {
|
|
|
|
switch (framebuffer.color_format) {
|
|
|
|
case Pica::Regs::ColorFormat::RGBA8:
|
|
|
|
case Regs::ColorFormat::RGBA8:
|
|
|
|
Color::EncodeRGBA8(color, dst_pixel);
|
|
|
|
Color::EncodeRGBA8(color, dst_pixel);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case Pica::Regs::ColorFormat::RGB8:
|
|
|
|
case Regs::ColorFormat::RGB8:
|
|
|
|
Color::EncodeRGB8(color, dst_pixel);
|
|
|
|
Color::EncodeRGB8(color, dst_pixel);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case Pica::Regs::ColorFormat::RGB5A1:
|
|
|
|
case Regs::ColorFormat::RGB5A1:
|
|
|
|
Color::EncodeRGB5A1(color, dst_pixel);
|
|
|
|
Color::EncodeRGB5A1(color, dst_pixel);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case Pica::Regs::ColorFormat::RGB565:
|
|
|
|
case Regs::ColorFormat::RGB565:
|
|
|
|
Color::EncodeRGB565(color, dst_pixel);
|
|
|
|
Color::EncodeRGB565(color, dst_pixel);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case Pica::Regs::ColorFormat::RGBA4:
|
|
|
|
case Regs::ColorFormat::RGBA4:
|
|
|
|
Color::EncodeRGBA4(color, dst_pixel);
|
|
|
|
Color::EncodeRGBA4(color, dst_pixel);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
|
|
|
|
LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value());
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static const Math::Vec4<u8> GetPixel(int x, int y) {
|
|
|
|
static const Math::Vec4<u8> GetPixel(int x, int y) {
|
|
|
|
const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer;
|
|
|
|
|
|
|
|
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
|
|
|
|
|
|
|
|
|
|
|
y = (registers.framebuffer.height - y);
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
|
|
|
|
u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
|
|
|
u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
|
|
|
|
u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel;
|
|
|
|
u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
|
|
|
|
u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
|
|
|
|
|
|
|
|
|
|
|
|
switch (registers.framebuffer.color_format) {
|
|
|
|
switch (framebuffer.color_format) {
|
|
|
|
case Pica::Regs::ColorFormat::RGBA8:
|
|
|
|
case Regs::ColorFormat::RGBA8:
|
|
|
|
return Color::DecodeRGBA8(src_pixel);
|
|
|
|
return Color::DecodeRGBA8(src_pixel);
|
|
|
|
|
|
|
|
|
|
|
|
case Pica::Regs::ColorFormat::RGB8:
|
|
|
|
case Regs::ColorFormat::RGB8:
|
|
|
|
return Color::DecodeRGB8(src_pixel);
|
|
|
|
return Color::DecodeRGB8(src_pixel);
|
|
|
|
|
|
|
|
|
|
|
|
case Pica::Regs::ColorFormat::RGB5A1:
|
|
|
|
case Regs::ColorFormat::RGB5A1:
|
|
|
|
return Color::DecodeRGB5A1(src_pixel);
|
|
|
|
return Color::DecodeRGB5A1(src_pixel);
|
|
|
|
|
|
|
|
|
|
|
|
case Pica::Regs::ColorFormat::RGB565:
|
|
|
|
case Regs::ColorFormat::RGB565:
|
|
|
|
return Color::DecodeRGB565(src_pixel);
|
|
|
|
return Color::DecodeRGB565(src_pixel);
|
|
|
|
|
|
|
|
|
|
|
|
case Pica::Regs::ColorFormat::RGBA4:
|
|
|
|
case Regs::ColorFormat::RGBA4:
|
|
|
|
return Color::DecodeRGBA4(src_pixel);
|
|
|
|
return Color::DecodeRGBA4(src_pixel);
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
|
|
|
|
LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value());
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -97,58 +99,60 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static u32 GetDepth(int x, int y) {
|
|
|
|
static u32 GetDepth(int x, int y) {
|
|
|
|
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer;
|
|
|
|
|
|
|
|
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
|
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
|
|
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
|
|
|
|
|
|
|
|
|
|
|
y = (registers.framebuffer.height - y);
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
|
|
|
|
u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format);
|
|
|
|
u32 stride = registers.framebuffer.width * bytes_per_pixel;
|
|
|
|
u32 stride = framebuffer.width * bytes_per_pixel;
|
|
|
|
|
|
|
|
|
|
|
|
u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
|
|
|
u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
|
|
|
u8* src_pixel = depth_buffer + src_offset;
|
|
|
|
u8* src_pixel = depth_buffer + src_offset;
|
|
|
|
|
|
|
|
|
|
|
|
switch (registers.framebuffer.depth_format) {
|
|
|
|
switch (framebuffer.depth_format) {
|
|
|
|
case Pica::Regs::DepthFormat::D16:
|
|
|
|
case Regs::DepthFormat::D16:
|
|
|
|
return Color::DecodeD16(src_pixel);
|
|
|
|
return Color::DecodeD16(src_pixel);
|
|
|
|
case Pica::Regs::DepthFormat::D24:
|
|
|
|
case Regs::DepthFormat::D24:
|
|
|
|
return Color::DecodeD24(src_pixel);
|
|
|
|
return Color::DecodeD24(src_pixel);
|
|
|
|
case Pica::Regs::DepthFormat::D24S8:
|
|
|
|
case Regs::DepthFormat::D24S8:
|
|
|
|
return Color::DecodeD24S8(src_pixel).x;
|
|
|
|
return Color::DecodeD24S8(src_pixel).x;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void SetDepth(int x, int y, u32 value) {
|
|
|
|
static void SetDepth(int x, int y, u32 value) {
|
|
|
|
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
|
|
|
|
const auto& framebuffer = g_state.regs.framebuffer;
|
|
|
|
|
|
|
|
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
|
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
|
|
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
|
|
|
|
|
|
|
|
|
|
|
y = (registers.framebuffer.height - y);
|
|
|
|
y = framebuffer.height - y;
|
|
|
|
|
|
|
|
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
const u32 coarse_y = y & ~7;
|
|
|
|
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
|
|
|
|
u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format);
|
|
|
|
u32 stride = registers.framebuffer.width * bytes_per_pixel;
|
|
|
|
u32 stride = framebuffer.width * bytes_per_pixel;
|
|
|
|
|
|
|
|
|
|
|
|
u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
|
|
|
u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
|
|
|
u8* dst_pixel = depth_buffer + dst_offset;
|
|
|
|
u8* dst_pixel = depth_buffer + dst_offset;
|
|
|
|
|
|
|
|
|
|
|
|
switch (registers.framebuffer.depth_format) {
|
|
|
|
switch (framebuffer.depth_format) {
|
|
|
|
case Pica::Regs::DepthFormat::D16:
|
|
|
|
case Regs::DepthFormat::D16:
|
|
|
|
Color::EncodeD16(value, dst_pixel);
|
|
|
|
Color::EncodeD16(value, dst_pixel);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case Pica::Regs::DepthFormat::D24:
|
|
|
|
case Regs::DepthFormat::D24:
|
|
|
|
Color::EncodeD24(value, dst_pixel);
|
|
|
|
Color::EncodeD24(value, dst_pixel);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case Pica::Regs::DepthFormat::D24S8:
|
|
|
|
case Regs::DepthFormat::D24S8:
|
|
|
|
// TODO(Subv): Implement the stencil buffer
|
|
|
|
// TODO(Subv): Implement the stencil buffer
|
|
|
|
Color::EncodeD24S8(value, 0, dst_pixel);
|
|
|
|
Color::EncodeD24S8(value, 0, dst_pixel);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -200,6 +204,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
const VertexShader::OutputVertex& v2,
|
|
|
|
const VertexShader::OutputVertex& v2,
|
|
|
|
bool reversed = false)
|
|
|
|
bool reversed = false)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
|
|
|
|
const auto& regs = g_state.regs;
|
|
|
|
Common::Profiling::ScopeTimer timer(rasterization_category);
|
|
|
|
Common::Profiling::ScopeTimer timer(rasterization_category);
|
|
|
|
|
|
|
|
|
|
|
|
// vertex positions in rasterizer coordinates
|
|
|
|
// vertex positions in rasterizer coordinates
|
|
|
@ -216,14 +221,14 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
ScreenToRasterizerCoordinates(v1.screenpos),
|
|
|
|
ScreenToRasterizerCoordinates(v1.screenpos),
|
|
|
|
ScreenToRasterizerCoordinates(v2.screenpos) };
|
|
|
|
ScreenToRasterizerCoordinates(v2.screenpos) };
|
|
|
|
|
|
|
|
|
|
|
|
if (registers.cull_mode == Regs::CullMode::KeepAll) {
|
|
|
|
if (regs.cull_mode == Regs::CullMode::KeepAll) {
|
|
|
|
// Make sure we always end up with a triangle wound counter-clockwise
|
|
|
|
// Make sure we always end up with a triangle wound counter-clockwise
|
|
|
|
if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) {
|
|
|
|
if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) {
|
|
|
|
ProcessTriangleInternal(v0, v2, v1, true);
|
|
|
|
ProcessTriangleInternal(v0, v2, v1, true);
|
|
|
|
return;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
if (!reversed && registers.cull_mode == Regs::CullMode::KeepClockWise) {
|
|
|
|
if (!reversed && regs.cull_mode == Regs::CullMode::KeepClockWise) {
|
|
|
|
// Reverse vertex order and use the CCW code path.
|
|
|
|
// Reverse vertex order and use the CCW code path.
|
|
|
|
ProcessTriangleInternal(v0, v2, v1, true);
|
|
|
|
ProcessTriangleInternal(v0, v2, v1, true);
|
|
|
|
return;
|
|
|
|
return;
|
|
|
@ -268,8 +273,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
|
|
|
|
|
|
|
|
auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
|
|
|
|
auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
|
|
|
|
|
|
|
|
|
|
|
|
auto textures = registers.GetTextures();
|
|
|
|
auto textures = regs.GetTextures();
|
|
|
|
auto tev_stages = registers.GetTevStages();
|
|
|
|
auto tev_stages = regs.GetTevStages();
|
|
|
|
|
|
|
|
|
|
|
|
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
|
|
|
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
|
|
|
// TODO: Not sure if looping through x first might be faster
|
|
|
|
// TODO: Not sure if looping through x first might be faster
|
|
|
@ -384,8 +389,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
// analogously.
|
|
|
|
// analogously.
|
|
|
|
Math::Vec4<u8> combiner_output;
|
|
|
|
Math::Vec4<u8> combiner_output;
|
|
|
|
Math::Vec4<u8> combiner_buffer = {
|
|
|
|
Math::Vec4<u8> combiner_buffer = {
|
|
|
|
registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g,
|
|
|
|
regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g,
|
|
|
|
registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a
|
|
|
|
regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
|
|
|
|
for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
|
|
|
@ -609,51 +614,52 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
|
|
|
|
combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
|
|
|
|
combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
|
|
|
|
combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
|
|
|
|
|
|
|
|
|
|
|
|
if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) {
|
|
|
|
if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) {
|
|
|
|
combiner_buffer.r() = combiner_output.r();
|
|
|
|
combiner_buffer.r() = combiner_output.r();
|
|
|
|
combiner_buffer.g() = combiner_output.g();
|
|
|
|
combiner_buffer.g() = combiner_output.g();
|
|
|
|
combiner_buffer.b() = combiner_output.b();
|
|
|
|
combiner_buffer.b() = combiner_output.b();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) {
|
|
|
|
if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) {
|
|
|
|
combiner_buffer.a() = combiner_output.a();
|
|
|
|
combiner_buffer.a() = combiner_output.a();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (registers.output_merger.alpha_test.enable) {
|
|
|
|
const auto& output_merger = regs.output_merger;
|
|
|
|
|
|
|
|
if (output_merger.alpha_test.enable) {
|
|
|
|
bool pass = false;
|
|
|
|
bool pass = false;
|
|
|
|
|
|
|
|
|
|
|
|
switch (registers.output_merger.alpha_test.func) {
|
|
|
|
switch (output_merger.alpha_test.func) {
|
|
|
|
case registers.output_merger.Never:
|
|
|
|
case Regs::CompareFunc::Never:
|
|
|
|
pass = false;
|
|
|
|
pass = false;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.Always:
|
|
|
|
case Regs::CompareFunc::Always:
|
|
|
|
pass = true;
|
|
|
|
pass = true;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.Equal:
|
|
|
|
case Regs::CompareFunc::Equal:
|
|
|
|
pass = combiner_output.a() == registers.output_merger.alpha_test.ref;
|
|
|
|
pass = combiner_output.a() == output_merger.alpha_test.ref;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.NotEqual:
|
|
|
|
case Regs::CompareFunc::NotEqual:
|
|
|
|
pass = combiner_output.a() != registers.output_merger.alpha_test.ref;
|
|
|
|
pass = combiner_output.a() != output_merger.alpha_test.ref;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.LessThan:
|
|
|
|
case Regs::CompareFunc::LessThan:
|
|
|
|
pass = combiner_output.a() < registers.output_merger.alpha_test.ref;
|
|
|
|
pass = combiner_output.a() < output_merger.alpha_test.ref;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.LessThanOrEqual:
|
|
|
|
case Regs::CompareFunc::LessThanOrEqual:
|
|
|
|
pass = combiner_output.a() <= registers.output_merger.alpha_test.ref;
|
|
|
|
pass = combiner_output.a() <= output_merger.alpha_test.ref;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.GreaterThan:
|
|
|
|
case Regs::CompareFunc::GreaterThan:
|
|
|
|
pass = combiner_output.a() > registers.output_merger.alpha_test.ref;
|
|
|
|
pass = combiner_output.a() > output_merger.alpha_test.ref;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.GreaterThanOrEqual:
|
|
|
|
case Regs::CompareFunc::GreaterThanOrEqual:
|
|
|
|
pass = combiner_output.a() >= registers.output_merger.alpha_test.ref;
|
|
|
|
pass = combiner_output.a() >= output_merger.alpha_test.ref;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -662,8 +668,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
|
|
|
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
|
|
|
if (registers.output_merger.depth_test_enable) {
|
|
|
|
if (output_merger.depth_test_enable) {
|
|
|
|
unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format);
|
|
|
|
unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
|
|
|
|
u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 +
|
|
|
|
u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 +
|
|
|
|
v1.screenpos[2].ToFloat32() * w1 +
|
|
|
|
v1.screenpos[2].ToFloat32() * w1 +
|
|
|
|
v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
|
|
|
|
v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
|
|
|
@ -671,36 +677,36 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
|
|
|
|
|
|
|
|
bool pass = false;
|
|
|
|
bool pass = false;
|
|
|
|
|
|
|
|
|
|
|
|
switch (registers.output_merger.depth_test_func) {
|
|
|
|
switch (output_merger.depth_test_func) {
|
|
|
|
case registers.output_merger.Never:
|
|
|
|
case Regs::CompareFunc::Never:
|
|
|
|
pass = false;
|
|
|
|
pass = false;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.Always:
|
|
|
|
case Regs::CompareFunc::Always:
|
|
|
|
pass = true;
|
|
|
|
pass = true;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.Equal:
|
|
|
|
case Regs::CompareFunc::Equal:
|
|
|
|
pass = z == ref_z;
|
|
|
|
pass = z == ref_z;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.NotEqual:
|
|
|
|
case Regs::CompareFunc::NotEqual:
|
|
|
|
pass = z != ref_z;
|
|
|
|
pass = z != ref_z;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.LessThan:
|
|
|
|
case Regs::CompareFunc::LessThan:
|
|
|
|
pass = z < ref_z;
|
|
|
|
pass = z < ref_z;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.LessThanOrEqual:
|
|
|
|
case Regs::CompareFunc::LessThanOrEqual:
|
|
|
|
pass = z <= ref_z;
|
|
|
|
pass = z <= ref_z;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.GreaterThan:
|
|
|
|
case Regs::CompareFunc::GreaterThan:
|
|
|
|
pass = z > ref_z;
|
|
|
|
pass = z > ref_z;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case registers.output_merger.GreaterThanOrEqual:
|
|
|
|
case Regs::CompareFunc::GreaterThanOrEqual:
|
|
|
|
pass = z >= ref_z;
|
|
|
|
pass = z >= ref_z;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -708,59 +714,59 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
if (!pass)
|
|
|
|
if (!pass)
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
|
|
if (registers.output_merger.depth_write_enable)
|
|
|
|
if (output_merger.depth_write_enable)
|
|
|
|
SetDepth(x >> 4, y >> 4, z);
|
|
|
|
SetDepth(x >> 4, y >> 4, z);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
auto dest = GetPixel(x >> 4, y >> 4);
|
|
|
|
auto dest = GetPixel(x >> 4, y >> 4);
|
|
|
|
Math::Vec4<u8> blend_output = combiner_output;
|
|
|
|
Math::Vec4<u8> blend_output = combiner_output;
|
|
|
|
|
|
|
|
|
|
|
|
if (registers.output_merger.alphablend_enable) {
|
|
|
|
if (output_merger.alphablend_enable) {
|
|
|
|
auto params = registers.output_merger.alpha_blending;
|
|
|
|
auto params = output_merger.alpha_blending;
|
|
|
|
|
|
|
|
|
|
|
|
auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> {
|
|
|
|
auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3<u8> {
|
|
|
|
switch (factor) {
|
|
|
|
switch (factor) {
|
|
|
|
case params.Zero:
|
|
|
|
case Regs::BlendFactor::Zero :
|
|
|
|
return Math::Vec3<u8>(0, 0, 0);
|
|
|
|
return Math::Vec3<u8>(0, 0, 0);
|
|
|
|
|
|
|
|
|
|
|
|
case params.One:
|
|
|
|
case Regs::BlendFactor::One :
|
|
|
|
return Math::Vec3<u8>(255, 255, 255);
|
|
|
|
return Math::Vec3<u8>(255, 255, 255);
|
|
|
|
|
|
|
|
|
|
|
|
case params.SourceColor:
|
|
|
|
case Regs::BlendFactor::SourceColor:
|
|
|
|
return combiner_output.rgb();
|
|
|
|
return combiner_output.rgb();
|
|
|
|
|
|
|
|
|
|
|
|
case params.OneMinusSourceColor:
|
|
|
|
case Regs::BlendFactor::OneMinusSourceColor:
|
|
|
|
return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b());
|
|
|
|
return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b());
|
|
|
|
|
|
|
|
|
|
|
|
case params.DestColor:
|
|
|
|
case Regs::BlendFactor::DestColor:
|
|
|
|
return dest.rgb();
|
|
|
|
return dest.rgb();
|
|
|
|
|
|
|
|
|
|
|
|
case params.OneMinusDestColor:
|
|
|
|
case Regs::BlendFactor::OneMinusDestColor:
|
|
|
|
return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b());
|
|
|
|
return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b());
|
|
|
|
|
|
|
|
|
|
|
|
case params.SourceAlpha:
|
|
|
|
case Regs::BlendFactor::SourceAlpha:
|
|
|
|
return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a());
|
|
|
|
return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a());
|
|
|
|
|
|
|
|
|
|
|
|
case params.OneMinusSourceAlpha:
|
|
|
|
case Regs::BlendFactor::OneMinusSourceAlpha:
|
|
|
|
return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a());
|
|
|
|
return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a());
|
|
|
|
|
|
|
|
|
|
|
|
case params.DestAlpha:
|
|
|
|
case Regs::BlendFactor::DestAlpha:
|
|
|
|
return Math::Vec3<u8>(dest.a(), dest.a(), dest.a());
|
|
|
|
return Math::Vec3<u8>(dest.a(), dest.a(), dest.a());
|
|
|
|
|
|
|
|
|
|
|
|
case params.OneMinusDestAlpha:
|
|
|
|
case Regs::BlendFactor::OneMinusDestAlpha:
|
|
|
|
return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a());
|
|
|
|
return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a());
|
|
|
|
|
|
|
|
|
|
|
|
case params.ConstantColor:
|
|
|
|
case Regs::BlendFactor::ConstantColor:
|
|
|
|
return Math::Vec3<u8>(registers.output_merger.blend_const.r, registers.output_merger.blend_const.g, registers.output_merger.blend_const.b);
|
|
|
|
return Math::Vec3<u8>(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b);
|
|
|
|
|
|
|
|
|
|
|
|
case params.OneMinusConstantColor:
|
|
|
|
case Regs::BlendFactor::OneMinusConstantColor:
|
|
|
|
return Math::Vec3<u8>(255 - registers.output_merger.blend_const.r, 255 - registers.output_merger.blend_const.g, 255 - registers.output_merger.blend_const.b);
|
|
|
|
return Math::Vec3<u8>(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b);
|
|
|
|
|
|
|
|
|
|
|
|
case params.ConstantAlpha:
|
|
|
|
case Regs::BlendFactor::ConstantAlpha:
|
|
|
|
return Math::Vec3<u8>(registers.output_merger.blend_const.a, registers.output_merger.blend_const.a, registers.output_merger.blend_const.a);
|
|
|
|
return Math::Vec3<u8>(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a);
|
|
|
|
|
|
|
|
|
|
|
|
case params.OneMinusConstantAlpha:
|
|
|
|
case Regs::BlendFactor::OneMinusConstantAlpha:
|
|
|
|
return Math::Vec3<u8>(255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a);
|
|
|
|
return Math::Vec3<u8>(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a);
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
|
|
|
@ -769,31 +775,31 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 {
|
|
|
|
auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 {
|
|
|
|
switch (factor) {
|
|
|
|
switch (factor) {
|
|
|
|
case params.Zero:
|
|
|
|
case Regs::BlendFactor::Zero:
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
|
|
case params.One:
|
|
|
|
case Regs::BlendFactor::One:
|
|
|
|
return 255;
|
|
|
|
return 255;
|
|
|
|
|
|
|
|
|
|
|
|
case params.SourceAlpha:
|
|
|
|
case Regs::BlendFactor::SourceAlpha:
|
|
|
|
return combiner_output.a();
|
|
|
|
return combiner_output.a();
|
|
|
|
|
|
|
|
|
|
|
|
case params.OneMinusSourceAlpha:
|
|
|
|
case Regs::BlendFactor::OneMinusSourceAlpha:
|
|
|
|
return 255 - combiner_output.a();
|
|
|
|
return 255 - combiner_output.a();
|
|
|
|
|
|
|
|
|
|
|
|
case params.DestAlpha:
|
|
|
|
case Regs::BlendFactor::DestAlpha:
|
|
|
|
return dest.a();
|
|
|
|
return dest.a();
|
|
|
|
|
|
|
|
|
|
|
|
case params.OneMinusDestAlpha:
|
|
|
|
case Regs::BlendFactor::OneMinusDestAlpha:
|
|
|
|
return 255 - dest.a();
|
|
|
|
return 255 - dest.a();
|
|
|
|
|
|
|
|
|
|
|
|
case params.ConstantAlpha:
|
|
|
|
case Regs::BlendFactor::ConstantAlpha:
|
|
|
|
return registers.output_merger.blend_const.a;
|
|
|
|
return output_merger.blend_const.a;
|
|
|
|
|
|
|
|
|
|
|
|
case params.OneMinusConstantAlpha:
|
|
|
|
case Regs::BlendFactor::OneMinusConstantAlpha:
|
|
|
|
return 255 - registers.output_merger.blend_const.a;
|
|
|
|
return 255 - output_merger.blend_const.a;
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
|
|
|
@ -802,7 +808,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
using BlendEquation = decltype(params)::BlendEquation;
|
|
|
|
using BlendEquation = Regs::BlendEquation;
|
|
|
|
static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
|
|
|
|
static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
|
|
|
|
const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
|
|
|
|
const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
|
|
|
|
BlendEquation equation) {
|
|
|
|
BlendEquation equation) {
|
|
|
@ -812,29 +818,29 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
auto dst_result = (dest * destfactor).Cast<int>();
|
|
|
|
auto dst_result = (dest * destfactor).Cast<int>();
|
|
|
|
|
|
|
|
|
|
|
|
switch (equation) {
|
|
|
|
switch (equation) {
|
|
|
|
case BlendEquation::Add:
|
|
|
|
case Regs::BlendEquation::Add:
|
|
|
|
result = (src_result + dst_result) / 255;
|
|
|
|
result = (src_result + dst_result) / 255;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case BlendEquation::Subtract:
|
|
|
|
case Regs::BlendEquation::Subtract:
|
|
|
|
result = (src_result - dst_result) / 255;
|
|
|
|
result = (src_result - dst_result) / 255;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case BlendEquation::ReverseSubtract:
|
|
|
|
case Regs::BlendEquation::ReverseSubtract:
|
|
|
|
result = (dst_result - src_result) / 255;
|
|
|
|
result = (dst_result - src_result) / 255;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: How do these two actually work?
|
|
|
|
// TODO: How do these two actually work?
|
|
|
|
// OpenGL doesn't include the blend factors in the min/max computations,
|
|
|
|
// OpenGL doesn't include the blend factors in the min/max computations,
|
|
|
|
// but is this what the 3DS actually does?
|
|
|
|
// but is this what the 3DS actually does?
|
|
|
|
case BlendEquation::Min:
|
|
|
|
case Regs::BlendEquation::Min:
|
|
|
|
result.r() = std::min(src.r(), dest.r());
|
|
|
|
result.r() = std::min(src.r(), dest.r());
|
|
|
|
result.g() = std::min(src.g(), dest.g());
|
|
|
|
result.g() = std::min(src.g(), dest.g());
|
|
|
|
result.b() = std::min(src.b(), dest.b());
|
|
|
|
result.b() = std::min(src.b(), dest.b());
|
|
|
|
result.a() = std::min(src.a(), dest.a());
|
|
|
|
result.a() = std::min(src.a(), dest.a());
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case BlendEquation::Max:
|
|
|
|
case Regs::BlendEquation::Max:
|
|
|
|
result.r() = std::max(src.r(), dest.r());
|
|
|
|
result.r() = std::max(src.r(), dest.r());
|
|
|
|
result.g() = std::max(src.g(), dest.g());
|
|
|
|
result.g() = std::max(src.g(), dest.g());
|
|
|
|
result.b() = std::max(src.b(), dest.b());
|
|
|
|
result.b() = std::max(src.b(), dest.b());
|
|
|
@ -860,15 +866,15 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
|
|
|
blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
|
|
|
|
blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
|
|
|
|
blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
|
|
|
|
blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op);
|
|
|
|
LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op);
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const Math::Vec4<u8> result = {
|
|
|
|
const Math::Vec4<u8> result = {
|
|
|
|
registers.output_merger.red_enable ? blend_output.r() : dest.r(),
|
|
|
|
output_merger.red_enable ? blend_output.r() : dest.r(),
|
|
|
|
registers.output_merger.green_enable ? blend_output.g() : dest.g(),
|
|
|
|
output_merger.green_enable ? blend_output.g() : dest.g(),
|
|
|
|
registers.output_merger.blue_enable ? blend_output.b() : dest.b(),
|
|
|
|
output_merger.blue_enable ? blend_output.b() : dest.b(),
|
|
|
|
registers.output_merger.alpha_enable ? blend_output.a() : dest.a()
|
|
|
|
output_merger.alpha_enable ? blend_output.a() : dest.a()
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
DrawPixel(x >> 4, y >> 4, result);
|
|
|
|
DrawPixel(x >> 4, y >> 4, result);
|
|
|
|