From e24717bca04a51fe185e5dbbb4918e31c923e8fa Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Sat, 28 Jan 2017 15:59:36 -0800
Subject: [PATCH 1/5] VideoCore: Move software rasterizer files to
 sub-directory

---
 src/video_core/CMakeLists.txt                      | 12 ++++++------
 src/video_core/renderer_base.cpp                   |  2 +-
 src/video_core/{ => swrasterizer}/clipper.cpp      |  4 ++--
 src/video_core/{ => swrasterizer}/clipper.h        |  0
 src/video_core/{ => swrasterizer}/rasterizer.cpp   |  2 +-
 src/video_core/{ => swrasterizer}/rasterizer.h     |  0
 src/video_core/{ => swrasterizer}/swrasterizer.cpp |  4 ++--
 src/video_core/{ => swrasterizer}/swrasterizer.h   |  0
 8 files changed, 12 insertions(+), 12 deletions(-)
 rename src/video_core/{ => swrasterizer}/clipper.cpp (98%)
 rename src/video_core/{ => swrasterizer}/clipper.h (100%)
 rename src/video_core/{ => swrasterizer}/rasterizer.cpp (99%)
 rename src/video_core/{ => swrasterizer}/rasterizer.h (100%)
 rename src/video_core/{ => swrasterizer}/swrasterizer.cpp (81%)
 rename src/video_core/{ => swrasterizer}/swrasterizer.h (100%)
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 11bc61e14..962228680 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,10 +1,8 @@
 set(SRCS
-            clipper.cpp
             command_processor.cpp
             debug_utils/debug_utils.cpp
             pica.cpp
             primitive_assembly.cpp
-            rasterizer.cpp
             regs.cpp
             renderer_base.cpp
             renderer_opengl/gl_rasterizer.cpp
@@ -15,7 +13,9 @@ set(SRCS
             renderer_opengl/renderer_opengl.cpp
             shader/shader.cpp
             shader/shader_interpreter.cpp
-            swrasterizer.cpp
+            swrasterizer/clipper.cpp
+            swrasterizer/rasterizer.cpp
+            swrasterizer/swrasterizer.cpp
             texture/etc1.cpp
             texture/texture_decode.cpp
             vertex_loader.cpp
@@ -23,7 +23,6 @@ set(SRCS
             )
 
 set(HEADERS
-            clipper.h
             command_processor.h
             debug_utils/debug_utils.h
             gpu_debugger.h
@@ -31,7 +30,6 @@ set(HEADERS
             pica_state.h
             pica_types.h
             primitive_assembly.h
-            rasterizer.h
             rasterizer_interface.h
             regs.h
             regs_framebuffer.h
@@ -52,7 +50,9 @@ set(HEADERS
             shader/debug_data.h
             shader/shader.h
             shader/shader_interpreter.h
-            swrasterizer.h
+            swrasterizer/clipper.h
+            swrasterizer/rasterizer.h
+            swrasterizer/swrasterizer.h
             texture/etc1.h
             texture/texture_decode.h
             utils.h
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index fd38175b3..f6ece5c4b 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -6,7 +6,7 @@
 #include <memory>
 #include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/swrasterizer.h"
+#include "video_core/swrasterizer/swrasterizer.h"
 #include "video_core/video_core.h"
 
 void RendererBase::RefreshRasterizerSetting() {
diff --git a/src/video_core/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp
similarity index 98%
rename from src/video_core/clipper.cpp
rename to src/video_core/swrasterizer/clipper.cpp
index 1e8e751ba..2d80822d9 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/swrasterizer/clipper.cpp
@@ -11,11 +11,11 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/vector_math.h"
-#include "video_core/clipper.h"
 #include "video_core/pica_state.h"
 #include "video_core/pica_types.h"
-#include "video_core/rasterizer.h"
 #include "video_core/shader/shader.h"
+#include "video_core/swrasterizer/clipper.h"
+#include "video_core/swrasterizer/rasterizer.h"
 
 using Pica::Rasterizer::Vertex;
 
diff --git a/src/video_core/clipper.h b/src/video_core/swrasterizer/clipper.h
similarity index 100%
rename from src/video_core/clipper.h
rename to src/video_core/swrasterizer/clipper.h
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
similarity index 99%
rename from src/video_core/rasterizer.cpp
rename to src/video_core/swrasterizer/rasterizer.cpp
index 83a08ebd7..17ba59144 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -18,11 +18,11 @@
 #include "video_core/debug_utils/debug_utils.h"
 #include "video_core/pica_state.h"
 #include "video_core/pica_types.h"
-#include "video_core/rasterizer.h"
 #include "video_core/regs_framebuffer.h"
 #include "video_core/regs_rasterizer.h"
 #include "video_core/regs_texturing.h"
 #include "video_core/shader/shader.h"
+#include "video_core/swrasterizer/rasterizer.h"
 #include "video_core/texture/texture_decode.h"
 #include "video_core/utils.h"
 
diff --git a/src/video_core/rasterizer.h b/src/video_core/swrasterizer/rasterizer.h
similarity index 100%
rename from src/video_core/rasterizer.h
rename to src/video_core/swrasterizer/rasterizer.h
diff --git a/src/video_core/swrasterizer.cpp b/src/video_core/swrasterizer/swrasterizer.cpp
similarity index 81%
rename from src/video_core/swrasterizer.cpp
rename to src/video_core/swrasterizer/swrasterizer.cpp
index 9cd21f72b..402b705dd 100644
--- a/src/video_core/swrasterizer.cpp
+++ b/src/video_core/swrasterizer/swrasterizer.cpp
@@ -2,8 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "video_core/clipper.h"
-#include "video_core/swrasterizer.h"
+#include "video_core/swrasterizer/clipper.h"
+#include "video_core/swrasterizer/swrasterizer.h"
 
 namespace VideoCore {
 
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer/swrasterizer.h
similarity index 100%
rename from src/video_core/swrasterizer.h
rename to src/video_core/swrasterizer/swrasterizer.h

From e1ad7d69b981c7ed249ba7efd4287d712db3747d Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Sun, 29 Jan 2017 17:43:09 -0800
Subject: [PATCH 2/5] SWRasterizer: Move framebuffer operation functions to
 their own file

---
 src/video_core/CMakeLists.txt               |   2 +
 src/video_core/swrasterizer/framebuffer.cpp | 259 ++++++++++++++++++++
 src/video_core/swrasterizer/framebuffer.h   |  23 ++
 src/video_core/swrasterizer/rasterizer.cpp  | 237 +-----------------
 4 files changed, 285 insertions(+), 236 deletions(-)
 create mode 100644 src/video_core/swrasterizer/framebuffer.cpp
 create mode 100644 src/video_core/swrasterizer/framebuffer.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 962228680..479edfff4 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -14,6 +14,7 @@ set(SRCS
             shader/shader.cpp
             shader/shader_interpreter.cpp
             swrasterizer/clipper.cpp
+            swrasterizer/framebuffer.cpp
             swrasterizer/rasterizer.cpp
             swrasterizer/swrasterizer.cpp
             texture/etc1.cpp
@@ -51,6 +52,7 @@ set(HEADERS
             shader/shader.h
             shader/shader_interpreter.h
             swrasterizer/clipper.h
+            swrasterizer/framebuffer.h
             swrasterizer/rasterizer.h
             swrasterizer/swrasterizer.h
             texture/etc1.h
diff --git a/src/video_core/swrasterizer/framebuffer.cpp b/src/video_core/swrasterizer/framebuffer.cpp
new file mode 100644
index 000000000..4b31eda89
--- /dev/null
+++ b/src/video_core/swrasterizer/framebuffer.cpp
@@ -0,0 +1,259 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "common/assert.h"
+#include "common/color.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "common/vector_math.h"
+#include "core/hw/gpu.h"
+#include "core/memory.h"
+#include "video_core/pica_state.h"
+#include "video_core/regs_framebuffer.h"
+#include "video_core/swrasterizer/framebuffer.h"
+#include "video_core/utils.h"
+
+namespace Pica {
+namespace Rasterizer {
+
+void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
+    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
+    const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
+
+    // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
+    // NOTE: The framebuffer height register contains the actual FB height minus one.
+    y = framebuffer.height - y;
+
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel =
+        GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
+    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
+                     coarse_y * framebuffer.width * bytes_per_pixel;
+    u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
+
+    switch (framebuffer.color_format) {
+    case FramebufferRegs::ColorFormat::RGBA8:
+        Color::EncodeRGBA8(color, dst_pixel);
+        break;
+
+    case FramebufferRegs::ColorFormat::RGB8:
+        Color::EncodeRGB8(color, dst_pixel);
+        break;
+
+    case FramebufferRegs::ColorFormat::RGB5A1:
+        Color::EncodeRGB5A1(color, dst_pixel);
+        break;
+
+    case FramebufferRegs::ColorFormat::RGB565:
+        Color::EncodeRGB565(color, dst_pixel);
+        break;
+
+    case FramebufferRegs::ColorFormat::RGBA4:
+        Color::EncodeRGBA4(color, dst_pixel);
+        break;
+
+    default:
+        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
+                     framebuffer.color_format.Value());
+        UNIMPLEMENTED();
+    }
+}
+
+const Math::Vec4<u8> GetPixel(int x, int y) {
+    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
+    const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
+
+    y = framebuffer.height - y;
+
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel =
+        GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
+    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
+                     coarse_y * framebuffer.width * bytes_per_pixel;
+    u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
+
+    switch (framebuffer.color_format) {
+    case FramebufferRegs::ColorFormat::RGBA8:
+        return Color::DecodeRGBA8(src_pixel);
+
+    case FramebufferRegs::ColorFormat::RGB8:
+        return Color::DecodeRGB8(src_pixel);
+
+    case FramebufferRegs::ColorFormat::RGB5A1:
+        return Color::DecodeRGB5A1(src_pixel);
+
+    case FramebufferRegs::ColorFormat::RGB565:
+        return Color::DecodeRGB565(src_pixel);
+
+    case FramebufferRegs::ColorFormat::RGBA4:
+        return Color::DecodeRGBA4(src_pixel);
+
+    default:
+        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
+                     framebuffer.color_format.Value());
+        UNIMPLEMENTED();
+    }
+
+    return {0, 0, 0, 0};
+}
+
+u32 GetDepth(int x, int y) {
+    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
+    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+    y = framebuffer.height - y;
+
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
+    u32 stride = framebuffer.width * bytes_per_pixel;
+
+    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* src_pixel = depth_buffer + src_offset;
+
+    switch (framebuffer.depth_format) {
+    case FramebufferRegs::DepthFormat::D16:
+        return Color::DecodeD16(src_pixel);
+    case FramebufferRegs::DepthFormat::D24:
+        return Color::DecodeD24(src_pixel);
+    case FramebufferRegs::DepthFormat::D24S8:
+        return Color::DecodeD24S8(src_pixel).x;
+    default:
+        LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
+        UNIMPLEMENTED();
+        return 0;
+    }
+}
+
+u8 GetStencil(int x, int y) {
+    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
+    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+    y = framebuffer.height - y;
+
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
+    u32 stride = framebuffer.width * bytes_per_pixel;
+
+    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* src_pixel = depth_buffer + src_offset;
+
+    switch (framebuffer.depth_format) {
+    case FramebufferRegs::DepthFormat::D24S8:
+        return Color::DecodeD24S8(src_pixel).y;
+
+    default:
+        LOG_WARNING(
+            HW_GPU,
+            "GetStencil called for function which doesn't have a stencil component (format %u)",
+            framebuffer.depth_format);
+        return 0;
+    }
+}
+
+void SetDepth(int x, int y, u32 value) {
+    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
+    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+    y = framebuffer.height - y;
+
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
+    u32 stride = framebuffer.width * bytes_per_pixel;
+
+    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* dst_pixel = depth_buffer + dst_offset;
+
+    switch (framebuffer.depth_format) {
+    case FramebufferRegs::DepthFormat::D16:
+        Color::EncodeD16(value, dst_pixel);
+        break;
+
+    case FramebufferRegs::DepthFormat::D24:
+        Color::EncodeD24(value, dst_pixel);
+        break;
+
+    case FramebufferRegs::DepthFormat::D24S8:
+        Color::EncodeD24X8(value, dst_pixel);
+        break;
+
+    default:
+        LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
+        UNIMPLEMENTED();
+        break;
+    }
+}
+
+void SetStencil(int x, int y, u8 value) {
+    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
+    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+    y = framebuffer.height - y;
+
+    const u32 coarse_y = y & ~7;
+    u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
+    u32 stride = framebuffer.width * bytes_per_pixel;
+
+    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+    u8* dst_pixel = depth_buffer + dst_offset;
+
+    switch (framebuffer.depth_format) {
+    case Pica::FramebufferRegs::DepthFormat::D16:
+    case Pica::FramebufferRegs::DepthFormat::D24:
+        // Nothing to do
+        break;
+
+    case Pica::FramebufferRegs::DepthFormat::D24S8:
+        Color::EncodeX24S8(value, dst_pixel);
+        break;
+
+    default:
+        LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
+        UNIMPLEMENTED();
+        break;
+    }
+}
+
+u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) {
+    switch (action) {
+    case FramebufferRegs::StencilAction::Keep:
+        return old_stencil;
+
+    case FramebufferRegs::StencilAction::Zero:
+        return 0;
+
+    case FramebufferRegs::StencilAction::Replace:
+        return ref;
+
+    case FramebufferRegs::StencilAction::Increment:
+        // Saturated increment
+        return std::min<u8>(old_stencil, 254) + 1;
+
+    case FramebufferRegs::StencilAction::Decrement:
+        // Saturated decrement
+        return std::max<u8>(old_stencil, 1) - 1;
+
+    case FramebufferRegs::StencilAction::Invert:
+        return ~old_stencil;
+
+    case FramebufferRegs::StencilAction::IncrementWrap:
+        return old_stencil + 1;
+
+    case FramebufferRegs::StencilAction::DecrementWrap:
+        return old_stencil - 1;
+
+    default:
+        LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
+        UNIMPLEMENTED();
+        return 0;
+    }
+}
+
+} // namespace Rasterizer
+} // namespace Pica
diff --git a/src/video_core/swrasterizer/framebuffer.h b/src/video_core/swrasterizer/framebuffer.h
new file mode 100644
index 000000000..220f7013b
--- /dev/null
+++ b/src/video_core/swrasterizer/framebuffer.h
@@ -0,0 +1,23 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "common/vector_math.h"
+#include "video_core/regs_framebuffer.h"
+
+namespace Pica {
+namespace Rasterizer {
+
+void DrawPixel(int x, int y, const Math::Vec4<u8>& color);
+const Math::Vec4<u8> GetPixel(int x, int y);
+u32 GetDepth(int x, int y);
+u8 GetStencil(int x, int y);
+void SetDepth(int x, int y, u32 value);
+void SetStencil(int x, int y, u8 value);
+u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref);
+
+} // namespace Rasterizer
+} // namespace Pica
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index 17ba59144..cb11338b7 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -22,6 +22,7 @@
 #include "video_core/regs_rasterizer.h"
 #include "video_core/regs_texturing.h"
 #include "video_core/shader/shader.h"
+#include "video_core/swrasterizer/framebuffer.h"
 #include "video_core/swrasterizer/rasterizer.h"
 #include "video_core/texture/texture_decode.h"
 #include "video_core/utils.h"
@@ -30,242 +31,6 @@ namespace Pica {
 
 namespace Rasterizer {
 
-static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
-    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
-    const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
-
-    // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
-    // NOTE: The framebuffer height register contains the actual FB height minus one.
-    y = framebuffer.height - y;
-
-    const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel =
-        GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
-    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
-                     coarse_y * framebuffer.width * bytes_per_pixel;
-    u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
-
-    switch (framebuffer.color_format) {
-    case FramebufferRegs::ColorFormat::RGBA8:
-        Color::EncodeRGBA8(color, dst_pixel);
-        break;
-
-    case FramebufferRegs::ColorFormat::RGB8:
-        Color::EncodeRGB8(color, dst_pixel);
-        break;
-
-    case FramebufferRegs::ColorFormat::RGB5A1:
-        Color::EncodeRGB5A1(color, dst_pixel);
-        break;
-
-    case FramebufferRegs::ColorFormat::RGB565:
-        Color::EncodeRGB565(color, dst_pixel);
-        break;
-
-    case FramebufferRegs::ColorFormat::RGBA4:
-        Color::EncodeRGBA4(color, dst_pixel);
-        break;
-
-    default:
-        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
-                     framebuffer.color_format.Value());
-        UNIMPLEMENTED();
-    }
-}
-
-static const Math::Vec4<u8> GetPixel(int x, int y) {
-    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
-    const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
-
-    y = framebuffer.height - y;
-
-    const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel =
-        GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
-    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
-                     coarse_y * framebuffer.width * bytes_per_pixel;
-    u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
-
-    switch (framebuffer.color_format) {
-    case FramebufferRegs::ColorFormat::RGBA8:
-        return Color::DecodeRGBA8(src_pixel);
-
-    case FramebufferRegs::ColorFormat::RGB8:
-        return Color::DecodeRGB8(src_pixel);
-
-    case FramebufferRegs::ColorFormat::RGB5A1:
-        return Color::DecodeRGB5A1(src_pixel);
-
-    case FramebufferRegs::ColorFormat::RGB565:
-        return Color::DecodeRGB565(src_pixel);
-
-    case FramebufferRegs::ColorFormat::RGBA4:
-        return Color::DecodeRGBA4(src_pixel);
-
-    default:
-        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
-                     framebuffer.color_format.Value());
-        UNIMPLEMENTED();
-    }
-
-    return {0, 0, 0, 0};
-}
-
-static u32 GetDepth(int x, int y) {
-    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
-    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
-    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
-
-    y = framebuffer.height - y;
-
-    const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
-    u32 stride = framebuffer.width * bytes_per_pixel;
-
-    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
-    u8* src_pixel = depth_buffer + src_offset;
-
-    switch (framebuffer.depth_format) {
-    case FramebufferRegs::DepthFormat::D16:
-        return Color::DecodeD16(src_pixel);
-    case FramebufferRegs::DepthFormat::D24:
-        return Color::DecodeD24(src_pixel);
-    case FramebufferRegs::DepthFormat::D24S8:
-        return Color::DecodeD24S8(src_pixel).x;
-    default:
-        LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
-        UNIMPLEMENTED();
-        return 0;
-    }
-}
-
-static u8 GetStencil(int x, int y) {
-    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
-    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
-    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
-
-    y = framebuffer.height - y;
-
-    const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
-    u32 stride = framebuffer.width * bytes_per_pixel;
-
-    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
-    u8* src_pixel = depth_buffer + src_offset;
-
-    switch (framebuffer.depth_format) {
-    case FramebufferRegs::DepthFormat::D24S8:
-        return Color::DecodeD24S8(src_pixel).y;
-
-    default:
-        LOG_WARNING(
-            HW_GPU,
-            "GetStencil called for function which doesn't have a stencil component (format %u)",
-            framebuffer.depth_format);
-        return 0;
-    }
-}
-
-static void SetDepth(int x, int y, u32 value) {
-    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
-    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
-    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
-
-    y = framebuffer.height - y;
-
-    const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
-    u32 stride = framebuffer.width * bytes_per_pixel;
-
-    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
-    u8* dst_pixel = depth_buffer + dst_offset;
-
-    switch (framebuffer.depth_format) {
-    case FramebufferRegs::DepthFormat::D16:
-        Color::EncodeD16(value, dst_pixel);
-        break;
-
-    case FramebufferRegs::DepthFormat::D24:
-        Color::EncodeD24(value, dst_pixel);
-        break;
-
-    case FramebufferRegs::DepthFormat::D24S8:
-        Color::EncodeD24X8(value, dst_pixel);
-        break;
-
-    default:
-        LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
-        UNIMPLEMENTED();
-        break;
-    }
-}
-
-static void SetStencil(int x, int y, u8 value) {
-    const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
-    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
-    u8* depth_buffer = Memory::GetPhysicalPointer(addr);
-
-    y = framebuffer.height - y;
-
-    const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
-    u32 stride = framebuffer.width * bytes_per_pixel;
-
-    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
-    u8* dst_pixel = depth_buffer + dst_offset;
-
-    switch (framebuffer.depth_format) {
-    case Pica::FramebufferRegs::DepthFormat::D16:
-    case Pica::FramebufferRegs::DepthFormat::D24:
-        // Nothing to do
-        break;
-
-    case Pica::FramebufferRegs::DepthFormat::D24S8:
-        Color::EncodeX24S8(value, dst_pixel);
-        break;
-
-    default:
-        LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
-        UNIMPLEMENTED();
-        break;
-    }
-}
-
-static u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) {
-    switch (action) {
-    case FramebufferRegs::StencilAction::Keep:
-        return old_stencil;
-
-    case FramebufferRegs::StencilAction::Zero:
-        return 0;
-
-    case FramebufferRegs::StencilAction::Replace:
-        return ref;
-
-    case FramebufferRegs::StencilAction::Increment:
-        // Saturated increment
-        return std::min<u8>(old_stencil, 254) + 1;
-
-    case FramebufferRegs::StencilAction::Decrement:
-        // Saturated decrement
-        return std::max<u8>(old_stencil, 1) - 1;
-
-    case FramebufferRegs::StencilAction::Invert:
-        return ~old_stencil;
-
-    case FramebufferRegs::StencilAction::IncrementWrap:
-        return old_stencil + 1;
-
-    case FramebufferRegs::StencilAction::DecrementWrap:
-        return old_stencil - 1;
-
-    default:
-        LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
-        UNIMPLEMENTED();
-        return 0;
-    }
-}
-
 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 struct Fix12P4 {
     Fix12P4() {}

From f9026e8a7a53073340f7188f433f81fe84a16976 Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Sun, 29 Jan 2017 18:10:26 -0800
Subject: [PATCH 3/5] SWRasterizer: Convert large no-capture lambdas to
 standalone functions

---
 src/video_core/swrasterizer/rasterizer.cpp | 625 ++++++++++-----------
 1 file changed, 310 insertions(+), 315 deletions(-)

diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index cb11338b7..7044a6136 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -28,9 +28,318 @@
 #include "video_core/utils.h"
 
 namespace Pica {
-
 namespace Rasterizer {
 
+using TevStageConfig = TexturingRegs::TevStageConfig;
+
+static int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) {
+    switch (mode) {
+    case TexturingRegs::TextureConfig::ClampToEdge:
+        val = std::max(val, 0);
+        val = std::min(val, (int)size - 1);
+        return val;
+
+    case TexturingRegs::TextureConfig::ClampToBorder:
+        return val;
+
+    case TexturingRegs::TextureConfig::Repeat:
+        return (int)((unsigned)val % size);
+
+    case TexturingRegs::TextureConfig::MirroredRepeat: {
+        unsigned int coord = ((unsigned)val % (2 * size));
+        if (coord >= size)
+            coord = 2 * size - 1 - coord;
+        return (int)coord;
+    }
+
+    default:
+        LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
+        UNIMPLEMENTED();
+        return 0;
+    }
+};
+
+static Math::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor,
+                                       const Math::Vec4<u8>& values) {
+    using ColorModifier = TevStageConfig::ColorModifier;
+
+    switch (factor) {
+    case ColorModifier::SourceColor:
+        return values.rgb();
+
+    case ColorModifier::OneMinusSourceColor:
+        return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
+
+    case ColorModifier::SourceAlpha:
+        return values.aaa();
+
+    case ColorModifier::OneMinusSourceAlpha:
+        return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>();
+
+    case ColorModifier::SourceRed:
+        return values.rrr();
+
+    case ColorModifier::OneMinusSourceRed:
+        return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>();
+
+    case ColorModifier::SourceGreen:
+        return values.ggg();
+
+    case ColorModifier::OneMinusSourceGreen:
+        return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>();
+
+    case ColorModifier::SourceBlue:
+        return values.bbb();
+
+    case ColorModifier::OneMinusSourceBlue:
+        return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>();
+    }
+};
+
+static u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) {
+    using AlphaModifier = TevStageConfig::AlphaModifier;
+
+    switch (factor) {
+    case AlphaModifier::SourceAlpha:
+        return values.a();
+
+    case AlphaModifier::OneMinusSourceAlpha:
+        return 255 - values.a();
+
+    case AlphaModifier::SourceRed:
+        return values.r();
+
+    case AlphaModifier::OneMinusSourceRed:
+        return 255 - values.r();
+
+    case AlphaModifier::SourceGreen:
+        return values.g();
+
+    case AlphaModifier::OneMinusSourceGreen:
+        return 255 - values.g();
+
+    case AlphaModifier::SourceBlue:
+        return values.b();
+
+    case AlphaModifier::OneMinusSourceBlue:
+        return 255 - values.b();
+    }
+};
+
+static Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) {
+    using Operation = TevStageConfig::Operation;
+
+    switch (op) {
+    case Operation::Replace:
+        return input[0];
+
+    case Operation::Modulate:
+        return ((input[0] * input[1]) / 255).Cast<u8>();
+
+    case Operation::Add: {
+        auto result = input[0] + input[1];
+        result.r() = std::min(255, result.r());
+        result.g() = std::min(255, result.g());
+        result.b() = std::min(255, result.b());
+        return result.Cast<u8>();
+    }
+
+    case Operation::AddSigned: {
+        // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
+        // (byte) 128 is correct
+        auto result =
+            input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128);
+        result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
+        result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
+        result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
+        return result.Cast<u8>();
+    }
+
+    case Operation::Lerp:
+        return ((input[0] * input[2] +
+                 input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
+                255)
+            .Cast<u8>();
+
+    case Operation::Subtract: {
+        auto result = input[0].Cast<int>() - input[1].Cast<int>();
+        result.r() = std::max(0, result.r());
+        result.g() = std::max(0, result.g());
+        result.b() = std::max(0, result.b());
+        return result.Cast<u8>();
+    }
+
+    case Operation::MultiplyThenAdd: {
+        auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
+        result.r() = std::min(255, result.r());
+        result.g() = std::min(255, result.g());
+        result.b() = std::min(255, result.b());
+        return result.Cast<u8>();
+    }
+
+    case Operation::AddThenMultiply: {
+        auto result = input[0] + input[1];
+        result.r() = std::min(255, result.r());
+        result.g() = std::min(255, result.g());
+        result.b() = std::min(255, result.b());
+        result = (result * input[2].Cast<int>()) / 255;
+        return result.Cast<u8>();
+    }
+    case Operation::Dot3_RGB: {
+        // Not fully accurate.  Worst case scenario seems to yield a +/-3 error.  Some HW results
+        // indicate that the per-component computation can't have a higher precision than 1/256,
+        // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give
+        // different results.
+        int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
+                     ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
+                     ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
+        result = std::max(0, std::min(255, result));
+        return {(u8)result, (u8)result, (u8)result};
+    }
+    default:
+        LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
+        UNIMPLEMENTED();
+        return {0, 0, 0};
+    }
+};
+
+static u8 AlphaCombine(TevStageConfig::Operation op, const std::array<u8, 3>& input) {
+    switch (op) {
+        using Operation = TevStageConfig::Operation;
+    case Operation::Replace:
+        return input[0];
+
+    case Operation::Modulate:
+        return input[0] * input[1] / 255;
+
+    case Operation::Add:
+        return std::min(255, input[0] + input[1]);
+
+    case Operation::AddSigned: {
+        // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
+        auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
+        return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
+    }
+
+    case Operation::Lerp:
+        return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
+
+    case Operation::Subtract:
+        return std::max(0, (int)input[0] - (int)input[1]);
+
+    case Operation::MultiplyThenAdd:
+        return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255);
+
+    case Operation::AddThenMultiply:
+        return (std::min(255, (input[0] + input[1])) * input[2]) / 255;
+
+    default:
+        LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op);
+        UNIMPLEMENTED();
+        return 0;
+    }
+};
+
+static Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src,
+                                            const Math::Vec4<u8>& srcfactor,
+                                            const Math::Vec4<u8>& dest,
+                                            const Math::Vec4<u8>& destfactor,
+                                            FramebufferRegs::BlendEquation equation) {
+    Math::Vec4<int> result;
+
+    auto src_result = (src * srcfactor).Cast<int>();
+    auto dst_result = (dest * destfactor).Cast<int>();
+
+    switch (equation) {
+    case FramebufferRegs::BlendEquation::Add:
+        result = (src_result + dst_result) / 255;
+        break;
+
+    case FramebufferRegs::BlendEquation::Subtract:
+        result = (src_result - dst_result) / 255;
+        break;
+
+    case FramebufferRegs::BlendEquation::ReverseSubtract:
+        result = (dst_result - src_result) / 255;
+        break;
+
+    // TODO: How do these two actually work?  OpenGL doesn't include the blend factors in the
+    //       min/max computations, but is this what the 3DS actually does?
+    case FramebufferRegs::BlendEquation::Min:
+        result.r() = std::min(src.r(), dest.r());
+        result.g() = std::min(src.g(), dest.g());
+        result.b() = std::min(src.b(), dest.b());
+        result.a() = std::min(src.a(), dest.a());
+        break;
+
+    case FramebufferRegs::BlendEquation::Max:
+        result.r() = std::max(src.r(), dest.r());
+        result.g() = std::max(src.g(), dest.g());
+        result.b() = std::max(src.b(), dest.b());
+        result.a() = std::max(src.a(), dest.a());
+        break;
+
+    default:
+        LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation);
+        UNIMPLEMENTED();
+    }
+
+    return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
+                          MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
+};
+
+static u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) {
+    switch (op) {
+    case FramebufferRegs::LogicOp::Clear:
+        return 0;
+
+    case FramebufferRegs::LogicOp::And:
+        return src & dest;
+
+    case FramebufferRegs::LogicOp::AndReverse:
+        return src & ~dest;
+
+    case FramebufferRegs::LogicOp::Copy:
+        return src;
+
+    case FramebufferRegs::LogicOp::Set:
+        return 255;
+
+    case FramebufferRegs::LogicOp::CopyInverted:
+        return ~src;
+
+    case FramebufferRegs::LogicOp::NoOp:
+        return dest;
+
+    case FramebufferRegs::LogicOp::Invert:
+        return ~dest;
+
+    case FramebufferRegs::LogicOp::Nand:
+        return ~(src & dest);
+
+    case FramebufferRegs::LogicOp::Or:
+        return src | dest;
+
+    case FramebufferRegs::LogicOp::Nor:
+        return ~(src | dest);
+
+    case FramebufferRegs::LogicOp::Xor:
+        return src ^ dest;
+
+    case FramebufferRegs::LogicOp::Equiv:
+        return ~(src ^ dest);
+
+    case FramebufferRegs::LogicOp::AndInverted:
+        return ~src & dest;
+
+    case FramebufferRegs::LogicOp::OrReverse:
+        return src | ~dest;
+
+    case FramebufferRegs::LogicOp::OrInverted:
+        return ~src | dest;
+    }
+};
+
 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 struct Fix12P4 {
     Fix12P4() {}
@@ -304,34 +613,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height)))
                             .ToFloat32();
 
-                static auto GetWrappedTexCoord = [](TexturingRegs::TextureConfig::WrapMode mode,
-                                                    int val, unsigned size) {
-                    switch (mode) {
-                    case TexturingRegs::TextureConfig::ClampToEdge:
-                        val = std::max(val, 0);
-                        val = std::min(val, (int)size - 1);
-                        return val;
-
-                    case TexturingRegs::TextureConfig::ClampToBorder:
-                        return val;
-
-                    case TexturingRegs::TextureConfig::Repeat:
-                        return (int)((unsigned)val % size);
-
-                    case TexturingRegs::TextureConfig::MirroredRepeat: {
-                        unsigned int coord = ((unsigned)val % (2 * size));
-                        if (coord >= size)
-                            coord = 2 * size - 1 - coord;
-                        return (int)coord;
-                    }
-
-                    default:
-                        LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
-                        UNIMPLEMENTED();
-                        return 0;
-                    }
-                };
-
                 if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder &&
                      (s < 0 || static_cast<u32>(s) >= texture.config.width)) ||
                     (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder &&
@@ -380,9 +661,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                  ++tev_stage_index) {
                 const auto& tev_stage = tev_stages[tev_stage_index];
                 using Source = TexturingRegs::TevStageConfig::Source;
-                using ColorModifier = TexturingRegs::TevStageConfig::ColorModifier;
-                using AlphaModifier = TexturingRegs::TevStageConfig::AlphaModifier;
-                using Operation = TexturingRegs::TevStageConfig::Operation;
 
                 auto GetSource = [&](Source source) -> Math::Vec4<u8> {
                     switch (source) {
@@ -422,187 +700,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                     }
                 };
 
-                static auto GetColorModifier = [](ColorModifier factor,
-                                                  const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
-                    switch (factor) {
-                    case ColorModifier::SourceColor:
-                        return values.rgb();
-
-                    case ColorModifier::OneMinusSourceColor:
-                        return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
-
-                    case ColorModifier::SourceAlpha:
-                        return values.aaa();
-
-                    case ColorModifier::OneMinusSourceAlpha:
-                        return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>();
-
-                    case ColorModifier::SourceRed:
-                        return values.rrr();
-
-                    case ColorModifier::OneMinusSourceRed:
-                        return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>();
-
-                    case ColorModifier::SourceGreen:
-                        return values.ggg();
-
-                    case ColorModifier::OneMinusSourceGreen:
-                        return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>();
-
-                    case ColorModifier::SourceBlue:
-                        return values.bbb();
-
-                    case ColorModifier::OneMinusSourceBlue:
-                        return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>();
-                    }
-                };
-
-                static auto GetAlphaModifier = [](AlphaModifier factor,
-                                                  const Math::Vec4<u8>& values) -> u8 {
-                    switch (factor) {
-                    case AlphaModifier::SourceAlpha:
-                        return values.a();
-
-                    case AlphaModifier::OneMinusSourceAlpha:
-                        return 255 - values.a();
-
-                    case AlphaModifier::SourceRed:
-                        return values.r();
-
-                    case AlphaModifier::OneMinusSourceRed:
-                        return 255 - values.r();
-
-                    case AlphaModifier::SourceGreen:
-                        return values.g();
-
-                    case AlphaModifier::OneMinusSourceGreen:
-                        return 255 - values.g();
-
-                    case AlphaModifier::SourceBlue:
-                        return values.b();
-
-                    case AlphaModifier::OneMinusSourceBlue:
-                        return 255 - values.b();
-                    }
-                };
-
-                static auto ColorCombine = [](Operation op,
-                                              const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
-                    switch (op) {
-                    case Operation::Replace:
-                        return input[0];
-
-                    case Operation::Modulate:
-                        return ((input[0] * input[1]) / 255).Cast<u8>();
-
-                    case Operation::Add: {
-                        auto result = input[0] + input[1];
-                        result.r() = std::min(255, result.r());
-                        result.g() = std::min(255, result.g());
-                        result.b() = std::min(255, result.b());
-                        return result.Cast<u8>();
-                    }
-
-                    case Operation::AddSigned: {
-                        // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
-                        // (byte) 128 is correct
-                        auto result = input[0].Cast<int>() + input[1].Cast<int>() -
-                                      Math::MakeVec<int>(128, 128, 128);
-                        result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
-                        result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
-                        result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
-                        return result.Cast<u8>();
-                    }
-
-                    case Operation::Lerp:
-                        return ((input[0] * input[2] +
-                                 input[1] *
-                                     (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
-                                255)
-                            .Cast<u8>();
-
-                    case Operation::Subtract: {
-                        auto result = input[0].Cast<int>() - input[1].Cast<int>();
-                        result.r() = std::max(0, result.r());
-                        result.g() = std::max(0, result.g());
-                        result.b() = std::max(0, result.b());
-                        return result.Cast<u8>();
-                    }
-
-                    case Operation::MultiplyThenAdd: {
-                        auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
-                        result.r() = std::min(255, result.r());
-                        result.g() = std::min(255, result.g());
-                        result.b() = std::min(255, result.b());
-                        return result.Cast<u8>();
-                    }
-
-                    case Operation::AddThenMultiply: {
-                        auto result = input[0] + input[1];
-                        result.r() = std::min(255, result.r());
-                        result.g() = std::min(255, result.g());
-                        result.b() = std::min(255, result.b());
-                        result = (result * input[2].Cast<int>()) / 255;
-                        return result.Cast<u8>();
-                    }
-                    case Operation::Dot3_RGB: {
-                        // Not fully accurate.
-                        // Worst case scenario seems to yield a +/-3 error
-                        // Some HW results indicate that the per-component computation can't have a
-                        // higher precision than 1/256,
-                        // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb(
-                        // (0x80,g0,b0),(0x80,g1,b1) ) give different results
-                        int result =
-                            ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
-                            ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
-                            ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
-                        result = std::max(0, std::min(255, result));
-                        return {(u8)result, (u8)result, (u8)result};
-                    }
-                    default:
-                        LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
-                        UNIMPLEMENTED();
-                        return {0, 0, 0};
-                    }
-                };
-
-                static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 {
-                    switch (op) {
-                    case Operation::Replace:
-                        return input[0];
-
-                    case Operation::Modulate:
-                        return input[0] * input[1] / 255;
-
-                    case Operation::Add:
-                        return std::min(255, input[0] + input[1]);
-
-                    case Operation::AddSigned: {
-                        // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
-                        // (byte) 128 is correct
-                        auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
-                        return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
-                    }
-
-                    case Operation::Lerp:
-                        return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
-
-                    case Operation::Subtract:
-                        return std::max(0, (int)input[0] - (int)input[1]);
-
-                    case Operation::MultiplyThenAdd:
-                        return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255);
-
-                    case Operation::AddThenMultiply:
-                        return (std::min(255, (input[0] + input[1])) * input[2]) / 255;
-
-                    default:
-                        LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op);
-                        UNIMPLEMENTED();
-                        return 0;
-                    }
-                };
-
                 // color combiner
                 // NOTE: Not sure if the alpha combiner might use the color output of the previous
                 //       stage as input. Hence, we currently don't directly write the result to
@@ -917,56 +1014,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                     return combiner_output[channel];
                 };
 
-                static auto EvaluateBlendEquation = [](
-                    const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
-                    const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
-                    FramebufferRegs::BlendEquation equation) {
-
-                    Math::Vec4<int> result;
-
-                    auto src_result = (src * srcfactor).Cast<int>();
-                    auto dst_result = (dest * destfactor).Cast<int>();
-
-                    switch (equation) {
-                    case FramebufferRegs::BlendEquation::Add:
-                        result = (src_result + dst_result) / 255;
-                        break;
-
-                    case FramebufferRegs::BlendEquation::Subtract:
-                        result = (src_result - dst_result) / 255;
-                        break;
-
-                    case FramebufferRegs::BlendEquation::ReverseSubtract:
-                        result = (dst_result - src_result) / 255;
-                        break;
-
-                    // TODO: How do these two actually work?
-                    //       OpenGL doesn't include the blend factors in the min/max computations,
-                    //       but is this what the 3DS actually does?
-                    case FramebufferRegs::BlendEquation::Min:
-                        result.r() = std::min(src.r(), dest.r());
-                        result.g() = std::min(src.g(), dest.g());
-                        result.b() = std::min(src.b(), dest.b());
-                        result.a() = std::min(src.a(), dest.a());
-                        break;
-
-                    case FramebufferRegs::BlendEquation::Max:
-                        result.r() = std::max(src.r(), dest.r());
-                        result.g() = std::max(src.g(), dest.g());
-                        result.b() = std::max(src.b(), dest.b());
-                        result.a() = std::max(src.a(), dest.a());
-                        break;
-
-                    default:
-                        LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation);
-                        UNIMPLEMENTED();
-                    }
-
-                    return Math::Vec4<u8>(
-                        MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
-                        MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
-                };
-
                 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
                                                LookupFactor(1, params.factor_source_rgb),
                                                LookupFactor(2, params.factor_source_rgb),
@@ -983,58 +1030,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                                                          dstfactor, params.blend_equation_a)
                                        .a();
             } else {
-                static auto LogicOp = [](u8 src, u8 dest, FramebufferRegs::LogicOp op) -> u8 {
-                    switch (op) {
-                    case FramebufferRegs::LogicOp::Clear:
-                        return 0;
-
-                    case FramebufferRegs::LogicOp::And:
-                        return src & dest;
-
-                    case FramebufferRegs::LogicOp::AndReverse:
-                        return src & ~dest;
-
-                    case FramebufferRegs::LogicOp::Copy:
-                        return src;
-
-                    case FramebufferRegs::LogicOp::Set:
-                        return 255;
-
-                    case FramebufferRegs::LogicOp::CopyInverted:
-                        return ~src;
-
-                    case FramebufferRegs::LogicOp::NoOp:
-                        return dest;
-
-                    case FramebufferRegs::LogicOp::Invert:
-                        return ~dest;
-
-                    case FramebufferRegs::LogicOp::Nand:
-                        return ~(src & dest);
-
-                    case FramebufferRegs::LogicOp::Or:
-                        return src | dest;
-
-                    case FramebufferRegs::LogicOp::Nor:
-                        return ~(src | dest);
-
-                    case FramebufferRegs::LogicOp::Xor:
-                        return src ^ dest;
-
-                    case FramebufferRegs::LogicOp::Equiv:
-                        return ~(src ^ dest);
-
-                    case FramebufferRegs::LogicOp::AndInverted:
-                        return ~src & dest;
-
-                    case FramebufferRegs::LogicOp::OrReverse:
-                        return src | ~dest;
-
-                    case FramebufferRegs::LogicOp::OrInverted:
-                        return ~src | dest;
-                    }
-                };
-
                 blend_output =
                     Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op),
                                   LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),

From 1683cb0ec908a214cd45a5c481d930f07c811ff2 Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Sun, 29 Jan 2017 19:22:19 -0800
Subject: [PATCH 4/5] SWRasterizer: Move texturing functions to their own file

---
 src/video_core/CMakeLists.txt              |   2 +
 src/video_core/swrasterizer/rasterizer.cpp | 211 +------------------
 src/video_core/swrasterizer/texturing.cpp  | 228 +++++++++++++++++++++
 src/video_core/swrasterizer/texturing.h    |  28 +++
 4 files changed, 259 insertions(+), 210 deletions(-)
 create mode 100644 src/video_core/swrasterizer/texturing.cpp
 create mode 100644 src/video_core/swrasterizer/texturing.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 479edfff4..5317719e8 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,7 @@ set(SRCS
             swrasterizer/framebuffer.cpp
             swrasterizer/rasterizer.cpp
             swrasterizer/swrasterizer.cpp
+            swrasterizer/texturing.cpp
             texture/etc1.cpp
             texture/texture_decode.cpp
             vertex_loader.cpp
@@ -55,6 +56,7 @@ set(HEADERS
             swrasterizer/framebuffer.h
             swrasterizer/rasterizer.h
             swrasterizer/swrasterizer.h
+            swrasterizer/texturing.h
             texture/etc1.h
             texture/texture_decode.h
             utils.h
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index 7044a6136..0fd842abe 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -24,222 +24,13 @@
 #include "video_core/shader/shader.h"
 #include "video_core/swrasterizer/framebuffer.h"
 #include "video_core/swrasterizer/rasterizer.h"
+#include "video_core/swrasterizer/texturing.h"
 #include "video_core/texture/texture_decode.h"
 #include "video_core/utils.h"
 
 namespace Pica {
 namespace Rasterizer {
 
-using TevStageConfig = TexturingRegs::TevStageConfig;
-
-static int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) {
-    switch (mode) {
-    case TexturingRegs::TextureConfig::ClampToEdge:
-        val = std::max(val, 0);
-        val = std::min(val, (int)size - 1);
-        return val;
-
-    case TexturingRegs::TextureConfig::ClampToBorder:
-        return val;
-
-    case TexturingRegs::TextureConfig::Repeat:
-        return (int)((unsigned)val % size);
-
-    case TexturingRegs::TextureConfig::MirroredRepeat: {
-        unsigned int coord = ((unsigned)val % (2 * size));
-        if (coord >= size)
-            coord = 2 * size - 1 - coord;
-        return (int)coord;
-    }
-
-    default:
-        LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
-        UNIMPLEMENTED();
-        return 0;
-    }
-};
-
-static Math::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor,
-                                       const Math::Vec4<u8>& values) {
-    using ColorModifier = TevStageConfig::ColorModifier;
-
-    switch (factor) {
-    case ColorModifier::SourceColor:
-        return values.rgb();
-
-    case ColorModifier::OneMinusSourceColor:
-        return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
-
-    case ColorModifier::SourceAlpha:
-        return values.aaa();
-
-    case ColorModifier::OneMinusSourceAlpha:
-        return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>();
-
-    case ColorModifier::SourceRed:
-        return values.rrr();
-
-    case ColorModifier::OneMinusSourceRed:
-        return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>();
-
-    case ColorModifier::SourceGreen:
-        return values.ggg();
-
-    case ColorModifier::OneMinusSourceGreen:
-        return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>();
-
-    case ColorModifier::SourceBlue:
-        return values.bbb();
-
-    case ColorModifier::OneMinusSourceBlue:
-        return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>();
-    }
-};
-
-static u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) {
-    using AlphaModifier = TevStageConfig::AlphaModifier;
-
-    switch (factor) {
-    case AlphaModifier::SourceAlpha:
-        return values.a();
-
-    case AlphaModifier::OneMinusSourceAlpha:
-        return 255 - values.a();
-
-    case AlphaModifier::SourceRed:
-        return values.r();
-
-    case AlphaModifier::OneMinusSourceRed:
-        return 255 - values.r();
-
-    case AlphaModifier::SourceGreen:
-        return values.g();
-
-    case AlphaModifier::OneMinusSourceGreen:
-        return 255 - values.g();
-
-    case AlphaModifier::SourceBlue:
-        return values.b();
-
-    case AlphaModifier::OneMinusSourceBlue:
-        return 255 - values.b();
-    }
-};
-
-static Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) {
-    using Operation = TevStageConfig::Operation;
-
-    switch (op) {
-    case Operation::Replace:
-        return input[0];
-
-    case Operation::Modulate:
-        return ((input[0] * input[1]) / 255).Cast<u8>();
-
-    case Operation::Add: {
-        auto result = input[0] + input[1];
-        result.r() = std::min(255, result.r());
-        result.g() = std::min(255, result.g());
-        result.b() = std::min(255, result.b());
-        return result.Cast<u8>();
-    }
-
-    case Operation::AddSigned: {
-        // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
-        // (byte) 128 is correct
-        auto result =
-            input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128);
-        result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
-        result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
-        result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
-        return result.Cast<u8>();
-    }
-
-    case Operation::Lerp:
-        return ((input[0] * input[2] +
-                 input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
-                255)
-            .Cast<u8>();
-
-    case Operation::Subtract: {
-        auto result = input[0].Cast<int>() - input[1].Cast<int>();
-        result.r() = std::max(0, result.r());
-        result.g() = std::max(0, result.g());
-        result.b() = std::max(0, result.b());
-        return result.Cast<u8>();
-    }
-
-    case Operation::MultiplyThenAdd: {
-        auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
-        result.r() = std::min(255, result.r());
-        result.g() = std::min(255, result.g());
-        result.b() = std::min(255, result.b());
-        return result.Cast<u8>();
-    }
-
-    case Operation::AddThenMultiply: {
-        auto result = input[0] + input[1];
-        result.r() = std::min(255, result.r());
-        result.g() = std::min(255, result.g());
-        result.b() = std::min(255, result.b());
-        result = (result * input[2].Cast<int>()) / 255;
-        return result.Cast<u8>();
-    }
-    case Operation::Dot3_RGB: {
-        // Not fully accurate.  Worst case scenario seems to yield a +/-3 error.  Some HW results
-        // indicate that the per-component computation can't have a higher precision than 1/256,
-        // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give
-        // different results.
-        int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
-                     ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
-                     ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
-        result = std::max(0, std::min(255, result));
-        return {(u8)result, (u8)result, (u8)result};
-    }
-    default:
-        LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
-        UNIMPLEMENTED();
-        return {0, 0, 0};
-    }
-};
-
-static u8 AlphaCombine(TevStageConfig::Operation op, const std::array<u8, 3>& input) {
-    switch (op) {
-        using Operation = TevStageConfig::Operation;
-    case Operation::Replace:
-        return input[0];
-
-    case Operation::Modulate:
-        return input[0] * input[1] / 255;
-
-    case Operation::Add:
-        return std::min(255, input[0] + input[1]);
-
-    case Operation::AddSigned: {
-        // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
-        auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
-        return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
-    }
-
-    case Operation::Lerp:
-        return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
-
-    case Operation::Subtract:
-        return std::max(0, (int)input[0] - (int)input[1]);
-
-    case Operation::MultiplyThenAdd:
-        return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255);
-
-    case Operation::AddThenMultiply:
-        return (std::min(255, (input[0] + input[1])) * input[2]) / 255;
-
-    default:
-        LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op);
-        UNIMPLEMENTED();
-        return 0;
-    }
-};
-
 static Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src,
                                             const Math::Vec4<u8>& srcfactor,
                                             const Math::Vec4<u8>& dest,
diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp
new file mode 100644
index 000000000..eb18e4ba4
--- /dev/null
+++ b/src/video_core/swrasterizer/texturing.cpp
@@ -0,0 +1,228 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/math_util.h"
+#include "common/vector_math.h"
+#include "video_core/regs_texturing.h"
+#include "video_core/swrasterizer/texturing.h"
+
+namespace Pica {
+namespace Rasterizer {
+
+using TevStageConfig = TexturingRegs::TevStageConfig;
+
+int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) {
+    switch (mode) {
+    case TexturingRegs::TextureConfig::ClampToEdge:
+        val = std::max(val, 0);
+        val = std::min(val, (int)size - 1);
+        return val;
+
+    case TexturingRegs::TextureConfig::ClampToBorder:
+        return val;
+
+    case TexturingRegs::TextureConfig::Repeat:
+        return (int)((unsigned)val % size);
+
+    case TexturingRegs::TextureConfig::MirroredRepeat: {
+        unsigned int coord = ((unsigned)val % (2 * size));
+        if (coord >= size)
+            coord = 2 * size - 1 - coord;
+        return (int)coord;
+    }
+
+    default:
+        LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
+        UNIMPLEMENTED();
+        return 0;
+    }
+};
+
+Math::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor,
+                                const Math::Vec4<u8>& values) {
+    using ColorModifier = TevStageConfig::ColorModifier;
+
+    switch (factor) {
+    case ColorModifier::SourceColor:
+        return values.rgb();
+
+    case ColorModifier::OneMinusSourceColor:
+        return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
+
+    case ColorModifier::SourceAlpha:
+        return values.aaa();
+
+    case ColorModifier::OneMinusSourceAlpha:
+        return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>();
+
+    case ColorModifier::SourceRed:
+        return values.rrr();
+
+    case ColorModifier::OneMinusSourceRed:
+        return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>();
+
+    case ColorModifier::SourceGreen:
+        return values.ggg();
+
+    case ColorModifier::OneMinusSourceGreen:
+        return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>();
+
+    case ColorModifier::SourceBlue:
+        return values.bbb();
+
+    case ColorModifier::OneMinusSourceBlue:
+        return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>();
+    }
+};
+
+u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) {
+    using AlphaModifier = TevStageConfig::AlphaModifier;
+
+    switch (factor) {
+    case AlphaModifier::SourceAlpha:
+        return values.a();
+
+    case AlphaModifier::OneMinusSourceAlpha:
+        return 255 - values.a();
+
+    case AlphaModifier::SourceRed:
+        return values.r();
+
+    case AlphaModifier::OneMinusSourceRed:
+        return 255 - values.r();
+
+    case AlphaModifier::SourceGreen:
+        return values.g();
+
+    case AlphaModifier::OneMinusSourceGreen:
+        return 255 - values.g();
+
+    case AlphaModifier::SourceBlue:
+        return values.b();
+
+    case AlphaModifier::OneMinusSourceBlue:
+        return 255 - values.b();
+    }
+};
+
+Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) {
+    using Operation = TevStageConfig::Operation;
+
+    switch (op) {
+    case Operation::Replace:
+        return input[0];
+
+    case Operation::Modulate:
+        return ((input[0] * input[1]) / 255).Cast<u8>();
+
+    case Operation::Add: {
+        auto result = input[0] + input[1];
+        result.r() = std::min(255, result.r());
+        result.g() = std::min(255, result.g());
+        result.b() = std::min(255, result.b());
+        return result.Cast<u8>();
+    }
+
+    case Operation::AddSigned: {
+        // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
+        // (byte) 128 is correct
+        auto result =
+            input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128);
+        result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
+        result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
+        result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
+        return result.Cast<u8>();
+    }
+
+    case Operation::Lerp:
+        return ((input[0] * input[2] +
+                 input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
+                255)
+            .Cast<u8>();
+
+    case Operation::Subtract: {
+        auto result = input[0].Cast<int>() - input[1].Cast<int>();
+        result.r() = std::max(0, result.r());
+        result.g() = std::max(0, result.g());
+        result.b() = std::max(0, result.b());
+        return result.Cast<u8>();
+    }
+
+    case Operation::MultiplyThenAdd: {
+        auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
+        result.r() = std::min(255, result.r());
+        result.g() = std::min(255, result.g());
+        result.b() = std::min(255, result.b());
+        return result.Cast<u8>();
+    }
+
+    case Operation::AddThenMultiply: {
+        auto result = input[0] + input[1];
+        result.r() = std::min(255, result.r());
+        result.g() = std::min(255, result.g());
+        result.b() = std::min(255, result.b());
+        result = (result * input[2].Cast<int>()) / 255;
+        return result.Cast<u8>();
+    }
+    case Operation::Dot3_RGB: {
+        // Not fully accurate.  Worst case scenario seems to yield a +/-3 error.  Some HW results
+        // indicate that the per-component computation can't have a higher precision than 1/256,
+        // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give
+        // different results.
+        int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
+                     ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
+                     ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
+        result = std::max(0, std::min(255, result));
+        return {(u8)result, (u8)result, (u8)result};
+    }
+    default:
+        LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
+        UNIMPLEMENTED();
+        return {0, 0, 0};
+    }
+};
+
+u8 AlphaCombine(TevStageConfig::Operation op, const std::array<u8, 3>& input) {
+    switch (op) {
+        using Operation = TevStageConfig::Operation;
+    case Operation::Replace:
+        return input[0];
+
+    case Operation::Modulate:
+        return input[0] * input[1] / 255;
+
+    case Operation::Add:
+        return std::min(255, input[0] + input[1]);
+
+    case Operation::AddSigned: {
+        // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
+        auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
+        return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
+    }
+
+    case Operation::Lerp:
+        return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
+
+    case Operation::Subtract:
+        return std::max(0, (int)input[0] - (int)input[1]);
+
+    case Operation::MultiplyThenAdd:
+        return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255);
+
+    case Operation::AddThenMultiply:
+        return (std::min(255, (input[0] + input[1])) * input[2]) / 255;
+
+    default:
+        LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op);
+        UNIMPLEMENTED();
+        return 0;
+    }
+};
+
+} // namespace Rasterizer
+} // namespace Pica
diff --git a/src/video_core/swrasterizer/texturing.h b/src/video_core/swrasterizer/texturing.h
new file mode 100644
index 000000000..24f74a5a3
--- /dev/null
+++ b/src/video_core/swrasterizer/texturing.h
@@ -0,0 +1,28 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "common/vector_math.h"
+#include "video_core/regs_texturing.h"
+
+namespace Pica {
+namespace Rasterizer {
+
+int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size);
+
+Math::Vec3<u8> GetColorModifier(TexturingRegs::TevStageConfig::ColorModifier factor,
+                                const Math::Vec4<u8>& values);
+
+u8 GetAlphaModifier(TexturingRegs::TevStageConfig::AlphaModifier factor,
+                    const Math::Vec4<u8>& values);
+
+Math::Vec3<u8> ColorCombine(TexturingRegs::TevStageConfig::Operation op,
+                            const Math::Vec3<u8> input[3]);
+
+u8 AlphaCombine(TexturingRegs::TevStageConfig::Operation op, const std::array<u8, 3>& input);
+
+} // namespace Rasterizer
+} // namespace Pica

From 426fda1d524b17acd10d962a03af872a85342eca Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Sun, 29 Jan 2017 19:25:48 -0800
Subject: [PATCH 5/5] SWRasterizer: Move more framebuffer functions to file

---
 src/video_core/swrasterizer/framebuffer.cpp |  99 +++++++++++++++++++
 src/video_core/swrasterizer/framebuffer.h   |   6 ++
 src/video_core/swrasterizer/rasterizer.cpp  | 100 --------------------
 3 files changed, 105 insertions(+), 100 deletions(-)

diff --git a/src/video_core/swrasterizer/framebuffer.cpp b/src/video_core/swrasterizer/framebuffer.cpp
index 4b31eda89..7de3aac75 100644
--- a/src/video_core/swrasterizer/framebuffer.cpp
+++ b/src/video_core/swrasterizer/framebuffer.cpp
@@ -8,6 +8,7 @@
 #include "common/color.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "common/math_util.h"
 #include "common/vector_math.h"
 #include "core/hw/gpu.h"
 #include "core/memory.h"
@@ -255,5 +256,103 @@ u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u
     }
 }
 
+Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
+                                     const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
+                                     FramebufferRegs::BlendEquation equation) {
+    Math::Vec4<int> result;
+
+    auto src_result = (src * srcfactor).Cast<int>();
+    auto dst_result = (dest * destfactor).Cast<int>();
+
+    switch (equation) {
+    case FramebufferRegs::BlendEquation::Add:
+        result = (src_result + dst_result) / 255;
+        break;
+
+    case FramebufferRegs::BlendEquation::Subtract:
+        result = (src_result - dst_result) / 255;
+        break;
+
+    case FramebufferRegs::BlendEquation::ReverseSubtract:
+        result = (dst_result - src_result) / 255;
+        break;
+
+    // TODO: How do these two actually work?  OpenGL doesn't include the blend factors in the
+    //       min/max computations, but is this what the 3DS actually does?
+    case FramebufferRegs::BlendEquation::Min:
+        result.r() = std::min(src.r(), dest.r());
+        result.g() = std::min(src.g(), dest.g());
+        result.b() = std::min(src.b(), dest.b());
+        result.a() = std::min(src.a(), dest.a());
+        break;
+
+    case FramebufferRegs::BlendEquation::Max:
+        result.r() = std::max(src.r(), dest.r());
+        result.g() = std::max(src.g(), dest.g());
+        result.b() = std::max(src.b(), dest.b());
+        result.a() = std::max(src.a(), dest.a());
+        break;
+
+    default:
+        LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation);
+        UNIMPLEMENTED();
+    }
+
+    return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
+                          MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
+};
+
+u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) {
+    switch (op) {
+    case FramebufferRegs::LogicOp::Clear:
+        return 0;
+
+    case FramebufferRegs::LogicOp::And:
+        return src & dest;
+
+    case FramebufferRegs::LogicOp::AndReverse:
+        return src & ~dest;
+
+    case FramebufferRegs::LogicOp::Copy:
+        return src;
+
+    case FramebufferRegs::LogicOp::Set:
+        return 255;
+
+    case FramebufferRegs::LogicOp::CopyInverted:
+        return ~src;
+
+    case FramebufferRegs::LogicOp::NoOp:
+        return dest;
+
+    case FramebufferRegs::LogicOp::Invert:
+        return ~dest;
+
+    case FramebufferRegs::LogicOp::Nand:
+        return ~(src & dest);
+
+    case FramebufferRegs::LogicOp::Or:
+        return src | dest;
+
+    case FramebufferRegs::LogicOp::Nor:
+        return ~(src | dest);
+
+    case FramebufferRegs::LogicOp::Xor:
+        return src ^ dest;
+
+    case FramebufferRegs::LogicOp::Equiv:
+        return ~(src ^ dest);
+
+    case FramebufferRegs::LogicOp::AndInverted:
+        return ~src & dest;
+
+    case FramebufferRegs::LogicOp::OrReverse:
+        return src | ~dest;
+
+    case FramebufferRegs::LogicOp::OrInverted:
+        return ~src | dest;
+    }
+};
+
 } // namespace Rasterizer
 } // namespace Pica
diff --git a/src/video_core/swrasterizer/framebuffer.h b/src/video_core/swrasterizer/framebuffer.h
index 220f7013b..4a32a4979 100644
--- a/src/video_core/swrasterizer/framebuffer.h
+++ b/src/video_core/swrasterizer/framebuffer.h
@@ -19,5 +19,11 @@ void SetDepth(int x, int y, u32 value);
 void SetStencil(int x, int y, u8 value);
 u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref);
 
+Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
+                                     const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
+                                     FramebufferRegs::BlendEquation equation);
+
+u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op);
+
 } // namespace Rasterizer
 } // namespace Pica
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index 0fd842abe..7557fcb89 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -31,106 +31,6 @@
 namespace Pica {
 namespace Rasterizer {
 
-static Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src,
-                                            const Math::Vec4<u8>& srcfactor,
-                                            const Math::Vec4<u8>& dest,
-                                            const Math::Vec4<u8>& destfactor,
-                                            FramebufferRegs::BlendEquation equation) {
-    Math::Vec4<int> result;
-
-    auto src_result = (src * srcfactor).Cast<int>();
-    auto dst_result = (dest * destfactor).Cast<int>();
-
-    switch (equation) {
-    case FramebufferRegs::BlendEquation::Add:
-        result = (src_result + dst_result) / 255;
-        break;
-
-    case FramebufferRegs::BlendEquation::Subtract:
-        result = (src_result - dst_result) / 255;
-        break;
-
-    case FramebufferRegs::BlendEquation::ReverseSubtract:
-        result = (dst_result - src_result) / 255;
-        break;
-
-    // TODO: How do these two actually work?  OpenGL doesn't include the blend factors in the
-    //       min/max computations, but is this what the 3DS actually does?
-    case FramebufferRegs::BlendEquation::Min:
-        result.r() = std::min(src.r(), dest.r());
-        result.g() = std::min(src.g(), dest.g());
-        result.b() = std::min(src.b(), dest.b());
-        result.a() = std::min(src.a(), dest.a());
-        break;
-
-    case FramebufferRegs::BlendEquation::Max:
-        result.r() = std::max(src.r(), dest.r());
-        result.g() = std::max(src.g(), dest.g());
-        result.b() = std::max(src.b(), dest.b());
-        result.a() = std::max(src.a(), dest.a());
-        break;
-
-    default:
-        LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation);
-        UNIMPLEMENTED();
-    }
-
-    return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
-                          MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
-};
-
-static u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) {
-    switch (op) {
-    case FramebufferRegs::LogicOp::Clear:
-        return 0;
-
-    case FramebufferRegs::LogicOp::And:
-        return src & dest;
-
-    case FramebufferRegs::LogicOp::AndReverse:
-        return src & ~dest;
-
-    case FramebufferRegs::LogicOp::Copy:
-        return src;
-
-    case FramebufferRegs::LogicOp::Set:
-        return 255;
-
-    case FramebufferRegs::LogicOp::CopyInverted:
-        return ~src;
-
-    case FramebufferRegs::LogicOp::NoOp:
-        return dest;
-
-    case FramebufferRegs::LogicOp::Invert:
-        return ~dest;
-
-    case FramebufferRegs::LogicOp::Nand:
-        return ~(src & dest);
-
-    case FramebufferRegs::LogicOp::Or:
-        return src | dest;
-
-    case FramebufferRegs::LogicOp::Nor:
-        return ~(src | dest);
-
-    case FramebufferRegs::LogicOp::Xor:
-        return src ^ dest;
-
-    case FramebufferRegs::LogicOp::Equiv:
-        return ~(src ^ dest);
-
-    case FramebufferRegs::LogicOp::AndInverted:
-        return ~src & dest;
-
-    case FramebufferRegs::LogicOp::OrReverse:
-        return src | ~dest;
-
-    case FramebufferRegs::LogicOp::OrInverted:
-        return ~src | dest;
-    }
-};
-
 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
 struct Fix12P4 {
     Fix12P4() {}