From 656de23d93882adc774e24e81a278913f461ad64 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 19:46:37 -0400 Subject: [PATCH 01/25] renderer: Create rasterizer and cleanup. --- src/video_core/renderer_base.cpp | 7 ++++++- src/video_core/renderer_base.h | 7 ++++++- src/video_core/renderer_opengl/gl_rasterizer_cache.h | 4 ++-- src/video_core/video_core.h | 2 ++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 51e1d45f9..30075b23c 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -5,6 +5,11 @@ #include #include #include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/video_core.h" -void RendererBase::RefreshRasterizerSetting() {} +void RendererBase::RefreshRasterizerSetting() { + if (rasterizer == nullptr) { + rasterizer = std::make_unique(); + } +} diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 2aba50eda..532e5b37c 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -8,6 +8,7 @@ #include #include "common/assert.h" #include "common/common_types.h" +#include "video_core/rasterizer_interface.h" class EmuWindow; @@ -74,12 +75,16 @@ public: return m_current_frame; } + VideoCore::RasterizerInterface* Rasterizer() const { + return rasterizer.get(); + } + void RefreshRasterizerSetting(); protected: + std::unique_ptr rasterizer; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer private: - bool opengl_rasterizer_active = false; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 17ce0fee7..828e62852 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -211,8 +211,8 @@ struct SurfaceParams { MathUtil::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; MathUtil::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; - PAddr addr = 0; - PAddr end = 0; + VAddr addr = 0; + VAddr end = 0; u64 size = 0; u32 width = 0; diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 1fd90b9d0..37da62436 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -15,6 +15,8 @@ class RendererBase; namespace VideoCore { +enum class Renderer { Software, OpenGL }; + extern std::unique_ptr g_renderer; ///< Renderer plugin extern EmuWindow* g_emu_window; ///< Emu window From 3a6604e8fa07ed68362f884a7d15797e5e67b791 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 19:47:28 -0400 Subject: [PATCH 02/25] maxwell_3d: Add some format decodings and string helper functions. --- src/video_core/engines/maxwell_3d.h | 110 +++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 3 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index aab282b77..69ed56338 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -7,6 +7,7 @@ #include #include #include +#include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -62,6 +63,107 @@ public: Fragment = 4, }; + enum class VertexSize : u32 { + Size_32_32_32_32 = 0x01, + Size_32_32_32 = 0x02, + Size_16_16_16_16 = 0x03, + Size_32_32 = 0x04, + Size_16_16_16 = 0x05, + Size_8_8_8_8 = 0x0a, + Size_16_16 = 0x0f, + Size_32 = 0x12, + Size_8_8_8 = 0x13, + Size_8_8 = 0x18, + Size_16 = 0x1b, + Size_8 = 0x1d, + Size_10_10_10_2 = 0x30, + Size_11_11_10 = 0x31, + }; + + static std::string VertexSizeToString(VertexSize vertex_size) { + switch (vertex_size) { + case VertexSize::Size_32_32_32_32: + return "32_32_32_32"; + case VertexSize::Size_32_32_32: + return "32_32_32"; + case VertexSize::Size_16_16_16_16: + return "16_16_16_16"; + case VertexSize::Size_32_32: + return "32_32"; + case VertexSize::Size_16_16_16: + return "16_16_16"; + case VertexSize::Size_8_8_8_8: + return "8_8_8_8"; + case VertexSize::Size_16_16: + return "16_16"; + case VertexSize::Size_32: + return "32"; + case VertexSize::Size_8_8_8: + return "8_8_8"; + case VertexSize::Size_8_8: + return "8_8"; + case VertexSize::Size_16: + return "16"; + case VertexSize::Size_8: + return "8"; + case VertexSize::Size_10_10_10_2: + return "10_10_10_2"; + case VertexSize::Size_11_11_10: + return "11_11_10"; + } + UNIMPLEMENTED(); + return {}; + } + + enum class VertexType : u32 { + SignedNorm = 1, + UnsignedNorm = 2, + SignedInt = 3, + UnsignedInt = 4, + UnsignedScaled = 5, + SignedScaled = 6, + Float = 7, + }; + + static std::string VertexTypeToString(VertexType vertex_type) { + switch (vertex_type) { + case VertexType::SignedNorm: + return "SignedNorm"; + case VertexType::UnsignedNorm: + return "UnsignedNorm"; + case VertexType::SignedInt: + return "SignedInt"; + case VertexType::UnsignedInt: + return "UnsignedInt"; + case VertexType::UnsignedScaled: + return "UnsignedScaled"; + case VertexType::SignedScaled: + return "SignedScaled"; + case VertexType::Float: + return "Float"; + } + UNIMPLEMENTED(); + return {}; + } + + enum class PrimitiveTopology : u32 { + Points = 0x0, + Lines = 0x1, + LineLoop = 0x2, + LineStrip = 0x3, + Triangles = 0x4, + TriangleStrip = 0x5, + TriangleFan = 0x6, + Quads = 0x7, + QuadStrip = 0x8, + Polygon = 0x9, + LinesAdjacency = 0xa, + LineStripAdjacency = 0xb, + TrianglesAdjacency = 0xc, + TriangleStripAdjacency = 0xd, + Patches = 0xe, + }; + union { struct { INSERT_PADDING_WORDS(0x200); @@ -112,8 +214,8 @@ public: BitField<0, 5, u32> buffer; BitField<6, 1, u32> constant; BitField<7, 14, u32> offset; - BitField<21, 6, u32> size; - BitField<27, 3, u32> type; + BitField<21, 6, VertexSize> size; + BitField<27, 3, VertexType> type; BitField<31, 1, u32> bgra; } vertex_attrib_format[NumVertexAttributes]; @@ -163,13 +265,15 @@ public: } } code_address; INSERT_PADDING_WORDS(1); + struct { u32 vertex_end_gl; union { u32 vertex_begin_gl; - BitField<0, 16, u32> topology; + BitField<0, 16, PrimitiveTopology> topology; }; } draw; + INSERT_PADDING_WORDS(0x139); struct { u32 query_address_high; From 7c3a26383927fa6ac523c330be0be82a4a5b9b5e Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 19:48:20 -0400 Subject: [PATCH 03/25] gpu: Expose Maxwell3D engine. --- src/video_core/gpu.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2a9064ba3..f9a725dee 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -36,6 +36,10 @@ public: std::unique_ptr memory_manager; + Engines::Maxwell3D& Maxwell3D() { + return *maxwell_3d; + } + private: static constexpr u32 InvalidGraphMacroEntry = 0xFFFFFFFF; From f707c2dac473c8971eccfd31d1b71281a039d95c Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 20:00:41 -0400 Subject: [PATCH 04/25] gl_rasterizer: Add a simple passthrough shader in lieu of shader generation. --- .../renderer_opengl/gl_rasterizer.cpp | 59 ++++++++++++++++++- .../renderer_opengl/gl_rasterizer.h | 14 ++++- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 24cfff229..8b08de011 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -54,6 +54,8 @@ static void SetShaderUniformBlockBindings(GLuint shader) { } RasterizerOpenGL::RasterizerOpenGL() { + shader_dirty = true; + has_ARB_buffer_storage = false; has_ARB_direct_state_access = false; has_ARB_separate_shader_objects = false; @@ -106,8 +108,6 @@ RasterizerOpenGL::RasterizerOpenGL() { state.draw.vertex_buffer = stream_buffer->GetHandle(); pipeline.Create(); - vs_input_index_min = 0; - vs_input_index_max = 0; state.draw.program_pipeline = pipeline.handle; state.draw.shader_program = 0; state.draw.vertex_array = hw_vao.handle; @@ -233,7 +233,60 @@ bool RasterizerOpenGL::AccelerateDisplay(const void* config, PAddr framebuffer_a } void RasterizerOpenGL::SetShader() { - UNIMPLEMENTED(); + // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to + // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell + // shaders. + + static constexpr char vertex_shader[] = R"( +#version 150 core + +in vec2 vert_position; +in vec2 vert_tex_coord; +out vec2 frag_tex_coord; + +void main() { + // Multiply input position by the rotscale part of the matrix and then manually translate by + // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector + // to `vec3(vert_position.xy, 1.0)` + gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} +)"; + + static constexpr char fragment_shader[] = R"( +#version 150 core + +in vec2 frag_tex_coord; +out vec4 color; + +uniform sampler2D color_texture; + +void main() { + color = vec4(1.0, 0.0, 0.0, 1.0); +} +)"; + + if (current_shader) { + return; + } + + LOG_ERROR(HW_GPU, "Emulated shaders are not supported! Using a passthrough shader."); + + current_shader = &test_shader; + if (has_ARB_separate_shader_objects) { + test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); + glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); + } else { + ASSERT_MSG(false, "Unimplemented"); + } + + state.draw.shader_program = test_shader.shader.handle; + state.Apply(); + + if (has_ARB_separate_shader_objects) { + state.draw.shader_program = 0; + state.Apply(); + } } void RasterizerOpenGL::SyncClipEnabled() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 893fc530f..7a68480d9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -42,6 +42,12 @@ public: ScreenInfo& screen_info) override; bool AccelerateDrawBatch(bool is_indexed) override; + /// OpenGL shader generated for a given Maxwell register state + struct MaxwellShader { + /// OpenGL shader resource + OGLShader shader; + }; + struct VertexShader { OGLShader shader; }; @@ -117,6 +123,12 @@ private: RasterizerCacheOpenGL res_cache; + /// Shader used for test renderering - to be removed once we have emulated shaders + MaxwellShader test_shader{}; + + const MaxwellShader* current_shader{}; + bool shader_dirty{}; + struct { UniformData data; bool dirty; @@ -136,8 +148,6 @@ private: static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; std::unique_ptr stream_buffer; - GLint vs_input_index_min; - GLint vs_input_index_max; GLsizeiptr vs_input_size; void AnalyzeVertexArray(bool is_indexed); From c6362543d46de63d276874e79715f05dcdfea8e5 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 20:19:34 -0400 Subject: [PATCH 05/25] gl_rasterizer: Replace a bunch of UNIMPLEMENTED with ASSERT. --- .../renderer_opengl/gl_rasterizer.cpp | 30 +++++++++---------- .../renderer_opengl/gl_rasterizer_cache.cpp | 10 +++---- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8b08de011..0aed4b048 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -120,7 +120,7 @@ RasterizerOpenGL::RasterizerOpenGL() { glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); } else { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } accelerate_draw = AccelDraw::Disabled; @@ -167,12 +167,12 @@ void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_ void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_FS); - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { if (!has_ARB_separate_shader_objects) { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); return false; } @@ -212,23 +212,23 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { MICROPROFILE_SCOPE(OpenGL_Blits); - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); return true; } bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); return true; } bool RasterizerOpenGL::AccelerateFill(const void* config) { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); return true; } bool RasterizerOpenGL::AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); return true; } @@ -290,33 +290,33 @@ void main() { } void RasterizerOpenGL::SyncClipEnabled() { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } void RasterizerOpenGL::SyncClipCoef() { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } void RasterizerOpenGL::SyncCullMode() { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } void RasterizerOpenGL::SyncDepthScale() { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } void RasterizerOpenGL::SyncDepthOffset() { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } void RasterizerOpenGL::SyncBlendEnabled() { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } void RasterizerOpenGL::SyncBlendFuncs() { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } void RasterizerOpenGL::SyncBlendColor() { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 884637ca5..939391639 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -290,7 +290,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec static bool FillSurface(const Surface& surface, const u8* fill_data, const MathUtil::Rectangle& fill_rect, GLuint draw_fb_handle) { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); return true; } @@ -557,7 +557,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { load_end - load_start); } else { if (type == SurfaceType::Texture) { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); } else { morton_to_gl_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], addr, load_start, load_end); @@ -1102,7 +1102,7 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& } Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); return {}; } @@ -1113,7 +1113,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( } Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { - UNIMPLEMENTED(); + ASSERT_MSG(false, "Unimplemented"); return {}; } @@ -1357,5 +1357,5 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { } void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u64 size, int delta) { - UNIMPLEMENTED(); + // ASSERT_MSG(false, "Unimplemented"); } From bfe45774f16e958bf34ed0d58a1d31e2325df47d Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 21:04:30 -0400 Subject: [PATCH 06/25] video_core: Move FramebufferInfo to FramebufferConfig in GPU. --- .../service/nvdrv/devices/nvdisp_disp0.cpp | 6 +- src/video_core/gpu.h | 29 ++++++++++ src/video_core/rasterizer_interface.h | 4 +- src/video_core/renderer_base.h | 33 +---------- .../renderer_opengl/gl_rasterizer.cpp | 5 +- .../renderer_opengl/gl_rasterizer.h | 4 +- .../renderer_opengl/renderer_opengl.cpp | 56 ++++++++++--------- .../renderer_opengl/renderer_opengl.h | 9 +-- 8 files changed, 77 insertions(+), 69 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 94530724e..7cd1d9306 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -26,14 +26,14 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 "Drawing from address %lx offset %08X Width %u Height %u Stride %u Format %u", addr, offset, width, height, stride, format); - using PixelFormat = RendererBase::FramebufferInfo::PixelFormat; + using PixelFormat = Tegra::FramebufferConfig::PixelFormat; using Flags = NVFlinger::BufferQueue::BufferTransformFlags; const bool flip_vertical = static_cast(transform) & static_cast(Flags::FlipV); - const RendererBase::FramebufferInfo framebuffer_info{ + const Tegra::FramebufferConfig framebuffer{ addr, offset, width, height, stride, static_cast(format), flip_vertical}; Core::System::GetInstance().perf_stats.EndGameFrame(); - VideoCore::g_renderer->SwapBuffers(framebuffer_info); + VideoCore::g_renderer->SwapBuffers(framebuffer); } } // namespace Devices diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index f9a725dee..f3c5e366a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -12,6 +12,35 @@ namespace Tegra { +/** + * Struct describing framebuffer configuration + */ +struct FramebufferConfig { + enum class PixelFormat : u32 { + ABGR8 = 1, + }; + + /** + * Returns the number of bytes per pixel. + */ + static u32 BytesPerPixel(PixelFormat format) { + switch (format) { + case PixelFormat::ABGR8: + return 4; + } + + UNREACHABLE(); + } + + VAddr address; + u32 offset; + u32 width; + u32 height; + u32 stride; + PixelFormat pixel_format; + bool flip_vertical; +}; + namespace Engines { class Fermi2D; class Maxwell3D; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 6c7bd0826..966e25f34 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "video_core/gpu.h" struct ScreenInfo; @@ -49,7 +50,8 @@ public: } /// Attempt to use a faster method to display the framebuffer to screen - virtual bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride, + virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, + PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; } diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 532e5b37c..89a960eaf 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -8,6 +8,7 @@ #include #include "common/assert.h" #include "common/common_types.h" +#include "video_core/gpu.h" #include "video_core/rasterizer_interface.h" class EmuWindow; @@ -17,40 +18,10 @@ public: /// Used to reference a framebuffer enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture }; - /** - * Struct describing framebuffer metadata - * TODO(bunnei): This struct belongs in the GPU code, but we don't have a good place for it yet. - */ - struct FramebufferInfo { - enum class PixelFormat : u32 { - ABGR8 = 1, - }; - - /** - * Returns the number of bytes per pixel. - */ - static u32 BytesPerPixel(PixelFormat format) { - switch (format) { - case PixelFormat::ABGR8: - return 4; - } - - UNREACHABLE(); - } - - VAddr address; - u32 offset; - u32 width; - u32 height; - u32 stride; - PixelFormat pixel_format; - bool flip_vertical; - }; - virtual ~RendererBase() {} /// Swap buffers (render frame) - virtual void SwapBuffers(boost::optional framebuffer_info) = 0; + virtual void SwapBuffers(boost::optional framebuffer) = 0; /** * Set the emulator window to use for renderer diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0aed4b048..abc6607b0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -226,8 +226,9 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { return true; } -bool RasterizerOpenGL::AccelerateDisplay(const void* config, PAddr framebuffer_addr, - u32 pixel_stride, ScreenInfo& screen_info) { +bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, + PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { ASSERT_MSG(false, "Unimplemented"); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 7a68480d9..8f213404d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -38,8 +38,8 @@ public: bool AccelerateDisplayTransfer(const void* config) override; bool AccelerateTextureCopy(const void* config) override; bool AccelerateFill(const void* config) override; - bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride, - ScreenInfo& screen_info) override; + bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, PAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info) override; bool AccelerateDrawBatch(bool is_indexed) override; /// OpenGL shader generated for a given Maxwell register state diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 65d38ade5..2ea5e91e3 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -98,22 +98,22 @@ RendererOpenGL::RendererOpenGL() = default; RendererOpenGL::~RendererOpenGL() = default; /// Swap buffers (render frame) -void RendererOpenGL::SwapBuffers(boost::optional framebuffer_info) { +void RendererOpenGL::SwapBuffers(boost::optional framebuffer) { // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.Apply(); - if (framebuffer_info != boost::none) { - // If framebuffer_info is provided, reload it from memory to a texture - if (screen_info.texture.width != (GLsizei)framebuffer_info->width || - screen_info.texture.height != (GLsizei)framebuffer_info->height || - screen_info.texture.pixel_format != framebuffer_info->pixel_format) { + if (framebuffer != boost::none) { + // If framebuffer is provided, reload it from memory to a texture + if (screen_info.texture.width != (GLsizei)framebuffer->width || + screen_info.texture.height != (GLsizei)framebuffer->height || + screen_info.texture.pixel_format != framebuffer->pixel_format) { // Reallocate texture if the framebuffer size has changed. // This is expected to not happen very often and hence should not be a // performance problem. - ConfigureFramebufferTexture(screen_info.texture, *framebuffer_info); + ConfigureFramebufferTexture(screen_info.texture, *framebuffer); } - LoadFBToScreenInfo(*framebuffer_info, screen_info); + LoadFBToScreenInfo(*framebuffer, screen_info); } DrawScreens(); @@ -245,43 +245,47 @@ static void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 /** * Loads framebuffer from emulated memory into the active OpenGL texture. */ -void RendererOpenGL::LoadFBToScreenInfo(const FramebufferInfo& framebuffer_info, +void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer, ScreenInfo& screen_info) { - const u32 bpp{FramebufferInfo::BytesPerPixel(framebuffer_info.pixel_format)}; - const u32 size_in_bytes{framebuffer_info.stride * framebuffer_info.height * bpp}; + const u32 bpp{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)}; + const u32 size_in_bytes{framebuffer.stride * framebuffer.height * bpp}; + const VAddr framebuffer_addr{framebuffer.address}; + const size_t pixel_stride{framebuffer.stride / bpp}; - MortonCopyPixels128(framebuffer_info.width, framebuffer_info.height, bpp, 4, - Memory::GetPointer(framebuffer_info.address), gl_framebuffer_data.data(), - true); + // OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately + ASSERT(pixel_stride * bpp == framebuffer.stride); + + MortonCopyPixels128(framebuffer.width, framebuffer.height, bpp, 4, + Memory::GetPointer(framebuffer.address), gl_framebuffer_data.data(), true); LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%llx(%dx%d), fmt %x", size_in_bytes, - framebuffer_info.address, framebuffer_info.width, framebuffer_info.height, - (int)framebuffer_info.pixel_format); + framebuffer.address, framebuffer.width, framebuffer.height, + (int)framebuffer.pixel_format); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default // only allows rows to have a memory alignement of 4. - ASSERT(framebuffer_info.stride % 4 == 0); + ASSERT(framebuffer.stride % 4 == 0); - framebuffer_flip_vertical = framebuffer_info.flip_vertical; + framebuffer_flip_vertical = framebuffer.flip_vertical; // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; screen_info.display_texcoords = MathUtil::Rectangle(0.f, 0.f, 1.f, 1.f); - // Memory::RasterizerFlushRegion(framebuffer_info.address, size_in_bytes); + Rasterizer()->FlushRegion(framebuffer.address, size_in_bytes); state.texture_units[0].texture_2d = screen_info.texture.resource.handle; state.Apply(); glActiveTexture(GL_TEXTURE0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)framebuffer_info.stride); + glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)framebuffer.stride); // Update existing texture // TODO: Test what happens on hardware when you change the framebuffer dimensions so that // they differ from the LCD resolution. // TODO: Applications could theoretically crash Citra here by specifying too large // framebuffer sizes. We should make sure that this cannot happen. - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer_info.width, framebuffer_info.height, + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, screen_info.texture.gl_format, screen_info.texture.gl_type, gl_framebuffer_data.data()); @@ -372,14 +376,14 @@ void RendererOpenGL::InitOpenGLObjects() { } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, - const FramebufferInfo& framebuffer_info) { + const Tegra::FramebufferConfig& framebuffer) { - texture.width = framebuffer_info.width; - texture.height = framebuffer_info.height; + texture.width = framebuffer.width; + texture.height = framebuffer.height; GLint internal_format; - switch (framebuffer_info.pixel_format) { - case FramebufferInfo::PixelFormat::ABGR8: + switch (framebuffer.pixel_format) { + case Tegra::FramebufferConfig::PixelFormat::ABGR8: // Use RGBA8 and swap in the fragment shader internal_format = GL_RGBA; texture.gl_format = GL_RGBA; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 05bb3c5cf..bd7c2510f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -21,7 +21,7 @@ struct TextureInfo { GLsizei height; GLenum gl_format; GLenum gl_type; - RendererBase::FramebufferInfo::PixelFormat pixel_format; + Tegra::FramebufferConfig::PixelFormat pixel_format; }; /// Structure used for storing information about the display target for each 3DS screen @@ -37,7 +37,7 @@ public: ~RendererOpenGL() override; /// Swap buffers (render frame) - void SwapBuffers(boost::optional framebuffer_info) override; + void SwapBuffers(boost::optional framebuffer) override; /** * Set the emulator window to use for renderer @@ -53,13 +53,14 @@ public: private: void InitOpenGLObjects(); - void ConfigureFramebufferTexture(TextureInfo& texture, const FramebufferInfo& framebuffer_info); + void ConfigureFramebufferTexture(TextureInfo& texture, + const Tegra::FramebufferConfig& framebuffer); void DrawScreens(); void DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, float h); void UpdateFramerate(); // Loads framebuffer from emulated memory into the display information structure - void LoadFBToScreenInfo(const FramebufferInfo& framebuffer_info, ScreenInfo& screen_info); + void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer, ScreenInfo& screen_info); // Fills active OpenGL texture with the given RGBA color. void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); From 8a250de987404034a4cf1a09f244c40947b4be9b Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 21:13:46 -0400 Subject: [PATCH 07/25] video_core: Remove usage of PAddr and replace with VAddr. --- src/video_core/rasterizer_interface.h | 8 ++--- .../renderer_opengl/gl_rasterizer.cpp | 8 ++--- .../renderer_opengl/gl_rasterizer.h | 8 ++--- .../renderer_opengl/gl_rasterizer_cache.cpp | 36 +++++++++---------- .../renderer_opengl/gl_rasterizer_cache.h | 18 +++++----- 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 966e25f34..6514d7ded 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -25,14 +25,14 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory - virtual void FlushRegion(PAddr addr, u32 size) = 0; + virtual void FlushRegion(VAddr addr, u32 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(PAddr addr, u32 size) = 0; + virtual void InvalidateRegion(VAddr addr, u32 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// and invalidated - virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; + virtual void FlushAndInvalidateRegion(VAddr addr, u32 size) = 0; /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 virtual bool AccelerateDisplayTransfer(const void* config) { @@ -51,7 +51,7 @@ public: /// Attempt to use a faster method to display the framebuffer to screen virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, - PAddr framebuffer_addr, u32 pixel_stride, + VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index abc6607b0..20e192ec9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -194,17 +194,17 @@ void RasterizerOpenGL::FlushAll() { res_cache.FlushAll(); } -void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { +void RasterizerOpenGL::FlushRegion(VAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { +void RasterizerOpenGL::InvalidateRegion(VAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size, nullptr); } -void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); res_cache.InvalidateRegion(addr, size, nullptr); @@ -227,7 +227,7 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { } bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, - PAddr framebuffer_addr, u32 pixel_stride, + VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { ASSERT_MSG(false, "Unimplemented"); return true; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 8f213404d..f5c7b1162 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -32,13 +32,13 @@ public: void DrawTriangles() override; void NotifyMaxwellRegisterChanged(u32 id) override; void FlushAll() override; - void FlushRegion(PAddr addr, u32 size) override; - void InvalidateRegion(PAddr addr, u32 size) override; - void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + void FlushRegion(VAddr addr, u32 size) override; + void InvalidateRegion(VAddr addr, u32 size) override; + void FlushAndInvalidateRegion(VAddr addr, u32 size) override; bool AccelerateDisplayTransfer(const void* config) override; bool AccelerateTextureCopy(const void* config) override; bool AccelerateFill(const void* config) override; - bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, PAddr framebuffer_addr, + bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override; bool AccelerateDrawBatch(bool is_indexed) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 939391639..7ef08980f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -107,7 +107,7 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { } template -static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { +static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; constexpr u32 tile_size = bytes_per_pixel * 64; @@ -115,9 +115,9 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; - const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); - const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); - const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); + const VAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); + const VAddr aligned_start = base + Common::AlignUp(start - base, tile_size); + const VAddr aligned_end = base + Common::AlignDown(end - base, tile_size); ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); @@ -136,7 +136,7 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr } }; - u8* tile_buffer = Memory::GetPhysicalPointer(start); + u8* tile_buffer = Memory::GetPointer(start); if (start < aligned_start && !morton_to_gl) { std::array tmp_buf; @@ -162,7 +162,7 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr } } -static constexpr std::array morton_to_gl_fns = { +static constexpr std::array morton_to_gl_fns = { MortonCopy, // 0 MortonCopy, // 1 MortonCopy, // 2 @@ -183,7 +183,7 @@ static constexpr std::array mo MortonCopy // 17 }; -static constexpr std::array gl_to_morton_fns = { +static constexpr std::array gl_to_morton_fns = { MortonCopy, // 0 MortonCopy, // 1 MortonCopy, // 2 @@ -298,9 +298,9 @@ SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { SurfaceParams params = *this; const u32 tiled_size = is_tiled ? 8 : 1; const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - PAddr aligned_start = + VAddr aligned_start = addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - PAddr aligned_end = + VAddr aligned_end = addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); if (aligned_end - aligned_start > stride_tiled_bytes) { @@ -527,10 +527,10 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { +void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { ASSERT(type != SurfaceType::Fill); - const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); + const u8* const texture_src_data = Memory::GetPointer(addr); if (texture_src_data == nullptr) return; @@ -549,7 +549,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); ASSERT(load_start >= addr && load_end <= end); - const u32 start_offset = load_start - addr; + const u64 start_offset = load_start - addr; if (!is_tiled) { ASSERT(type == SurfaceType::Color); @@ -566,8 +566,8 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { - u8* const dst_buffer = Memory::GetPhysicalPointer(addr); +void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { + u8* const dst_buffer = Memory::GetPointer(addr); if (dst_buffer == nullptr) return; @@ -1167,7 +1167,7 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, } } -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u64 size) { +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, u64 size) { if (size == 0) return; @@ -1227,7 +1227,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, } } -void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u64 size, Surface flush_surface) { +void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surface) { if (size == 0) return; @@ -1263,7 +1263,7 @@ void RasterizerCacheOpenGL::FlushAll() { FlushRegion(0, 0xFFFFFFFF); } -void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner) { +void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) { if (size == 0) return; @@ -1356,6 +1356,6 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); } -void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u64 size, int delta) { +void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { // ASSERT_MSG(false, "Unimplemented"); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 828e62852..2172a9d24 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -28,9 +28,9 @@ struct CachedSurface; using Surface = std::shared_ptr; using SurfaceSet = std::set; -using SurfaceRegions = boost::icl::interval_set; -using SurfaceMap = boost::icl::interval_map; -using SurfaceCache = boost::icl::interval_map; +using SurfaceRegions = boost::icl::interval_set; +using SurfaceMap = boost::icl::interval_map; +using SurfaceCache = boost::icl::interval_map; using SurfaceInterval = SurfaceCache::interval_type; static_assert(std::is_same() && @@ -258,8 +258,8 @@ struct CachedSurface : SurfaceParams { size_t gl_buffer_size = 0; // Read/Write data in 3DS memory to/from gl_buffer - void LoadGLBuffer(PAddr load_start, PAddr load_end); - void FlushGLBuffer(PAddr flush_start, PAddr flush_end); + void LoadGLBuffer(VAddr load_start, VAddr load_end); + void FlushGLBuffer(VAddr flush_start, VAddr flush_end); // Upload/Download data in gl_buffer in/to this surface's texture void UploadGLTexture(const MathUtil::Rectangle& rect, GLuint read_fb_handle, @@ -307,10 +307,10 @@ public: SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(PAddr addr, u64 size, Surface flush_surface = nullptr); + void FlushRegion(VAddr addr, u64 size, Surface flush_surface = nullptr); /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) - void InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner); + void InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner); /// Flush all cached resources tracked by this cache manager void FlushAll(); @@ -319,7 +319,7 @@ private: void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); /// Update surface's texture for given region when necessary - void ValidateSurface(const Surface& surface, PAddr addr, u64 size); + void ValidateSurface(const Surface& surface, VAddr addr, u64 size); /// Create a new surface Surface CreateSurface(const SurfaceParams& params); @@ -331,7 +331,7 @@ private: void UnregisterSurface(const Surface& surface); /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(PAddr addr, u64 size, int delta); + void UpdatePagesCachedCount(VAddr addr, u64 size, int delta); SurfaceCache surface_cache; PageMap cached_pages; From 740310113b626cc1918213dd569d3a9a644d9280 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 22:52:40 -0400 Subject: [PATCH 08/25] video_core: Move MortonCopyPixels128 to utils header. --- .../renderer_opengl/renderer_opengl.cpp | 112 +----------------- src/video_core/utils.h | 112 ++++++++++++++++++ 2 files changed, 113 insertions(+), 111 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 2ea5e91e3..a65270222 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -20,6 +20,7 @@ #include "core/settings.h" #include "core/tracer/recorder.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/utils.h" #include "video_core/video_core.h" static const char vertex_shader[] = R"( @@ -131,117 +132,6 @@ void RendererOpenGL::SwapBuffers(boost::optional Date: Thu, 22 Mar 2018 22:54:04 -0400 Subject: [PATCH 09/25] gl_rasterizer_cache: LoadGLBuffer should do a morton copy. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 7ef08980f..175f329e3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -530,7 +530,7 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64 void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { ASSERT(type != SurfaceType::Fill); - const u8* const texture_src_data = Memory::GetPointer(addr); + u8* texture_src_data = Memory::GetPointer(addr); if (texture_src_data == nullptr) return; @@ -539,13 +539,6 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { gl_buffer.reset(new u8[gl_buffer_size]); } - // TODO: Should probably be done in ::Memory:: and check for other regions too - if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) - load_end = Memory::VRAM_VADDR_END; - - if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) - load_start = Memory::VRAM_VADDR; - MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); ASSERT(load_start >= addr && load_end <= end); @@ -553,15 +546,11 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { if (!is_tiled) { ASSERT(type == SurfaceType::Color); - std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, - load_end - load_start); + VideoCore::MortonCopyPixels128(width, height, GetFormatBpp(), 4, + texture_src_data + start_offset, &gl_buffer[start_offset], + true); } else { - if (type == SurfaceType::Texture) { - ASSERT_MSG(false, "Unimplemented"); - } else { - morton_to_gl_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], addr, - load_start, load_end); - } + ASSERT_MSG(false, "Unimplemented"); } } From 63d3924b5bb5dd17f1de9dfe3a357df293fc113d Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 22:56:41 -0400 Subject: [PATCH 10/25] memory: Port RasterizerFlushVirtualRegion from Citra. --- src/core/memory.cpp | 39 +++++++++++++++++++++++++++++++++++++++ src/core/memory.h | 20 +++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 4e34d8334..8a83de904 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -42,6 +42,9 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa LOG_DEBUG(HW_Memory, "Mapping %p onto %016" PRIX64 "-%016" PRIX64, memory, base * PAGE_SIZE, (base + size) * PAGE_SIZE); + RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, + FlushMode::FlushAndInvalidate); + VAddr end = base + size; while (base != end) { ASSERT_MSG(base < PAGE_TABLE_NUM_ENTRIES, "out of range mapping at %016" PRIX64, base); @@ -293,6 +296,42 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } +void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { + // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be + // null here + if (VideoCore::g_renderer == nullptr) { + return; + } + + VAddr end = start + size; + + auto CheckRegion = [&](VAddr region_start, VAddr region_end) { + if (start >= region_end || end <= region_start) { + // No overlap with region + return; + } + + VAddr overlap_start = std::max(start, region_start); + VAddr overlap_end = std::min(end, region_end); + u32 overlap_size = overlap_end - overlap_start; + + auto* rasterizer = VideoCore::g_renderer->Rasterizer(); + switch (mode) { + case FlushMode::Flush: + rasterizer->FlushRegion(region_start, overlap_size); + break; + case FlushMode::Invalidate: + rasterizer->InvalidateRegion(region_start, overlap_size); + break; + case FlushMode::FlushAndInvalidate: + rasterizer->FlushAndInvalidateRegion(region_start, overlap_size); + break; + } + }; + + CheckRegion(HEAP_VADDR, HEAP_VADDR_END); +} + u8 Read8(const VAddr addr) { return Read(addr); } diff --git a/src/core/memory.h b/src/core/memory.h index f406cc848..1c7232115 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -36,7 +36,10 @@ enum class PageType : u8 { Unmapped, /// Page is mapped to regular memory. This is the only type you can get pointers to. Memory, - /// Page is mapped to a memory hook, which intercepts read and write requests. + /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and + /// invalidation + RasterizerCachedMemory, + /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. Special, }; @@ -253,4 +256,19 @@ boost::optional PhysicalToVirtualAddress(PAddr addr); */ u8* GetPhysicalPointer(PAddr address); +enum class FlushMode { + /// Write back modified surfaces to RAM + Flush, + /// Remove region from the cache + Invalidate, + /// Write back modified surfaces to RAM, and also remove them from the cache + FlushAndInvalidate, +}; + +/** + * Flushes and invalidates any externally cached rasterizer resources touching the given virtual + * address region. + */ +void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode); + } // namespace Memory From f61b9f7338b0f3667f605a399c6a1501e621b3a0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 23:01:57 -0400 Subject: [PATCH 11/25] LoadGLBuffer: Use bytes_per_pixel, not bits. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 175f329e3..19fb2333a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -546,7 +546,8 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { if (!is_tiled) { ASSERT(type == SurfaceType::Color); - VideoCore::MortonCopyPixels128(width, height, GetFormatBpp(), 4, + const u32 bytes_per_pixel{GetFormatBpp() >> 3}; + VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4, texture_src_data + start_offset, &gl_buffer[start_offset], true); } else { From a0b1235f82b2632651cb817f8216cc9af37759a2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 23:06:54 -0400 Subject: [PATCH 12/25] gl_rasterizer: Implement AccelerateDisplay method from Citra. --- .../renderer_opengl/gl_rasterizer.cpp | 34 ++++++++++++++++++- .../renderer_opengl/gl_rasterizer_cache.h | 12 ++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 20e192ec9..b51614c25 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -229,7 +229,39 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { - ASSERT_MSG(false, "Unimplemented"); + if (framebuffer_addr == 0) { + return false; + } + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + + SurfaceParams src_params; + src_params.addr = framebuffer_addr; + src_params.width = std::min(framebuffer.width, pixel_stride); + src_params.height = framebuffer.height; + src_params.stride = pixel_stride; + src_params.is_tiled = false; + src_params.pixel_format = + SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); + src_params.UpdateParams(); + + MathUtil::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + + if (src_surface == nullptr) { + return false; + } + + u32 scaled_width = src_surface->GetScaledWidth(); + u32 scaled_height = src_surface->GetScaledHeight(); + + screen_info.display_texcoords = MathUtil::Rectangle( + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + + screen_info.display_texture = src_surface->texture.handle; + return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 2172a9d24..14f3cdc38 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -22,6 +22,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/gpu.h" #include "video_core/renderer_opengl/gl_resource_manager.h" struct CachedSurface; @@ -115,6 +116,15 @@ struct SurfaceParams { return GetFormatBpp(pixel_format); } + static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { + switch (format) { + case Tegra::FramebufferConfig::PixelFormat::ABGR8: + return PixelFormat::RGBA8; + default: + UNREACHABLE(); + } + } + static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { SurfaceType a_type = GetFormatType(pixel_format_a); SurfaceType b_type = GetFormatType(pixel_format_b); @@ -257,7 +267,7 @@ struct CachedSurface : SurfaceParams { std::unique_ptr gl_buffer; size_t gl_buffer_size = 0; - // Read/Write data in 3DS memory to/from gl_buffer + // Read/Write data in Switch memory to/from gl_buffer void LoadGLBuffer(VAddr load_start, VAddr load_end); void FlushGLBuffer(VAddr flush_start, VAddr flush_end); From e12c2cf8c692bf7581dda96c601e30c2a969086c Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 23:18:04 -0400 Subject: [PATCH 13/25] nvdisp_disp0: Always flush and invalidate framebuffer region. - Workaround for texture forwarding until we have a better place. --- src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 7cd1d9306..db030a8e2 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -33,6 +33,13 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 addr, offset, width, height, stride, static_cast(format), flip_vertical}; Core::System::GetInstance().perf_stats.EndGameFrame(); + + // TODO(bunnei): The framebuffer region should only be flushed and invalidated if it is written + // to, not every frame. When we find the right place for this, the below line can be removed. + Memory::RasterizerFlushVirtualRegion(framebuffer.address, + framebuffer.width * framebuffer.height * 4, + Memory::FlushMode::FlushAndInvalidate); + VideoCore::g_renderer->SwapBuffers(framebuffer); } From c2c55e0811bf6314047c0b907157c84cad14981f Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 22 Mar 2018 23:28:37 -0400 Subject: [PATCH 14/25] renderer_opengl: Use accelerated framebuffer load with LoadFBToScreenInfo. --- .../renderer_opengl/renderer_opengl.cpp | 56 +++++++++---------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a65270222..047389fee 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -140,49 +140,43 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf const u32 bpp{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)}; const u32 size_in_bytes{framebuffer.stride * framebuffer.height * bpp}; const VAddr framebuffer_addr{framebuffer.address}; - const size_t pixel_stride{framebuffer.stride / bpp}; - - // OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately - ASSERT(pixel_stride * bpp == framebuffer.stride); - - MortonCopyPixels128(framebuffer.width, framebuffer.height, bpp, 4, - Memory::GetPointer(framebuffer.address), gl_framebuffer_data.data(), true); - - LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%llx(%dx%d), fmt %x", size_in_bytes, - framebuffer.address, framebuffer.width, framebuffer.height, - (int)framebuffer.pixel_format); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default // only allows rows to have a memory alignement of 4. ASSERT(framebuffer.stride % 4 == 0); - framebuffer_flip_vertical = framebuffer.flip_vertical; + if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride, + screen_info)) { + // Reset the screen info's display texture to its own permanent texture + screen_info.display_texture = screen_info.texture.resource.handle; + screen_info.display_texcoords = MathUtil::Rectangle(0.f, 0.f, 1.f, 1.f); - // Reset the screen info's display texture to its own permanent texture - screen_info.display_texture = screen_info.texture.resource.handle; - screen_info.display_texcoords = MathUtil::Rectangle(0.f, 0.f, 1.f, 1.f); + Rasterizer()->FlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); - Rasterizer()->FlushRegion(framebuffer.address, size_in_bytes); + VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bpp, 4, + Memory::GetPointer(framebuffer.address), + gl_framebuffer_data.data(), true); - state.texture_units[0].texture_2d = screen_info.texture.resource.handle; - state.Apply(); + state.texture_units[0].texture_2d = screen_info.texture.resource.handle; + state.Apply(); - glActiveTexture(GL_TEXTURE0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)framebuffer.stride); + glActiveTexture(GL_TEXTURE0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(framebuffer.stride)); - // Update existing texture - // TODO: Test what happens on hardware when you change the framebuffer dimensions so that - // they differ from the LCD resolution. - // TODO: Applications could theoretically crash Citra here by specifying too large - // framebuffer sizes. We should make sure that this cannot happen. - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, - screen_info.texture.gl_format, screen_info.texture.gl_type, - gl_framebuffer_data.data()); + // Update existing texture + // TODO: Test what happens on hardware when you change the framebuffer dimensions so that + // they differ from the LCD resolution. + // TODO: Applications could theoretically crash yuzu here by specifying too large + // framebuffer sizes. We should make sure that this cannot happen. + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, + screen_info.texture.gl_format, screen_info.texture.gl_type, + gl_framebuffer_data.data()); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - state.texture_units[0].texture_2d = 0; - state.Apply(); + state.texture_units[0].texture_2d = 0; + state.Apply(); + } } /** From ec4e1a3685d458147ac76f4cf53ea86632d0debd Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 14:58:27 -0400 Subject: [PATCH 15/25] renderer_opengl: Better handling of framebuffer transform flags. --- .../hle/service/nvdrv/devices/nvdisp_disp0.cpp | 4 +--- src/core/hle/service/nvflinger/buffer_queue.h | 2 ++ src/video_core/gpu.h | 5 ++++- .../renderer_opengl/renderer_opengl.cpp | 18 ++++++++++++++++-- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index db030a8e2..f6c2b24a8 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -27,10 +27,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 offset, width, height, stride, format); using PixelFormat = Tegra::FramebufferConfig::PixelFormat; - using Flags = NVFlinger::BufferQueue::BufferTransformFlags; - const bool flip_vertical = static_cast(transform) & static_cast(Flags::FlipV); const Tegra::FramebufferConfig framebuffer{ - addr, offset, width, height, stride, static_cast(format), flip_vertical}; + addr, offset, width, height, stride, static_cast(format), transform}; Core::System::GetInstance().perf_stats.EndGameFrame(); diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 686eadca7..1de5767cb 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -47,6 +47,8 @@ public: ~BufferQueue() = default; enum class BufferTransformFlags : u32 { + /// No transform flags are set + Unset = 0x00, /// Flip source image horizontally (around the vertical axis) FlipH = 0x01, /// Flip source image vertically (around the horizontal axis) diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index f3c5e366a..206b3e05e 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -8,6 +8,7 @@ #include #include #include "common/common_types.h" +#include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/memory_manager.h" namespace Tegra { @@ -38,7 +39,9 @@ struct FramebufferConfig { u32 height; u32 stride; PixelFormat pixel_format; - bool flip_vertical; + + using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; + TransformFlags transform_flags; }; namespace Engines { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 047389fee..ef63cbcf0 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -141,6 +141,9 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf const u32 size_in_bytes{framebuffer.stride * framebuffer.height * bpp}; const VAddr framebuffer_addr{framebuffer.address}; + // Framebuffer orientation handling + framebuffer_transform_flags = framebuffer.transform_flags; + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default // only allows rows to have a memory alignement of 4. ASSERT(framebuffer.stride % 4 == 0); @@ -292,8 +295,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, float h) { const auto& texcoords = screen_info.display_texcoords; - const auto& left = framebuffer_flip_vertical ? texcoords.right : texcoords.left; - const auto& right = framebuffer_flip_vertical ? texcoords.left : texcoords.right; + auto left = texcoords.left; + auto right = texcoords.right; + if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset) + if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) { + // Flip the framebuffer vertically + left = texcoords.right; + right = texcoords.left; + } else { + // Other transformations are unsupported + LOG_CRITICAL(HW_GPU, "unsupported framebuffer_transform_flags=%d", + framebuffer_transform_flags); + UNIMPLEMENTED(); + } std::array vertices = {{ ScreenRectVertex(x, y, texcoords.top, right), From cdf541fb5b70da538577932353ed15fac65aef13 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 14:59:14 -0400 Subject: [PATCH 16/25] renderer_opengl: Add framebuffer_transform_flags member variable. --- src/video_core/renderer_opengl/renderer_opengl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index bd7c2510f..29516baf4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -88,6 +88,6 @@ private: GLuint attrib_position; GLuint attrib_tex_coord; - /// Flips the framebuffer vertically when true - bool framebuffer_flip_vertical; + /// Used for transforming the framebuffer orientation + Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; }; From 11047d7fd511fd9ae6130da7bc824fefa6fb64c1 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 15:01:45 -0400 Subject: [PATCH 17/25] rasterizer: Flush and invalidate regions should be 64-bit. --- src/core/memory.cpp | 4 ++-- src/core/memory.h | 2 +- src/video_core/rasterizer_interface.h | 6 +++--- src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 +++--- src/video_core/renderer_opengl/gl_rasterizer.h | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 8a83de904..d8aab7090 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -296,7 +296,7 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } -void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { +void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here if (VideoCore::g_renderer == nullptr) { @@ -313,7 +313,7 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) { VAddr overlap_start = std::max(start, region_start); VAddr overlap_end = std::min(end, region_end); - u32 overlap_size = overlap_end - overlap_start; + u64 overlap_size = overlap_end - overlap_start; auto* rasterizer = VideoCore::g_renderer->Rasterizer(); switch (mode) { diff --git a/src/core/memory.h b/src/core/memory.h index 1c7232115..3e2c3f23d 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -269,6 +269,6 @@ enum class FlushMode { * Flushes and invalidates any externally cached rasterizer resources touching the given virtual * address region. */ -void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode); +void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode); } // namespace Memory diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 6514d7ded..a493e1d60 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -25,14 +25,14 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory - virtual void FlushRegion(VAddr addr, u32 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(VAddr addr, u32 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory /// and invalidated - virtual void FlushAndInvalidateRegion(VAddr addr, u32 size) = 0; + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 virtual bool AccelerateDisplayTransfer(const void* config) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b51614c25..09828e48d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -194,17 +194,17 @@ void RasterizerOpenGL::FlushAll() { res_cache.FlushAll(); } -void RasterizerOpenGL::FlushRegion(VAddr addr, u32 size) { +void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(VAddr addr, u32 size) { +void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size, nullptr); } -void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u32 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); res_cache.InvalidateRegion(addr, size, nullptr); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f5c7b1162..b387f383b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -32,9 +32,9 @@ public: void DrawTriangles() override; void NotifyMaxwellRegisterChanged(u32 id) override; void FlushAll() override; - void FlushRegion(VAddr addr, u32 size) override; - void InvalidateRegion(VAddr addr, u32 size) override; - void FlushAndInvalidateRegion(VAddr addr, u32 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; bool AccelerateDisplayTransfer(const void* config) override; bool AccelerateTextureCopy(const void* config) override; bool AccelerateFill(const void* config) override; From 1a158dfcd660058d2f87cb6eb4662861db203386 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 15:10:02 -0400 Subject: [PATCH 18/25] memory: RasterizerFlushVirtualRegion should also check process image region. --- src/core/memory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index d8aab7090..fd5a57022 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -329,6 +329,7 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { } }; + CheckRegion(PROCESS_IMAGE_VADDR, PROCESS_IMAGE_VADDR_END); CheckRegion(HEAP_VADDR, HEAP_VADDR_END); } From b36b627d4d117cf667f697ade45c2fc3dd6a1f66 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 15:25:16 -0400 Subject: [PATCH 19/25] RasterizerCacheOpenGL: FlushAll should flush full memory region. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 19fb2333a..f54c5aff3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1250,7 +1250,7 @@ void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surf } void RasterizerCacheOpenGL::FlushAll() { - FlushRegion(0, 0xFFFFFFFF); + FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); } void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) { From b5f3e7951b69fe689408f4560c83860302c540e5 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 15:46:21 -0400 Subject: [PATCH 20/25] memory: Fix typo in RasterizerFlushVirtualRegion. --- src/core/memory.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index fd5a57022..0eca4e76e 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -318,13 +318,13 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { auto* rasterizer = VideoCore::g_renderer->Rasterizer(); switch (mode) { case FlushMode::Flush: - rasterizer->FlushRegion(region_start, overlap_size); + rasterizer->FlushRegion(overlap_start, overlap_size); break; case FlushMode::Invalidate: - rasterizer->InvalidateRegion(region_start, overlap_size); + rasterizer->InvalidateRegion(overlap_start, overlap_size); break; case FlushMode::FlushAndInvalidate: - rasterizer->FlushAndInvalidateRegion(region_start, overlap_size); + rasterizer->FlushAndInvalidateRegion(overlap_start, overlap_size); break; } }; From 054393917e99d307eea0aabc78c0c6e5e709b2c7 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 15:49:04 -0400 Subject: [PATCH 21/25] renderer_opengl: Fixes for properly flushing & rendering the framebuffer. --- .../hle/service/nvdrv/devices/nvdisp_disp0.cpp | 6 ------ .../renderer_opengl/renderer_opengl.cpp | 18 ++++++++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index f6c2b24a8..87b3a2d74 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -32,12 +32,6 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 Core::System::GetInstance().perf_stats.EndGameFrame(); - // TODO(bunnei): The framebuffer region should only be flushed and invalidated if it is written - // to, not every frame. When we find the right place for this, the below line can be removed. - Memory::RasterizerFlushVirtualRegion(framebuffer.address, - framebuffer.width * framebuffer.height * 4, - Memory::FlushMode::FlushAndInvalidate); - VideoCore::g_renderer->SwapBuffers(framebuffer); } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index ef63cbcf0..4628f6db0 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -137,9 +137,15 @@ void RendererOpenGL::SwapBuffers(boost::optional(0.f, 0.f, 1.f, 1.f); - Rasterizer()->FlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); + Rasterizer()->FlushRegion(framebuffer_addr, size_in_bytes); - VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bpp, 4, - Memory::GetPointer(framebuffer.address), + VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, + Memory::GetPointer(framebuffer_addr), gl_framebuffer_data.data(), true); state.texture_units[0].texture_2d = screen_info.texture.resource.handle; From 0f8401906b9616f853bb8baf43424dc354f736e2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 15:52:14 -0400 Subject: [PATCH 22/25] renderer_opengl: Only invalidate the framebuffer region, not flush. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 4628f6db0..1a24855d7 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -141,11 +141,10 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - // TODO(bunnei): The framebuffer region should only be flushed and invalidated if it is - // written to, not every frame. When we find the right place for this, the below line can be - // removed. + // TODO(bunnei): The framebuffer region should only be invalidated if it is written to, not + // every frame. When we find the right place for this, the below line can be removed. Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, - Memory::FlushMode::FlushAndInvalidate); + Memory::FlushMode::Invalidate); // Framebuffer orientation handling framebuffer_transform_flags = framebuffer.transform_flags; From b7da9d5a5410fc55c24ed49d9ed24c593b10e9e5 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 16:54:20 -0400 Subject: [PATCH 23/25] gl_rasterizer_cache: Add missing include for vm_manager. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f54c5aff3..78fa7c051 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -22,6 +22,7 @@ #include "common/scope_exit.h" #include "common/vector_math.h" #include "core/frontend/emu_window.h" +#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "core/settings.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" From 4ed54738fc7777781213cd160cb0f4255a5c7c26 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 22:24:16 -0400 Subject: [PATCH 24/25] gl_rasterizer: Log warning instead of sync'ing unimplemented funcs. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 09828e48d..12a1ab09b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -127,13 +127,7 @@ RasterizerOpenGL::RasterizerOpenGL() { glEnable(GL_BLEND); - // Sync fixed function OpenGL state - SyncClipEnabled(); - SyncClipCoef(); - SyncCullMode(); - SyncBlendEnabled(); - SyncBlendFuncs(); - SyncBlendColor(); + LOG_WARNING(HW_GPU, "Sync fixed function OpenGL state here when ready"); } RasterizerOpenGL::~RasterizerOpenGL() { From d561e4acc8bd3f9a7d677fe713ab0a748ff7dd9c Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 23 Mar 2018 22:27:53 -0400 Subject: [PATCH 25/25] gl_rasterizer: Fake render in green, because it's cooler. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 12a1ab09b..286491b73 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -289,7 +289,7 @@ out vec4 color; uniform sampler2D color_texture; void main() { - color = vec4(1.0, 0.0, 0.0, 1.0); + color = vec4(1.0, 0.0, 1.0, 0.0); } )";