From bc930754cc9437ddd86e7d246b3eb4302540896a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:57:16 -0400 Subject: [PATCH] Implement Texture Cache V2 --- .../renderer_opengl/gl_rasterizer.cpp | 69 ++- .../renderer_opengl/gl_texture_cache.cpp | 286 ++++------- .../renderer_opengl/gl_texture_cache.h | 117 ++--- src/video_core/renderer_opengl/utils.cpp | 23 +- src/video_core/renderer_opengl/utils.h | 6 +- src/video_core/texture_cache/texture_cache.h | 462 ++++++++++++++---- 6 files changed, 575 insertions(+), 388 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 027e9d293c..482d0428c9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -83,10 +83,10 @@ struct FramebufferCacheKey { bool stencil_enable = false; std::array color_attachments{}; - std::array colors{}; + std::array colors{}; u32 colors_count = 0; - CachedSurfaceView* zeta = nullptr; + View zeta = nullptr; auto Tie() const { return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, @@ -115,6 +115,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind RasterizerOpenGL::~RasterizerOpenGL() {} +void RasterizerOpenGL::InitMemoryMananger(Tegra::MemoryManager& memory_manager) { + texture_cache.InitMemoryMananger(memory_manager); +} + void RasterizerOpenGL::CheckExtensions() { if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { LOG_WARNING( @@ -474,9 +478,11 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( } current_framebuffer_config_state = fb_config_state; - CachedSurfaceView* depth_surface{}; + View depth_surface{}; if (using_depth_fb) { depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); + } else { + texture_cache.SetEmptyDepthBuffer(); } UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); @@ -489,38 +495,41 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( if (using_color_fb) { if (single_color_target) { // Used when just a single color attachment is enabled, e.g. for clearing a color buffer - CachedSurfaceView* color_surface{ + View color_surface{ texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)}; if (color_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. - color_surface->MarkAsModified(true); + texture_cache.MarkColorBufferInUse(*single_color_target); // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - state.framebuffer_srgb.enabled |= - color_surface->GetSurfaceParams().GetSrgbConversion(); + state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; } fbkey.is_single_buffer = true; fbkey.color_attachments[0] = GL_COLOR_ATTACHMENT0 + static_cast(*single_color_target); fbkey.colors[0] = color_surface; + for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + if (index != *single_color_target) { + texture_cache.SetEmptyColorBuffer(index); + } + } } else { // Multiple color attachments are enabled for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - CachedSurfaceView* color_surface{ - texture_cache.GetColorBufferSurface(index, preserve_contents)}; + View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)}; if (color_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even // if the shader doesn't actually write to it. - color_surface->MarkAsModified(true); + texture_cache.MarkColorBufferInUse(index); // Enable sRGB only for supported formats // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ state.framebuffer_srgb.enabled |= - color_surface->GetSurfaceParams().GetSrgbConversion(); + color_surface->GetSurfaceParams().srgb_conversion; } fbkey.color_attachments[index] = @@ -538,11 +547,11 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( if (depth_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. - depth_surface->MarkAsModified(true); + texture_cache.MarkDepthBufferInUse(); fbkey.zeta = depth_surface; - fbkey.stencil_enable = regs.stencil_enable && depth_surface->GetSurfaceParams().GetType() == - SurfaceType::DepthStencil; + fbkey.stencil_enable = regs.stencil_enable && + depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } SetupCachedFramebuffer(fbkey, current_state); @@ -728,11 +737,27 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { buffer_cache.InvalidateRegion(addr, size); } +void RasterizerOpenGL::InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + if (!addr || !size) { + return; + } + texture_cache.InvalidateRegionEx(gpu_addr, size); + shader_cache.InvalidateRegion(addr, size); + global_cache.InvalidateRegion(addr, size); + buffer_cache.InvalidateRegion(addr, size); +} + void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } +void RasterizerOpenGL::FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { + FlushRegion(addr, size); + InvalidateRegionEx(gpu_addr, addr, size); +} + bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, @@ -740,7 +765,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs MICROPROFILE_SCOPE(OpenGL_Blits); const auto src_surface{texture_cache.GetFermiSurface(src)}; const auto dst_surface{texture_cache.GetFermiSurface(dst)}; - blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); + // blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); return true; } @@ -762,10 +787,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, const auto& params{surface->GetSurfaceParams()}; const auto& pixel_format{ VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; - ASSERT_MSG(params.GetWidth() == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.GetHeight() == config.height, "Framebuffer height is different"); + ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); + ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - if (params.GetPixelFormat() != pixel_format) { + if (params.pixel_format != pixel_format) { LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); } @@ -860,10 +885,10 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); - if (const auto surface{texture_cache.GetTextureSurface(texture)}; surface) { - state.texture_units[current_bindpoint].texture = surface->GetTexture( - entry.GetType(), entry.IsArray(), texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); + if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { + view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, + texture.tic.w_source); + state.texture_units[current_bindpoint].texture = view->GetTexture(); } else { // Can occur when texture addr is null or its memory is unmapped/invalid state.texture_units[current_bindpoint].texture = 0; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index f7c2f46aa4..871608f6dc 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -4,7 +4,9 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/microprofile.h" #include "common/scope_exit.h" +#include "core/core.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" @@ -22,6 +24,9 @@ using VideoCore::Surface::ComponentType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; +MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); + namespace { struct FormatTuple { @@ -129,8 +134,8 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType compon return format; } -GLenum GetTextureTarget(const SurfaceParams& params) { - switch (params.GetTarget()) { +GLenum GetTextureTarget(const SurfaceTarget& target) { + switch (target) { case SurfaceTarget::Texture1D: return GL_TEXTURE_1D; case SurfaceTarget::Texture2D: @@ -175,8 +180,8 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.GetNumLevels() - 1); - if (params.GetNumLevels() == 1) { + glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); + if (params.num_levels == 1) { glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); } } @@ -185,21 +190,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte OGLTexture texture; texture.Create(target); - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.GetNumLevels(), internal_format, - params.GetWidth()); + glTextureStorage1D(texture.handle, params.num_levels, internal_format, params.width); break; case SurfaceTarget::Texture2D: case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.GetNumLevels(), internal_format, - params.GetWidth(), params.GetHeight()); + glTextureStorage2D(texture.handle, params.num_levels, internal_format, params.width, + params.height); break; case SurfaceTarget::Texture3D: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.GetNumLevels(), internal_format, - params.GetWidth(), params.GetHeight(), params.GetDepth()); + glTextureStorage3D(texture.handle, params.num_levels, internal_format, params.width, + params.height, params.depth); break; default: UNREACHABLE(); @@ -212,54 +216,72 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte } // Anonymous namespace -CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params) - : VideoCommon::SurfaceBase{texture_cache, params} { - const auto& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; +CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) + : VideoCommon::SurfaceBase(gpu_addr, params) { + const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)}; internal_format = tuple.internal_format; format = tuple.format; type = tuple.type; is_compressed = tuple.compressed; - target = GetTextureTarget(params); + target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format); + DecorateSurfaceName(); + ViewParams main{}; + main.num_levels = params.num_levels; + main.base_level = 0; + main.base_layer = 0; + main.num_layers = params.is_layered ? params.depth : 1; + main.target = params.target; + main_view = CreateView(main); + main_view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); } -CachedSurface::~CachedSurface() = default; +CachedSurface::~CachedSurface() { + views.clear(); + main_view = nullptr; +} + +void CachedSurface::DownloadTexture(std::vector& staging_buffer) { + LOG_CRITICAL(Render_OpenGL, "Flushing"); + MICROPROFILE_SCOPE(OpenGL_Texture_Download); -void CachedSurface::DownloadTexture() { // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_PACK_ALIGNMENT, 1); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { + for (u32 level = 0; level < params.num_levels; ++level) { glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); if (is_compressed) { glGetCompressedTextureImage(texture.handle, level, static_cast(params.GetHostMipmapSize(level)), - GetStagingBufferLevelData(level)); + staging_buffer.data() + mip_offset); } else { glGetTextureImage(texture.handle, level, format, type, static_cast(params.GetHostMipmapSize(level)), - GetStagingBufferLevelData(level)); + staging_buffer.data() + mip_offset); } } } -void CachedSurface::UploadTexture() { +void CachedSurface::UploadTexture(std::vector& staging_buffer) { + MICROPROFILE_SCOPE(OpenGL_Texture_Upload); SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - UploadTextureMipmap(level); + for (u32 level = 0; level < params.num_levels; ++level) { + UploadTextureMipmap(level, staging_buffer); } } -void CachedSurface::UploadTextureMipmap(u32 level) { +void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - u8* buffer{GetStagingBufferLevelData(level)}; + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture2D: glCompressedTextureSubImage2D(texture.handle, level, 0, 0, static_cast(params.GetMipWidth(level)), @@ -277,7 +299,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) { break; case SurfaceTarget::TextureCubemap: { const std::size_t layer_size{params.GetHostLayerSize(level)}; - for (std::size_t face = 0; face < params.GetDepth(); ++face) { + for (std::size_t face = 0; face < params.depth; ++face) { glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), static_cast(params.GetMipWidth(level)), static_cast(params.GetMipHeight(level)), 1, @@ -291,7 +313,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) { UNREACHABLE(); } } else { - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture1D: glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, buffer); @@ -310,7 +332,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) { static_cast(params.GetMipDepth(level)), format, type, buffer); break; case SurfaceTarget::TextureCubemap: - for (std::size_t face = 0; face < params.GetDepth(); ++face) { + for (std::size_t face = 0; face < params.depth; ++face) { glTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), params.GetMipWidth(level), params.GetMipHeight(level), 1, format, type, buffer); @@ -324,61 +346,57 @@ void CachedSurface::UploadTextureMipmap(u32 level) { } void CachedSurface::DecorateSurfaceName() { - LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), - params.GetTarget() == SurfaceTarget::Texture3D ? "3D" : ""); + LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); } -std::unique_ptr CachedSurface::CreateView(const ViewKey& view_key) { - return std::make_unique(*this, view_key); +void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { + LabelGLObject(GL_TEXTURE, texture_view.texture.handle, gpu_addr, prefix); } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, ViewKey key) - : surface{surface}, key{key}, params{surface.GetSurfaceParams()} {} +View CachedSurface::CreateView(const ViewParams& view_key) { + auto view = std::make_shared(*this, view_key); + views[view_key] = view; + view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); + return view; +} + +CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params) + : VideoCommon::ViewBase(params), surface{surface} { + target = GetTextureTarget(params.target); + texture_view = CreateTextureView(); +} CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment) const { - ASSERT(key.num_layers == 1 && key.num_levels == 1); + ASSERT(params.num_layers == 1 && params.num_levels == 1); - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), key.base_level); + glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, target, + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), key.base_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, target, + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTexture(), - key.base_level, key.base_layer); + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, target, + params.base_level, params.base_layer); break; default: UNIMPLEMENTED(); } } -GLuint CachedSurfaceView::GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, - SwizzleSource x_source, SwizzleSource y_source, +void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, SwizzleSource w_source) { - const auto [texture_view, target] = GetTextureView(texture_type, is_array); - if (texture_view.get().texture.handle == 0) { - texture_view.get() = std::move(CreateTextureView(target)); - } - ApplySwizzle(texture_view, x_source, y_source, z_source, w_source); - return texture_view.get().texture.handle; -} - -void CachedSurfaceView::ApplySwizzle(TextureView& texture_view, SwizzleSource x_source, - SwizzleSource y_source, SwizzleSource z_source, - SwizzleSource w_source) { - const std::array swizzle = {x_source, y_source, z_source, w_source}; - if (swizzle == texture_view.swizzle) { + u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); + if (swizzle == texture_view.swizzle) return; - } const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; @@ -386,146 +404,48 @@ void CachedSurfaceView::ApplySwizzle(TextureView& texture_view, SwizzleSource x_ texture_view.swizzle = swizzle; } -CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView(GLenum target) const { +CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView() const { + const auto& owner_params = surface.GetSurfaceParams(); TextureView texture_view; - glGenTextures(1, &texture_view.texture.handle); + texture_view.texture.Create(); const GLuint handle{texture_view.texture.handle}; - const FormatTuple& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; + const FormatTuple& tuple{ + GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; - glTextureView(handle, target, surface.texture.handle, tuple.internal_format, key.base_level, - key.num_levels, key.base_layer, key.num_layers); - ApplyTextureDefaults(params, handle); + glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, + params.num_levels, params.base_layer, params.num_layers); + + ApplyTextureDefaults(owner_params, handle); + + u32 swizzle = + EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); + texture_view.swizzle = swizzle; return texture_view; } -std::pair, GLenum> -CachedSurfaceView::GetTextureView(Tegra::Shader::TextureType texture_type, bool is_array) { - using Pair = std::pair, GLenum>; - switch (texture_type) { - case Tegra::Shader::TextureType::Texture1D: - return is_array ? Pair{texture_view_1d_array, GL_TEXTURE_1D_ARRAY} - : Pair{texture_view_1d, GL_TEXTURE_1D}; - case Tegra::Shader::TextureType::Texture2D: - return is_array ? Pair{texture_view_2d_array, GL_TEXTURE_2D_ARRAY} - : Pair{texture_view_2d, GL_TEXTURE_2D}; - case Tegra::Shader::TextureType::Texture3D: - ASSERT(!is_array); - return {texture_view_3d, GL_TEXTURE_3D}; - case Tegra::Shader::TextureType::TextureCube: - return is_array ? Pair{texture_view_cube_array, GL_TEXTURE_CUBE_MAP_ARRAY} - : Pair{texture_view_cube, GL_TEXTURE_CUBE_MAP}; - } - UNREACHABLE(); -} - TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : TextureCacheBase{system, rasterizer} {} TextureCacheOpenGL::~TextureCacheOpenGL() = default; -CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, - const SurfaceParams& new_params, - bool preserve_contents, - const std::vector& overlaps) { - if (overlaps.size() > 1) { - return TryCopyAsViews(gpu_addr, cpu_addr, host_ptr, new_params, overlaps); - } - - const auto& old_surface{overlaps[0]}; - const auto& old_params{old_surface->GetSurfaceParams()}; - if (old_params.GetTarget() == new_params.GetTarget() && - old_params.GetDepth() == new_params.GetDepth() && old_params.GetDepth() == 1 && - old_params.GetNumLevels() == new_params.GetNumLevels() && - old_params.GetPixelFormat() == new_params.GetPixelFormat()) { - return SurfaceCopy(gpu_addr, cpu_addr, host_ptr, new_params, old_surface, old_params); - } - - return nullptr; +Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { + return std::make_shared(gpu_addr, params); } -CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - const Surface& old_surface, - const SurfaceParams& old_params) { - const auto new_surface{GetUncachedSurface(new_params)}; - Register(new_surface, gpu_addr, cpu_addr, host_ptr); - - const u32 min_width{ - std::max(old_params.GetDefaultBlockWidth(), new_params.GetDefaultBlockWidth())}; - const u32 min_height{ - std::max(old_params.GetDefaultBlockHeight(), new_params.GetDefaultBlockHeight())}; - for (u32 level = 0; level < old_params.GetNumLevels(); ++level) { - const u32 width{std::min(old_params.GetMipWidth(level), new_params.GetMipWidth(level))}; - const u32 height{std::min(old_params.GetMipHeight(level), new_params.GetMipHeight(level))}; - if (width < min_width || height < min_height) { - // Avoid copies that are too small to be handled in OpenGL - break; - } - glCopyImageSubData(old_surface->GetTexture(), old_surface->GetTarget(), level, 0, 0, 0, - new_surface->GetTexture(), new_surface->GetTarget(), level, 0, 0, 0, - width, height, 1); - } - - new_surface->MarkAsModified(true); - - // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(gpu_addr, new_params); -} - -CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, const SurfaceParams& new_params, - const std::vector& overlaps) { - if (new_params.GetTarget() == SurfaceTarget::Texture1D || - new_params.GetTarget() == SurfaceTarget::Texture1DArray || - new_params.GetTarget() == SurfaceTarget::Texture3D) { - // Non-2D textures are not handled at the moment in this fast path. - return nullptr; - } - - const auto new_surface{GetUncachedSurface(new_params)}; - // TODO(Rodrigo): Move this down - Register(new_surface, gpu_addr, cpu_addr, host_ptr); - - // TODO(Rodrigo): Find a way to avoid heap allocations here. - std::vector views; - views.reserve(overlaps.size()); - for (const auto& overlap : overlaps) { - const auto view{ - new_surface->TryGetView(overlap->GetGpuAddr(), overlap->GetSurfaceParams())}; - if (!view) { - // TODO(Rodrigo): Remove this - Unregister(new_surface); - return nullptr; - } - views.push_back(view); - } - - // TODO(Rodrigo): It's possible that these method leaves some unloaded textures if the data has - // been uploaded to guest memory but not used as a surface previously. - for (std::size_t i = 0; i < overlaps.size(); ++i) { - const auto& overlap{overlaps[i]}; - const auto& view{views[i]}; - for (u32 overlap_level = 0; overlap_level < view->GetNumLevels(); ++overlap_level) { - const u32 super_level{view->GetBaseLevel() + overlap_level}; - glCopyImageSubData(overlap->GetTexture(), overlap->GetTarget(), overlap_level, 0, 0, 0, - new_surface->GetTexture(), new_surface->GetTarget(), super_level, 0, - 0, view->GetBaseLayer(), view->GetWidth(), view->GetHeight(), - view->GetNumLayers()); - } - } - - new_surface->MarkAsModified(true); - - // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(gpu_addr, new_params); -} - -Surface TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { - return std::make_unique(*this, params); +void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, + const VideoCommon::CopyParams& copy_params) { + const auto src_handle = src_surface->GetTexture(); + const auto src_target = src_surface->GetTarget(); + const auto dst_handle = dst_surface->GetTexture(); + const auto dst_target = dst_surface->GetTarget(); + glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, + copy_params.source_y, copy_params.source_z, dst_handle, dst_target, + copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, + copy_params.dest_z, copy_params.width, copy_params.height, + copy_params.depth); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index c65e371536..1722c1bbc7 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -19,24 +19,25 @@ namespace OpenGL { using VideoCommon::SurfaceParams; -using VideoCommon::ViewKey; +using VideoCommon::ViewParams; class CachedSurfaceView; class CachedSurface; class TextureCacheOpenGL; using Surface = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCache; +using View = std::shared_ptr; +using TextureCacheBase = VideoCommon::TextureCache; -class CachedSurface final : public VideoCommon::SurfaceBase { +class CachedSurface final : public VideoCommon::SurfaceBase { friend CachedSurfaceView; public: - explicit CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params); + explicit CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params); ~CachedSurface(); - void UploadTexture(); - void DownloadTexture(); + void UploadTexture(std::vector& staging_buffer) override; + void DownloadTexture(std::vector& staging_buffer) override; GLenum GetTarget() const { return target; @@ -49,99 +50,79 @@ public: protected: void DecorateSurfaceName(); - std::unique_ptr CreateView(const ViewKey& view_key); + View CreateView(const ViewParams& view_key) override; private: - void UploadTextureMipmap(u32 level); + void UploadTextureMipmap(u32 level, std::vector& staging_buffer); GLenum internal_format{}; GLenum format{}; GLenum type{}; bool is_compressed{}; GLenum target{}; + u32 view_count{}; OGLTexture texture; }; -class CachedSurfaceView final { +class CachedSurfaceView final : public VideoCommon::ViewBase { public: - explicit CachedSurfaceView(CachedSurface& surface, ViewKey key); + explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params); ~CachedSurfaceView(); /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment) const; - GLuint GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, - Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); - - void MarkAsModified(bool is_modified) { - surface.MarkAsModified(is_modified); + GLuint GetTexture() { + return texture_view.texture.handle; } const SurfaceParams& GetSurfaceParams() const { - return params; + return surface.GetSurfaceParams(); } u32 GetWidth() const { - return params.GetMipWidth(GetBaseLevel()); + const auto owner_params = GetSurfaceParams(); + return owner_params.GetMipWidth(params.base_level); } u32 GetHeight() const { - return params.GetMipHeight(GetBaseLevel()); + const auto owner_params = GetSurfaceParams(); + return owner_params.GetMipHeight(params.base_level); } u32 GetDepth() const { - return params.GetMipDepth(GetBaseLevel()); + const auto owner_params = GetSurfaceParams(); + return owner_params.GetMipDepth(params.base_level); } - u32 GetBaseLayer() const { - return key.base_layer; - } - - u32 GetNumLayers() const { - return key.num_layers; - } - - u32 GetBaseLevel() const { - return key.base_level; - } - - u32 GetNumLevels() const { - return key.num_levels; - } - -private: - struct TextureView { - OGLTexture texture; - std::array swizzle{ - Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, - Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A}; - }; - - void ApplySwizzle(TextureView& texture_view, Tegra::Texture::SwizzleSource x_source, + void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source); - TextureView CreateTextureView(GLenum target) const; + void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); - std::pair, GLenum> GetTextureView( - Tegra::Shader::TextureType texture_type, bool is_array); +private: + struct TextureView { + OGLTextureView texture; + u32 swizzle; + }; + + u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source) const { + return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | + (static_cast(z_source) << 8) | static_cast(w_source); + } + + TextureView CreateTextureView() const; CachedSurface& surface; - const ViewKey key; - const SurfaceParams params; + GLenum target{}; - TextureView texture_view_1d; - TextureView texture_view_1d_array; - TextureView texture_view_2d; - TextureView texture_view_2d_array; - TextureView texture_view_3d; - TextureView texture_view_cube; - TextureView texture_view_cube_array; + TextureView texture_view; }; class TextureCacheOpenGL final : public TextureCacheBase { @@ -150,21 +131,9 @@ public: ~TextureCacheOpenGL(); protected: - CachedSurfaceView* TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - bool preserve_contents, - const std::vector& overlaps); - - Surface CreateSurface(const SurfaceParams& params); - -private: - CachedSurfaceView* SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, const Surface& old_surface, - const SurfaceParams& old_params); - - CachedSurfaceView* TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - const std::vector& overlaps); + Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; + void ImageCopy(Surface src_surface, Surface dst_surface, + const VideoCommon::CopyParams& copy_params) override; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 5994c0c616..a9fa539a5e 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -56,8 +56,7 @@ SurfaceBlitter::SurfaceBlitter() { SurfaceBlitter::~SurfaceBlitter() = default; -void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, - const Common::Rectangle& src_rect, +void SurfaceBlitter::Blit(View src, View dst, const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) const { const auto& src_params{src->GetSurfaceParams()}; const auto& dst_params{dst->GetSurfaceParams()}; @@ -72,17 +71,13 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, u32 buffers{}; - UNIMPLEMENTED_IF(src_params.GetTarget() != SurfaceTarget::Texture2D); - UNIMPLEMENTED_IF(dst_params.GetTarget() != SurfaceTarget::Texture2D); + UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); + UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); - const auto GetTexture = [](CachedSurfaceView* view) { - return view->GetTexture(TextureType::Texture2D, false, SwizzleSource::R, SwizzleSource::G, - SwizzleSource::B, SwizzleSource::A); - }; - const GLuint src_texture{GetTexture(src)}; - const GLuint dst_texture{GetTexture(dst)}; + const GLuint src_texture{src->GetTexture()}; + const GLuint dst_texture{dst->GetTexture()}; - if (src_params.GetType() == SurfaceType::ColorTexture) { + if (src_params.type == SurfaceType::ColorTexture) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_texture, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, @@ -94,7 +89,7 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, 0); buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.GetType() == SurfaceType::Depth) { + } else if (src_params.type == SurfaceType::Depth) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, 0); @@ -106,7 +101,7 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.GetType() == SurfaceType::DepthStencil) { + } else if (src_params.type == SurfaceType::DepthStencil) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_texture, 0); @@ -148,4 +143,4 @@ void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_vie glObjectLabel(identifier, handle, -1, static_cast(object_label.c_str())); } -} // namespace OpenGL \ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index e7726d14eb..8977d2383f 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -39,8 +39,8 @@ public: explicit SurfaceBlitter(); ~SurfaceBlitter(); - void Blit(CachedSurfaceView* src, CachedSurfaceView* dst, - const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) const; + void Blit(View src, View dst, const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) const; private: OGLFramebuffer src_framebuffer; @@ -49,4 +49,4 @@ private: void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); -} // namespace OpenGL \ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c5c01957a1..eb0d9bc101 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -22,6 +22,7 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" +#include "video_core/texture_cache/copy_params.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" @@ -40,32 +41,42 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::SurfaceTarget; +using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; + template class TextureCache { - using IntervalMap = boost::icl::interval_map>>; + using IntervalMap = boost::icl::interval_map>; using IntervalType = typename IntervalMap::interval_type; public: + void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { + this->memory_manager = &memory_manager; + } + void InvalidateRegion(CacheAddr addr, std::size_t size) { for (const auto& surface : GetSurfacesInRegion(addr, size)) { - if (!surface->IsRegistered()) { - // Skip duplicates - continue; - } Unregister(surface); } } - TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { + void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { + for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { + Unregister(surface); + } + } + + TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { return {}; } - const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(gpu_addr, params, true); + const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; + return GetSurface(gpu_addr, params, true).second; } - TView* GetDepthBufferSurface(bool preserve_contents) { + TView GetDepthBufferSurface(bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { @@ -75,36 +86,75 @@ public: system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(gpu_addr, depth_params, preserve_contents); + auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(false); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(true); + return surface_view.second; } - TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { + TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); const auto& regs{system.GPU().Maxwell3D().regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + SetEmptyColorBuffer(index); return {}; } - auto& memory_manager{system.GPU().MemoryManager()}; - const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; - const auto gpu_addr{config.Address() + - config.base_layer * config.layer_stride * sizeof(u32)}; + const auto& config{regs.rt[index]}; + const auto gpu_addr{config.Address()}; if (!gpu_addr) { + SetEmptyColorBuffer(index); return {}; } - return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); + auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(false); + render_targets[index].target = surface_view.first; + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(true); + return surface_view.second; } - TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), - true); + void MarkColorBufferInUse(std::size_t index) { + if (render_targets[index].target) + render_targets[index].target->MarkAsModified(true, Tick()); } - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + void MarkDepthBufferInUse() { + if (depth_buffer.target) + depth_buffer.target->MarkAsModified(true, Tick()); + } + + void SetEmptyDepthBuffer() { + if (depth_buffer.target != nullptr) { + depth_buffer.target->MarkAsProtected(false); + depth_buffer.target = nullptr; + depth_buffer.view = nullptr; + } + } + + void SetEmptyColorBuffer(std::size_t index) { + if (render_targets[index].target != nullptr) { + render_targets[index].target->MarkAsProtected(false); + std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); + render_targets[index].target = nullptr; + render_targets[index].view = nullptr; + } + } + + TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); + const GPUVAddr gpu_addr = config.Address(); + return GetSurface(gpu_addr, params, true).second; + } + + TSurface TryFindFramebufferSurface(const u8* host_ptr) const { const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; return it != registered_surfaces.end() ? *it->second.begin() : nullptr; } @@ -115,126 +165,334 @@ public: protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer} {} + : system{system}, rasterizer{rasterizer} { + for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + SetEmptyColorBuffer(i); + } + SetEmptyDepthBuffer(); + } ~TextureCache() = default; - virtual TView* TryFastGetSurfaceView( - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector>& overlaps) = 0; + virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; + virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, + const CopyParams& copy_params) = 0; - void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr) { - surface->Register(gpu_addr, cpu_addr, host_ptr); - registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + void Register(TSurface surface) { + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const std::size_t size = surface->GetSizeInBytes(); + const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); + if (!host_ptr || !cpu_addr) { + LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", + gpu_addr); + return; + } + surface->SetHostPtr(host_ptr); + surface->SetCpuAddr(*cpu_addr); + registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + RegisterInnerCache(surface); + surface->MarkAsRegistered(true); } - void Unregister(std::shared_ptr surface) { - registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); - surface->Unregister(); + void Unregister(TSurface surface) { + if (surface->IsProtected()) + return; + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + const void* host_ptr = surface->GetHostPtr(); + const std::size_t size = surface->GetSizeInBytes(); + const VAddr cpu_addr = surface->GetCpuAddr(); + registered_surfaces.erase(GetInterval(host_ptr, size)); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + UnregisterInnerCache(surface); + surface->MarkAsRegistered(false); + ReserveSurface(surface->GetSurfaceParams(), surface); } - std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { - if (const auto surface = TryGetReservedSurface(params); surface) + TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) { + surface->SetGpuAddr(gpu_addr); return surface; + } // No reserved surface available, create a new one and reserve it - auto new_surface{CreateSurface(params)}; - ReserveSurface(params, new_surface); + auto new_surface{CreateSurface(gpu_addr, params)}; return new_surface; } Core::System& system; private: - TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; - DEBUG_ASSERT(cpu_addr); + enum class RecycleStrategy : u32 { + Ignore = 0, + Flush = 1, + BufferCopy = 3, + }; - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool untopological) { + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } + // 3D Textures decision + if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; + } + for (auto s : overlaps) { + const auto& s_params = s->GetSurfaceParams(); + if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; + } + } + return RecycleStrategy::Ignore; + } + + std::pair RecycleSurface(std::vector& overlaps, + const SurfaceParams& params, const GPUVAddr gpu_addr, + const u8* host_ptr, const bool preserve_contents, + const bool untopological) { + for (auto surface : overlaps) { + Unregister(surface); + } + RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation + ? PickStrategy(overlaps, params, gpu_addr, untopological) + : RecycleStrategy::Flush; + switch (strategy) { + case RecycleStrategy::Ignore: { + return InitializeSurface(gpu_addr, params, preserve_contents); + } + case RecycleStrategy::Flush: { + std::sort(overlaps.begin(), overlaps.end(), + [](const TSurface& a, const TSurface& b) -> bool { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (auto surface : overlaps) { + FlushSurface(surface); + } + return InitializeSurface(gpu_addr, params, preserve_contents); + } + default: { + UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + } + } + + std::pair RebuildMirage(TSurface current_surface, + const SurfaceParams& params) { + const auto gpu_addr = current_surface->GetGpuAddr(); + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + std::vector bricks = current_surface->BreakDown(); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } + Unregister(current_surface); + Register(new_surface); + return {new_surface, new_surface->GetMainView()}; + } + + std::pair ManageStructuralMatch(TSurface current_surface, + const SurfaceParams& params) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + return RebuildMirage(current_surface, params); + } + const bool matches_target = current_surface->MatchTarget(params.target); + if (matches_target) { + return {current_surface, current_surface->GetMainView()}; + } + return {current_surface, current_surface->EmplaceOverview(params)}; + } + + std::optional> ReconstructSurface(std::vector& overlaps, + const SurfaceParams& params, + const GPUVAddr gpu_addr, + const u8* host_ptr) { + if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { + return {}; + } + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + for (auto surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + if (src_params.is_layered || src_params.num_levels > 1) { + // We send this cases to recycle as they are more complex to handle + return {}; + } + const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); + if (!mipmap_layer) { + return {}; + } + const u32 layer = (*mipmap_layer).first; + const u32 mipmap = (*mipmap_layer).second; + if (new_surface->GetMipmapSize(mipmap) != candidate_size) { + return {}; + } + // Now we got all the data set up + CopyParams copy_params{}; + const u32 dst_width = params.GetMipWidth(mipmap); + const u32 dst_height = params.GetMipHeight(mipmap); + copy_params.width = std::min(src_params.width, dst_width); + copy_params.height = std::min(src_params.height, dst_height); + copy_params.depth = 1; + copy_params.source_level = 0; + copy_params.dest_level = mipmap; + copy_params.source_z = 0; + copy_params.dest_z = layer; + ImageCopy(surface, new_surface, copy_params); + } + for (auto surface : overlaps) { + Unregister(surface); + } + Register(new_surface); + return {{new_surface, new_surface->GetMainView()}}; + } + + std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + + const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; - auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; + const std::size_t candidate_size = params.GetGuestSizeInBytes(); + auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; if (overlaps.empty()) { - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + + for (auto surface : overlaps) { + if (!surface->MatchesTopology(params)) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + true); + } } if (overlaps.size() == 1) { - if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return view; + TSurface current_surface = overlaps[0]; + if (current_surface->MatchesStructure(params) && + current_surface->GetGpuAddr() == gpu_addr && + (params.target != SurfaceTarget::Texture3D || + current_surface->MatchTarget(params.target))) { + return ManageStructuralMatch(current_surface, params); } - } - - const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, - preserve_contents, overlaps)}; - - if (!fast_view) { - std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { - return lhs->GetModificationTick() < rhs->GetModificationTick(); - }); - } - - for (const auto& surface : overlaps) { - if (!fast_view) { - // Flush even when we don't care about the contents, to preserve memory not - // written by the new surface. - FlushSurface(surface); + if (current_surface->GetSizeInBytes() <= candidate_size) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); } - Unregister(surface); + std::optional view = current_surface->EmplaceView(params, gpu_addr); + if (view.has_value()) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); + } + return {current_surface, *view}; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + } else { + std::optional> view = + ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + if (view.has_value()) { + return *view; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); } - if (fast_view) { - return fast_view; - } - - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } - TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents) { - const auto new_surface{GetUncachedSurface(params)}; - Register(new_surface, gpu_addr, cpu_addr, host_ptr); + std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + auto new_surface{GetUncachedSurface(gpu_addr, params)}; + Register(new_surface); if (preserve_contents) { LoadSurface(new_surface); } - return new_surface->GetView(gpu_addr, params); + return {new_surface, new_surface->GetMainView()}; } - void LoadSurface(const std::shared_ptr& surface) { - surface->LoadBuffer(); - surface->UploadTexture(); - surface->MarkAsModified(false); + void LoadSurface(const TSurface& surface) { + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->LoadBuffer(*memory_manager, staging_buffer); + surface->UploadTexture(staging_buffer); + surface->MarkAsModified(false, Tick()); } - void FlushSurface(const std::shared_ptr& surface) { + void FlushSurface(const TSurface& surface) { if (!surface->IsModified()) { return; } - surface->DownloadTexture(); - surface->FlushBuffer(); + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->DownloadTexture(staging_buffer); + surface->FlushBuffer(staging_buffer); + surface->MarkAsModified(false, Tick()); } - std::vector> GetSurfacesInRegion(CacheAddr cache_addr, - std::size_t size) const { + std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { if (size == 0) { return {}; } const IntervalType interval{cache_addr, cache_addr + size}; - std::vector> surfaces; + std::vector surfaces; for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - surfaces.push_back(*pair.second.begin()); + for (auto& s : pair.second) { + if (!s || !s->IsRegistered()) { + continue; + } + surfaces.push_back(s); + } } return surfaces; } - void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { + void RegisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].push_back(surface); + start++; + } + } + + void UnregisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].remove(surface); + start++; + } + } + + std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + if (size == 0) { + return {}; + } + const GPUVAddr gpu_addr_end = gpu_addr + size; + GPUVAddr start = gpu_addr >> inner_cache_page_bits; + const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + std::vector surfaces; + while (start <= end) { + std::list& list = inner_cache[start]; + for (auto& s : list) { + if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + s->MarkAsPicked(true); + surfaces.push_back(s); + } + } + start++; + } + for (auto& s : surfaces) { + s->MarkAsPicked(false); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, TSurface surface) { surface_reserve[params].push_back(std::move(surface)); } - std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { + TSurface TryGetReservedSurface(const SurfaceParams& params) { auto search{surface_reserve.find(params)}; if (search == surface_reserve.end()) { return {}; @@ -247,21 +505,41 @@ private: return {}; } - IntervalType GetSurfaceInterval(std::shared_ptr surface) const { - return IntervalType::right_open(surface->GetCacheAddr(), - surface->GetCacheAddr() + surface->GetSizeInBytes()); + IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { + const CacheAddr addr = ToCacheAddr(host_ptr); + return IntervalType::right_open(addr, addr + size); } + struct RenderInfo { + RenderTargetConfig config; + TSurface target; + TView view; + }; + + struct DepthBufferInfo { + TSurface target; + TView view; + }; + VideoCore::RasterizerInterface& rasterizer; + Tegra::MemoryManager* memory_manager; u64 ticks{}; IntervalMap registered_surfaces; + static constexpr u64 inner_cache_page_bits{20}; + static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; + std::unordered_map> inner_cache; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map>> surface_reserve; + std::unordered_map> surface_reserve; + std::array render_targets; + DepthBufferInfo depth_buffer; + + std::vector staging_buffer; }; } // namespace VideoCommon