texture_cache: Remove execution context copies from the texture cache

This is done to simplify the OpenGL implementation, it is needed for
Vulkan.
merge-requests/60/head
ReinUsesLisp 2019-04-25 13:41:57 +07:00
parent fa59a7b4d8
commit 6c410104f4
7 changed files with 59 additions and 168 deletions

@ -9,7 +9,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/texture_cache/texture_cache_contextless.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/textures/convert.h"
#include "video_core/textures/texture.h"
@ -18,6 +18,10 @@ namespace OpenGL {
using Tegra::Texture::SwizzleSource;
using VideoCore::MortonSwizzleMode;
using VideoCore::Surface::ComponentType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
namespace {
struct FormatTuple {
@ -209,8 +213,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
} // Anonymous namespace
CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params)
: VideoCommon::SurfaceBaseContextless<TextureCacheOpenGL, CachedSurfaceView>{texture_cache,
params} {
: VideoCommon::SurfaceBase<TextureCacheOpenGL, CachedSurfaceView>{texture_cache, params} {
const auto& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())};
internal_format = tuple.internal_format;
format = tuple.format;
@ -222,7 +225,7 @@ CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfacePar
CachedSurface::~CachedSurface() = default;
void CachedSurface::DownloadTextureImpl() {
void CachedSurface::DownloadTexture() {
// TODO(Rodrigo): Optimize alignment
glPixelStorei(GL_PACK_ALIGNMENT, 1);
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
@ -241,7 +244,7 @@ void CachedSurface::DownloadTextureImpl() {
}
}
void CachedSurface::UploadTextureImpl() {
void CachedSurface::UploadTexture() {
SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); });
for (u32 level = 0; level < params.GetNumLevels(); ++level) {
UploadTextureMipmap(level);
@ -321,7 +324,8 @@ void CachedSurface::UploadTextureMipmap(u32 level) {
}
void CachedSurface::DecorateSurfaceName() {
LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr());
LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(),
params.GetTarget() == SurfaceTarget::Texture3D ? "3D" : "");
}
std::unique_ptr<CachedSurfaceView> CachedSurface::CreateView(const ViewKey& view_key) {

@ -14,32 +14,30 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/texture_cache/texture_cache_contextless.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
using VideoCommon::SurfaceParams;
using VideoCommon::ViewKey;
using VideoCore::Surface::ComponentType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
class CachedSurfaceView;
class CachedSurface;
class TextureCacheOpenGL;
using Surface = std::shared_ptr<CachedSurface>;
using TextureCacheBase = VideoCommon::TextureCacheContextless<CachedSurface, CachedSurfaceView>;
using TextureCacheBase = VideoCommon::TextureCache<CachedSurface, CachedSurfaceView>;
class CachedSurface final
: public VideoCommon::SurfaceBaseContextless<TextureCacheOpenGL, CachedSurfaceView> {
class CachedSurface final : public VideoCommon::SurfaceBase<TextureCacheOpenGL, CachedSurfaceView> {
friend CachedSurfaceView;
public:
explicit CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params);
~CachedSurface();
void UploadTexture();
void DownloadTexture();
GLenum GetTarget() const {
return target;
}
@ -53,9 +51,6 @@ protected:
std::unique_ptr<CachedSurfaceView> CreateView(const ViewKey& view_key);
void UploadTextureImpl();
void DownloadTextureImpl();
private:
void UploadTextureMipmap(u32 level);

@ -18,7 +18,9 @@ namespace OpenGL {
using Tegra::Shader::TextureType;
using Tegra::Texture::SwizzleSource;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}

@ -53,8 +53,8 @@ void SurfaceBaseImpl::LoadBuffer() {
ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}",
params.GetBlockWidth(), static_cast<u32>(params.GetTarget()));
for (u32 level = 0; level < params.GetNumLevels(); ++level) {
u8* const buffer{GetStagingBufferLevelData(level)};
SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, buffer, level);
SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
GetStagingBufferLevelData(level), level);
}
} else {
ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented");
@ -89,8 +89,8 @@ void SurfaceBaseImpl::FlushBuffer() {
ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}",
params.GetBlockWidth());
for (u32 level = 0; level < params.GetNumLevels(); ++level) {
u8* const buffer = GetStagingBufferLevelData(level);
SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level);
SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params,
GetStagingBufferLevelData(level), level);
}
} else {
UNIMPLEMENTED();

@ -89,14 +89,12 @@ private:
std::vector<u8> staging_buffer;
};
template <typename TTextureCache, typename TView, typename TExecutionContext>
template <typename TTextureCache, typename TView>
class SurfaceBase : public SurfaceBaseImpl {
static_assert(std::is_trivially_copyable_v<TExecutionContext>);
public:
virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0;
virtual void UploadTexture() = 0;
virtual TExecutionContext DownloadTexture(TExecutionContext exctx) = 0;
virtual void DownloadTexture() = 0;
TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) {
if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) {

@ -8,7 +8,6 @@
#include <memory>
#include <set>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <boost/icl/interval_map.hpp>
@ -41,11 +40,8 @@ class RasterizerInterface;
namespace VideoCommon {
template <typename TSurface, typename TView, typename TExecutionContext>
template <typename TSurface, typename TView>
class TextureCache {
static_assert(std::is_trivially_copyable_v<TExecutionContext>);
using ResultType = std::tuple<TView*, TExecutionContext>;
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<std::shared_ptr<TSurface>>>;
using IntervalType = typename IntervalMap::interval_type;
@ -60,37 +56,35 @@ public:
}
}
ResultType GetTextureSurface(TExecutionContext exctx,
const Tegra::Texture::FullTextureInfo& config) {
TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
const auto gpu_addr{config.tic.Address()};
if (!gpu_addr) {
return {{}, exctx};
return {};
}
const auto params{SurfaceParams::CreateForTexture(system, config)};
return GetSurfaceView(exctx, gpu_addr, params, true);
return GetSurfaceView(gpu_addr, params, true);
}
ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) {
TView* GetDepthBufferSurface(bool preserve_contents) {
const auto& regs{system.GPU().Maxwell3D().regs};
const auto gpu_addr{regs.zeta.Address()};
if (!gpu_addr || !regs.zeta_enable) {
return {{}, exctx};
return {};
}
const auto depth_params{SurfaceParams::CreateForDepthBuffer(
system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents);
return GetSurfaceView(gpu_addr, depth_params, preserve_contents);
}
ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index,
bool preserve_contents) {
TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) {
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
const auto& regs{system.GPU().Maxwell3D().regs};
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
return {{}, exctx};
return {};
}
auto& memory_manager{system.GPU().MemoryManager()};
@ -98,17 +92,16 @@ public:
const auto gpu_addr{config.Address() +
config.base_layer * config.layer_stride * sizeof(u32)};
if (!gpu_addr) {
return {{}, exctx};
return {};
}
return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents);
}
ResultType GetFermiSurface(TExecutionContext exctx,
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
return GetSurfaceView(exctx, config.Address(),
SurfaceParams::CreateForFermiCopySurface(config), true);
TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) {
return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config),
true);
}
std::shared_ptr<TSurface> TryFindFramebufferSurface(const u8* host_ptr) const {
@ -126,10 +119,9 @@ protected:
~TextureCache() = default;
virtual ResultType TryFastGetSurfaceView(
TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
const SurfaceParams& params, bool preserve_contents,
const std::vector<std::shared_ptr<TSurface>>& overlaps) = 0;
virtual TView* TryFastGetSurfaceView(
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params,
bool preserve_contents, const std::vector<std::shared_ptr<TSurface>>& overlaps) = 0;
virtual std::shared_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0;
@ -158,8 +150,7 @@ protected:
Core::System& system;
private:
ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr,
const SurfaceParams& params, bool preserve_contents) {
TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) {
auto& memory_manager{system.GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
DEBUG_ASSERT(cpu_addr);
@ -168,18 +159,17 @@ private:
const auto cache_addr{ToCacheAddr(host_ptr)};
auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())};
if (overlaps.empty()) {
return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents);
return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents);
}
if (overlaps.size() == 1) {
if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) {
return {view, exctx};
return view;
}
}
TView* fast_view;
std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr,
params, preserve_contents, overlaps);
const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params,
preserve_contents, overlaps)};
if (!fast_view) {
std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) {
@ -191,44 +181,39 @@ private:
if (!fast_view) {
// Flush even when we don't care about the contents, to preserve memory not
// written by the new surface.
exctx = FlushSurface(exctx, surface);
FlushSurface(surface);
}
Unregister(surface);
}
if (fast_view) {
return {fast_view, exctx};
return fast_view;
}
return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents);
return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents);
}
ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr,
u8* host_ptr, const SurfaceParams& params, bool preserve_contents) {
TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
const SurfaceParams& params, bool preserve_contents) {
const auto new_surface{GetUncachedSurface(params)};
Register(new_surface, gpu_addr, cpu_addr, host_ptr);
if (preserve_contents) {
exctx = LoadSurface(exctx, new_surface);
LoadSurface(new_surface);
}
return {new_surface->GetView(gpu_addr, params), exctx};
return new_surface->GetView(gpu_addr, params);
}
TExecutionContext LoadSurface(TExecutionContext exctx,
const std::shared_ptr<TSurface>& surface) {
void LoadSurface(const std::shared_ptr<TSurface>& surface) {
surface->LoadBuffer();
exctx = surface->UploadTexture(exctx);
surface->UploadTexture();
surface->MarkAsModified(false);
return exctx;
}
TExecutionContext FlushSurface(TExecutionContext exctx,
const std::shared_ptr<TSurface>& surface) {
void FlushSurface(const std::shared_ptr<TSurface>& surface) {
if (!surface->IsModified()) {
return exctx;
return;
}
exctx = surface->DownloadTexture(exctx);
surface->DownloadTexture();
surface->FlushBuffer();
return exctx;
}
std::vector<std::shared_ptr<TSurface>> GetSurfacesInRegion(CacheAddr cache_addr,

@ -1,93 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
struct DummyExecutionContext {};
template <typename TSurface, typename TView>
class TextureCacheContextless : protected TextureCache<TSurface, TView, DummyExecutionContext> {
using Base = TextureCache<TSurface, TView, DummyExecutionContext>;
public:
void InvalidateRegion(CacheAddr addr, std::size_t size) {
Base::InvalidateRegion(addr, size);
}
TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
return RemoveContext(Base::GetTextureSurface({}, config));
}
TView* GetDepthBufferSurface(bool preserve_contents) {
return RemoveContext(Base::GetDepthBufferSurface({}, preserve_contents));
}
TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) {
return RemoveContext(Base::GetColorBufferSurface({}, index, preserve_contents));
}
TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) {
return RemoveContext(Base::GetFermiSurface({}, config));
}
std::shared_ptr<TSurface> TryFindFramebufferSurface(const u8* host_ptr) const {
return Base::TryFindFramebufferSurface(host_ptr);
}
u64 Tick() {
return Base::Tick();
}
protected:
explicit TextureCacheContextless(Core::System& system,
VideoCore::RasterizerInterface& rasterizer)
: TextureCache<TSurface, TView, DummyExecutionContext>{system, rasterizer} {}
virtual TView* TryFastGetSurfaceView(
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params,
bool preserve_contents, const std::vector<std::shared_ptr<TSurface>>& overlaps) = 0;
private:
std::tuple<TView*, DummyExecutionContext> TryFastGetSurfaceView(
DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
const SurfaceParams& params, bool preserve_contents,
const std::vector<std::shared_ptr<TSurface>>& overlaps) {
return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents,
overlaps),
{}};
}
TView* RemoveContext(std::tuple<TView*, DummyExecutionContext> return_value) {
const auto [view, exctx] = return_value;
return view;
}
};
template <typename TTextureCache, typename TView>
class SurfaceBaseContextless : public SurfaceBase<TTextureCache, TView, DummyExecutionContext> {
public:
DummyExecutionContext DownloadTexture(DummyExecutionContext) {
DownloadTextureImpl();
return {};
}
DummyExecutionContext UploadTexture(DummyExecutionContext) {
UploadTextureImpl();
return {};
}
protected:
explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params)
: SurfaceBase<TTextureCache, TView, DummyExecutionContext>{texture_cache, params} {}
virtual void DownloadTextureImpl() = 0;
virtual void UploadTextureImpl() = 0;
};
} // namespace VideoCommon