Merge pull request #1322 from bunnei/tex-cubemap

gl_rasterizer_cache: Implement cubemap textures.
merge-requests/60/head
bunnei 2018-09-30 21:19:00 +07:00 committed by GitHub
commit 8391048a83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 357 additions and 131 deletions

@ -738,7 +738,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
} }
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture); Surface surface = res_cache.GetTextureSurface(texture, entry);
if (surface != nullptr) { if (surface != nullptr) {
state.texture_units[current_bindpoint].texture = surface->Texture().handle; state.texture_units[current_bindpoint].texture = surface->Texture().handle;
state.texture_units[current_bindpoint].target = surface->Target(); state.texture_units[current_bindpoint].target = surface->Target();

@ -41,7 +41,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
} }
/*static*/ SurfaceParams SurfaceParams::CreateForTexture( /*static*/ SurfaceParams SurfaceParams::CreateForTexture(
const Tegra::Texture::FullTextureInfo& config) { const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
SurfaceParams params{}; SurfaceParams params{};
params.addr = TryGetCpuAddr(config.tic.Address()); params.addr = TryGetCpuAddr(config.tic.Address());
params.is_tiled = config.tic.IsTiled(); params.is_tiled = config.tic.IsTiled();
@ -60,9 +60,23 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
case SurfaceTarget::Texture2D: case SurfaceTarget::Texture2D:
params.depth = 1; params.depth = 1;
break; break;
case SurfaceTarget::TextureCubemap:
params.depth = config.tic.Depth() * 6;
break;
case SurfaceTarget::Texture3D: case SurfaceTarget::Texture3D:
params.depth = config.tic.Depth();
break;
case SurfaceTarget::Texture2DArray: case SurfaceTarget::Texture2DArray:
params.depth = config.tic.Depth(); params.depth = config.tic.Depth();
if (!entry.IsArray()) {
// TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of
// one, but sample the texture in the shader as if it were not an array texture. This
// probably is valid on hardware, but we still need to write a test to confirm this. In
// emulation, the workaround here is to continue to treat this as a Texture2D. An
// example game that does this is Super Mario Odyssey (in Cloud Kingdom).
ASSERT(params.depth == 1);
params.target = SurfaceTarget::Texture2D;
}
break; break;
default: default:
LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target)); LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
@ -71,7 +85,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
break; break;
} }
params.size_in_bytes = params.SizeInBytes(); params.size_in_bytes_total = params.SizeInBytesTotal();
params.size_in_bytes_2d = params.SizeInBytes2D();
params.max_mip_level = config.tic.max_mip_level + 1;
params.rt = {};
return params; return params;
} }
@ -89,7 +107,16 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.unaligned_height = config.height; params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D; params.target = SurfaceTarget::Texture2D;
params.depth = 1; params.depth = 1;
params.size_in_bytes = params.SizeInBytes(); params.size_in_bytes_total = params.SizeInBytesTotal();
params.size_in_bytes_2d = params.SizeInBytes2D();
params.max_mip_level = 0;
// Render target specific parameters, not used for caching
params.rt.index = static_cast<u32>(index);
params.rt.array_mode = config.array_mode;
params.rt.layer_stride = config.layer_stride;
params.rt.base_layer = config.base_layer;
return params; return params;
} }
@ -108,7 +135,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.unaligned_height = zeta_height; params.unaligned_height = zeta_height;
params.target = SurfaceTarget::Texture2D; params.target = SurfaceTarget::Texture2D;
params.depth = 1; params.depth = 1;
params.size_in_bytes = params.SizeInBytes(); params.size_in_bytes_total = params.SizeInBytesTotal();
params.size_in_bytes_2d = params.SizeInBytes2D();
params.max_mip_level = 0;
params.rt = {};
return params; return params;
} }
@ -400,9 +431,13 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
// clang-format on // clang-format on
}; };
static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
GLuint read_fb_handle, GLuint draw_fb_handle) { GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
OpenGLState prev_state{OpenGLState::GetCurState()}; OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({ prev_state.Apply(); }); SCOPE_EXIT({ prev_state.Apply(); });
@ -413,47 +448,203 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
u32 buffers{}; u32 buffers{};
if (type == SurfaceType::ColorTexture) { if (src_params.type == SurfaceType::ColorTexture) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, switch (src_params.target) {
0); case SurfaceParams::SurfaceTarget::Texture2D:
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
0); GL_TEXTURE_2D, src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
case SurfaceParams::SurfaceTarget::TextureCubemap:
glFramebufferTexture2D(
GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
src_surface->Texture().handle, 0);
glFramebufferTexture2D(
GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
break;
case SurfaceParams::SurfaceTarget::Texture2DArray:
glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
src_surface->Texture().handle, 0, 0);
glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
break;
case SurfaceParams::SurfaceTarget::Texture3D:
glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
SurfaceTargetToGL(src_params.target),
src_surface->Texture().handle, 0, 0);
glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
SurfaceTargetToGL(src_params.target), 0, 0, 0);
break;
default:
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
GL_TEXTURE_2D, src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
}
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, switch (dst_params.target) {
0); case SurfaceParams::SurfaceTarget::Texture2D:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
0); GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
case SurfaceParams::SurfaceTarget::TextureCubemap:
glFramebufferTexture2D(
GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
dst_surface->Texture().handle, 0);
glFramebufferTexture2D(
GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
break;
case SurfaceParams::SurfaceTarget::Texture2DArray:
glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
dst_surface->Texture().handle, 0, 0);
glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
break;
case SurfaceParams::SurfaceTarget::Texture3D:
glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
SurfaceTargetToGL(dst_params.target),
dst_surface->Texture().handle, 0, 0);
glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
SurfaceTargetToGL(dst_params.target), 0, 0, 0);
break;
default:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
}
buffers = GL_COLOR_BUFFER_BIT; buffers = GL_COLOR_BUFFER_BIT;
} else if (type == SurfaceType::Depth) { } else if (src_params.type == SurfaceType::Depth) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
dst_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
buffers = GL_DEPTH_BUFFER_BIT; buffers = GL_DEPTH_BUFFER_BIT;
} else if (type == SurfaceType::DepthStencil) { } else if (src_params.type == SurfaceType::DepthStencil) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
src_tex, 0); src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
dst_tex, 0); dst_surface->Texture().handle, 0);
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
} }
glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, const auto& rect{src_params.GetRect()};
dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, glBlitFramebuffer(rect.left, rect.bottom, rect.right, rect.top, rect.left, rect.bottom,
rect.right, rect.top, buffers,
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
return true; return true;
} }
static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
GLuint copy_pbo_handle, GLenum src_attachment = 0,
GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
ASSERT_MSG(dst_attachment == 0, "Unimplemented");
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
std::size_t buffer_size =
std::max(src_params.size_in_bytes_total, dst_params.size_in_bytes_total);
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
if (source_format.compressed) {
glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
static_cast<GLsizei>(src_params.size_in_bytes_total), nullptr);
} else {
glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format,
source_format.type, static_cast<GLsizei>(src_params.size_in_bytes_total),
nullptr);
}
// If the new texture is bigger than the previous one, we need to fill in the rest with data
// from the CPU.
if (src_params.size_in_bytes_total < dst_params.size_in_bytes_total) {
// Upload the rest of the memory.
if (dst_params.is_tiled) {
// TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
// of the data in this case. Games like Super Mario Odyssey seem to hit this case
// when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
// but it doesn't clear it beforehand, the texture is already full of zeros.
LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
"reinterpretation but the texture is tiled.");
}
std::size_t remaining_size =
dst_params.size_in_bytes_total - src_params.size_in_bytes_total;
std::vector<u8> data(remaining_size);
Memory::ReadBlock(dst_params.addr + src_params.size_in_bytes_total, data.data(),
data.size());
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes_total, remaining_size,
data.data());
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
const GLsizei width{static_cast<GLsizei>(
std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))};
const GLsizei height{static_cast<GLsizei>(
std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))};
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
if (dest_format.compressed) {
LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
UNREACHABLE();
} else {
switch (dst_params.target) {
case SurfaceParams::SurfaceTarget::Texture1D:
glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format,
dest_format.type, nullptr);
break;
case SurfaceParams::SurfaceTarget::Texture2D:
glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height,
dest_format.format, dest_format.type, nullptr);
break;
case SurfaceParams::SurfaceTarget::Texture3D:
case SurfaceParams::SurfaceTarget::Texture2DArray:
glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height,
static_cast<GLsizei>(dst_params.depth), dest_format.format,
dest_format.type, nullptr);
break;
case SurfaceParams::SurfaceTarget::TextureCubemap:
glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0,
static_cast<GLint>(cubemap_face), width, height, 1,
dest_format.format, dest_format.type, nullptr);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(dst_params.target));
UNREACHABLE();
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
}
CachedSurface::CachedSurface(const SurfaceParams& params) CachedSurface::CachedSurface(const SurfaceParams& params)
: params(params), gl_target(SurfaceTargetToGL(params.target)) { : params(params), gl_target(SurfaceTargetToGL(params.target)) {
texture.Create(); texture.Create();
@ -481,6 +672,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
rect.GetWidth()); rect.GetWidth());
break; break;
case SurfaceParams::SurfaceTarget::Texture2D: case SurfaceParams::SurfaceTarget::Texture2D:
case SurfaceParams::SurfaceTarget::TextureCubemap:
glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
rect.GetWidth(), rect.GetHeight()); rect.GetWidth(), rect.GetHeight());
break; break;
@ -585,29 +777,39 @@ void CachedSurface::LoadGLBuffer() {
const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format); const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
const u32 copy_size = params.width * params.height * bytes_per_pixel; const u32 copy_size = params.width * params.height * bytes_per_pixel;
const std::size_t total_size = copy_size * params.depth;
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
if (params.is_tiled) { if (params.is_tiled) {
gl_buffer.resize(total_size);
// TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do
// this for 3D textures, etc. // this for 3D textures, etc.
switch (params.target) { switch (params.target) {
case SurfaceParams::SurfaceTarget::Texture2D: case SurfaceParams::SurfaceTarget::Texture2D:
// Pass impl. to the fallback code below // Pass impl. to the fallback code below
break; break;
case SurfaceParams::SurfaceTarget::Texture2DArray:
case SurfaceParams::SurfaceTarget::TextureCubemap:
for (std::size_t index = 0; index < params.depth; ++index) {
const std::size_t offset{index * copy_size};
morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
params.width, params.block_height, params.height, gl_buffer.data() + offset,
copy_size, params.addr + offset);
}
break;
default: default:
LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}", LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}",
static_cast<u32>(params.target)); static_cast<u32>(params.target));
UNREACHABLE(); UNREACHABLE();
} }
gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size);
morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
params.width, params.block_height, params.height, gl_buffer.data(), copy_size, params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
params.addr); params.addr);
} else { } else {
const u8* const texture_src_data_end{texture_src_data + const u8* const texture_src_data_end{texture_src_data + total_size};
(static_cast<std::size_t>(params.depth) * copy_size)};
gl_buffer.assign(texture_src_data, texture_src_data_end); gl_buffer.assign(texture_src_data, texture_src_data_end);
} }
@ -634,7 +836,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
// Load data from memory to the surface // Load data from memory to the surface
const GLint x0 = static_cast<GLint>(rect.left); const GLint x0 = static_cast<GLint>(rect.left);
const GLint y0 = static_cast<GLint>(rect.bottom); const GLint y0 = static_cast<GLint>(rect.bottom);
const std::size_t buffer_offset = std::size_t buffer_offset =
static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width + static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
static_cast<std::size_t>(x0)) * static_cast<std::size_t>(x0)) *
GetGLBytesPerPixel(params.pixel_format); GetGLBytesPerPixel(params.pixel_format);
@ -663,15 +865,25 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
glCompressedTexImage2D( glCompressedTexImage2D(
SurfaceTargetToGL(params.target), 0, tuple.internal_format, SurfaceTargetToGL(params.target), 0, tuple.internal_format,
static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
static_cast<GLsizei>(params.size_in_bytes), &gl_buffer[buffer_offset]); static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
break; break;
case SurfaceParams::SurfaceTarget::Texture3D: case SurfaceParams::SurfaceTarget::Texture3D:
case SurfaceParams::SurfaceTarget::Texture2DArray: case SurfaceParams::SurfaceTarget::Texture2DArray:
glCompressedTexImage3D( glCompressedTexImage3D(
SurfaceTargetToGL(params.target), 0, tuple.internal_format, SurfaceTargetToGL(params.target), 0, tuple.internal_format,
static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
static_cast<GLsizei>(params.depth), 0, static_cast<GLsizei>(params.size_in_bytes), static_cast<GLsizei>(params.depth), 0,
static_cast<GLsizei>(params.size_in_bytes_total), &gl_buffer[buffer_offset]);
break;
case SurfaceParams::SurfaceTarget::TextureCubemap:
for (std::size_t face = 0; face < params.depth; ++face) {
glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
0, tuple.internal_format, static_cast<GLsizei>(params.width),
static_cast<GLsizei>(params.height), 0,
static_cast<GLsizei>(params.size_in_bytes_2d),
&gl_buffer[buffer_offset]); &gl_buffer[buffer_offset]);
buffer_offset += params.size_in_bytes_2d;
}
break; break;
default: default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@ -679,8 +891,8 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
UNREACHABLE(); UNREACHABLE();
glCompressedTexImage2D( glCompressedTexImage2D(
GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width), GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes), static_cast<GLsizei>(params.height), 0,
&gl_buffer[buffer_offset]); static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
} }
} else { } else {
@ -703,6 +915,15 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format, static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
tuple.type, &gl_buffer[buffer_offset]); tuple.type, &gl_buffer[buffer_offset]);
break; break;
case SurfaceParams::SurfaceTarget::TextureCubemap:
for (std::size_t face = 0; face < params.depth; ++face) {
glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, x0,
y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[buffer_offset]);
buffer_offset += params.size_in_bytes_2d;
}
break;
default: default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target)); static_cast<u32>(params.target));
@ -722,8 +943,9 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
copy_pbo.Create(); copy_pbo.Create();
} }
Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
return GetSurface(SurfaceParams::CreateForTexture(config)); const GLShader::SamplerEntry& entry) {
return GetSurface(SurfaceParams::CreateForTexture(config, entry));
} }
Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
@ -811,99 +1033,70 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {
return surface; return surface;
} }
Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
const SurfaceParams& new_params) { const SurfaceParams& new_params) {
// Verify surface is compatible for blitting // Verify surface is compatible for blitting
const auto& params{surface->GetSurfaceParams()}; auto old_params{old_surface->GetSurfaceParams()};
// Get a new surface with the new parameters, and blit the previous surface to it // Get a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{GetUncachedSurface(new_params)}; Surface new_surface{GetUncachedSurface(new_params)};
if (params.pixel_format == new_params.pixel_format ||
!Settings::values.use_accurate_framebuffers) {
// If the format is the same, just do a framebuffer blit. This is significantly faster than // If the format is the same, just do a framebuffer blit. This is significantly faster than
// using PBOs. The is also likely less accurate, as textures will be converted rather than // using PBOs. The is also likely less accurate, as textures will be converted rather than
// reinterpreted. // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate
// surface copy, where pixels are reinterpreted as a new format (without conversion). This
// code path uses OpenGL PBOs and is quite slow.
const bool is_blit{old_params.pixel_format == new_params.pixel_format ||
!Settings::values.use_accurate_framebuffers};
BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
params.GetRect(), params.type, read_framebuffer.handle,
draw_framebuffer.handle);
} else {
// When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy,
// where pixels are reinterpreted as a new format (without conversion). This code path uses
// OpenGL PBOs and is quite slow.
auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
if (source_format.compressed) {
glGetCompressedTextureImage(surface->Texture().handle, 0,
static_cast<GLsizei>(params.SizeInBytes()), nullptr);
} else {
glGetTextureImage(surface->Texture().handle, 0, source_format.format,
source_format.type, static_cast<GLsizei>(params.SizeInBytes()),
nullptr);
}
// If the new texture is bigger than the previous one, we need to fill in the rest with data
// from the CPU.
if (params.SizeInBytes() < new_params.SizeInBytes()) {
// Upload the rest of the memory.
if (new_params.is_tiled) {
// TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
// of the data in this case. Games like Super Mario Odyssey seem to hit this case
// when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
// but it doesn't clear it beforehand, the texture is already full of zeros.
LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
"reinterpretation but the texture is tiled.");
}
std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
std::vector<u8> data(remaining_size);
Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());
glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size,
data.data());
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
const auto& dest_rect{new_params.GetRect()};
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo.handle);
if (dest_format.compressed) {
LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
UNREACHABLE();
} else {
switch (new_params.target) { switch (new_params.target) {
case SurfaceParams::SurfaceTarget::Texture1D:
glTextureSubImage1D(new_surface->Texture().handle, 0, 0,
static_cast<GLsizei>(dest_rect.GetWidth()), dest_format.format,
dest_format.type, nullptr);
break;
case SurfaceParams::SurfaceTarget::Texture2D: case SurfaceParams::SurfaceTarget::Texture2D:
glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0, if (is_blit) {
static_cast<GLsizei>(dest_rect.GetWidth()), BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle);
static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, } else {
dest_format.type, nullptr); CopySurface(old_surface, new_surface, copy_pbo.handle);
}
break; break;
case SurfaceParams::SurfaceTarget::Texture3D: case SurfaceParams::SurfaceTarget::TextureCubemap: {
case SurfaceParams::SurfaceTarget::Texture2DArray: if (old_params.rt.array_mode != 1) {
glTextureSubImage3D(new_surface->Texture().handle, 0, 0, 0, 0, // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this
static_cast<GLsizei>(dest_rect.GetWidth()), // yet (array rendering used as a cubemap texture).
static_cast<GLsizei>(dest_rect.GetHeight()), LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode);
static_cast<GLsizei>(new_params.depth), dest_format.format, UNREACHABLE();
dest_format.type, nullptr); return new_surface;
}
// This seems to be used for render-to-cubemap texture
ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected");
ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected");
ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented");
// TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels.
// Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild.
const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)};
for (std::size_t index = 0; index < new_params.depth; ++index) {
Surface face_surface{TryGetReservedSurface(old_params)};
ASSERT_MSG(face_surface, "Unexpected");
if (is_blit) {
BlitSurface(face_surface, new_surface, read_framebuffer.handle,
draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index,
new_params.rt.index, index);
} else {
CopySurface(face_surface, new_surface, copy_pbo.handle,
face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index);
}
old_params.addr += byte_stride;
}
break; break;
}
default: default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target)); static_cast<u32>(new_params.target));
UNREACHABLE(); UNREACHABLE();
} }
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
return new_surface; return new_surface;
} }

@ -9,12 +9,14 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "common/alignment.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/hash.h" #include "common/hash.h"
#include "common/math_util.h" #include "common/math_util.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/textures/texture.h" #include "video_core/textures/texture.h"
namespace OpenGL { namespace OpenGL {
@ -126,6 +128,8 @@ struct SurfaceParams {
case Tegra::Texture::TextureType::Texture2D: case Tegra::Texture::TextureType::Texture2D:
case Tegra::Texture::TextureType::Texture2DNoMipmap: case Tegra::Texture::TextureType::Texture2DNoMipmap:
return SurfaceTarget::Texture2D; return SurfaceTarget::Texture2D;
case Tegra::Texture::TextureType::TextureCubemap:
return SurfaceTarget::TextureCubemap;
case Tegra::Texture::TextureType::Texture1DArray: case Tegra::Texture::TextureType::Texture1DArray:
return SurfaceTarget::Texture1DArray; return SurfaceTarget::Texture1DArray;
case Tegra::Texture::TextureType::Texture2DArray: case Tegra::Texture::TextureType::Texture2DArray:
@ -689,17 +693,23 @@ struct SurfaceParams {
/// Returns the rectangle corresponding to this surface /// Returns the rectangle corresponding to this surface
MathUtil::Rectangle<u32> GetRect() const; MathUtil::Rectangle<u32> GetRect() const;
/// Returns the size of this surface in bytes, adjusted for compression /// Returns the size of this surface as a 2D texture in bytes, adjusted for compression
std::size_t SizeInBytes() const { std::size_t SizeInBytes2D() const {
const u32 compression_factor{GetCompressionFactor(pixel_format)}; const u32 compression_factor{GetCompressionFactor(pixel_format)};
ASSERT(width % compression_factor == 0); ASSERT(width % compression_factor == 0);
ASSERT(height % compression_factor == 0); ASSERT(height % compression_factor == 0);
return (width / compression_factor) * (height / compression_factor) * return (width / compression_factor) * (height / compression_factor) *
GetFormatBpp(pixel_format) * depth / CHAR_BIT; GetFormatBpp(pixel_format) / CHAR_BIT;
}
/// Returns the total size of this surface in bytes, adjusted for compression
std::size_t SizeInBytesTotal() const {
return SizeInBytes2D() * depth;
} }
/// Creates SurfaceParams from a texture configuration /// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
/// Creates SurfaceParams from a framebuffer configuration /// Creates SurfaceParams from a framebuffer configuration
static SurfaceParams CreateForFramebuffer(std::size_t index); static SurfaceParams CreateForFramebuffer(std::size_t index);
@ -711,8 +721,9 @@ struct SurfaceParams {
/// Checks if surfaces are compatible for caching /// Checks if surfaces are compatible for caching
bool IsCompatibleSurface(const SurfaceParams& other) const { bool IsCompatibleSurface(const SurfaceParams& other) const {
return std::tie(pixel_format, type, width, height) == return std::tie(pixel_format, type, width, height, target, depth) ==
std::tie(other.pixel_format, other.type, other.width, other.height); std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
other.depth);
} }
VAddr addr; VAddr addr;
@ -725,8 +736,18 @@ struct SurfaceParams {
u32 height; u32 height;
u32 depth; u32 depth;
u32 unaligned_height; u32 unaligned_height;
std::size_t size_in_bytes; std::size_t size_in_bytes_total;
std::size_t size_in_bytes_2d;
SurfaceTarget target; SurfaceTarget target;
u32 max_mip_level;
// Render target specific parameters, not used in caching
struct {
u32 index;
u32 array_mode;
u32 layer_stride;
u32 base_layer;
} rt;
}; };
}; // namespace OpenGL }; // namespace OpenGL
@ -736,6 +757,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
SurfaceReserveKey res; SurfaceReserveKey res;
res.state = params; res.state = params;
res.state.rt = {}; // Ignore rt config in caching
return res; return res;
} }
}; };
@ -759,7 +781,7 @@ public:
} }
std::size_t GetSizeInBytes() const { std::size_t GetSizeInBytes() const {
return params.size_in_bytes; return params.size_in_bytes_total;
} }
const OGLTexture& Texture() const { const OGLTexture& Texture() const {
@ -800,7 +822,8 @@ public:
RasterizerCacheOpenGL(); RasterizerCacheOpenGL();
/// Get a surface based on the texture configuration /// Get a surface based on the texture configuration
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
/// Get the depth surface based on the framebuffer configuration /// Get the depth surface based on the framebuffer configuration
Surface GetDepthBufferSurface(bool preserve_contents); Surface GetDepthBufferSurface(bool preserve_contents);
@ -822,7 +845,7 @@ private:
Surface GetUncachedSurface(const SurfaceParams& params); Surface GetUncachedSurface(const SurfaceParams& params);
/// Recreates a surface with new parameters /// Recreates a surface with new parameters
Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params);
/// Reserves a unique surface that can be reused later /// Reserves a unique surface that can be reused later
void ReserveSurface(const Surface& surface); void ReserveSurface(const Surface& surface);

@ -2000,6 +2000,14 @@ private:
} }
break; break;
} }
case Tegra::Shader::TextureType::TextureCube: {
ASSERT_MSG(!is_array, "Unimplemented");
std::string x = regs.GetRegisterAsFloat(instr.gpr8);
std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
std::string z = regs.GetRegisterAsFloat(instr.gpr20);
coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
break;
}
default: default:
LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
static_cast<u32>(texture_type)); static_cast<u32>(texture_type));

@ -165,6 +165,8 @@ struct TICEntry {
// High 16 bits of the pitch value // High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high; BitField<0, 16, u32> pitch_high;
BitField<28, 4, u32> max_mip_level;
}; };
union { union {
BitField<0, 16, u32> width_minus_1; BitField<0, 16, u32> width_minus_1;