Merge pull request #2244 from bunnei/gpu-mem-refactor

video_core: Refactor to use MemoryManager interface for all memory access.
master
bunnei 2019-03-16 21:59:45 +07:00 committed by GitHub
commit 2392e146b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 196 additions and 189 deletions

@ -55,12 +55,9 @@ bool DmaPusher::Step() {
} }
// Push buffer non-empty, read a word // Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
command_headers.resize(command_list_header.size); command_headers.resize(command_list_header.size);
gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32)); command_list_header.size * sizeof(u32));
for (const CommandHeader& command_header : command_headers) { for (const CommandHeader& command_header : command_headers) {

@ -41,18 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
const GPUVAddr address = regs.dest.Address();
const auto dest_address =
memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
ASSERT_MSG(dest_address, "Invalid GPU address");
// We have to invalidate the destination region to evict any outdated surfaces from the cache. // We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might contain // We do this before actually writing the new data because the destination address might
// a dirty surface that will have to be written back to memory. // contain a dirty surface that will have to be written back to memory.
system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)), const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
sizeof(u32)); rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
memory_manager.Write32(address, data);
Memory::Write32(*dest_address, data);
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
state.write_offset++; state.write_offset++;

@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
} }
void Maxwell3D::ProcessQueryGet() { void Maxwell3D::ProcessQueryGet() {
GPUVAddr sequence_address = regs.query.QueryAddress(); const GPUVAddr sequence_address{regs.query.QueryAddress()};
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
// VAddr before writing. // VAddr before writing.
const auto address = memory_manager.GpuToCpuAddress(sequence_address);
ASSERT_MSG(address, "Invalid GPU address");
// TODO(Subv): Support the other query units. // TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
// Write the current query sequence to the sequence address. // Write the current query sequence to the sequence address.
// TODO(Subv): Find out what happens if you use a long query type but mark it as a short // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
// query. // query.
Memory::Write32(*address, sequence); memory_manager.Write32(sequence_address, sequence);
} else { } else {
// Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
// GPU, this command may actually take a while to complete in real hardware due to GPU // GPU, this command may actually take a while to complete in real hardware due to GPU
@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() {
query_result.value = result; query_result.value = result;
// TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
query_result.timestamp = system.CoreTiming().GetTicks(); query_result.timestamp = system.CoreTiming().GetTicks();
Memory::WriteBlock(*address, &query_result, sizeof(query_result)); memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
} }
dirty_flags.OnMemoryWrite(); dirty_flags.OnMemoryWrite();
break; break;
@ -393,12 +391,11 @@ void Maxwell3D::ProcessCBData(u32 value) {
// Don't allow writing past the end of the buffer. // Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
ASSERT_MSG(address, "Invalid GPU address");
u8* ptr{Memory::GetPointer(*address)}; u8* ptr{memory_manager.GetPointer(address)};
rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
std::memcpy(ptr, &value, sizeof(u32)); memory_manager.Write32(address, value);
dirty_flags.OnMemoryWrite(); dirty_flags.OnMemoryWrite();
@ -407,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
} }
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
const GPUVAddr tic_base_address = regs.tic.TICAddress(); const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
Texture::TICEntry tic_entry; Texture::TICEntry tic_entry;
Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch, tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@ -432,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
} }
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
const GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
Texture::TSCEntry tsc_entry; Texture::TSCEntry tsc_entry;
Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry; return tsc_entry;
} }
@ -458,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
const auto address = memory_manager.GpuToCpuAddress(current_texture); const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};
ASSERT_MSG(address, "Invalid GPU address");
const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
Texture::FullTextureInfo tex_info{}; Texture::FullTextureInfo tex_info{};
// TODO(Subv): Use the shader to determine which textures are actually accessed. // TODO(Subv): Use the shader to determine which textures are actually accessed.
@ -496,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};
ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
Texture::FullTextureInfo tex_info{}; Texture::FullTextureInfo tex_info{};
tex_info.index = static_cast<u32>(offset); tex_info.index = static_cast<u32>(offset);

@ -43,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
const GPUVAddr source = regs.src_address.Address(); const GPUVAddr source = regs.src_address.Address();
const GPUVAddr dest = regs.dst_address.Address(); const GPUVAddr dest = regs.dst_address.Address();
const auto source_cpu = memory_manager.GpuToCpuAddress(source);
const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
ASSERT_MSG(source_cpu, "Invalid source GPU address");
ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
// TODO(Subv): Perform more research and implement all features of this engine. // TODO(Subv): Perform more research and implement all features of this engine.
ASSERT(regs.exec.enable_swizzle == 0); ASSERT(regs.exec.enable_swizzle == 0);
ASSERT(regs.exec.query_mode == Regs::QueryMode::None); ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@ -70,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
// y_count). // y_count).
if (!regs.exec.enable_2d) { if (!regs.exec.enable_2d) {
Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count); memory_manager.CopyBlock(dest, source, regs.x_count);
return; return;
} }
@ -79,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
// rectangle. There is no need to manually flush/invalidate the regions because // rectangle. There is no need to manually flush/invalidate the regions because
// CopyBlock does that for us. // CopyBlock does that for us.
for (u32 line = 0; line < regs.y_count; ++line) { for (u32 line = 0; line < regs.y_count; ++line) {
const VAddr source_line = *source_cpu + line * regs.src_pitch; const GPUVAddr source_line = source + line * regs.src_pitch;
const VAddr dest_line = *dest_cpu + line * regs.dst_pitch; const GPUVAddr dest_line = dest + line * regs.dst_pitch;
Memory::CopyBlock(dest_line, source_line, regs.x_count); memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
} }
return; return;
} }
@ -90,17 +85,18 @@ void MaxwellDMA::HandleCopy() {
const std::size_t copy_size = regs.x_count * regs.y_count; const std::size_t copy_size = regs.x_count * regs.y_count;
auto source_ptr{memory_manager.GetPointer(source)};
auto dst_ptr{memory_manager.GetPointer(dest)};
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying. // copying.
Core::System::GetInstance().Renderer().Rasterizer().FlushRegion( rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the // We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address // cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory. // might contain a dirty surface that will have to be written back to memory.
Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion( rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size);
}; };
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@ -113,8 +109,8 @@ void MaxwellDMA::HandleCopy() {
copy_size * src_bytes_per_pixel); copy_size * src_bytes_per_pixel);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, src_bytes_per_pixel, *source_cpu, regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
*dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.pos_y); regs.src_params.pos_y);
} else { } else {
ASSERT(regs.dst_params.size_z == 1); ASSERT(regs.dst_params.size_z == 1);
@ -127,7 +123,7 @@ void MaxwellDMA::HandleCopy() {
// If the input is linear and the output is tiled, swizzle the input and copy it over. // If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight()); src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
} }
} }

@ -274,7 +274,6 @@ void GPU::ProcessSemaphoreTriggerMethod() {
const auto op = const auto op =
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
if (op == GpuSemaphoreOperation::WriteLong) { if (op == GpuSemaphoreOperation::WriteLong) {
auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
struct Block { struct Block {
u32 sequence; u32 sequence;
u32 zeros = 0; u32 zeros = 0;
@ -286,11 +285,9 @@ void GPU::ProcessSemaphoreTriggerMethod() {
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming // CoreTiming
block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
Memory::WriteBlock(*address, &block, sizeof(block)); memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
} else { } else {
const auto address = const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())};
memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
const u32 word = Memory::Read32(*address);
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal && (op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) || static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@ -317,13 +314,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
} }
void GPU::ProcessSemaphoreRelease() { void GPU::ProcessSemaphoreRelease() {
const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
Memory::Write32(*address, regs.semaphore_release);
} }
void GPU::ProcessSemaphoreAcquire() { void GPU::ProcessSemaphoreAcquire() {
const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress());
const u32 word = Memory::Read32(*address);
const auto value = regs.semaphore_acquire; const auto value = regs.semaphore_acquire;
if (word != value) { if (word != value) {
regs.acquire_active = true; regs.acquire_active = true;

@ -5,6 +5,7 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/memory.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
namespace Tegra { namespace Tegra {
@ -162,15 +163,51 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
return base_addr + (gpu_addr & PAGE_MASK); return base_addr + (gpu_addr & PAGE_MASK);
} }
std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { u8 MemoryManager::Read8(GPUVAddr addr) {
std::vector<GPUVAddr> results; return Memory::Read8(*GpuToCpuAddress(addr));
for (const auto& region : mapped_regions) {
if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
const u64 offset{cpu_addr - region.cpu_addr};
results.push_back(region.gpu_addr + offset);
} }
u16 MemoryManager::Read16(GPUVAddr addr) {
return Memory::Read16(*GpuToCpuAddress(addr));
} }
return results;
u32 MemoryManager::Read32(GPUVAddr addr) {
return Memory::Read32(*GpuToCpuAddress(addr));
}
u64 MemoryManager::Read64(GPUVAddr addr) {
return Memory::Read64(*GpuToCpuAddress(addr));
}
void MemoryManager::Write8(GPUVAddr addr, u8 data) {
Memory::Write8(*GpuToCpuAddress(addr), data);
}
void MemoryManager::Write16(GPUVAddr addr, u16 data) {
Memory::Write16(*GpuToCpuAddress(addr), data);
}
void MemoryManager::Write32(GPUVAddr addr, u32 data) {
Memory::Write32(*GpuToCpuAddress(addr), data);
}
void MemoryManager::Write64(GPUVAddr addr, u64 data) {
Memory::Write64(*GpuToCpuAddress(addr), data);
}
u8* MemoryManager::GetPointer(GPUVAddr addr) {
return Memory::GetPointer(*GpuToCpuAddress(addr));
}
void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
std::memcpy(dest_buffer, GetPointer(src_addr), size);
}
void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
std::memcpy(GetPointer(dest_addr), src_buffer, size);
}
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
} }
VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {

@ -27,12 +27,27 @@ public:
GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
GPUVAddr GetRegionEnd(GPUVAddr region_start) const; GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
static constexpr u64 PAGE_BITS = 16; static constexpr u64 PAGE_BITS = 16;
static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
u8 Read8(GPUVAddr addr);
u16 Read16(GPUVAddr addr);
u32 Read32(GPUVAddr addr);
u64 Read64(GPUVAddr addr);
void Write8(GPUVAddr addr, u8 data);
void Write16(GPUVAddr addr, u16 data);
void Write32(GPUVAddr addr, u32 data);
void Write64(GPUVAddr addr, u64 data);
u8* GetPointer(GPUVAddr vaddr);
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
private: private:
enum class PageStatus : u64 { enum class PageStatus : u64 {
Unmapped = 0xFFFFFFFFFFFFFFFFULL, Unmapped = 0xFFFFFFFFFFFFFFFFULL,

@ -6,7 +6,6 @@
#include <cstring> #include <cstring>
#include "common/assert.h" #include "common/assert.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "core/memory.h"
#include "video_core/morton.h" #include "video_core/morton.h"
#include "video_core/surface.h" #include "video_core/surface.h"
#include "video_core/textures/decoders.h" #include "video_core/textures/decoders.h"
@ -16,12 +15,12 @@ namespace VideoCore {
using Surface::GetBytesPerPixel; using Surface::GetBytesPerPixel;
using Surface::PixelFormat; using Surface::PixelFormat;
using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, VAddr); using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
template <bool morton_to_linear, PixelFormat format> template <bool morton_to_linear, PixelFormat format>
static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
u32 tile_width_spacing, u8* buffer, VAddr addr) { u32 tile_width_spacing, u8* buffer, u8* addr) {
constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@ -34,9 +33,9 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
stride, height, depth, block_height, block_depth, stride, height, depth, block_height, block_depth,
tile_width_spacing); tile_width_spacing);
} else { } else {
Tegra::Texture::CopySwizzledData( Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
(stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y, (height + tile_size_y - 1) / tile_size_y, depth,
depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false, bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
block_height, block_depth, tile_width_spacing); block_height, block_depth, tile_width_spacing);
} }
} }
@ -282,7 +281,7 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, VAddr addr) { u8* buffer, u8* addr) {
GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
tile_width_spacing, buffer, addr); tile_width_spacing, buffer, addr);
} }

@ -13,7 +13,7 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, VAddr addr); u8* buffer, u8* addr);
void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data); u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);

@ -24,14 +24,12 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
std::size_t alignment, bool cache) { std::size_t alignment, bool cache) {
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
ASSERT_MSG(cpu_addr, "Invalid GPU address");
// Cache management is a big overhead, so only cache entries with a given size. // Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games. // TODO: Figure out which size is the best for given games.
cache &= size >= 2048; cache &= size >= 2048;
const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
if (cache) { if (cache) {
auto entry = TryGet(host_ptr); auto entry = TryGet(host_ptr);
if (entry) { if (entry) {
@ -54,8 +52,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
buffer_offset += size; buffer_offset += size;
if (cache) { if (cache) {
auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, auto entry = std::make_shared<CachedBufferEntry>(
alignment, host_ptr); *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
Register(entry); Register(entry);
} }

@ -7,7 +7,6 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/core.h" #include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h"
@ -39,7 +38,7 @@ void CachedGlobalRegion::Reload(u32 size_) {
glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
} }
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
const auto search{reserve.find(addr)}; const auto search{reserve.find(addr)};
if (search == reserve.end()) { if (search == reserve.end()) {
return {}; return {};
@ -47,11 +46,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
return search->second; return search->second;
} }
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) { GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size,
GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; u8* host_ptr) {
GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
if (!region) { if (!region) {
// No reserved surface available, create a new one and reserve it // No reserved surface available, create a new one and reserve it
region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr); auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
ReserveGlobalRegion(region); ReserveGlobalRegion(region);
} }
region->Reload(size); region->Reload(size);
@ -59,7 +61,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
} }
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
reserve.insert_or_assign(region->GetCpuAddr(), std::move(region)); reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
} }
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@ -70,23 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
auto& gpu{Core::System::GetInstance().GPU()}; auto& gpu{Core::System::GetInstance().GPU()};
const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; auto& memory_manager{gpu.MemoryManager()};
const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
ASSERT(cbuf_addr); global_region.GetCbufOffset()};
const auto actual_addr{memory_manager.Read64(addr)};
const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); const auto size{memory_manager.Read32(addr + 8)};
const auto size = Memory::Read32(*cbuf_addr + 8);
const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
ASSERT(actual_addr);
// Look up global region in the cache based on address // Look up global region in the cache based on address
const auto& host_ptr{Memory::GetPointer(*actual_addr)}; const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
GlobalRegion region{TryGet(host_ptr)}; GlobalRegion region{TryGet(host_ptr)};
if (!region) { if (!region) {
// No global region found - create a new one // No global region found - create a new one
region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr); region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
Register(region); Register(region);
} }

@ -65,11 +65,11 @@ public:
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
private: private:
GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr); GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr);
void ReserveGlobalRegion(GlobalRegion region); void ReserveGlobalRegion(GlobalRegion region);
std::unordered_map<VAddr, GlobalRegion> reserve; std::unordered_map<CacheAddr, GlobalRegion> reserve;
}; };
} // namespace OpenGL } // namespace OpenGL

@ -46,10 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; const u8* source{memory_manager.GetPointer(gpu_addr)};
ASSERT_MSG(cpu_addr, "Invalid GPU address");
const u8* source{Memory::GetPointer(*cpu_addr)};
for (u32 primitive = 0; primitive < count / 4; ++primitive) { for (u32 primitive = 0; primitive < count / 4; ++primitive) {
for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {

@ -57,11 +57,9 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
addr = cpu_addr ? *cpu_addr : 0;
gpu_addr = gpu_addr_; gpu_addr = gpu_addr_;
host_ptr = Memory::GetPointer(addr); host_ptr = memory_manager.GetPointer(gpu_addr_);
size_in_bytes = SizeInBytesRaw(); size_in_bytes = SizeInBytesRaw();
if (IsPixelFormatASTC(pixel_format)) { if (IsPixelFormatASTC(pixel_format)) {
@ -447,7 +445,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
gl_buffer.data() + offset_gl, params.addr + offset); gl_buffer.data() + offset_gl, params.host_ptr + offset);
offset += layer_size; offset += layer_size;
offset_gl += gl_size; offset_gl += gl_size;
} }
@ -456,7 +454,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
gl_buffer.data(), params.addr + offset); gl_buffer.data(), params.host_ptr + offset);
} }
} }
@ -514,9 +512,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
"reinterpretation but the texture is tiled."); "reinterpretation but the texture is tiled.");
} }
const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
} }
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@ -604,7 +602,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
ApplyTextureDefaults(texture.handle, params.max_mip_level); ApplyTextureDefaults(texture.handle, params.max_mip_level);
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString()); OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
// Clamp size to mapped GPU memory region // Clamp size to mapped GPU memory region
// TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@ -617,6 +615,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size); LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
cached_size_in_bytes = max_size; cached_size_in_bytes = max_size;
} }
cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr);
} }
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@ -925,7 +925,7 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
} }
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
if (params.addr == 0 || params.height * params.width == 0) { if (params.gpu_addr == 0 || params.height * params.width == 0) {
return {}; return {};
} }
@ -979,14 +979,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
const Surface& dst_surface) { const Surface& dst_surface) {
const auto& init_params{src_surface->GetSurfaceParams()}; const auto& init_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()};
VAddr address = init_params.addr; auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const std::size_t layer_size = dst_params.LayerMemorySize(); Tegra::GPUVAddr address{init_params.gpu_addr};
const std::size_t layer_size{dst_params.LayerMemorySize()};
for (u32 layer = 0; layer < dst_params.depth; layer++) { for (u32 layer = 0; layer < dst_params.depth; layer++) {
for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
const Surface& copy = TryGet(Memory::GetPointer(sub_address)); const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
if (!copy) if (!copy) {
continue; continue;
}
const auto& src_params{copy->GetSurfaceParams()}; const auto& src_params{copy->GetSurfaceParams()};
const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@ -1242,9 +1244,10 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
return {}; return {};
} }
static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params,
const std::size_t size = params.LayerMemorySize(); u32 mipmap) {
VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); const std::size_t size{params.LayerMemorySize()};
Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
for (u32 i = 0; i < params.depth; i++) { for (u32 i = 0; i < params.depth; i++) {
if (start == addr) { if (start == addr) {
return {i}; return {i};
@ -1266,7 +1269,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
src_params.height == dst_params.MipHeight(*level) && src_params.height == dst_params.MipHeight(*level) &&
src_params.block_height >= dst_params.MipBlockHeight(*level)) { src_params.block_height >= dst_params.MipBlockHeight(*level)) {
const std::optional<u32> slot = const std::optional<u32> slot =
TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level); TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
if (slot.has_value()) { if (slot.has_value()) {
glCopyImageSubData(render_surface->Texture().handle, glCopyImageSubData(render_surface->Texture().handle,
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,

@ -296,7 +296,6 @@ struct SurfaceParams {
bool is_array; bool is_array;
bool srgb_conversion; bool srgb_conversion;
// Parameters used for caching // Parameters used for caching
VAddr addr;
u8* host_ptr; u8* host_ptr;
Tegra::GPUVAddr gpu_addr; Tegra::GPUVAddr gpu_addr;
std::size_t size_in_bytes; std::size_t size_in_bytes;
@ -349,7 +348,7 @@ public:
explicit CachedSurface(const SurfaceParams& params); explicit CachedSurface(const SurfaceParams& params);
VAddr GetCpuAddr() const override { VAddr GetCpuAddr() const override {
return params.addr; return cpu_addr;
} }
std::size_t GetSizeInBytes() const override { std::size_t GetSizeInBytes() const override {
@ -433,6 +432,7 @@ private:
std::size_t memory_size; std::size_t memory_size;
bool reinterpreted = false; bool reinterpreted = false;
bool must_reload = false; bool must_reload = false;
VAddr cpu_addr{};
}; };
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {

@ -32,13 +32,10 @@ struct UnspecializedShader {
namespace { namespace {
/// Gets the address for the specified shader stage program /// Gets the address for the specified shader stage program
VAddr GetShaderAddress(Maxwell::ShaderProgram program) { Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + return gpu.regs.code_address.CodeAddress() + shader_config.offset;
shader_config.offset);
ASSERT_MSG(address, "Invalid GPU address");
return *address;
} }
/// Gets the shader program code from memory for the specified address /// Gets the shader program code from memory for the specified address
@ -214,11 +211,11 @@ std::set<GLenum> GetSupportedFormats() {
} // namespace } // namespace
CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs, const PrecompiledPrograms& precompiled_programs,
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
: host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier}, : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
program_type{program_type}, disk_cache{disk_cache}, program_type{program_type}, disk_cache{disk_cache},
precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
@ -244,11 +241,11 @@ CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
disk_cache.SaveRaw(raw); disk_cache.SaveRaw(raw);
} }
CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs, const PrecompiledPrograms& precompiled_programs,
GLShader::ProgramResult result, u8* host_ptr) GLShader::ProgramResult result, u8* host_ptr)
: guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type}, : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type},
disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
host_ptr} { host_ptr} {
@ -273,7 +270,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
} }
LabelGLObject(GL_PROGRAM, program->handle, guest_addr); LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
} }
handle = program->handle; handle = program->handle;
@ -325,7 +322,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
} }
LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name); LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
return target_program->handle; return target_program->handle;
}; };
@ -488,31 +485,31 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
return last_shaders[static_cast<u32>(program)]; return last_shaders[static_cast<u32>(program)];
} }
const VAddr program_addr{GetShaderAddress(program)}; auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};
// Look up shader in the cache based on address // Look up shader in the cache based on address
const auto& host_ptr{Memory::GetPointer(program_addr)}; const auto& host_ptr{memory_manager.GetPointer(program_addr)};
Shader shader{TryGet(host_ptr)}; Shader shader{TryGet(host_ptr)};
if (!shader) { if (!shader) {
// No shader found - create a new one // No shader found - create a new one
const auto& host_ptr{Memory::GetPointer(program_addr)};
ProgramCode program_code{GetShaderCode(host_ptr)}; ProgramCode program_code{GetShaderCode(host_ptr)};
ProgramCode program_code_b; ProgramCode program_code_b;
if (program == Maxwell::ShaderProgram::VertexA) { if (program == Maxwell::ShaderProgram::VertexA) {
program_code_b = GetShaderCode( program_code_b = GetShaderCode(
Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
} }
const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
const auto found = precompiled_shaders.find(unique_identifier); const auto found = precompiled_shaders.find(unique_identifier);
if (found != precompiled_shaders.end()) { if (found != precompiled_shaders.end()) {
shader = shader =
std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
precompiled_programs, found->second, host_ptr); precompiled_programs, found->second, host_ptr);
} else { } else {
shader = std::make_shared<CachedShader>( shader = std::make_shared<CachedShader>(
program_addr, unique_identifier, program, disk_cache, precompiled_programs, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
std::move(program_code), std::move(program_code_b), host_ptr); std::move(program_code), std::move(program_code_b), host_ptr);
} }
Register(shader); Register(shader);

@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
class CachedShader final : public RasterizerCacheObject { class CachedShader final : public RasterizerCacheObject {
public: public:
explicit CachedShader(VAddr guest_addr, u64 unique_identifier, explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs, const PrecompiledPrograms& precompiled_programs,
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
explicit CachedShader(VAddr guest_addr, u64 unique_identifier, explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs, const PrecompiledPrograms& precompiled_programs,
GLShader::ProgramResult result, u8* host_ptr); GLShader::ProgramResult result, u8* host_ptr);
VAddr GetCpuAddr() const override { VAddr GetCpuAddr() const override {
return guest_addr; return cpu_addr;
} }
std::size_t GetSizeInBytes() const override { std::size_t GetSizeInBytes() const override {
@ -92,7 +92,7 @@ private:
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
u8* host_ptr{}; u8* host_ptr{};
VAddr guest_addr{}; VAddr cpu_addr{};
u64 unique_identifier{}; u64 unique_identifier{};
Maxwell::ShaderProgram program_type{}; Maxwell::ShaderProgram program_type{};
ShaderDiskCacheOpenGL& disk_cache; ShaderDiskCacheOpenGL& disk_cache;

@ -6,7 +6,6 @@
#include <cstring> #include <cstring>
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "core/memory.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/textures/decoders.h" #include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h" #include "video_core/textures/texture.h"
@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
} }
} }
void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
u32 block_depth, u32 width_spacing) { u32 block_depth, u32 width_spacing) {
CopySwizzledData((width + tile_size_x - 1) / tile_size_x, CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
(height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
block_height, block_depth, width_spacing); width_spacing);
} }
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 width, u32 height, u32 depth, u32 block_height,
u32 block_height, u32 block_depth, u32 width_spacing) { u32 block_depth, u32 width_spacing) {
std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
width, height, depth, block_height, block_depth, width_spacing); width, height, depth, block_height, block_depth, width_spacing);
@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
} }
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
u32 block_height) {
const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
gob_size_x}; gob_size_x};
for (u32 line = 0; line < subrect_height; ++line) { for (u32 line = 0; line < subrect_height; ++line) {
@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
const u32 gob_address = const u32 gob_address =
gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
const VAddr dest_addr = swizzled_data + swizzled_offset; u8* dest_addr = swizzled_data + swizzled_offset;
Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel); std::memcpy(dest_addr, source_line, bytes_per_pixel);
} }
} }
} }
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
u32 block_height, u32 offset_x, u32 offset_y) { u32 offset_x, u32 offset_y) {
for (u32 line = 0; line < subrect_height; ++line) { for (u32 line = 0; line < subrect_height; ++line) {
const u32 y2 = line + offset_y; const u32 y2 = line + offset_y;
const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
const u32 x2 = (x + offset_x) * bytes_per_pixel; const u32 x2 = (x + offset_x) * bytes_per_pixel;
const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
const VAddr source_addr = swizzled_data + swizzled_offset; u8* source_addr = swizzled_data + swizzled_offset;
Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel); std::memcpy(dest_line, source_addr, bytes_per_pixel);
} }
} }
} }

@ -17,14 +17,14 @@ inline std::size_t GetGOBSize() {
} }
/// Unswizzles a swizzled texture without changing its format. /// Unswizzles a swizzled texture without changing its format.
void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight, u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
/// Unswizzles a swizzled texture without changing its format. /// Unswizzles a swizzled texture without changing its format.
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight, u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight, u32 block_depth = TICEntry::DefaultBlockHeight,
u32 width_spacing = 0); u32 width_spacing = 0);
@ -44,12 +44,11 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
/// Copies an untiled subrectangle into a tiled surface. /// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
u32 block_height);
/// Copies a tiled subrectangle into a linear surface. /// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
u32 block_height, u32 offset_x, u32 offset_y); u32 offset_x, u32 offset_y);
} // namespace Tegra::Texture } // namespace Tegra::Texture

@ -383,13 +383,12 @@ void GraphicsSurfaceWidget::OnUpdate() {
// TODO: Implement a good way to visualize alpha components! // TODO: Implement a good way to visualize alpha components!
QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
// TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles. // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
// Needs to be fixed if we plan to use this feature more, otherwise we may remove it. // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
auto unswizzled_data = Tegra::Texture::UnswizzleTexture( auto unswizzled_data = Tegra::Texture::UnswizzleTexture(
*address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width, gpu.MemoryManager().GetPointer(surface_address), 1, 1,
surface_height, 1U); Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U);
auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
surface_width, surface_height); surface_width, surface_height);