|
|
@ -6,11 +6,13 @@
|
|
|
|
#include "common/alignment.h"
|
|
|
|
#include "common/alignment.h"
|
|
|
|
#include "common/assert.h"
|
|
|
|
#include "common/assert.h"
|
|
|
|
#include "common/logging/log.h"
|
|
|
|
#include "common/logging/log.h"
|
|
|
|
|
|
|
|
#include "common/settings.h"
|
|
|
|
#include "core/core.h"
|
|
|
|
#include "core/core.h"
|
|
|
|
#include "core/device_memory.h"
|
|
|
|
#include "core/device_memory.h"
|
|
|
|
#include "core/hle/kernel/k_page_table.h"
|
|
|
|
#include "core/hle/kernel/k_page_table.h"
|
|
|
|
#include "core/hle/kernel/k_process.h"
|
|
|
|
#include "core/hle/kernel/k_process.h"
|
|
|
|
#include "core/memory.h"
|
|
|
|
#include "core/memory.h"
|
|
|
|
|
|
|
|
#include "video_core/invalidation_accumulator.h"
|
|
|
|
#include "video_core/memory_manager.h"
|
|
|
|
#include "video_core/memory_manager.h"
|
|
|
|
#include "video_core/rasterizer_interface.h"
|
|
|
|
#include "video_core/rasterizer_interface.h"
|
|
|
|
#include "video_core/renderer_base.h"
|
|
|
|
#include "video_core/renderer_base.h"
|
|
|
@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
|
|
|
|
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
|
|
|
|
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
|
|
|
|
page_bits != big_page_bits ? page_bits : 0},
|
|
|
|
page_bits != big_page_bits ? page_bits : 0},
|
|
|
|
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
|
|
|
|
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
|
|
|
|
1, std::memory_order_acq_rel)} {
|
|
|
|
1, std::memory_order_acq_rel)},
|
|
|
|
|
|
|
|
accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
|
|
|
|
address_space_size = 1ULL << address_space_bits;
|
|
|
|
address_space_size = 1ULL << address_space_bits;
|
|
|
|
page_size = 1ULL << page_bits;
|
|
|
|
page_size = 1ULL << page_bits;
|
|
|
|
page_mask = page_size - 1ULL;
|
|
|
|
page_mask = page_size - 1ULL;
|
|
|
@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
|
|
|
|
big_page_table_cpu.resize(big_page_table_size);
|
|
|
|
big_page_table_cpu.resize(big_page_table_size);
|
|
|
|
big_page_continous.resize(big_page_table_size / continous_bits, 0);
|
|
|
|
big_page_continous.resize(big_page_table_size / continous_bits, 0);
|
|
|
|
entries.resize(page_table_size / 32, 0);
|
|
|
|
entries.resize(page_table_size / 32, 0);
|
|
|
|
|
|
|
|
if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
|
|
|
|
|
|
|
|
fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
fastmem_arena = nullptr;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
MemoryManager::~MemoryManager() = default;
|
|
|
|
MemoryManager::~MemoryManager() = default;
|
|
|
@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
|
|
|
if (size == 0) {
|
|
|
|
if (size == 0) {
|
|
|
|
return;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
|
|
|
|
GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
|
|
|
|
|
|
|
|
|
|
|
|
for (const auto& [map_addr, map_size] : submapped_ranges) {
|
|
|
|
for (const auto& [map_addr, map_size] : page_stash) {
|
|
|
|
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
|
|
|
rasterizer->UnmapMemory(map_addr, map_size);
|
|
|
|
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
|
|
|
|
|
|
|
|
ASSERT(cpu_addr);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rasterizer->UnmapMemory(*cpu_addr, map_size);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
page_stash.clear();
|
|
|
|
|
|
|
|
|
|
|
|
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
|
|
|
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
|
|
|
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
|
|
|
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
|
|
@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <bool is_safe>
|
|
|
|
template <bool is_safe, bool use_fastmem>
|
|
|
|
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
|
|
|
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
|
|
|
[[maybe_unused]] VideoCommon::CacheType which) const {
|
|
|
|
[[maybe_unused]] VideoCommon::CacheType which) const {
|
|
|
|
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
|
|
|
|
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
|
|
|
@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
|
|
|
|
if constexpr (is_safe) {
|
|
|
|
if constexpr (is_safe) {
|
|
|
|
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
|
|
|
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if constexpr (use_fastmem) {
|
|
|
|
|
|
|
|
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
u8* physical = memory.GetPointer(cpu_addr_base);
|
|
|
|
u8* physical = memory.GetPointer(cpu_addr_base);
|
|
|
|
std::memcpy(dest_buffer, physical, copy_amount);
|
|
|
|
std::memcpy(dest_buffer, physical, copy_amount);
|
|
|
|
|
|
|
|
}
|
|
|
|
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
|
|
|
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
|
|
|
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
|
|
@ -379,12 +388,16 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
|
|
|
|
if constexpr (is_safe) {
|
|
|
|
if constexpr (is_safe) {
|
|
|
|
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
|
|
|
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if constexpr (use_fastmem) {
|
|
|
|
|
|
|
|
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
if (!IsBigPageContinous(page_index)) [[unlikely]] {
|
|
|
|
if (!IsBigPageContinous(page_index)) [[unlikely]] {
|
|
|
|
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
|
|
|
|
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
u8* physical = memory.GetPointer(cpu_addr_base);
|
|
|
|
u8* physical = memory.GetPointer(cpu_addr_base);
|
|
|
|
std::memcpy(dest_buffer, physical, copy_amount);
|
|
|
|
std::memcpy(dest_buffer, physical, copy_amount);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
|
|
|
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
auto read_short_pages = [&](std::size_t page_index, std::size_t offset,
|
|
|
|
auto read_short_pages = [&](std::size_t page_index, std::size_t offset,
|
|
|
@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
|
|
|
|
|
|
|
|
|
|
|
|
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
|
|
|
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
|
|
|
VideoCommon::CacheType which) const {
|
|
|
|
VideoCommon::CacheType which) const {
|
|
|
|
ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which);
|
|
|
|
if (fastmem_arena) [[likely]] {
|
|
|
|
|
|
|
|
ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which);
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
|
|
|
|
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
|
|
|
|
const std::size_t size) const {
|
|
|
|
const std::size_t size) const {
|
|
|
|
ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
|
|
|
|
if (fastmem_arena) [[likely]] {
|
|
|
|
|
|
|
|
ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <bool is_safe>
|
|
|
|
template <bool is_safe>
|
|
|
@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
|
|
|
|
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
|
|
|
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
|
|
|
|
|
|
|
|
std::size_t size) {
|
|
|
|
|
|
|
|
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
|
|
|
|
|
|
|
accumulator->Add(gpu_dest_addr, size);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
|
|
|
|
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
|
|
|
|
VideoCommon::CacheType which) const {
|
|
|
|
VideoCommon::CacheType which) const {
|
|
|
|
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
|
|
|
|
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
|
|
|
@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
|
|
|
|
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
|
|
|
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
|
|
|
GPUVAddr gpu_addr, std::size_t size) const {
|
|
|
|
GPUVAddr gpu_addr, std::size_t size) const {
|
|
|
|
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
|
|
|
|
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
|
|
|
|
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
|
|
|
|
GetSubmappedRangeImpl<true>(gpu_addr, size, result);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <bool is_gpu_address>
|
|
|
|
|
|
|
|
void MemoryManager::GetSubmappedRangeImpl(
|
|
|
|
|
|
|
|
GPUVAddr gpu_addr, std::size_t size,
|
|
|
|
|
|
|
|
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
|
|
|
|
|
|
|
|
result) const {
|
|
|
|
|
|
|
|
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
|
|
|
|
|
|
|
|
last_segment{};
|
|
|
|
std::optional<VAddr> old_page_addr{};
|
|
|
|
std::optional<VAddr> old_page_addr{};
|
|
|
|
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
|
|
|
|
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
|
|
|
|
[[maybe_unused]] std::size_t offset,
|
|
|
|
[[maybe_unused]] std::size_t offset,
|
|
|
@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
|
|
|
}
|
|
|
|
}
|
|
|
|
old_page_addr = {cpu_addr_base + copy_amount};
|
|
|
|
old_page_addr = {cpu_addr_base + copy_amount};
|
|
|
|
if (!last_segment) {
|
|
|
|
if (!last_segment) {
|
|
|
|
|
|
|
|
if constexpr (is_gpu_address) {
|
|
|
|
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
|
|
|
|
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
|
|
|
|
last_segment = {new_base_addr, copy_amount};
|
|
|
|
last_segment = {new_base_addr, copy_amount};
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
last_segment = {cpu_addr_base, copy_amount};
|
|
|
|
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
last_segment->second += copy_amount;
|
|
|
|
last_segment->second += copy_amount;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
|
|
|
}
|
|
|
|
}
|
|
|
|
old_page_addr = {cpu_addr_base + copy_amount};
|
|
|
|
old_page_addr = {cpu_addr_base + copy_amount};
|
|
|
|
if (!last_segment) {
|
|
|
|
if (!last_segment) {
|
|
|
|
|
|
|
|
if constexpr (is_gpu_address) {
|
|
|
|
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
|
|
|
|
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
|
|
|
|
last_segment = {new_base_addr, copy_amount};
|
|
|
|
last_segment = {new_base_addr, copy_amount};
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
last_segment = {cpu_addr_base, copy_amount};
|
|
|
|
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
last_segment->second += copy_amount;
|
|
|
|
last_segment->second += copy_amount;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
|
|
|
};
|
|
|
|
};
|
|
|
|
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
|
|
|
|
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
|
|
|
|
split(0, 0, 0);
|
|
|
|
split(0, 0, 0);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void MemoryManager::FlushCaching() {
|
|
|
|
|
|
|
|
if (!accumulator->AnyAccumulated()) {
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
accumulator->Callback([this](GPUVAddr addr, size_t size) {
|
|
|
|
|
|
|
|
GetSubmappedRangeImpl<false>(addr, size, page_stash);
|
|
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
rasterizer->InnerInvalidation(page_stash);
|
|
|
|
|
|
|
|
page_stash.clear();
|
|
|
|
|
|
|
|
accumulator->Clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace Tegra
|
|
|
|
} // namespace Tegra
|
|
|
|