mirror of https://git.suyu.dev/suyu/suyu
Merge pull request #12579 from FernandoS27/smmu
Core: Implement Device Mapping & GPU SMMUmerge-requests/60/head
commit
8bd10473d6
@ -0,0 +1,211 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "common/virtual_buffer.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
constexpr size_t DEVICE_PAGEBITS = 12ULL;
|
||||
constexpr size_t DEVICE_PAGESIZE = 1ULL << DEVICE_PAGEBITS;
|
||||
constexpr size_t DEVICE_PAGEMASK = DEVICE_PAGESIZE - 1ULL;
|
||||
|
||||
class DeviceMemory;
|
||||
|
||||
namespace Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
template <typename DTraits>
|
||||
struct DeviceMemoryManagerAllocator;
|
||||
|
||||
struct Asid {
|
||||
size_t id;
|
||||
};
|
||||
|
||||
template <typename Traits>
|
||||
class DeviceMemoryManager {
|
||||
using DeviceInterface = typename Traits::DeviceInterface;
|
||||
using DeviceMethods = typename Traits::DeviceMethods;
|
||||
|
||||
public:
|
||||
DeviceMemoryManager(const DeviceMemory& device_memory);
|
||||
~DeviceMemoryManager();
|
||||
|
||||
void BindInterface(DeviceInterface* device_inter);
|
||||
|
||||
DAddr Allocate(size_t size);
|
||||
void AllocateFixed(DAddr start, size_t size);
|
||||
void Free(DAddr start, size_t size);
|
||||
|
||||
void Map(DAddr address, VAddr virtual_address, size_t size, Asid asid, bool track = false);
|
||||
|
||||
void Unmap(DAddr address, size_t size);
|
||||
|
||||
void TrackContinuityImpl(DAddr address, VAddr virtual_address, size_t size, Asid asid);
|
||||
void TrackContinuity(DAddr address, VAddr virtual_address, size_t size, Asid asid) {
|
||||
std::scoped_lock lk(mapping_guard);
|
||||
TrackContinuityImpl(address, virtual_address, size, asid);
|
||||
}
|
||||
|
||||
// Write / Read
|
||||
template <typename T>
|
||||
T* GetPointer(DAddr address);
|
||||
|
||||
template <typename T>
|
||||
const T* GetPointer(DAddr address) const;
|
||||
|
||||
template <typename Func>
|
||||
void ApplyOpOnPAddr(PAddr address, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
|
||||
DAddr subbits = static_cast<DAddr>(address & page_mask);
|
||||
const u32 base = compressed_device_addr[(address >> page_bits)];
|
||||
if ((base >> MULTI_FLAG_BITS) == 0) [[likely]] {
|
||||
const DAddr d_address = (static_cast<DAddr>(base) << page_bits) + subbits;
|
||||
operation(d_address);
|
||||
return;
|
||||
}
|
||||
InnerGatherDeviceAddresses(buffer, address);
|
||||
for (u32 value : buffer) {
|
||||
operation((static_cast<DAddr>(value) << page_bits) + subbits);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void ApplyOpOnPointer(const u8* p, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
|
||||
PAddr address = GetRawPhysicalAddr<u8>(p);
|
||||
ApplyOpOnPAddr(address, buffer, operation);
|
||||
}
|
||||
|
||||
PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const {
|
||||
PAddr subbits = static_cast<PAddr>(address & page_mask);
|
||||
auto paddr = compressed_physical_ptr[(address >> page_bits)];
|
||||
if (paddr == 0) {
|
||||
return 0;
|
||||
}
|
||||
return (static_cast<PAddr>(paddr - 1) << page_bits) + subbits;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Write(DAddr address, T value);
|
||||
|
||||
template <typename T>
|
||||
T Read(DAddr address) const;
|
||||
|
||||
u8* GetSpan(const DAddr src_addr, const std::size_t size);
|
||||
const u8* GetSpan(const DAddr src_addr, const std::size_t size) const;
|
||||
|
||||
void ReadBlock(DAddr address, void* dest_pointer, size_t size);
|
||||
void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size);
|
||||
void WriteBlock(DAddr address, const void* src_pointer, size_t size);
|
||||
void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);
|
||||
|
||||
Asid RegisterProcess(Memory::Memory* memory);
|
||||
void UnregisterProcess(Asid id);
|
||||
|
||||
void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta);
|
||||
|
||||
static constexpr size_t AS_BITS = Traits::device_virtual_bits;
|
||||
|
||||
private:
|
||||
static constexpr size_t device_virtual_bits = Traits::device_virtual_bits;
|
||||
static constexpr size_t device_as_size = 1ULL << device_virtual_bits;
|
||||
static constexpr size_t physical_min_bits = 32;
|
||||
static constexpr size_t physical_max_bits = 33;
|
||||
static constexpr size_t page_bits = 12;
|
||||
static constexpr size_t page_size = 1ULL << page_bits;
|
||||
static constexpr size_t page_mask = page_size - 1ULL;
|
||||
static constexpr u32 physical_address_base = 1U << page_bits;
|
||||
static constexpr u32 MULTI_FLAG_BITS = 31;
|
||||
static constexpr u32 MULTI_FLAG = 1U << MULTI_FLAG_BITS;
|
||||
static constexpr u32 MULTI_MASK = ~MULTI_FLAG;
|
||||
|
||||
template <typename T>
|
||||
T* GetPointerFromRaw(PAddr addr) {
|
||||
return reinterpret_cast<T*>(physical_base + addr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const T* GetPointerFromRaw(PAddr addr) const {
|
||||
return reinterpret_cast<T*>(physical_base + addr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
PAddr GetRawPhysicalAddr(const T* ptr) const {
|
||||
return static_cast<PAddr>(reinterpret_cast<uintptr_t>(ptr) - physical_base);
|
||||
}
|
||||
|
||||
void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory,
|
||||
auto increment);
|
||||
|
||||
void InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, PAddr address);
|
||||
|
||||
std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl;
|
||||
|
||||
const uintptr_t physical_base;
|
||||
DeviceInterface* device_inter;
|
||||
Common::VirtualBuffer<u32> compressed_physical_ptr;
|
||||
Common::VirtualBuffer<u32> compressed_device_addr;
|
||||
Common::VirtualBuffer<u32> continuity_tracker;
|
||||
|
||||
// Process memory interfaces
|
||||
|
||||
std::deque<size_t> id_pool;
|
||||
std::deque<Memory::Memory*> registered_processes;
|
||||
|
||||
// Memory protection management
|
||||
|
||||
static constexpr size_t guest_max_as_bits = 39;
|
||||
static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits;
|
||||
static constexpr size_t guest_mask = guest_as_size - 1ULL;
|
||||
static constexpr size_t asid_start_bit = guest_max_as_bits;
|
||||
|
||||
std::pair<Asid, VAddr> ExtractCPUBacking(size_t page_index) {
|
||||
auto content = cpu_backing_address[page_index];
|
||||
const VAddr address = content & guest_mask;
|
||||
const Asid asid{static_cast<size_t>(content >> asid_start_bit)};
|
||||
return std::make_pair(asid, address);
|
||||
}
|
||||
|
||||
void InsertCPUBacking(size_t page_index, VAddr address, Asid asid) {
|
||||
cpu_backing_address[page_index] = address | (asid.id << asid_start_bit);
|
||||
}
|
||||
|
||||
Common::VirtualBuffer<VAddr> cpu_backing_address;
|
||||
static constexpr size_t subentries = 8 / sizeof(u8);
|
||||
static constexpr size_t subentries_mask = subentries - 1;
|
||||
class CounterEntry final {
|
||||
public:
|
||||
CounterEntry() = default;
|
||||
|
||||
std::atomic_uint8_t& Count(std::size_t page) {
|
||||
return values[page & subentries_mask];
|
||||
}
|
||||
|
||||
const std::atomic_uint8_t& Count(std::size_t page) const {
|
||||
return values[page & subentries_mask];
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<std::atomic_uint8_t, subentries> values{};
|
||||
};
|
||||
static_assert(sizeof(CounterEntry) == subentries * sizeof(u8),
|
||||
"CounterEntry should be 8 bytes!");
|
||||
|
||||
static constexpr size_t num_counter_entries =
|
||||
(1ULL << (device_virtual_bits - page_bits)) / subentries;
|
||||
using CachedPages = std::array<CounterEntry, num_counter_entries>;
|
||||
std::unique_ptr<CachedPages> cached_pages;
|
||||
std::mutex counter_guard;
|
||||
std::mutex mapping_guard;
|
||||
};
|
||||
|
||||
} // namespace Core
|
@ -0,0 +1,582 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <atomic>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/address_space.h"
|
||||
#include "common/address_space.inc"
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/device_memory.h"
|
||||
#include "core/device_memory_manager.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
namespace {
|
||||
|
||||
class MultiAddressContainer {
|
||||
public:
|
||||
MultiAddressContainer() = default;
|
||||
~MultiAddressContainer() = default;
|
||||
|
||||
void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
|
||||
buffer.resize(8);
|
||||
buffer.resize(0);
|
||||
size_t index = 0;
|
||||
const auto add_value = [&](u32 value) {
|
||||
buffer[index] = value;
|
||||
index++;
|
||||
buffer.resize(index);
|
||||
};
|
||||
|
||||
u32 iter_entry = start_entry;
|
||||
Entry* current = &storage[iter_entry - 1];
|
||||
add_value(current->value);
|
||||
while (current->next_entry != 0) {
|
||||
iter_entry = current->next_entry;
|
||||
current = &storage[iter_entry - 1];
|
||||
add_value(current->value);
|
||||
}
|
||||
}
|
||||
|
||||
u32 Register(u32 value) {
|
||||
return RegisterImplementation(value);
|
||||
}
|
||||
|
||||
void Register(u32 value, u32 start_entry) {
|
||||
auto entry_id = RegisterImplementation(value);
|
||||
u32 iter_entry = start_entry;
|
||||
Entry* current = &storage[iter_entry - 1];
|
||||
while (current->next_entry != 0) {
|
||||
iter_entry = current->next_entry;
|
||||
current = &storage[iter_entry - 1];
|
||||
}
|
||||
current->next_entry = entry_id;
|
||||
}
|
||||
|
||||
std::pair<bool, u32> Unregister(u32 value, u32 start_entry) {
|
||||
u32 iter_entry = start_entry;
|
||||
Entry* previous{};
|
||||
Entry* current = &storage[iter_entry - 1];
|
||||
Entry* next{};
|
||||
bool more_than_one_remaining = false;
|
||||
u32 result_start{start_entry};
|
||||
size_t count = 0;
|
||||
while (current->value != value) {
|
||||
count++;
|
||||
previous = current;
|
||||
iter_entry = current->next_entry;
|
||||
current = &storage[iter_entry - 1];
|
||||
}
|
||||
// Find next
|
||||
u32 next_entry = current->next_entry;
|
||||
if (next_entry != 0) {
|
||||
next = &storage[next_entry - 1];
|
||||
more_than_one_remaining = next->next_entry != 0 || previous != nullptr;
|
||||
}
|
||||
if (previous) {
|
||||
previous->next_entry = next_entry;
|
||||
} else {
|
||||
result_start = next_entry;
|
||||
}
|
||||
free_entries.emplace_back(iter_entry);
|
||||
return std::make_pair(more_than_one_remaining || count > 1, result_start);
|
||||
}
|
||||
|
||||
u32 ReleaseEntry(u32 start_entry) {
|
||||
Entry* current = &storage[start_entry - 1];
|
||||
free_entries.emplace_back(start_entry);
|
||||
return current->value;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 RegisterImplementation(u32 value) {
|
||||
auto entry_id = GetNewEntry();
|
||||
auto& entry = storage[entry_id - 1];
|
||||
entry.next_entry = 0;
|
||||
entry.value = value;
|
||||
return entry_id;
|
||||
}
|
||||
u32 GetNewEntry() {
|
||||
if (!free_entries.empty()) {
|
||||
u32 result = free_entries.front();
|
||||
free_entries.pop_front();
|
||||
return result;
|
||||
}
|
||||
storage.emplace_back();
|
||||
u32 new_entry = static_cast<u32>(storage.size());
|
||||
return new_entry;
|
||||
}
|
||||
|
||||
struct Entry {
|
||||
u32 next_entry{};
|
||||
u32 value{};
|
||||
};
|
||||
|
||||
std::deque<Entry> storage;
|
||||
std::deque<u32> free_entries;
|
||||
};
|
||||
|
||||
struct EmptyAllocator {
|
||||
EmptyAllocator([[maybe_unused]] DAddr address) {}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename DTraits>
|
||||
struct DeviceMemoryManagerAllocator {
|
||||
static constexpr size_t device_virtual_bits = DTraits::device_virtual_bits;
|
||||
static constexpr DAddr first_address = 1ULL << Memory::YUZU_PAGEBITS;
|
||||
static constexpr DAddr max_device_area = 1ULL << device_virtual_bits;
|
||||
|
||||
DeviceMemoryManagerAllocator() : main_allocator(first_address) {}
|
||||
|
||||
Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
|
||||
MultiAddressContainer multi_dev_address;
|
||||
|
||||
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
|
||||
template <bool pin_area>
|
||||
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
|
||||
return addr >= 0 && addr + size <= max_device_area;
|
||||
}
|
||||
|
||||
DAddr Allocate(size_t size) {
|
||||
return main_allocator.Allocate(size);
|
||||
}
|
||||
|
||||
void AllocateFixed(DAddr b_address, size_t b_size) {
|
||||
main_allocator.AllocateFixed(b_address, b_size);
|
||||
}
|
||||
|
||||
void Free(DAddr b_address, size_t b_size) {
|
||||
main_allocator.Free(b_address, b_size);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Traits>
|
||||
DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_)
|
||||
: physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())},
|
||||
device_inter{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS),
|
||||
compressed_device_addr(1ULL << ((Settings::values.memory_layout_mode.GetValue() ==
|
||||
Settings::MemoryLayout::Memory_4Gb
|
||||
? physical_min_bits
|
||||
: physical_max_bits) -
|
||||
Memory::YUZU_PAGEBITS)),
|
||||
continuity_tracker(device_as_size >> Memory::YUZU_PAGEBITS),
|
||||
cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
|
||||
impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
|
||||
cached_pages = std::make_unique<CachedPages>();
|
||||
|
||||
const size_t total_virtual = device_as_size >> Memory::YUZU_PAGEBITS;
|
||||
for (size_t i = 0; i < total_virtual; i++) {
|
||||
compressed_physical_ptr[i] = 0;
|
||||
continuity_tracker[i] = 1;
|
||||
cpu_backing_address[i] = 0;
|
||||
}
|
||||
const size_t total_phys = 1ULL << ((Settings::values.memory_layout_mode.GetValue() ==
|
||||
Settings::MemoryLayout::Memory_4Gb
|
||||
? physical_min_bits
|
||||
: physical_max_bits) -
|
||||
Memory::YUZU_PAGEBITS);
|
||||
for (size_t i = 0; i < total_phys; i++) {
|
||||
compressed_device_addr[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
DeviceMemoryManager<Traits>::~DeviceMemoryManager() = default;
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::BindInterface(DeviceInterface* device_inter_) {
|
||||
device_inter = device_inter_;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
DAddr DeviceMemoryManager<Traits>::Allocate(size_t size) {
|
||||
return impl->Allocate(size);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::AllocateFixed(DAddr start, size_t size) {
|
||||
return impl->AllocateFixed(start, size);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) {
|
||||
impl->Free(start, size);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size,
|
||||
Asid asid, bool track) {
|
||||
Core::Memory::Memory* process_memory = registered_processes[asid.id];
|
||||
size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
|
||||
size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
|
||||
std::scoped_lock lk(mapping_guard);
|
||||
for (size_t i = 0; i < num_pages; i++) {
|
||||
const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE;
|
||||
auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress));
|
||||
if (ptr == nullptr) [[unlikely]] {
|
||||
compressed_physical_ptr[start_page_d + i] = 0;
|
||||
continue;
|
||||
}
|
||||
auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
|
||||
compressed_physical_ptr[start_page_d + i] = phys_addr;
|
||||
InsertCPUBacking(start_page_d + i, new_vaddress, asid);
|
||||
const u32 base_dev = compressed_device_addr[phys_addr - 1U];
|
||||
const u32 new_dev = static_cast<u32>(start_page_d + i);
|
||||
if (base_dev == 0) [[likely]] {
|
||||
compressed_device_addr[phys_addr - 1U] = new_dev;
|
||||
continue;
|
||||
}
|
||||
u32 start_id = base_dev & MULTI_MASK;
|
||||
if ((base_dev >> MULTI_FLAG_BITS) == 0) {
|
||||
start_id = impl->multi_dev_address.Register(base_dev);
|
||||
compressed_device_addr[phys_addr - 1U] = MULTI_FLAG | start_id;
|
||||
}
|
||||
impl->multi_dev_address.Register(new_dev, start_id);
|
||||
}
|
||||
if (track) {
|
||||
TrackContinuityImpl(address, virtual_address, size, asid);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
|
||||
size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
|
||||
size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
|
||||
device_inter->InvalidateRegion(address, size);
|
||||
std::scoped_lock lk(mapping_guard);
|
||||
for (size_t i = 0; i < num_pages; i++) {
|
||||
auto phys_addr = compressed_physical_ptr[start_page_d + i];
|
||||
compressed_physical_ptr[start_page_d + i] = 0;
|
||||
cpu_backing_address[start_page_d + i] = 0;
|
||||
if (phys_addr != 0) [[likely]] {
|
||||
const u32 base_dev = compressed_device_addr[phys_addr - 1U];
|
||||
if ((base_dev >> MULTI_FLAG_BITS) == 0) [[likely]] {
|
||||
compressed_device_addr[phys_addr - 1] = 0;
|
||||
continue;
|
||||
}
|
||||
const auto [more_entries, new_start] = impl->multi_dev_address.Unregister(
|
||||
static_cast<u32>(start_page_d + i), base_dev & MULTI_MASK);
|
||||
if (!more_entries) {
|
||||
compressed_device_addr[phys_addr - 1] =
|
||||
impl->multi_dev_address.ReleaseEntry(new_start);
|
||||
continue;
|
||||
}
|
||||
compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG;
|
||||
}
|
||||
}
|
||||
}
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::TrackContinuityImpl(DAddr address, VAddr virtual_address,
|
||||
size_t size, Asid asid) {
|
||||
Core::Memory::Memory* process_memory = registered_processes[asid.id];
|
||||
size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
|
||||
size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
|
||||
uintptr_t last_ptr = 0;
|
||||
size_t page_count = 1;
|
||||
for (size_t i = num_pages; i > 0; i--) {
|
||||
size_t index = i - 1;
|
||||
const VAddr new_vaddress = virtual_address + index * Memory::YUZU_PAGESIZE;
|
||||
const uintptr_t new_ptr = reinterpret_cast<uintptr_t>(
|
||||
process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress)));
|
||||
if (new_ptr + page_size == last_ptr) {
|
||||
page_count++;
|
||||
} else {
|
||||
page_count = 1;
|
||||
}
|
||||
last_ptr = new_ptr;
|
||||
continuity_tracker[start_page_d + index] = static_cast<u32>(page_count);
|
||||
}
|
||||
}
|
||||
template <typename Traits>
|
||||
u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) {
|
||||
size_t page_index = src_addr >> page_bits;
|
||||
size_t subbits = src_addr & page_mask;
|
||||
if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) {
|
||||
return GetPointer<u8>(src_addr);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
const u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) const {
|
||||
size_t page_index = src_addr >> page_bits;
|
||||
size_t subbits = src_addr & page_mask;
|
||||
if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) {
|
||||
return GetPointer<u8>(src_addr);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer,
|
||||
PAddr address) {
|
||||
size_t phys_addr = address >> page_bits;
|
||||
std::scoped_lock lk(mapping_guard);
|
||||
u32 backing = compressed_device_addr[phys_addr];
|
||||
if ((backing >> MULTI_FLAG_BITS) != 0) {
|
||||
impl->multi_dev_address.GatherValues(backing & MULTI_MASK, buffer);
|
||||
return;
|
||||
}
|
||||
buffer.resize(1);
|
||||
buffer[0] = backing;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
template <typename T>
|
||||
T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) {
|
||||
const size_t index = address >> Memory::YUZU_PAGEBITS;
|
||||
const size_t offset = address & Memory::YUZU_PAGEMASK;
|
||||
auto phys_addr = compressed_physical_ptr[index];
|
||||
if (phys_addr == 0) [[unlikely]] {
|
||||
return nullptr;
|
||||
}
|
||||
return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) +
|
||||
offset);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
template <typename T>
|
||||
const T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) const {
|
||||
const size_t index = address >> Memory::YUZU_PAGEBITS;
|
||||
const size_t offset = address & Memory::YUZU_PAGEMASK;
|
||||
auto phys_addr = compressed_physical_ptr[index];
|
||||
if (phys_addr == 0) [[unlikely]] {
|
||||
return nullptr;
|
||||
}
|
||||
return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) +
|
||||
offset);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
template <typename T>
|
||||
void DeviceMemoryManager<Traits>::Write(DAddr address, T value) {
|
||||
T* ptr = GetPointer<T>(address);
|
||||
if (!ptr) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
std::memcpy(ptr, &value, sizeof(T));
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
template <typename T>
|
||||
T DeviceMemoryManager<Traits>::Read(DAddr address) const {
|
||||
const T* ptr = GetPointer<T>(address);
|
||||
T result{};
|
||||
if (!ptr) [[unlikely]] {
|
||||
return result;
|
||||
}
|
||||
std::memcpy(&result, ptr, sizeof(T));
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto on_unmapped,
|
||||
auto on_memory, auto increment) {
|
||||
std::size_t remaining_size = size;
|
||||
std::size_t page_index = addr >> Memory::YUZU_PAGEBITS;
|
||||
std::size_t page_offset = addr & Memory::YUZU_PAGEMASK;
|
||||
|
||||
while (remaining_size) {
|
||||
const size_t next_pages = static_cast<std::size_t>(continuity_tracker[page_index]);
|
||||
const std::size_t copy_amount =
|
||||
std::min((next_pages << Memory::YUZU_PAGEBITS) - page_offset, remaining_size);
|
||||
const auto current_vaddr =
|
||||
static_cast<u64>((page_index << Memory::YUZU_PAGEBITS) + page_offset);
|
||||
SCOPE_EXIT({
|
||||
page_index += next_pages;
|
||||
page_offset = 0;
|
||||
increment(copy_amount);
|
||||
remaining_size -= copy_amount;
|
||||
});
|
||||
|
||||
auto phys_addr = compressed_physical_ptr[page_index];
|
||||
if (phys_addr == 0) {
|
||||
on_unmapped(copy_amount, current_vaddr);
|
||||
continue;
|
||||
}
|
||||
auto* mem_ptr = GetPointerFromRaw<u8>(
|
||||
(static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) + page_offset);
|
||||
on_memory(copy_amount, mem_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) {
|
||||
device_inter->FlushRegion(address, size);
|
||||
WalkBlock(
|
||||
address, size,
|
||||
[&](size_t copy_amount, DAddr current_vaddr) {
|
||||
LOG_ERROR(
|
||||
HW_Memory,
|
||||
"Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, address, size);
|
||||
std::memset(dest_pointer, 0, copy_amount);
|
||||
},
|
||||
[&](size_t copy_amount, const u8* const src_ptr) {
|
||||
std::memcpy(dest_pointer, src_ptr, copy_amount);
|
||||
},
|
||||
[&](const std::size_t copy_amount) {
|
||||
dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_pointer, size_t size) {
|
||||
WalkBlock(
|
||||
address, size,
|
||||
[&](size_t copy_amount, DAddr current_vaddr) {
|
||||
LOG_ERROR(
|
||||
HW_Memory,
|
||||
"Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, address, size);
|
||||
},
|
||||
[&](size_t copy_amount, u8* const dst_ptr) {
|
||||
std::memcpy(dst_ptr, src_pointer, copy_amount);
|
||||
},
|
||||
[&](const std::size_t copy_amount) {
|
||||
src_pointer = static_cast<const u8*>(src_pointer) + copy_amount;
|
||||
});
|
||||
device_inter->InvalidateRegion(address, size);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) {
|
||||
WalkBlock(
|
||||
address, size,
|
||||
[&](size_t copy_amount, DAddr current_vaddr) {
|
||||
LOG_ERROR(
|
||||
HW_Memory,
|
||||
"Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, address, size);
|
||||
std::memset(dest_pointer, 0, copy_amount);
|
||||
},
|
||||
[&](size_t copy_amount, const u8* const src_ptr) {
|
||||
std::memcpy(dest_pointer, src_ptr, copy_amount);
|
||||
},
|
||||
[&](const std::size_t copy_amount) {
|
||||
dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer,
|
||||
size_t size) {
|
||||
WalkBlock(
|
||||
address, size,
|
||||
[&](size_t copy_amount, DAddr current_vaddr) {
|
||||
LOG_ERROR(
|
||||
HW_Memory,
|
||||
"Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, address, size);
|
||||
},
|
||||
[&](size_t copy_amount, u8* const dst_ptr) {
|
||||
std::memcpy(dst_ptr, src_pointer, copy_amount);
|
||||
},
|
||||
[&](const std::size_t copy_amount) {
|
||||
src_pointer = static_cast<const u8*>(src_pointer) + copy_amount;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
Asid DeviceMemoryManager<Traits>::RegisterProcess(Memory::Memory* memory_device_inter) {
|
||||
size_t new_id{};
|
||||
if (!id_pool.empty()) {
|
||||
new_id = id_pool.front();
|
||||
id_pool.pop_front();
|
||||
registered_processes[new_id] = memory_device_inter;
|
||||
} else {
|
||||
registered_processes.emplace_back(memory_device_inter);
|
||||
new_id = registered_processes.size() - 1U;
|
||||
}
|
||||
return Asid{new_id};
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::UnregisterProcess(Asid asid) {
|
||||
registered_processes[asid.id] = nullptr;
|
||||
id_pool.push_front(asid.id);
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) {
|
||||
std::unique_lock<std::mutex> lk(counter_guard, std::defer_lock);
|
||||
const auto Lock = [&] {
|
||||
if (!lk) {
|
||||
lk.lock();
|
||||
}
|
||||
};
|
||||
u64 uncache_begin = 0;
|
||||
u64 cache_begin = 0;
|
||||
u64 uncache_bytes = 0;
|
||||
u64 cache_bytes = 0;
|
||||
const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
|
||||
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
|
||||
size_t page = addr >> Memory::YUZU_PAGEBITS;
|
||||
auto [asid, base_vaddress] = ExtractCPUBacking(page);
|
||||
size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS;
|
||||
auto* memory_device_inter = registered_processes[asid.id];
|
||||
for (; page != page_end; ++page) {
|
||||
std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page);
|
||||
|
||||
if (delta > 0) {
|
||||
ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u8>::max(),
|
||||
"Count may overflow!");
|
||||
} else if (delta < 0) {
|
||||
ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
|
||||
} else {
|
||||
ASSERT_MSG(false, "Delta must be non-zero!");
|
||||
}
|
||||
|
||||
// Adds or subtracts 1, as count is a unsigned 8-bit value
|
||||
count.fetch_add(static_cast<u8>(delta), std::memory_order_release);
|
||||
|
||||
// Assume delta is either -1 or 1
|
||||
if (count.load(std::memory_order::relaxed) == 0) {
|
||||
if (uncache_bytes == 0) {
|
||||
uncache_begin = vpage;
|
||||
}
|
||||
uncache_bytes += Memory::YUZU_PAGESIZE;
|
||||
} else if (uncache_bytes > 0) {
|
||||
Lock();
|
||||
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
|
||||
uncache_bytes, false);
|
||||
uncache_bytes = 0;
|
||||
}
|
||||
if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
|
||||
if (cache_bytes == 0) {
|
||||
cache_begin = vpage;
|
||||
}
|
||||
cache_bytes += Memory::YUZU_PAGESIZE;
|
||||
} else if (cache_bytes > 0) {
|
||||
Lock();
|
||||
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
|
||||
true);
|
||||
cache_bytes = 0;
|
||||
}
|
||||
vpage++;
|
||||
}
|
||||
if (uncache_bytes > 0) {
|
||||
Lock();
|
||||
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
|
||||
false);
|
||||
}
|
||||
if (cache_bytes > 0) {
|
||||
Lock();
|
||||
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Core
|
@ -0,0 +1,214 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
|
||||
namespace Core::Memory {
|
||||
|
||||
enum GuestMemoryFlags : u32 {
|
||||
Read = 1 << 0,
|
||||
Write = 1 << 1,
|
||||
Safe = 1 << 2,
|
||||
Cached = 1 << 3,
|
||||
|
||||
SafeRead = Read | Safe,
|
||||
SafeWrite = Write | Safe,
|
||||
SafeReadWrite = SafeRead | SafeWrite,
|
||||
SafeReadCachedWrite = SafeReadWrite | Cached,
|
||||
|
||||
UnsafeRead = Read,
|
||||
UnsafeWrite = Write,
|
||||
UnsafeReadWrite = UnsafeRead | UnsafeWrite,
|
||||
UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
|
||||
};
|
||||
|
||||
namespace {
|
||||
template <typename M, typename T, GuestMemoryFlags FLAGS>
|
||||
class GuestMemory {
|
||||
using iterator = T*;
|
||||
using const_iterator = const T*;
|
||||
using value_type = T;
|
||||
using element_type = T;
|
||||
using iterator_category = std::contiguous_iterator_tag;
|
||||
|
||||
public:
|
||||
GuestMemory() = delete;
|
||||
explicit GuestMemory(M& memory, u64 addr, std::size_t size,
|
||||
Common::ScratchBuffer<T>* backup = nullptr)
|
||||
: m_memory{memory}, m_addr{addr}, m_size{size} {
|
||||
static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Read) {
|
||||
Read(addr, size, backup);
|
||||
}
|
||||
}
|
||||
|
||||
~GuestMemory() = default;
|
||||
|
||||
T* data() noexcept {
|
||||
return m_data_span.data();
|
||||
}
|
||||
|
||||
const T* data() const noexcept {
|
||||
return m_data_span.data();
|
||||
}
|
||||
|
||||
size_t size() const noexcept {
|
||||
return m_size;
|
||||
}
|
||||
|
||||
size_t size_bytes() const noexcept {
|
||||
return this->size() * sizeof(T);
|
||||
}
|
||||
|
||||
[[nodiscard]] T* begin() noexcept {
|
||||
return this->data();
|
||||
}
|
||||
|
||||
[[nodiscard]] const T* begin() const noexcept {
|
||||
return this->data();
|
||||
}
|
||||
|
||||
[[nodiscard]] T* end() noexcept {
|
||||
return this->data() + this->size();
|
||||
}
|
||||
|
||||
[[nodiscard]] const T* end() const noexcept {
|
||||
return this->data() + this->size();
|
||||
}
|
||||
|
||||
T& operator[](size_t index) noexcept {
|
||||
return m_data_span[index];
|
||||
}
|
||||
|
||||
const T& operator[](size_t index) const noexcept {
|
||||
return m_data_span[index];
|
||||
}
|
||||
|
||||
void SetAddressAndSize(u64 addr, std::size_t size) noexcept {
|
||||
m_addr = addr;
|
||||
m_size = size;
|
||||
m_addr_changed = true;
|
||||
}
|
||||
|
||||
std::span<T> Read(u64 addr, std::size_t size,
|
||||
Common::ScratchBuffer<T>* backup = nullptr) noexcept {
|
||||
m_addr = addr;
|
||||
m_size = size;
|
||||
if (m_size == 0) {
|
||||
m_is_data_copy = true;
|
||||
return {};
|
||||
}
|
||||
|
||||
if (this->TrySetSpan()) {
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
m_memory.FlushRegion(m_addr, this->size_bytes());
|
||||
}
|
||||
} else {
|
||||
if (backup) {
|
||||
backup->resize_destructive(this->size());
|
||||
m_data_span = *backup;
|
||||
} else {
|
||||
m_data_copy.resize(this->size());
|
||||
m_data_span = std::span(m_data_copy);
|
||||
}
|
||||
m_is_data_copy = true;
|
||||
m_span_valid = true;
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
|
||||
} else {
|
||||
m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
|
||||
}
|
||||
}
|
||||
return m_data_span;
|
||||
}
|
||||
|
||||
void Write(std::span<T> write_data) noexcept {
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
|
||||
m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
|
||||
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
|
||||
} else {
|
||||
m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
bool TrySetSpan() noexcept {
|
||||
if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
|
||||
m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
|
||||
m_span_valid = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool IsDataCopy() const noexcept {
|
||||
return m_is_data_copy;
|
||||
}
|
||||
|
||||
bool AddressChanged() const noexcept {
|
||||
return m_addr_changed;
|
||||
}
|
||||
|
||||
M& m_memory;
|
||||
u64 m_addr{};
|
||||
size_t m_size{};
|
||||
std::span<T> m_data_span{};
|
||||
std::vector<T> m_data_copy{};
|
||||
bool m_span_valid{false};
|
||||
bool m_is_data_copy{false};
|
||||
bool m_addr_changed{false};
|
||||
};
|
||||
|
||||
template <typename M, typename T, GuestMemoryFlags FLAGS>
|
||||
class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
|
||||
public:
|
||||
GuestMemoryScoped() = delete;
|
||||
explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
|
||||
Common::ScratchBuffer<T>* backup = nullptr)
|
||||
: GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
|
||||
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
|
||||
if (!this->TrySetSpan()) {
|
||||
if (backup) {
|
||||
this->m_data_span = *backup;
|
||||
this->m_span_valid = true;
|
||||
this->m_is_data_copy = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~GuestMemoryScoped() {
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Write) {
|
||||
if (this->size() == 0) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->AddressChanged() || this->IsDataCopy()) {
|
||||
ASSERT(this->m_span_valid);
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
|
||||
this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
|
||||
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
|
||||
} else {
|
||||
this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
|
||||
}
|
||||
} else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
|
||||
(FLAGS & GuestMemoryFlags::Cached)) {
|
||||
this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
} // namespace Core::Memory
|
@ -0,0 +1,175 @@
|
||||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#define BOOST_NO_MT
|
||||
#include <boost/pool/detail/mutex.hpp>
|
||||
#undef BOOST_NO_MT
|
||||
#include <boost/icl/interval.hpp>
|
||||
#include <boost/icl/interval_base_set.hpp>
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include <boost/icl/split_interval_map.hpp>
|
||||
#include <boost/pool/pool.hpp>
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
#include <boost/pool/poolfwd.hpp>
|
||||
|
||||
#include "core/hle/service/nvdrv/core/heap_mapper.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
|
||||
namespace boost {
|
||||
template <typename T>
|
||||
class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
|
||||
}
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
||||
using IntervalCompare = std::less<DAddr>;
|
||||
using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
|
||||
using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
|
||||
using IntervalSet = boost::icl::interval_set<DAddr>;
|
||||
using IntervalType = typename IntervalSet::interval_type;
|
||||
|
||||
template <typename Type>
|
||||
struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
|
||||
// types
|
||||
typedef counter_add_functor<Type> type;
|
||||
typedef boost::icl::identity_based_inplace_combine<Type> base_type;
|
||||
|
||||
// public member functions
|
||||
void operator()(Type& current, const Type& added) const {
|
||||
current += added;
|
||||
if (current < base_type::identity_element()) {
|
||||
current = base_type::identity_element();
|
||||
}
|
||||
}
|
||||
|
||||
// public static functions
|
||||
static void version(Type&){};
|
||||
};
|
||||
|
||||
using OverlapCombine = counter_add_functor<int>;
|
||||
using OverlapSection = boost::icl::inter_section<int>;
|
||||
using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
|
||||
|
||||
struct HeapMapper::HeapMapperInternal {
|
||||
HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {}
|
||||
~HeapMapperInternal() = default;
|
||||
|
||||
template <typename Func>
|
||||
void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
|
||||
Func&& func) {
|
||||
const DAddr start_address = cpu_addr;
|
||||
const DAddr end_address = start_address + size;
|
||||
const IntervalType search_interval{start_address, end_address};
|
||||
auto it = current_range.lower_bound(search_interval);
|
||||
if (it == current_range.end()) {
|
||||
return;
|
||||
}
|
||||
auto end_it = current_range.upper_bound(search_interval);
|
||||
for (; it != end_it; it++) {
|
||||
auto& inter = it->first;
|
||||
DAddr inter_addr_end = inter.upper();
|
||||
DAddr inter_addr = inter.lower();
|
||||
if (inter_addr_end > end_address) {
|
||||
inter_addr_end = end_address;
|
||||
}
|
||||
if (inter_addr < start_address) {
|
||||
inter_addr = start_address;
|
||||
}
|
||||
func(inter_addr, inter_addr_end, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveEachInOverlapCounter(OverlapCounter& current_range,
|
||||
const IntervalType search_interval, int subtract_value) {
|
||||
bool any_removals = false;
|
||||
current_range.add(std::make_pair(search_interval, subtract_value));
|
||||
do {
|
||||
any_removals = false;
|
||||
auto it = current_range.lower_bound(search_interval);
|
||||
if (it == current_range.end()) {
|
||||
return;
|
||||
}
|
||||
auto end_it = current_range.upper_bound(search_interval);
|
||||
for (; it != end_it; it++) {
|
||||
if (it->second <= 0) {
|
||||
any_removals = true;
|
||||
current_range.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (any_removals);
|
||||
}
|
||||
|
||||
IntervalSet base_set;
|
||||
OverlapCounter mapping_overlaps;
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory;
|
||||
std::mutex guard;
|
||||
};
|
||||
|
||||
HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid,
|
||||
Tegra::Host1x::Host1x& host1x)
|
||||
: m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_asid{asid} {
|
||||
m_internal = std::make_unique<HeapMapperInternal>(host1x);
|
||||
}
|
||||
|
||||
HeapMapper::~HeapMapper() {
|
||||
m_internal->device_memory.Unmap(m_daddress, m_size);
|
||||
}
|
||||
|
||||
DAddr HeapMapper::Map(VAddr start, size_t size) {
|
||||
std::scoped_lock lk(m_internal->guard);
|
||||
m_internal->base_set.clear();
|
||||
const IntervalType interval{start, start + size};
|
||||
m_internal->base_set.insert(interval);
|
||||
m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size,
|
||||
[this](VAddr start_addr, VAddr end_addr, int) {
|
||||
const IntervalType other{start_addr, end_addr};
|
||||
m_internal->base_set.subtract(other);
|
||||
});
|
||||
if (!m_internal->base_set.empty()) {
|
||||
auto it = m_internal->base_set.begin();
|
||||
auto end_it = m_internal->base_set.end();
|
||||
for (; it != end_it; it++) {
|
||||
const VAddr inter_addr_end = it->upper();
|
||||
const VAddr inter_addr = it->lower();
|
||||
const size_t offset = inter_addr - m_vaddress;
|
||||
const size_t sub_size = inter_addr_end - inter_addr;
|
||||
m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size,
|
||||
m_asid);
|
||||
}
|
||||
}
|
||||
m_internal->mapping_overlaps += std::make_pair(interval, 1);
|
||||
m_internal->base_set.clear();
|
||||
return m_daddress + (start - m_vaddress);
|
||||
}
|
||||
|
||||
void HeapMapper::Unmap(VAddr start, size_t size) {
|
||||
std::scoped_lock lk(m_internal->guard);
|
||||
m_internal->base_set.clear();
|
||||
m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size,
|
||||
[this](VAddr start_addr, VAddr end_addr, int value) {
|
||||
if (value <= 1) {
|
||||
const IntervalType other{start_addr, end_addr};
|
||||
m_internal->base_set.insert(other);
|
||||
}
|
||||
});
|
||||
if (!m_internal->base_set.empty()) {
|
||||
auto it = m_internal->base_set.begin();
|
||||
auto end_it = m_internal->base_set.end();
|
||||
for (; it != end_it; it++) {
|
||||
const VAddr inter_addr_end = it->upper();
|
||||
const VAddr inter_addr = it->lower();
|
||||
const size_t offset = inter_addr - m_vaddress;
|
||||
const size_t sub_size = inter_addr_end - inter_addr;
|
||||
m_internal->device_memory.Unmap(m_daddress + offset, sub_size);
|
||||
}
|
||||
}
|
||||
const IntervalType to_remove{start, start + size};
|
||||
m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1);
|
||||
m_internal->base_set.clear();
|
||||
}
|
||||
|
||||
} // namespace Service::Nvidia::NvCore
|
@ -0,0 +1,49 @@
|
||||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/device_memory_manager.h"
|
||||
|
||||
namespace Tegra::Host1x {
|
||||
class Host1x;
|
||||
} // namespace Tegra::Host1x
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
||||
class HeapMapper {
|
||||
public:
|
||||
HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid,
|
||||
Tegra::Host1x::Host1x& host1x);
|
||||
~HeapMapper();
|
||||
|
||||
bool IsInBounds(VAddr start, size_t size) const {
|
||||
VAddr end = start + size;
|
||||
return start >= m_vaddress && end <= (m_vaddress + m_size);
|
||||
}
|
||||
|
||||
DAddr Map(VAddr start, size_t size);
|
||||
|
||||
void Unmap(VAddr start, size_t size);
|
||||
|
||||
DAddr GetRegionStart() const {
|
||||
return m_daddress;
|
||||
}
|
||||
|
||||
size_t GetRegionSize() const {
|
||||
return m_size;
|
||||
}
|
||||
|
||||
private:
|
||||
struct HeapMapperInternal;
|
||||
VAddr m_vaddress;
|
||||
DAddr m_daddress;
|
||||
size_t m_size;
|
||||
Core::Asid m_asid;
|
||||
std::unique_ptr<HeapMapperInternal> m_internal;
|
||||
};
|
||||
|
||||
} // namespace Service::Nvidia::NvCore
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,30 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "core/guest_memory.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Memory {
|
||||
|
||||
using GuestMemoryFlags = Core::Memory::GuestMemoryFlags;
|
||||
|
||||
template <typename T, GuestMemoryFlags FLAGS>
|
||||
using DeviceGuestMemory = Core::Memory::GuestMemory<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>;
|
||||
template <typename T, GuestMemoryFlags FLAGS>
|
||||
using DeviceGuestMemoryScoped =
|
||||
Core::Memory::GuestMemoryScoped<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>;
|
||||
template <typename T, GuestMemoryFlags FLAGS>
|
||||
using GpuGuestMemory = Core::Memory::GuestMemory<Tegra::MemoryManager, T, FLAGS>;
|
||||
template <typename T, GuestMemoryFlags FLAGS>
|
||||
using GpuGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>;
|
||||
|
||||
} // namespace Tegra::Memory
|
@ -0,0 +1,32 @@
|
||||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/device_memory_manager.inc"
|
||||
#include "video_core/host1x/gpu_device_memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
struct MaxwellDeviceMethods {
|
||||
static inline void MarkRegionCaching(Core::Memory::Memory* interface, VAddr address,
|
||||
size_t size, bool caching) {
|
||||
interface->RasterizerMarkRegionCached(address, size, caching);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>;
|
||||
template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>;
|
||||
|
||||
template const u8* Tegra::MaxwellDeviceMemoryManager::GetPointer<u8>(DAddr addr) const;
|
||||
template u8* Tegra::MaxwellDeviceMemoryManager::GetPointer<u8>(DAddr addr);
|
||||
|
||||
template u8 Tegra::MaxwellDeviceMemoryManager::Read<u8>(DAddr addr) const;
|
||||
template u16 Tegra::MaxwellDeviceMemoryManager::Read<u16>(DAddr addr) const;
|
||||
template u32 Tegra::MaxwellDeviceMemoryManager::Read<u32>(DAddr addr) const;
|
||||
template u64 Tegra::MaxwellDeviceMemoryManager::Read<u64>(DAddr addr) const;
|
||||
template void Tegra::MaxwellDeviceMemoryManager::Write<u8>(DAddr addr, u8 data);
|
||||
template void Tegra::MaxwellDeviceMemoryManager::Write<u16>(DAddr addr, u16 data);
|
||||
template void Tegra::MaxwellDeviceMemoryManager::Write<u32>(DAddr addr, u32 data);
|
||||
template void Tegra::MaxwellDeviceMemoryManager::Write<u64>(DAddr addr, u64 data);
|
@ -0,0 +1,24 @@
|
||||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/device_memory_manager.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
struct MaxwellDeviceMethods;
|
||||
|
||||
struct MaxwellDeviceTraits {
|
||||
static constexpr size_t device_virtual_bits = 34;
|
||||
using DeviceInterface = typename VideoCore::RasterizerInterface;
|
||||
using DeviceMethods = MaxwellDeviceMethods;
|
||||
};
|
||||
|
||||
using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;
|
||||
|
||||
} // namespace Tegra
|
@ -1,72 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
using namespace Core::Memory;
|
||||
|
||||
RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_)
|
||||
: cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {}
|
||||
|
||||
RasterizerAccelerated::~RasterizerAccelerated() = default;
|
||||
|
||||
void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
|
||||
u64 uncache_begin = 0;
|
||||
u64 cache_begin = 0;
|
||||
u64 uncache_bytes = 0;
|
||||
u64 cache_bytes = 0;
|
||||
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE);
|
||||
for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) {
|
||||
std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page);
|
||||
|
||||
if (delta > 0) {
|
||||
ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!");
|
||||
} else if (delta < 0) {
|
||||
ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
|
||||
} else {
|
||||
ASSERT_MSG(false, "Delta must be non-zero!");
|
||||
}
|
||||
|
||||
// Adds or subtracts 1, as count is a unsigned 8-bit value
|
||||
count.fetch_add(static_cast<u16>(delta), std::memory_order_release);
|
||||
|
||||
// Assume delta is either -1 or 1
|
||||
if (count.load(std::memory_order::relaxed) == 0) {
|
||||
if (uncache_bytes == 0) {
|
||||
uncache_begin = page;
|
||||
}
|
||||
uncache_bytes += YUZU_PAGESIZE;
|
||||
} else if (uncache_bytes > 0) {
|
||||
cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes,
|
||||
false);
|
||||
uncache_bytes = 0;
|
||||
}
|
||||
if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
|
||||
if (cache_bytes == 0) {
|
||||
cache_begin = page;
|
||||
}
|
||||
cache_bytes += YUZU_PAGESIZE;
|
||||
} else if (cache_bytes > 0) {
|
||||
cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
|
||||
cache_bytes = 0;
|
||||
}
|
||||
}
|
||||
if (uncache_bytes > 0) {
|
||||
cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false);
|
||||
}
|
||||
if (cache_bytes > 0) {
|
||||
cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
@ -1,49 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Core::Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
/// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface.
|
||||
class RasterizerAccelerated : public RasterizerInterface {
|
||||
public:
|
||||
explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_);
|
||||
~RasterizerAccelerated() override;
|
||||
|
||||
void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
|
||||
|
||||
private:
|
||||
class CacheEntry final {
|
||||
public:
|
||||
CacheEntry() = default;
|
||||
|
||||
std::atomic_uint16_t& Count(std::size_t page) {
|
||||
return values[page & 3];
|
||||
}
|
||||
|
||||
const std::atomic_uint16_t& Count(std::size_t page) const {
|
||||
return values[page & 3];
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<std::atomic_uint16_t, 4> values{};
|
||||
};
|
||||
static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!");
|
||||
|
||||
using CachedPages = std::array<CacheEntry, 0x2000000>;
|
||||
std::unique_ptr<CachedPages> cached_pages;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue