NVDRV: Remake ASGPU

master
Fernando Sahmkow 2021-11-14 20:55:52 +07:00
parent c6ea0c650e
commit feb49c822d
8 changed files with 882 additions and 239 deletions

@ -17,6 +17,8 @@ endif ()
include(GenerateSCMRev) include(GenerateSCMRev)
add_library(common STATIC add_library(common STATIC
address_space.cpp
address_space.h
algorithm.h algorithm.h
alignment.h alignment.h
announce_multiplayer_room.h announce_multiplayer_room.h

@ -0,0 +1,11 @@
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Licensed under GPLv3 or any later version
// Refer to the license.txt file included.
#include "common/address_space.inc"
namespace Common {
template class Common::FlatAllocator<u32, 0, 32>;
}

@ -0,0 +1,134 @@
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Licensed under GPLv3 or any later version
// Refer to the license.txt file included.
#pragma once
#include <concepts>
#include <functional>
#include <mutex>
#include <vector>
#include "common/common_types.h"
namespace Common {
template <typename VaType, size_t AddressSpaceBits>
concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits;
struct EmptyStruct {};
/**
* @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector
*/
template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,
bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct>
requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAddressSpaceMap {
private:
std::function<void(VaType, VaType)>
unmapCallback{}; //!< Callback called when the mappings in an region have changed
protected:
/**
* @brief Represents a block of memory in the AS, the physical mapping is contiguous until
* another block with a different phys address is hit
*/
struct Block {
VaType virt{UnmappedVa}; //!< VA of the block
PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block
//!< is encountered
[[no_unique_address]] ExtraBlockInfo extraInfo;
Block() = default;
Block(VaType virt, PaType phys, ExtraBlockInfo extraInfo)
: virt(virt), phys(phys), extraInfo(extraInfo) {}
constexpr bool Valid() {
return virt != UnmappedVa;
}
constexpr bool Mapped() {
return phys != UnmappedPa;
}
constexpr bool Unmapped() {
return phys == UnmappedPa;
}
bool operator<(const VaType& pVirt) const {
return virt < pVirt;
}
};
std::mutex blockMutex;
std::vector<Block> blocks{Block{}};
/**
* @brief Maps a PA range into the given AS region
* @note blockMutex MUST be locked when calling this
*/
void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo);
/**
* @brief Unmaps the given range and merges it with other unmapped regions
* @note blockMutex MUST be locked when calling this
*/
void UnmapLocked(VaType virt, VaType size);
public:
static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) +
((1ULL << (AddressSpaceBits - 1)) -
1)}; //!< The maximum VA that this AS can technically reach
VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS
FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {});
FlatAddressSpaceMap() = default;
void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) {
std::scoped_lock lock(blockMutex);
MapLocked(virt, phys, size, extraInfo);
}
void Unmap(VaType virt, VaType size) {
std::scoped_lock lock(blockMutex);
UnmapLocked(virt, size);
}
};
/**
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an
* initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
*/
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>
requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAllocator
: public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
private:
using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once
//!< this reaches the AS limit the slower allocation path will be
//!< used
public:
VaType vaStart; //!< The base VA of the allocator, no allocations will be below this
FlatAllocator(VaType vaStart, VaType vaLimit = Base::VaMaximum);
/**
* @brief Allocates a region in the AS of the given size and returns its address
*/
VaType Allocate(VaType size);
/**
* @brief Marks the given region in the AS as allocated
*/
void AllocateFixed(VaType virt, VaType size);
/**
* @brief Frees an AS region so it can be used again
*/
void Free(VaType virt, VaType size);
};
} // namespace Common

@ -0,0 +1,338 @@
// SPDX-License-Identifier: GPLv3 or later
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "common/address_space.h"
#include "common/assert.h"
#define MAP_MEMBER(returnType) \
template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \
bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \
requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap< \
VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
#define MAP_MEMBER_CONST() \
template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \
bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \
requires AddressSpaceValid<VaType, AddressSpaceBits> FlatAddressSpaceMap< \
VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
#define MM_MEMBER(returnType) \
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \
FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
#define ALLOC_MEMBER(returnType) \
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \
FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
#define ALLOC_MEMBER_CONST() \
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
requires AddressSpaceValid<VaType, AddressSpaceBits> \
FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
namespace Common {
MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit,
std::function<void(VaType, VaType)> unmapCallback)
: unmapCallback(std::move(unmapCallback)), vaLimit(vaLimit) {
if (vaLimit > VaMaximum)
UNREACHABLE_MSG("Invalid VA limit!");
}
MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo) {
VaType virtEnd{virt + size};
if (virtEnd > vaLimit)
UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}",
virtEnd, vaLimit);
auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
if (blockEndSuccessor == blocks.begin())
UNREACHABLE_MSG("Trying to map a block before the VA start: virtEnd: 0x{:X}", virtEnd);
auto blockEndPredecessor{std::prev(blockEndSuccessor)};
if (blockEndSuccessor != blocks.end()) {
// We have blocks in front of us, if one is directly in front then we don't have to add a
// tail
if (blockEndSuccessor->virt != virtEnd) {
PaType tailPhys{[&]() -> PaType {
if constexpr (!PaContigSplit) {
return blockEndPredecessor
->phys; // Always propagate unmapped regions rather than calculating offset
} else {
if (blockEndPredecessor->Unmapped())
return blockEndPredecessor->phys; // Always propagate unmapped regions
// rather than calculating offset
else
return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
}
}()};
if (blockEndPredecessor->virt >= virt) {
// If this block's start would be overlapped by the map then reuse it as a tail
// block
blockEndPredecessor->virt = virtEnd;
blockEndPredecessor->phys = tailPhys;
blockEndPredecessor->extraInfo = blockEndPredecessor->extraInfo;
// No longer predecessor anymore
blockEndSuccessor = blockEndPredecessor--;
} else {
// Else insert a new one and we're done
blocks.insert(blockEndSuccessor,
{Block(virt, phys, extraInfo),
Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)});
if (unmapCallback)
unmapCallback(virt, size);
return;
}
}
} else {
// blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped
// chunk
if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) {
// Move the unmapped block start backwards
blockEndPredecessor->virt = virtEnd;
// No longer predecessor anymore
blockEndSuccessor = blockEndPredecessor--;
} else {
// Else insert a new one and we're done
blocks.insert(blockEndSuccessor,
{Block(virt, phys, extraInfo), Block(virtEnd, UnmappedPa, {})});
if (unmapCallback)
unmapCallback(virt, size);
return;
}
}
auto blockStartSuccessor{blockEndSuccessor};
// Walk the block vector to find the start successor as this is more efficient than another
// binary search in most scenarios
while (std::prev(blockStartSuccessor)->virt >= virt)
blockStartSuccessor--;
// Check that the start successor is either the end block or something in between
if (blockStartSuccessor->virt > virtEnd) {
UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt);
} else if (blockStartSuccessor->virt == virtEnd) {
// We need to create a new block as there are none spare that we would overwrite
blocks.insert(blockStartSuccessor, Block(virt, phys, extraInfo));
} else {
// Erase overwritten blocks
if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor)
blocks.erase(eraseStart, blockEndSuccessor);
// Reuse a block that would otherwise be overwritten as a start block
blockStartSuccessor->virt = virt;
blockStartSuccessor->phys = phys;
blockStartSuccessor->extraInfo = extraInfo;
}
if (unmapCallback)
unmapCallback(virt, size);
}
MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
VaType virtEnd{virt + size};
if (virtEnd > vaLimit)
UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}",
virtEnd, vaLimit);
auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
if (blockEndSuccessor == blocks.begin())
UNREACHABLE_MSG("Trying to unmap a block before the VA start: virtEnd: 0x{:X}", virtEnd);
auto blockEndPredecessor{std::prev(blockEndSuccessor)};
auto walkBackToPredecessor{[&](auto iter) {
while (iter->virt >= virt)
iter--;
return iter;
}};
auto eraseBlocksWithEndUnmapped{[&](auto unmappedEnd) {
auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)};
auto blockStartSuccessor{std::next(blockStartPredecessor)};
auto eraseEnd{[&]() {
if (blockStartPredecessor->Unmapped()) {
// If the start predecessor is unmapped then we can erase everything in our region
// and be done
return std::next(unmappedEnd);
} else {
// Else reuse the end predecessor as the start of our unmapped region then erase all
// up to it
unmappedEnd->virt = virt;
return unmappedEnd;
}
}()};
// We can't have two unmapped regions after each other
if (eraseEnd != blocks.end() &&
(eraseEnd == blockStartSuccessor ||
(blockStartPredecessor->Unmapped() && eraseEnd->Unmapped())))
UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!");
blocks.erase(blockStartSuccessor, eraseEnd);
}};
// We can avoid any splitting logic if these are the case
if (blockEndPredecessor->Unmapped()) {
if (blockEndPredecessor->virt > virt)
eraseBlocksWithEndUnmapped(blockEndPredecessor);
if (unmapCallback)
unmapCallback(virt, size);
return; // The region is unmapped, bail out early
} else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) {
eraseBlocksWithEndUnmapped(blockEndSuccessor);
if (unmapCallback)
unmapCallback(virt, size);
return; // The region is unmapped here and doesn't need splitting, bail out early
} else if (blockEndSuccessor == blocks.end()) {
// This should never happen as the end should always follow an unmapped block
UNREACHABLE_MSG("Unexpected Memory Manager state!");
} else if (blockEndSuccessor->virt != virtEnd) {
// If one block is directly in front then we don't have to add a tail
// The previous block is mapped so we will need to add a tail with an offset
PaType tailPhys{[&]() {
if constexpr (PaContigSplit)
return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
else
return blockEndPredecessor->phys;
}()};
if (blockEndPredecessor->virt >= virt) {
// If this block's start would be overlapped by the unmap then reuse it as a tail block
blockEndPredecessor->virt = virtEnd;
blockEndPredecessor->phys = tailPhys;
// No longer predecessor anymore
blockEndSuccessor = blockEndPredecessor--;
} else {
blocks.insert(blockEndSuccessor,
{Block(virt, UnmappedPa, {}),
Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)});
if (unmapCallback)
unmapCallback(virt, size);
return; // The previous block is mapped and ends before
}
}
// Walk the block vector to find the start predecessor as this is more efficient than another
// binary search in most scenarios
auto blockStartPredecessor{walkBackToPredecessor(blockEndSuccessor)};
auto blockStartSuccessor{std::next(blockStartPredecessor)};
if (blockStartSuccessor->virt > virtEnd) {
UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt);
} else if (blockStartSuccessor->virt == virtEnd) {
// There are no blocks between the start and the end that would let us skip inserting a new
// one for head
// The previous block is may be unmapped, if so we don't need to insert any unmaps after it
if (blockStartPredecessor->Mapped())
blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, {}));
} else if (blockStartPredecessor->Unmapped()) {
// If the previous block is unmapped
blocks.erase(blockStartSuccessor, blockEndPredecessor);
} else {
// Erase overwritten blocks, skipping the first one as we have written the unmapped start
// block there
if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor)
blocks.erase(eraseStart, blockEndSuccessor);
// Add in the unmapped block header
blockStartSuccessor->virt = virt;
blockStartSuccessor->phys = UnmappedPa;
}
if (unmapCallback)
unmapCallback(virt, size);
}
ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart, VaType vaLimit)
: Base(vaLimit), currentLinearAllocEnd(vaStart), vaStart(vaStart) {}
ALLOC_MEMBER(VaType)::Allocate(VaType size) {
std::scoped_lock lock(this->blockMutex);
VaType allocStart{UnmappedVa};
VaType allocEnd{currentLinearAllocEnd + size};
// Avoid searching backwards in the address space if possible
if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) {
auto allocEndSuccessor{
std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)};
if (allocEndSuccessor == this->blocks.begin())
UNREACHABLE_MSG("First block in AS map is invalid!");
auto allocEndPredecessor{std::prev(allocEndSuccessor)};
if (allocEndPredecessor->virt <= currentLinearAllocEnd) {
allocStart = currentLinearAllocEnd;
} else {
// Skip over fixed any mappings in front of us
while (allocEndSuccessor != this->blocks.end()) {
if (allocEndSuccessor->virt - allocEndPredecessor->virt < size ||
allocEndPredecessor->Mapped()) {
allocStart = allocEndPredecessor->virt;
break;
}
allocEndPredecessor = allocEndSuccessor++;
// Use the VA limit to calculate if we can fit in the final block since it has no
// successor
if (allocEndSuccessor == this->blocks.end()) {
allocEnd = allocEndPredecessor->virt + size;
if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit)
allocStart = allocEndPredecessor->virt;
}
}
}
}
if (allocStart != UnmappedVa) {
currentLinearAllocEnd = allocStart + size;
} else { // If linear allocation overflows the AS then find a gap
if (this->blocks.size() <= 2)
UNREACHABLE_MSG("Unexpected allocator state!");
auto searchPredecessor{this->blocks.begin()};
auto searchSuccessor{std::next(searchPredecessor)};
while (searchSuccessor != this->blocks.end() &&
(searchSuccessor->virt - searchPredecessor->virt < size ||
searchPredecessor->Mapped())) {
searchPredecessor = searchSuccessor++;
}
if (searchSuccessor != this->blocks.end())
allocStart = searchPredecessor->virt;
else
return {}; // AS is full
}
this->MapLocked(allocStart, true, size, {});
return allocStart;
}
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
this->Map(virt, true, size);
}
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
this->Unmap(virt, size);
}
} // namespace Common

@ -6,6 +6,7 @@
#include <cstring> #include <cstring>
#include <utility> #include <utility>
#include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/core.h" #include "core/core.h"
@ -21,8 +22,8 @@
namespace Service::Nvidia::Devices { namespace Service::Nvidia::Devices {
nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core) nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
: nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{},
gmmu{std::make_shared<Tegra::MemoryManager>(system)} {} gmmu{} {}
nvhost_as_gpu::~nvhost_as_gpu() = default; nvhost_as_gpu::~nvhost_as_gpu() = default;
NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@ -89,12 +90,49 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
IoctlAllocAsEx params{}; IoctlAllocAsEx params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size); LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size);
if (params.big_page_size == 0) {
params.big_page_size = DEFAULT_BIG_PAGE_SIZE; std::scoped_lock lock(mutex);
if (vm.initialised) {
UNREACHABLE_MSG("Cannot initialise an address space twice!");
return NvResult::InvalidState;
} }
big_page_size = params.big_page_size; if (params.big_page_size) {
if (!std::has_single_bit(params.big_page_size)) {
LOG_ERROR(Service_NVDRV, "Non power-of-2 big page size: 0x{:X}!", params.big_page_size);
return NvResult::BadValue;
}
if (!(params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES)) {
LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size);
return NvResult::BadValue;
}
vm.big_page_size = params.big_page_size;
vm.big_page_size_bits = static_cast<u32>(std::countr_zero(params.big_page_size));
vm.va_range_start = params.big_page_size << VM::VA_START_SHIFT;
}
// If this is unspecified then default values should be used
if (params.va_range_start) {
vm.va_range_start = params.va_range_start;
vm.va_range_split = params.va_range_split;
vm.va_range_end = params.va_range_end;
}
const u64 start_pages{vm.va_range_start >> VM::PAGE_SIZE_BITS};
const u64 end_pages{vm.va_range_split >> VM::PAGE_SIZE_BITS};
vm.small_page_allocator = std::make_shared<VM::Allocator>(start_pages, end_pages);
const u64 start_big_pages{vm.va_range_split >> vm.big_page_size_bits};
const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits};
vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages);
gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS);
vm.initialised = true;
return NvResult::Success; return NvResult::Success;
} }
@ -106,21 +144,73 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages,
params.page_size, params.flags); params.page_size, params.flags);
const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; std::scoped_lock lock(mutex);
if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
params.offset = *(gmmu->AllocateFixed(params.offset, size)); if (!vm.initialised) {
} else { return NvResult::BadValue;
params.offset = gmmu->Allocate(size, params.align);
} }
auto result = NvResult::Success; if (params.page_size != VM::YUZU_PAGESIZE && params.page_size != vm.big_page_size) {
if (!params.offset) { return NvResult::BadValue;
LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size);
result = NvResult::InsufficientMemory;
} }
if (params.page_size != vm.big_page_size &&
((params.flags & MappingFlags::Sparse) != MappingFlags::None)) {
UNIMPLEMENTED_MSG("Sparse small pages are not implemented!");
return NvResult::NotImplemented;
}
const u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
: vm.big_page_size_bits};
auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
: *vm.big_page_allocator};
if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
allocator.AllocateFixed(static_cast<u32>(params.offset >> page_size_bits), params.pages);
} else {
params.offset = static_cast<u64>(allocator.Allocate(params.pages)) << page_size_bits;
if (!params.offset) {
UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!");
return NvResult::InsufficientMemory;
}
}
u64 size{static_cast<u64>(params.pages) * params.page_size};
if ((params.flags & MappingFlags::Sparse) != MappingFlags::None) {
gmmu->MapSparse(params.offset, size);
}
allocation_map[params.offset] = {
.size = size,
.page_size = params.page_size,
.sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None,
};
std::memcpy(output.data(), &params, output.size()); std::memcpy(output.data(), &params, output.size());
return result; return NvResult::Success;
}
void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
auto mapping{mapping_map.at(offset)};
if (!mapping->fixed) {
auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
static_cast<u32>(mapping->size >> page_size_bits));
}
// Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
// Only FreeSpace can unmap them fully
if (mapping->sparse_alloc)
gmmu->MapSparse(offset, mapping->size);
else
gmmu->Unmap(offset, mapping->size);
mapping_map.erase(offset);
} }
NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) { NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) {
@ -130,7 +220,40 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset, LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
params.pages, params.page_size); params.pages, params.page_size);
gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size); std::scoped_lock lock(mutex);
if (!vm.initialised) {
return NvResult::BadValue;
}
try {
auto allocation{allocation_map[params.offset]};
if (allocation.page_size != params.page_size ||
allocation.size != (static_cast<u64>(params.pages) * params.page_size)) {
return NvResult::BadValue;
}
for (const auto& mapping : allocation.mappings) {
FreeMappingLocked(mapping->offset);
}
// Unset sparse flag if required
if (allocation.sparse) {
gmmu->Unmap(params.offset, allocation.size);
}
auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
: *vm.big_page_allocator};
u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
: vm.big_page_size_bits};
allocator.Free(static_cast<u32>(params.offset >> page_size_bits),
static_cast<u32>(allocation.size >> page_size_bits));
allocation_map.erase(params.offset);
} catch ([[maybe_unused]] const std::out_of_range& e) {
return NvResult::BadValue;
}
std::memcpy(output.data(), &params, output.size()); std::memcpy(output.data(), &params, output.size());
return NvResult::Success; return NvResult::Success;
@ -141,43 +264,51 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);
auto result = NvResult::Success;
std::vector<IoctlRemapEntry> entries(num_entries); std::vector<IoctlRemapEntry> entries(num_entries);
std::memcpy(entries.data(), input.data(), input.size()); std::memcpy(entries.data(), input.data(), input.size());
std::scoped_lock lock(mutex);
if (!vm.initialised) {
return NvResult::BadValue;
}
for (const auto& entry : entries) { for (const auto& entry : entries) {
LOG_DEBUG(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", GPUVAddr virtual_address{static_cast<u64>(entry.as_offset_big_pages)
entry.offset, entry.nvmap_handle, entry.pages); << vm.big_page_size_bits};
u64 size{static_cast<u64>(entry.big_pages) << vm.big_page_size_bits};
if (entry.nvmap_handle == 0) { auto alloc{allocation_map.upper_bound(virtual_address)};
// If nvmap handle is null, we should unmap instead.
const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; if (alloc-- == allocation_map.begin() ||
const auto size{static_cast<u64>(entry.pages) << 0x10}; (virtual_address - alloc->first) + size > alloc->second.size) {
gmmu->Unmap(offset, size); LOG_WARNING(Service_NVDRV, "Cannot remap into an unallocated region!");
continue; return NvResult::BadValue;
} }
const auto object{nvmap.GetHandle(entry.nvmap_handle)}; if (!alloc->second.sparse) {
if (!object) { LOG_WARNING(Service_NVDRV, "Cannot remap a non-sparse mapping!");
LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle); return NvResult::BadValue;
result = NvResult::InvalidState;
break;
} }
const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; if (!entry.handle) {
const auto size{static_cast<u64>(entry.pages) << 0x10}; gmmu->MapSparse(virtual_address, size);
const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10}; } else {
const auto addr{gmmu->Map(object->address + map_offset, offset, size)}; auto handle{nvmap.GetHandle(entry.handle)};
if (!handle) {
return NvResult::BadValue;
}
if (!addr) { VAddr cpu_address{static_cast<VAddr>(
LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!"); handle->address +
result = NvResult::InvalidState; (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
break;
gmmu->Map(virtual_address, cpu_address, size);
} }
} }
std::memcpy(output.data(), entries.data(), output.size()); std::memcpy(output.data(), entries.data(), output.size());
return result; return NvResult::Success;
} }
NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
@ -187,75 +318,96 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
LOG_DEBUG(Service_NVDRV, LOG_DEBUG(Service_NVDRV,
"called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}"
", offset={}", ", offset={}",
params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size, params.flags, params.handle, params.buffer_offset, params.mapping_size,
params.offset); params.offset);
if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) { std::scoped_lock lock(mutex);
if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};
if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) { if (!vm.initialised) {
LOG_CRITICAL(Service_NVDRV, return NvResult::BadValue;
"remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, " }
"mapping_size = {}, offset={}",
params.flags, params.nvmap_handle, params.buffer_offset,
params.mapping_size, params.offset);
std::memcpy(output.data(), &params, output.size()); // Remaps a subregion of an existing mapping to a different PA
return NvResult::InvalidState; if ((params.flags & MappingFlags::Remap) != MappingFlags::None) {
try {
auto mapping{mapping_map.at(params.offset)};
if (mapping->size < params.mapping_size) {
LOG_WARNING(Service_NVDRV,
"Cannot remap a partially mapped GPU address space region: 0x{:X}",
params.offset);
return NvResult::BadValue;
} }
std::memcpy(output.data(), &params, output.size()); u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
return NvResult::Success; VAddr cpu_address{mapping->ptr + params.buffer_offset};
} else {
LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset);
std::memcpy(output.data(), &params, output.size()); gmmu->Map(gpu_address, cpu_address, params.mapping_size);
return NvResult::InvalidState;
return NvResult::Success;
} catch ([[maybe_unused]] const std::out_of_range& e) {
LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}",
params.offset);
return NvResult::BadValue;
} }
} }
const auto object{nvmap.GetHandle(params.nvmap_handle)}; auto handle{nvmap.GetHandle(params.handle)};
if (!object) { if (!handle) {
LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle); return NvResult::BadValue;
std::memcpy(output.data(), &params, output.size());
return NvResult::InvalidState;
} }
// The real nvservices doesn't make a distinction between handles and ids, and VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)};
// object can only have one handle and it will be the same as its id. Assert that this is the u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
// case to prevent unexpected behavior.
ASSERT(object->id == params.nvmap_handle);
u64 page_size{params.page_size}; if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
if (!page_size) { auto alloc{allocation_map.upper_bound(params.offset)};
page_size = object->align;
}
const auto physical_address{object->address + params.buffer_offset}; if (alloc-- == allocation_map.begin() ||
u64 size{params.mapping_size}; (params.offset - alloc->first) + size > alloc->second.size) {
if (!size) { UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!");
size = object->size; return NvResult::BadValue;
} }
const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None}; gmmu->Map(params.offset, cpu_address, size);
if (is_alloc) {
params.offset = gmmu->MapAllocate(physical_address, size, page_size); auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, false,
alloc->second.sparse)};
alloc->second.mappings.push_back(mapping);
mapping_map[params.offset] = mapping;
} else { } else {
params.offset = gmmu->Map(physical_address, params.offset, size); bool big_page{[&]() {
} if (Common::IsAligned(handle->align, vm.big_page_size))
return true;
else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE))
return false;
else {
UNREACHABLE();
return false;
}
}()};
auto result = NvResult::Success; auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
if (!params.offset) { u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size); u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
result = NvResult::InvalidState;
} else { params.offset = static_cast<u64>(allocator.Allocate(
AddBufferMap(params.offset, size, physical_address, is_alloc); static_cast<u32>(Common::AlignUp(size, page_size) >> page_size_bits)))
<< page_size_bits;
if (!params.offset) {
UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!");
return NvResult::InsufficientMemory;
}
gmmu->Map(params.offset, cpu_address, size);
auto mapping{
std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
mapping_map[params.offset] = mapping;
} }
std::memcpy(output.data(), &params, output.size()); std::memcpy(output.data(), &params, output.size());
return result; return NvResult::Success;
} }
NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
@ -264,13 +416,36 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
if (const auto size{RemoveBufferMap(params.offset)}; size) { std::scoped_lock lock(mutex);
gmmu->Unmap(params.offset, *size);
} else { if (!vm.initialised) {
LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset); return NvResult::BadValue;
}
try {
auto mapping{mapping_map.at(params.offset)};
if (!mapping->fixed) {
auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
static_cast<u32>(mapping->size >> page_size_bits));
}
// Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
// Only FreeSpace can unmap them fully
if (mapping->sparse_alloc) {
gmmu->MapSparse(params.offset, mapping->size);
} else {
gmmu->Unmap(params.offset, mapping->size);
}
mapping_map.erase(params.offset);
} catch ([[maybe_unused]] const std::out_of_range& e) {
LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset);
} }
std::memcpy(output.data(), &params, output.size());
return NvResult::Success; return NvResult::Success;
} }
@ -284,28 +459,37 @@ NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8
return NvResult::Success; return NvResult::Success;
} }
void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
params.buf_size = 2 * sizeof(VaRegion);
params.regions = std::array<VaRegion, 2>{
VaRegion{
.offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS,
.page_size = VM::YUZU_PAGESIZE,
.pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart,
},
VaRegion{
.offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits,
.page_size = vm.big_page_size,
.pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart,
},
};
}
NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) { NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlGetVaRegions params{}; IoctlGetVaRegions params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
params.buf_size); params.buf_size);
params.buf_size = 0x30; std::scoped_lock lock(mutex);
params.small = IoctlVaRegion{ if (!vm.initialised) {
.offset = 0x04000000, return NvResult::BadValue;
.page_size = DEFAULT_SMALL_PAGE_SIZE, }
.pages = 0x3fbfff,
};
params.big = IoctlVaRegion{ GetVARegionsImpl(params);
.offset = 0x04000000,
.page_size = big_page_size,
.pages = 0x1bffff,
};
// TODO(ogniK): This probably can stay stubbed but should add support way way later
std::memcpy(output.data(), &params, output.size()); std::memcpy(output.data(), &params, output.size());
return NvResult::Success; return NvResult::Success;
@ -316,64 +500,24 @@ NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u
IoctlGetVaRegions params{}; IoctlGetVaRegions params{};
std::memcpy(&params, input.data(), input.size()); std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
params.buf_size); params.buf_size);
params.buf_size = 0x30; std::scoped_lock lock(mutex);
params.small = IoctlVaRegion{ if (!vm.initialised) {
.offset = 0x04000000, return NvResult::BadValue;
.page_size = 0x1000, }
.pages = 0x3fbfff,
};
params.big = IoctlVaRegion{ GetVARegionsImpl(params);
.offset = 0x04000000,
.page_size = big_page_size,
.pages = 0x1bffff,
};
// TODO(ogniK): This probably can stay stubbed but should add support way way later
std::memcpy(output.data(), &params, output.size()); std::memcpy(output.data(), &params, output.size());
std::memcpy(inline_output.data(), &params.small, sizeof(IoctlVaRegion)); std::memcpy(inline_output.data(), &params.regions[0], sizeof(VaRegion));
std::memcpy(inline_output.data() + sizeof(IoctlVaRegion), &params.big, sizeof(IoctlVaRegion)); std::memcpy(inline_output.data() + sizeof(VaRegion), &params.regions[1], sizeof(VaRegion));
return NvResult::Success; return NvResult::Success;
} }
std::optional<nvhost_as_gpu::BufferMap> nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const {
const auto end{buffer_mappings.upper_bound(gpu_addr)};
for (auto iter{buffer_mappings.begin()}; iter != end; ++iter) {
if (gpu_addr >= iter->second.StartAddr() && gpu_addr < iter->second.EndAddr()) {
return iter->second;
}
}
return std::nullopt;
}
void nvhost_as_gpu::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
bool is_allocated) {
buffer_mappings[gpu_addr] = {gpu_addr, size, cpu_addr, is_allocated};
}
std::optional<std::size_t> nvhost_as_gpu::RemoveBufferMap(GPUVAddr gpu_addr) {
if (const auto iter{buffer_mappings.find(gpu_addr)}; iter != buffer_mappings.end()) {
std::size_t size{};
if (iter->second.IsAllocated()) {
size = iter->second.Size();
}
buffer_mappings.erase(iter);
return size;
}
return std::nullopt;
}
Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) { Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) {
LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id); LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id);
return nullptr; return nullptr;

@ -5,14 +5,19 @@
#pragma once #pragma once
#include <bit>
#include <list>
#include <map> #include <map>
#include <memory> #include <memory>
#include <mutex>
#include <optional> #include <optional>
#include <vector> #include <vector>
#include "common/address_space.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/swap.h" #include "common/swap.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/devices/nvdevice.h"
namespace Tegra { namespace Tegra {
@ -30,17 +35,13 @@ class NvMap;
namespace Service::Nvidia::Devices { namespace Service::Nvidia::Devices {
constexpr u32 DEFAULT_BIG_PAGE_SIZE = 1 << 16; enum class MappingFlags : u32 {
constexpr u32 DEFAULT_SMALL_PAGE_SIZE = 1 << 12; None = 0,
Fixed = 1 << 0,
class nvmap; Sparse = 1 << 1,
Remap = 1 << 8,
enum class AddressSpaceFlags : u32 {
None = 0x0,
FixedOffset = 0x1,
Remap = 0x100,
}; };
DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags); DECLARE_ENUM_FLAG_OPERATORS(MappingFlags);
class nvhost_as_gpu final : public nvdevice { class nvhost_as_gpu final : public nvdevice {
public: public:
@ -59,46 +60,15 @@ public:
Kernel::KEvent* QueryEvent(u32 event_id) override; Kernel::KEvent* QueryEvent(u32 event_id) override;
private: struct VaRegion {
class BufferMap final { u64 offset;
public: u32 page_size;
constexpr BufferMap() = default; u32 _pad0_;
u64 pages;
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_)
: start_addr{start_addr_}, end_addr{start_addr_ + size_} {}
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_,
bool is_allocated_)
: start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_},
is_allocated{is_allocated_} {}
constexpr VAddr StartAddr() const {
return start_addr;
}
constexpr VAddr EndAddr() const {
return end_addr;
}
constexpr std::size_t Size() const {
return end_addr - start_addr;
}
constexpr VAddr CpuAddr() const {
return cpu_addr;
}
constexpr bool IsAllocated() const {
return is_allocated;
}
private:
GPUVAddr start_addr{};
GPUVAddr end_addr{};
VAddr cpu_addr{};
bool is_allocated{};
}; };
static_assert(sizeof(VaRegion) == 0x18);
private:
struct IoctlAllocAsEx { struct IoctlAllocAsEx {
u32_le flags{}; // usually passes 1 u32_le flags{}; // usually passes 1
s32_le as_fd{}; // ignored; passes 0 s32_le as_fd{}; // ignored; passes 0
@ -113,7 +83,7 @@ private:
struct IoctlAllocSpace { struct IoctlAllocSpace {
u32_le pages{}; u32_le pages{};
u32_le page_size{}; u32_le page_size{};
AddressSpaceFlags flags{}; MappingFlags flags{};
INSERT_PADDING_WORDS(1); INSERT_PADDING_WORDS(1);
union { union {
u64_le offset; u64_le offset;
@ -130,19 +100,19 @@ private:
static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size"); static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size");
struct IoctlRemapEntry { struct IoctlRemapEntry {
u16_le flags{}; u16 flags;
u16_le kind{}; u16 kind;
u32_le nvmap_handle{}; NvCore::NvMap::Handle::Id handle;
u32_le map_offset{}; u32 handle_offset_big_pages;
u32_le offset{}; u32 as_offset_big_pages;
u32_le pages{}; u32 big_pages;
}; };
static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size"); static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size");
struct IoctlMapBufferEx { struct IoctlMapBufferEx {
AddressSpaceFlags flags{}; // bit0: fixed_offset, bit2: cacheable MappingFlags flags{}; // bit0: fixed_offset, bit2: cacheable
u32_le kind{}; // -1 is default u32_le kind{}; // -1 is default
u32_le nvmap_handle{}; NvCore::NvMap::Handle::Id handle;
u32_le page_size{}; // 0 means don't care u32_le page_size{}; // 0 means don't care
s64_le buffer_offset{}; s64_le buffer_offset{};
u64_le mapping_size{}; u64_le mapping_size{};
@ -160,27 +130,15 @@ private:
}; };
static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size"); static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size");
struct IoctlVaRegion {
u64_le offset{};
u32_le page_size{};
INSERT_PADDING_WORDS(1);
u64_le pages{};
};
static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size");
struct IoctlGetVaRegions { struct IoctlGetVaRegions {
u64_le buf_addr{}; // (contained output user ptr on linux, ignored) u64_le buf_addr{}; // (contained output user ptr on linux, ignored)
u32_le buf_size{}; // forced to 2*sizeof(struct va_region) u32_le buf_size{}; // forced to 2*sizeof(struct va_region)
u32_le reserved{}; u32_le reserved{};
IoctlVaRegion small{}; std::array<VaRegion, 2> regions{};
IoctlVaRegion big{};
}; };
static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2, static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2,
"IoctlGetVaRegions is incorrect size"); "IoctlGetVaRegions is incorrect size");
s32 channel{};
u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output); NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output);
NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output); NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output); NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output);
@ -189,23 +147,74 @@ private:
NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output); NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output);
NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output); NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
void GetVARegionsImpl(IoctlGetVaRegions& params);
NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output); NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output, NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output,
std::vector<u8>& inline_output); std::vector<u8>& inline_output);
std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const; void FreeMappingLocked(u64 offset);
void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
Module& module; Module& module;
NvCore::Container& container; NvCore::Container& container;
NvCore::NvMap& nvmap; NvCore::NvMap& nvmap;
struct Mapping {
VAddr ptr;
u64 offset;
u64 size;
bool fixed;
bool big_page; // Only valid if fixed == false
bool sparse_alloc;
Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_)
: ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_),
sparse_alloc(sparse_alloc_) {}
};
struct Allocation {
u64 size;
std::list<std::shared_ptr<Mapping>> mappings;
u32 page_size;
bool sparse;
};
std::map<u64, std::shared_ptr<Mapping>>
mapping_map; //!< This maps the base addresses of mapped buffers to their total sizes and
//!< mapping type, this is needed as what was originally a single buffer may
//!< have been split into multiple GPU side buffers with the remap flag.
std::map<u64, Allocation> allocation_map; //!< Holds allocations created by AllocSpace from
//!< which fixed buffers can be mapped into
std::mutex mutex; //!< Locks all AS operations
struct VM {
static constexpr u32 YUZU_PAGESIZE{0x1000};
static constexpr u32 PAGE_SIZE_BITS{std::countr_zero(YUZU_PAGESIZE)};
static constexpr u32 SUPPORTED_BIG_PAGE_SIZES{0x30000};
static constexpr u32 DEFAULT_BIG_PAGE_SIZE{0x20000};
u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
u32 big_page_size_bits{std::countr_zero(DEFAULT_BIG_PAGE_SIZE)};
static constexpr u32 VA_START_SHIFT{10};
static constexpr u64 DEFAULT_VA_SPLIT{1ULL << 34};
static constexpr u64 DEFAULT_VA_RANGE{1ULL << 37};
u64 va_range_start{DEFAULT_BIG_PAGE_SIZE << VA_START_SHIFT};
u64 va_range_split{DEFAULT_VA_SPLIT};
u64 va_range_end{DEFAULT_VA_RANGE};
using Allocator = Common::FlatAllocator<u32, 0, 32>;
std::unique_ptr<Allocator> big_page_allocator;
std::shared_ptr<Allocator>
small_page_allocator; //! Shared as this is also used by nvhost::GpuChannel
bool initialised{};
} vm;
std::shared_ptr<Tegra::MemoryManager> gmmu; std::shared_ptr<Tegra::MemoryManager> gmmu;
// This is expected to be ordered, therefore we must use a map, not unordered_map // s32 channel{};
std::map<GPUVAddr, BufferMap> buffer_mappings; // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
}; };
} // namespace Service::Nvidia::Devices } // namespace Service::Nvidia::Devices

@ -71,18 +71,22 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
rasterizer = rasterizer_; rasterizer = rasterizer_;
} }
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
} }
GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) {
return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
}
GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
return Map(cpu_addr, *FindFreeRange(size, align), size); return Map(*FindFreeRange(size, align), cpu_addr, size);
} }
GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true); const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
ASSERT(gpu_addr); ASSERT(gpu_addr);
return Map(cpu_addr, *gpu_addr, size); return Map(*gpu_addr, cpu_addr, size);
} }
void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {

@ -88,7 +88,8 @@ public:
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
std::size_t size) const; std::size_t size) const;
[[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size);
GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size);
[[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
[[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
[[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);