Merge pull request #8467 from FernandoS27/yfc-rel-1
Project yuzu Fried Chicken (Y.F.C.) Part 1master
commit
1effa578f1
@ -0,0 +1,10 @@
|
||||
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/address_space.inc"
|
||||
|
||||
namespace Common {
|
||||
|
||||
template class Common::FlatAllocator<u32, 0, 32>;
|
||||
|
||||
}
|
@ -0,0 +1,150 @@
|
||||
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <concepts>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Common {
|
||||
template <typename VaType, size_t AddressSpaceBits>
|
||||
concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits;
|
||||
|
||||
struct EmptyStruct {};
|
||||
|
||||
/**
|
||||
* @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector
|
||||
*/
|
||||
template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,
|
||||
bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct>
|
||||
requires AddressSpaceValid<VaType, AddressSpaceBits>
|
||||
class FlatAddressSpaceMap {
|
||||
public:
|
||||
/// The maximum VA that this AS can technically reach
|
||||
static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) +
|
||||
((1ULL << (AddressSpaceBits - 1)) - 1)};
|
||||
|
||||
explicit FlatAddressSpaceMap(VaType va_limit,
|
||||
std::function<void(VaType, VaType)> unmap_callback = {});
|
||||
|
||||
FlatAddressSpaceMap() = default;
|
||||
|
||||
void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info = {}) {
|
||||
std::scoped_lock lock(block_mutex);
|
||||
MapLocked(virt, phys, size, extra_info);
|
||||
}
|
||||
|
||||
void Unmap(VaType virt, VaType size) {
|
||||
std::scoped_lock lock(block_mutex);
|
||||
UnmapLocked(virt, size);
|
||||
}
|
||||
|
||||
VaType GetVALimit() const {
|
||||
return va_limit;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief Represents a block of memory in the AS, the physical mapping is contiguous until
|
||||
* another block with a different phys address is hit
|
||||
*/
|
||||
struct Block {
|
||||
/// VA of the block
|
||||
VaType virt{UnmappedVa};
|
||||
/// PA of the block, will increase 1-1 with VA until a new block is encountered
|
||||
PaType phys{UnmappedPa};
|
||||
[[no_unique_address]] ExtraBlockInfo extra_info;
|
||||
|
||||
Block() = default;
|
||||
|
||||
Block(VaType virt_, PaType phys_, ExtraBlockInfo extra_info_)
|
||||
: virt(virt_), phys(phys_), extra_info(extra_info_) {}
|
||||
|
||||
bool Valid() const {
|
||||
return virt != UnmappedVa;
|
||||
}
|
||||
|
||||
bool Mapped() const {
|
||||
return phys != UnmappedPa;
|
||||
}
|
||||
|
||||
bool Unmapped() const {
|
||||
return phys == UnmappedPa;
|
||||
}
|
||||
|
||||
bool operator<(const VaType& p_virt) const {
|
||||
return virt < p_virt;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Maps a PA range into the given AS region
|
||||
* @note block_mutex MUST be locked when calling this
|
||||
*/
|
||||
void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info);
|
||||
|
||||
/**
|
||||
* @brief Unmaps the given range and merges it with other unmapped regions
|
||||
* @note block_mutex MUST be locked when calling this
|
||||
*/
|
||||
void UnmapLocked(VaType virt, VaType size);
|
||||
|
||||
std::mutex block_mutex;
|
||||
std::vector<Block> blocks{Block{}};
|
||||
|
||||
/// a soft limit on the maximum VA of the AS
|
||||
VaType va_limit{VaMaximum};
|
||||
|
||||
private:
|
||||
/// Callback called when the mappings in an region have changed
|
||||
std::function<void(VaType, VaType)> unmap_callback{};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an
|
||||
* initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
|
||||
*/
|
||||
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>
|
||||
requires AddressSpaceValid<VaType, AddressSpaceBits>
|
||||
class FlatAllocator
|
||||
: public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
|
||||
private:
|
||||
using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
|
||||
|
||||
public:
|
||||
explicit FlatAllocator(VaType virt_start, VaType va_limit = Base::VaMaximum);
|
||||
|
||||
/**
|
||||
* @brief Allocates a region in the AS of the given size and returns its address
|
||||
*/
|
||||
VaType Allocate(VaType size);
|
||||
|
||||
/**
|
||||
* @brief Marks the given region in the AS as allocated
|
||||
*/
|
||||
void AllocateFixed(VaType virt, VaType size);
|
||||
|
||||
/**
|
||||
* @brief Frees an AS region so it can be used again
|
||||
*/
|
||||
void Free(VaType virt, VaType size);
|
||||
|
||||
VaType GetVAStart() const {
|
||||
return virt_start;
|
||||
}
|
||||
|
||||
private:
|
||||
/// The base VA of the allocator, no allocations will be below this
|
||||
VaType virt_start;
|
||||
|
||||
/**
|
||||
* The end address for the initial linear allocation pass
|
||||
* Once this reaches the AS limit the slower allocation path will be used
|
||||
*/
|
||||
VaType current_linear_alloc_end;
|
||||
};
|
||||
} // namespace Common
|
@ -0,0 +1,366 @@
|
||||
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/address_space.h"
|
||||
#include "common/assert.h"
|
||||
|
||||
#define MAP_MEMBER(returnType) \
|
||||
template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \
|
||||
bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \
|
||||
requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap< \
|
||||
VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
|
||||
#define MAP_MEMBER_CONST() \
|
||||
template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \
|
||||
bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \
|
||||
requires AddressSpaceValid<VaType, AddressSpaceBits> FlatAddressSpaceMap< \
|
||||
VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
|
||||
|
||||
#define MM_MEMBER(returnType) \
|
||||
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
|
||||
requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \
|
||||
FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
|
||||
|
||||
#define ALLOC_MEMBER(returnType) \
|
||||
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
|
||||
requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \
|
||||
FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
|
||||
#define ALLOC_MEMBER_CONST() \
|
||||
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
|
||||
requires AddressSpaceValid<VaType, AddressSpaceBits> \
|
||||
FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
|
||||
|
||||
namespace Common {
|
||||
MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType va_limit_,
|
||||
std::function<void(VaType, VaType)> unmap_callback_)
|
||||
: va_limit{va_limit_}, unmap_callback{std::move(unmap_callback_)} {
|
||||
if (va_limit > VaMaximum) {
|
||||
ASSERT_MSG(false, "Invalid VA limit!");
|
||||
}
|
||||
}
|
||||
|
||||
MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info) {
|
||||
VaType virt_end{virt + size};
|
||||
|
||||
if (virt_end > va_limit) {
|
||||
ASSERT_MSG(false,
|
||||
"Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}",
|
||||
virt_end, va_limit);
|
||||
}
|
||||
|
||||
auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
|
||||
if (block_end_successor == blocks.begin()) {
|
||||
ASSERT_MSG(false, "Trying to map a block before the VA start: virt_end: 0x{:X}", virt_end);
|
||||
}
|
||||
|
||||
auto block_end_predecessor{std::prev(block_end_successor)};
|
||||
|
||||
if (block_end_successor != blocks.end()) {
|
||||
// We have blocks in front of us, if one is directly in front then we don't have to add a
|
||||
// tail
|
||||
if (block_end_successor->virt != virt_end) {
|
||||
PaType tailPhys{[&]() -> PaType {
|
||||
if constexpr (!PaContigSplit) {
|
||||
// Always propagate unmapped regions rather than calculating offset
|
||||
return block_end_predecessor->phys;
|
||||
} else {
|
||||
if (block_end_predecessor->Unmapped()) {
|
||||
// Always propagate unmapped regions rather than calculating offset
|
||||
return block_end_predecessor->phys;
|
||||
} else {
|
||||
return block_end_predecessor->phys + virt_end - block_end_predecessor->virt;
|
||||
}
|
||||
}
|
||||
}()};
|
||||
|
||||
if (block_end_predecessor->virt >= virt) {
|
||||
// If this block's start would be overlapped by the map then reuse it as a tail
|
||||
// block
|
||||
block_end_predecessor->virt = virt_end;
|
||||
block_end_predecessor->phys = tailPhys;
|
||||
block_end_predecessor->extra_info = block_end_predecessor->extra_info;
|
||||
|
||||
// No longer predecessor anymore
|
||||
block_end_successor = block_end_predecessor--;
|
||||
} else {
|
||||
// Else insert a new one and we're done
|
||||
blocks.insert(block_end_successor,
|
||||
{Block(virt, phys, extra_info),
|
||||
Block(virt_end, tailPhys, block_end_predecessor->extra_info)});
|
||||
if (unmap_callback) {
|
||||
unmap_callback(virt, size);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// block_end_predecessor will always be unmapped as blocks has to be terminated by an
|
||||
// unmapped chunk
|
||||
if (block_end_predecessor != blocks.begin() && block_end_predecessor->virt >= virt) {
|
||||
// Move the unmapped block start backwards
|
||||
block_end_predecessor->virt = virt_end;
|
||||
|
||||
// No longer predecessor anymore
|
||||
block_end_successor = block_end_predecessor--;
|
||||
} else {
|
||||
// Else insert a new one and we're done
|
||||
blocks.insert(block_end_successor,
|
||||
{Block(virt, phys, extra_info), Block(virt_end, UnmappedPa, {})});
|
||||
if (unmap_callback) {
|
||||
unmap_callback(virt, size);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto block_start_successor{block_end_successor};
|
||||
|
||||
// Walk the block vector to find the start successor as this is more efficient than another
|
||||
// binary search in most scenarios
|
||||
while (std::prev(block_start_successor)->virt >= virt) {
|
||||
block_start_successor--;
|
||||
}
|
||||
|
||||
// Check that the start successor is either the end block or something in between
|
||||
if (block_start_successor->virt > virt_end) {
|
||||
ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
|
||||
} else if (block_start_successor->virt == virt_end) {
|
||||
// We need to create a new block as there are none spare that we would overwrite
|
||||
blocks.insert(block_start_successor, Block(virt, phys, extra_info));
|
||||
} else {
|
||||
// Erase overwritten blocks
|
||||
if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) {
|
||||
blocks.erase(eraseStart, block_end_successor);
|
||||
}
|
||||
|
||||
// Reuse a block that would otherwise be overwritten as a start block
|
||||
block_start_successor->virt = virt;
|
||||
block_start_successor->phys = phys;
|
||||
block_start_successor->extra_info = extra_info;
|
||||
}
|
||||
|
||||
if (unmap_callback) {
|
||||
unmap_callback(virt, size);
|
||||
}
|
||||
}
|
||||
|
||||
MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
|
||||
VaType virt_end{virt + size};
|
||||
|
||||
if (virt_end > va_limit) {
|
||||
ASSERT_MSG(false,
|
||||
"Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}",
|
||||
virt_end, va_limit);
|
||||
}
|
||||
|
||||
auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
|
||||
if (block_end_successor == blocks.begin()) {
|
||||
ASSERT_MSG(false, "Trying to unmap a block before the VA start: virt_end: 0x{:X}",
|
||||
virt_end);
|
||||
}
|
||||
|
||||
auto block_end_predecessor{std::prev(block_end_successor)};
|
||||
|
||||
auto walk_back_to_predecessor{[&](auto iter) {
|
||||
while (iter->virt >= virt) {
|
||||
iter--;
|
||||
}
|
||||
|
||||
return iter;
|
||||
}};
|
||||
|
||||
auto erase_blocks_with_end_unmapped{[&](auto unmappedEnd) {
|
||||
auto block_start_predecessor{walk_back_to_predecessor(unmappedEnd)};
|
||||
auto block_start_successor{std::next(block_start_predecessor)};
|
||||
|
||||
auto eraseEnd{[&]() {
|
||||
if (block_start_predecessor->Unmapped()) {
|
||||
// If the start predecessor is unmapped then we can erase everything in our region
|
||||
// and be done
|
||||
return std::next(unmappedEnd);
|
||||
} else {
|
||||
// Else reuse the end predecessor as the start of our unmapped region then erase all
|
||||
// up to it
|
||||
unmappedEnd->virt = virt;
|
||||
return unmappedEnd;
|
||||
}
|
||||
}()};
|
||||
|
||||
// We can't have two unmapped regions after each other
|
||||
if (eraseEnd != blocks.end() &&
|
||||
(eraseEnd == block_start_successor ||
|
||||
(block_start_predecessor->Unmapped() && eraseEnd->Unmapped()))) {
|
||||
ASSERT_MSG(false, "Multiple contiguous unmapped regions are unsupported!");
|
||||
}
|
||||
|
||||
blocks.erase(block_start_successor, eraseEnd);
|
||||
}};
|
||||
|
||||
// We can avoid any splitting logic if these are the case
|
||||
if (block_end_predecessor->Unmapped()) {
|
||||
if (block_end_predecessor->virt > virt) {
|
||||
erase_blocks_with_end_unmapped(block_end_predecessor);
|
||||
}
|
||||
|
||||
if (unmap_callback) {
|
||||
unmap_callback(virt, size);
|
||||
}
|
||||
|
||||
return; // The region is unmapped, bail out early
|
||||
} else if (block_end_successor->virt == virt_end && block_end_successor->Unmapped()) {
|
||||
erase_blocks_with_end_unmapped(block_end_successor);
|
||||
|
||||
if (unmap_callback) {
|
||||
unmap_callback(virt, size);
|
||||
}
|
||||
|
||||
return; // The region is unmapped here and doesn't need splitting, bail out early
|
||||
} else if (block_end_successor == blocks.end()) {
|
||||
// This should never happen as the end should always follow an unmapped block
|
||||
ASSERT_MSG(false, "Unexpected Memory Manager state!");
|
||||
} else if (block_end_successor->virt != virt_end) {
|
||||
// If one block is directly in front then we don't have to add a tail
|
||||
|
||||
// The previous block is mapped so we will need to add a tail with an offset
|
||||
PaType tailPhys{[&]() {
|
||||
if constexpr (PaContigSplit) {
|
||||
return block_end_predecessor->phys + virt_end - block_end_predecessor->virt;
|
||||
} else {
|
||||
return block_end_predecessor->phys;
|
||||
}
|
||||
}()};
|
||||
|
||||
if (block_end_predecessor->virt >= virt) {
|
||||
// If this block's start would be overlapped by the unmap then reuse it as a tail block
|
||||
block_end_predecessor->virt = virt_end;
|
||||
block_end_predecessor->phys = tailPhys;
|
||||
|
||||
// No longer predecessor anymore
|
||||
block_end_successor = block_end_predecessor--;
|
||||
} else {
|
||||
blocks.insert(block_end_successor,
|
||||
{Block(virt, UnmappedPa, {}),
|
||||
Block(virt_end, tailPhys, block_end_predecessor->extra_info)});
|
||||
if (unmap_callback) {
|
||||
unmap_callback(virt, size);
|
||||
}
|
||||
|
||||
// The previous block is mapped and ends before
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Walk the block vector to find the start predecessor as this is more efficient than another
|
||||
// binary search in most scenarios
|
||||
auto block_start_predecessor{walk_back_to_predecessor(block_end_successor)};
|
||||
auto block_start_successor{std::next(block_start_predecessor)};
|
||||
|
||||
if (block_start_successor->virt > virt_end) {
|
||||
ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
|
||||
} else if (block_start_successor->virt == virt_end) {
|
||||
// There are no blocks between the start and the end that would let us skip inserting a new
|
||||
// one for head
|
||||
|
||||
// The previous block is may be unmapped, if so we don't need to insert any unmaps after it
|
||||
if (block_start_predecessor->Mapped()) {
|
||||
blocks.insert(block_start_successor, Block(virt, UnmappedPa, {}));
|
||||
}
|
||||
} else if (block_start_predecessor->Unmapped()) {
|
||||
// If the previous block is unmapped
|
||||
blocks.erase(block_start_successor, block_end_predecessor);
|
||||
} else {
|
||||
// Erase overwritten blocks, skipping the first one as we have written the unmapped start
|
||||
// block there
|
||||
if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) {
|
||||
blocks.erase(eraseStart, block_end_successor);
|
||||
}
|
||||
|
||||
// Add in the unmapped block header
|
||||
block_start_successor->virt = virt;
|
||||
block_start_successor->phys = UnmappedPa;
|
||||
}
|
||||
|
||||
if (unmap_callback)
|
||||
unmap_callback(virt, size);
|
||||
}
|
||||
|
||||
ALLOC_MEMBER_CONST()::FlatAllocator(VaType virt_start_, VaType va_limit_)
|
||||
: Base{va_limit_}, virt_start{virt_start_}, current_linear_alloc_end{virt_start_} {}
|
||||
|
||||
ALLOC_MEMBER(VaType)::Allocate(VaType size) {
|
||||
std::scoped_lock lock(this->block_mutex);
|
||||
|
||||
VaType alloc_start{UnmappedVa};
|
||||
VaType alloc_end{current_linear_alloc_end + size};
|
||||
|
||||
// Avoid searching backwards in the address space if possible
|
||||
if (alloc_end >= current_linear_alloc_end && alloc_end <= this->va_limit) {
|
||||
auto alloc_end_successor{
|
||||
std::lower_bound(this->blocks.begin(), this->blocks.end(), alloc_end)};
|
||||
if (alloc_end_successor == this->blocks.begin()) {
|
||||
ASSERT_MSG(false, "First block in AS map is invalid!");
|
||||
}
|
||||
|
||||
auto alloc_end_predecessor{std::prev(alloc_end_successor)};
|
||||
if (alloc_end_predecessor->virt <= current_linear_alloc_end) {
|
||||
alloc_start = current_linear_alloc_end;
|
||||
} else {
|
||||
// Skip over fixed any mappings in front of us
|
||||
while (alloc_end_successor != this->blocks.end()) {
|
||||
if (alloc_end_successor->virt - alloc_end_predecessor->virt < size ||
|
||||
alloc_end_predecessor->Mapped()) {
|
||||
alloc_start = alloc_end_predecessor->virt;
|
||||
break;
|
||||
}
|
||||
|
||||
alloc_end_predecessor = alloc_end_successor++;
|
||||
|
||||
// Use the VA limit to calculate if we can fit in the final block since it has no
|
||||
// successor
|
||||
if (alloc_end_successor == this->blocks.end()) {
|
||||
alloc_end = alloc_end_predecessor->virt + size;
|
||||
|
||||
if (alloc_end >= alloc_end_predecessor->virt && alloc_end <= this->va_limit) {
|
||||
alloc_start = alloc_end_predecessor->virt;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (alloc_start != UnmappedVa) {
|
||||
current_linear_alloc_end = alloc_start + size;
|
||||
} else { // If linear allocation overflows the AS then find a gap
|
||||
if (this->blocks.size() <= 2) {
|
||||
ASSERT_MSG(false, "Unexpected allocator state!");
|
||||
}
|
||||
|
||||
auto search_predecessor{this->blocks.begin()};
|
||||
auto search_successor{std::next(search_predecessor)};
|
||||
|
||||
while (search_successor != this->blocks.end() &&
|
||||
(search_successor->virt - search_predecessor->virt < size ||
|
||||
search_predecessor->Mapped())) {
|
||||
search_predecessor = search_successor++;
|
||||
}
|
||||
|
||||
if (search_successor != this->blocks.end()) {
|
||||
alloc_start = search_predecessor->virt;
|
||||
} else {
|
||||
return {}; // AS is full
|
||||
}
|
||||
}
|
||||
|
||||
this->MapLocked(alloc_start, true, size, {});
|
||||
return alloc_start;
|
||||
}
|
||||
|
||||
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
|
||||
this->Map(virt, true, size);
|
||||
}
|
||||
|
||||
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
|
||||
this->Unmap(virt, size);
|
||||
}
|
||||
} // namespace Common
|
@ -0,0 +1,9 @@
|
||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/multi_level_page_table.inc"
|
||||
|
||||
namespace Common {
|
||||
template class Common::MultiLevelPageTable<u64>;
|
||||
template class Common::MultiLevelPageTable<u32>;
|
||||
} // namespace Common
|
@ -0,0 +1,78 @@
|
||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
template <typename BaseAddr>
|
||||
class MultiLevelPageTable final {
|
||||
public:
|
||||
constexpr MultiLevelPageTable() = default;
|
||||
explicit MultiLevelPageTable(std::size_t address_space_bits, std::size_t first_level_bits,
|
||||
std::size_t page_bits);
|
||||
|
||||
~MultiLevelPageTable() noexcept;
|
||||
|
||||
MultiLevelPageTable(const MultiLevelPageTable&) = delete;
|
||||
MultiLevelPageTable& operator=(const MultiLevelPageTable&) = delete;
|
||||
|
||||
MultiLevelPageTable(MultiLevelPageTable&& other) noexcept
|
||||
: address_space_bits{std::exchange(other.address_space_bits, 0)},
|
||||
first_level_bits{std::exchange(other.first_level_bits, 0)}, page_bits{std::exchange(
|
||||
other.page_bits, 0)},
|
||||
first_level_shift{std::exchange(other.first_level_shift, 0)},
|
||||
first_level_chunk_size{std::exchange(other.first_level_chunk_size, 0)},
|
||||
first_level_map{std::move(other.first_level_map)}, base_ptr{std::exchange(other.base_ptr,
|
||||
nullptr)} {}
|
||||
|
||||
MultiLevelPageTable& operator=(MultiLevelPageTable&& other) noexcept {
|
||||
address_space_bits = std::exchange(other.address_space_bits, 0);
|
||||
first_level_bits = std::exchange(other.first_level_bits, 0);
|
||||
page_bits = std::exchange(other.page_bits, 0);
|
||||
first_level_shift = std::exchange(other.first_level_shift, 0);
|
||||
first_level_chunk_size = std::exchange(other.first_level_chunk_size, 0);
|
||||
alloc_size = std::exchange(other.alloc_size, 0);
|
||||
first_level_map = std::move(other.first_level_map);
|
||||
base_ptr = std::exchange(other.base_ptr, nullptr);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void ReserveRange(u64 start, std::size_t size);
|
||||
|
||||
[[nodiscard]] const BaseAddr& operator[](std::size_t index) const {
|
||||
return base_ptr[index];
|
||||
}
|
||||
|
||||
[[nodiscard]] BaseAddr& operator[](std::size_t index) {
|
||||
return base_ptr[index];
|
||||
}
|
||||
|
||||
[[nodiscard]] BaseAddr* data() {
|
||||
return base_ptr;
|
||||
}
|
||||
|
||||
[[nodiscard]] const BaseAddr* data() const {
|
||||
return base_ptr;
|
||||
}
|
||||
|
||||
private:
|
||||
void AllocateLevel(u64 level);
|
||||
|
||||
std::size_t address_space_bits{};
|
||||
std::size_t first_level_bits{};
|
||||
std::size_t page_bits{};
|
||||
std::size_t first_level_shift{};
|
||||
std::size_t first_level_chunk_size{};
|
||||
std::size_t alloc_size{};
|
||||
std::vector<void*> first_level_map{};
|
||||
BaseAddr* base_ptr{};
|
||||
};
|
||||
|
||||
} // namespace Common
|
@ -0,0 +1,84 @@
|
||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/multi_level_page_table.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
template <typename BaseAddr>
|
||||
MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bits_,
|
||||
std::size_t first_level_bits_,
|
||||
std::size_t page_bits_)
|
||||
: address_space_bits{address_space_bits_},
|
||||
first_level_bits{first_level_bits_}, page_bits{page_bits_} {
|
||||
if (page_bits == 0) {
|
||||
return;
|
||||
}
|
||||
first_level_shift = address_space_bits - first_level_bits;
|
||||
first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr);
|
||||
alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr);
|
||||
std::size_t first_level_size = 1ULL << first_level_bits;
|
||||
first_level_map.resize(first_level_size, nullptr);
|
||||
#ifdef _WIN32
|
||||
void* base{VirtualAlloc(nullptr, alloc_size, MEM_RESERVE, PAGE_READWRITE)};
|
||||
#else
|
||||
void* base{mmap(nullptr, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)};
|
||||
|
||||
if (base == MAP_FAILED) {
|
||||
base = nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
ASSERT(base);
|
||||
base_ptr = reinterpret_cast<BaseAddr*>(base);
|
||||
}
|
||||
|
||||
template <typename BaseAddr>
|
||||
MultiLevelPageTable<BaseAddr>::~MultiLevelPageTable() noexcept {
|
||||
if (!base_ptr) {
|
||||
return;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
ASSERT(VirtualFree(base_ptr, 0, MEM_RELEASE));
|
||||
#else
|
||||
ASSERT(munmap(base_ptr, alloc_size) == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename BaseAddr>
|
||||
void MultiLevelPageTable<BaseAddr>::ReserveRange(u64 start, std::size_t size) {
|
||||
const u64 new_start = start >> first_level_shift;
|
||||
const u64 new_end = (start + size) >> first_level_shift;
|
||||
for (u64 i = new_start; i <= new_end; i++) {
|
||||
if (!first_level_map[i]) {
|
||||
AllocateLevel(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename BaseAddr>
|
||||
void MultiLevelPageTable<BaseAddr>::AllocateLevel(u64 level) {
|
||||
void* ptr = reinterpret_cast<char *>(base_ptr) + level * first_level_chunk_size;
|
||||
#ifdef _WIN32
|
||||
void* base{VirtualAlloc(ptr, first_level_chunk_size, MEM_COMMIT, PAGE_READWRITE)};
|
||||
#else
|
||||
void* base{mmap(ptr, first_level_chunk_size, PROT_READ | PROT_WRITE,
|
||||
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)};
|
||||
|
||||
if (base == MAP_FAILED) {
|
||||
base = nullptr;
|
||||
}
|
||||
#endif
|
||||
ASSERT(base);
|
||||
|
||||
first_level_map[level] = base;
|
||||
}
|
||||
|
||||
} // namespace Common
|
@ -1,32 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/hardware_interrupt_manager.h"
|
||||
#include "core/hle/service/nvdrv/nvdrv_interface.h"
|
||||
#include "core/hle/service/sm/sm.h"
|
||||
|
||||
namespace Core::Hardware {
|
||||
|
||||
InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) {
|
||||
gpu_interrupt_event = Core::Timing::CreateEvent(
|
||||
"GPUInterrupt",
|
||||
[this](std::uintptr_t message, u64 time,
|
||||
std::chrono::nanoseconds) -> std::optional<std::chrono::nanoseconds> {
|
||||
auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv");
|
||||
const u32 syncpt = static_cast<u32>(message >> 32);
|
||||
const u32 value = static_cast<u32>(message);
|
||||
nvdrv->SignalGPUInterruptSyncpt(syncpt, value);
|
||||
return std::nullopt;
|
||||
});
|
||||
}
|
||||
|
||||
InterruptManager::~InterruptManager() = default;
|
||||
|
||||
void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
|
||||
const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value;
|
||||
system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{10}, gpu_interrupt_event, msg);
|
||||
}
|
||||
|
||||
} // namespace Core::Hardware
|
@ -1,32 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Core::Timing {
|
||||
struct EventType;
|
||||
}
|
||||
|
||||
namespace Core::Hardware {
|
||||
|
||||
class InterruptManager {
|
||||
public:
|
||||
explicit InterruptManager(Core::System& system);
|
||||
~InterruptManager();
|
||||
|
||||
void GPUInterruptSyncpt(u32 syncpoint_id, u32 value);
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
std::shared_ptr<Core::Timing::EventType> gpu_interrupt_event;
|
||||
};
|
||||
|
||||
} // namespace Core::Hardware
|
@ -0,0 +1,50 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "core/hle/service/nvdrv/core/container.h"
|
||||
#include "core/hle/service/nvdrv/core/nvmap.h"
|
||||
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
||||
struct ContainerImpl {
|
||||
explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_)
|
||||
: file{host1x_}, manager{host1x_}, device_file_data{} {}
|
||||
NvMap file;
|
||||
SyncpointManager manager;
|
||||
Container::Host1xDeviceFileData device_file_data;
|
||||
};
|
||||
|
||||
Container::Container(Tegra::Host1x::Host1x& host1x_) {
|
||||
impl = std::make_unique<ContainerImpl>(host1x_);
|
||||
}
|
||||
|
||||
Container::~Container() = default;
|
||||
|
||||
NvMap& Container::GetNvMapFile() {
|
||||
return impl->file;
|
||||
}
|
||||
|
||||
const NvMap& Container::GetNvMapFile() const {
|
||||
return impl->file;
|
||||
}
|
||||
|
||||
Container::Host1xDeviceFileData& Container::Host1xDeviceFile() {
|
||||
return impl->device_file_data;
|
||||
}
|
||||
|
||||
const Container::Host1xDeviceFileData& Container::Host1xDeviceFile() const {
|
||||
return impl->device_file_data;
|
||||
}
|
||||
|
||||
SyncpointManager& Container::GetSyncpointManager() {
|
||||
return impl->manager;
|
||||
}
|
||||
|
||||
const SyncpointManager& Container::GetSyncpointManager() const {
|
||||
return impl->manager;
|
||||
}
|
||||
|
||||
} // namespace Service::Nvidia::NvCore
|
@ -0,0 +1,52 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "core/hle/service/nvdrv/nvdata.h"
|
||||
|
||||
namespace Tegra::Host1x {
|
||||
class Host1x;
|
||||
} // namespace Tegra::Host1x
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
||||
class NvMap;
|
||||
class SyncpointManager;
|
||||
|
||||
struct ContainerImpl;
|
||||
|
||||
class Container {
|
||||
public:
|
||||
explicit Container(Tegra::Host1x::Host1x& host1x);
|
||||
~Container();
|
||||
|
||||
NvMap& GetNvMapFile();
|
||||
|
||||
const NvMap& GetNvMapFile() const;
|
||||
|
||||
SyncpointManager& GetSyncpointManager();
|
||||
|
||||
const SyncpointManager& GetSyncpointManager() const;
|
||||
|
||||
struct Host1xDeviceFileData {
|
||||
std::unordered_map<DeviceFD, u32> fd_to_id{};
|
||||
std::deque<u32> syncpts_accumulated{};
|
||||
u32 nvdec_next_id{};
|
||||
u32 vic_next_id{};
|
||||
};
|
||||
|
||||
Host1xDeviceFileData& Host1xDeviceFile();
|
||||
|
||||
const Host1xDeviceFileData& Host1xDeviceFile() const;
|
||||
|
||||
private:
|
||||
std::unique_ptr<ContainerImpl> impl;
|
||||
};
|
||||
|
||||
} // namespace Service::Nvidia::NvCore
|
@ -0,0 +1,272 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/hle/service/nvdrv/core/nvmap.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
|
||||
using Core::Memory::YUZU_PAGESIZE;
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
NvMap::Handle::Handle(u64 size_, Id id_)
|
||||
: size(size_), aligned_size(size), orig_size(size), id(id_) {
|
||||
flags.raw = 0;
|
||||
}
|
||||
|
||||
NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) {
|
||||
std::scoped_lock lock(mutex);
|
||||
|
||||
// Handles cannot be allocated twice
|
||||
if (allocated) {
|
||||
return NvResult::AccessDenied;
|
||||
}
|
||||
|
||||
flags = pFlags;
|
||||
kind = pKind;
|
||||
align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign;
|
||||
|
||||
// This flag is only applicable for handles with an address passed
|
||||
if (pAddress) {
|
||||
flags.keep_uncached_after_free.Assign(0);
|
||||
} else {
|
||||
LOG_CRITICAL(Service_NVDRV,
|
||||
"Mapping nvmap handles without a CPU side address is unimplemented!");
|
||||
}
|
||||
|
||||
size = Common::AlignUp(size, YUZU_PAGESIZE);
|
||||
aligned_size = Common::AlignUp(size, align);
|
||||
address = pAddress;
|
||||
allocated = true;
|
||||
|
||||
return NvResult::Success;
|
||||
}
|
||||
|
||||
NvResult NvMap::Handle::Duplicate(bool internal_session) {
|
||||
std::scoped_lock lock(mutex);
|
||||
// Unallocated handles cannot be duplicated as duplication requires memory accounting (in HOS)
|
||||
if (!allocated) [[unlikely]] {
|
||||
return NvResult::BadValue;
|
||||
}
|
||||
|
||||
// If we internally use FromId the duplication tracking of handles won't work accurately due to
|
||||
// us not implementing per-process handle refs.
|
||||
if (internal_session) {
|
||||
internal_dupes++;
|
||||
} else {
|
||||
dupes++;
|
||||
}
|
||||
|
||||
return NvResult::Success;
|
||||
}
|
||||
|
||||
NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {}
|
||||
|
||||
void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) {
|
||||
std::scoped_lock lock(handles_lock);
|
||||
|
||||
handles.emplace(handle_description->id, std::move(handle_description));
|
||||
}
|
||||
|
||||
void NvMap::UnmapHandle(Handle& handle_description) {
|
||||
// Remove pending unmap queue entry if needed
|
||||
if (handle_description.unmap_queue_entry) {
|
||||
unmap_queue.erase(*handle_description.unmap_queue_entry);
|
||||
handle_description.unmap_queue_entry.reset();
|
||||
}
|
||||
|
||||
// Free and unmap the handle from the SMMU
|
||||
host1x.MemoryManager().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address),
|
||||
handle_description.aligned_size);
|
||||
host1x.Allocator().Free(handle_description.pin_virt_address,
|
||||
static_cast<u32>(handle_description.aligned_size));
|
||||
handle_description.pin_virt_address = 0;
|
||||
}
|
||||
|
||||
bool NvMap::TryRemoveHandle(const Handle& handle_description) {
|
||||
// No dupes left, we can remove from handle map
|
||||
if (handle_description.dupes == 0 && handle_description.internal_dupes == 0) {
|
||||
std::scoped_lock lock(handles_lock);
|
||||
|
||||
auto it{handles.find(handle_description.id)};
|
||||
if (it != handles.end()) {
|
||||
handles.erase(it);
|
||||
}
|
||||
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
NvResult NvMap::CreateHandle(u64 size, std::shared_ptr<NvMap::Handle>& result_out) {
|
||||
if (!size) [[unlikely]] {
|
||||
return NvResult::BadValue;
|
||||
}
|
||||
|
||||
u32 id{next_handle_id.fetch_add(HandleIdIncrement, std::memory_order_relaxed)};
|
||||
auto handle_description{std::make_shared<Handle>(size, id)};
|
||||
AddHandle(handle_description);
|
||||
|
||||
result_out = handle_description;
|
||||
return NvResult::Success;
|
||||
}
|
||||
|
||||
std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) {
|
||||
std::scoped_lock lock(handles_lock);
|
||||
try {
|
||||
return handles.at(handle);
|
||||
} catch (std::out_of_range&) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
VAddr NvMap::GetHandleAddress(Handle::Id handle) {
|
||||
std::scoped_lock lock(handles_lock);
|
||||
try {
|
||||
return handles.at(handle)->address;
|
||||
} catch (std::out_of_range&) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 NvMap::PinHandle(NvMap::Handle::Id handle) {
|
||||
auto handle_description{GetHandle(handle)};
|
||||
if (!handle_description) [[unlikely]] {
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::scoped_lock lock(handle_description->mutex);
|
||||
if (!handle_description->pins) {
|
||||
// If we're in the unmap queue we can just remove ourselves and return since we're already
|
||||
// mapped
|
||||
{
|
||||
// Lock now to prevent our queue entry from being removed for allocation in-between the
|
||||
// following check and erase
|
||||
std::scoped_lock queueLock(unmap_queue_lock);
|
||||
if (handle_description->unmap_queue_entry) {
|
||||
unmap_queue.erase(*handle_description->unmap_queue_entry);
|
||||
handle_description->unmap_queue_entry.reset();
|
||||
|
||||
handle_description->pins++;
|
||||
return handle_description->pin_virt_address;
|
||||
}
|
||||
}
|
||||
|
||||
// If not then allocate some space and map it
|
||||
u32 address{};
|
||||
auto& smmu_allocator = host1x.Allocator();
|
||||
auto& smmu_memory_manager = host1x.MemoryManager();
|
||||
while (!(address =
|
||||
smmu_allocator.Allocate(static_cast<u32>(handle_description->aligned_size)))) {
|
||||
// Free handles until the allocation succeeds
|
||||
std::scoped_lock queueLock(unmap_queue_lock);
|
||||
if (auto freeHandleDesc{unmap_queue.front()}) {
|
||||
// Handles in the unmap queue are guaranteed not to be pinned so don't bother
|
||||
// checking if they are before unmapping
|
||||
std::scoped_lock freeLock(freeHandleDesc->mutex);
|
||||
if (handle_description->pin_virt_address)
|
||||
UnmapHandle(*freeHandleDesc);
|
||||
} else {
|
||||
LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
|
||||
}
|
||||
}
|
||||
|
||||
smmu_memory_manager.Map(static_cast<GPUVAddr>(address), handle_description->address,
|
||||
handle_description->aligned_size);
|
||||
handle_description->pin_virt_address = address;
|
||||
}
|
||||
|
||||
handle_description->pins++;
|
||||
return handle_description->pin_virt_address;
|
||||
}
|
||||
|
||||
void NvMap::UnpinHandle(Handle::Id handle) {
|
||||
auto handle_description{GetHandle(handle)};
|
||||
if (!handle_description) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::scoped_lock lock(handle_description->mutex);
|
||||
if (--handle_description->pins < 0) {
|
||||
LOG_WARNING(Service_NVDRV, "Pin count imbalance detected!");
|
||||
} else if (!handle_description->pins) {
|
||||
std::scoped_lock queueLock(unmap_queue_lock);
|
||||
|
||||
// Add to the unmap queue allowing this handle's memory to be freed if needed
|
||||
unmap_queue.push_back(handle_description);
|
||||
handle_description->unmap_queue_entry = std::prev(unmap_queue.end());
|
||||
}
|
||||
}
|
||||
|
||||
void NvMap::DuplicateHandle(Handle::Id handle, bool internal_session) {
|
||||
auto handle_description{GetHandle(handle)};
|
||||
if (!handle_description) {
|
||||
LOG_CRITICAL(Service_NVDRV, "Unregistered handle!");
|
||||
return;
|
||||
}
|
||||
|
||||
auto result = handle_description->Duplicate(internal_session);
|
||||
if (result != NvResult::Success) {
|
||||
LOG_CRITICAL(Service_NVDRV, "Could not duplicate handle!");
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool internal_session) {
|
||||
std::weak_ptr<Handle> hWeak{GetHandle(handle)};
|
||||
FreeInfo freeInfo;
|
||||
|
||||
// We use a weak ptr here so we can tell when the handle has been freed and report that back to
|
||||
// guest
|
||||
if (auto handle_description = hWeak.lock()) {
|
||||
std::scoped_lock lock(handle_description->mutex);
|
||||
|
||||
if (internal_session) {
|
||||
if (--handle_description->internal_dupes < 0)
|
||||
LOG_WARNING(Service_NVDRV, "Internal duplicate count imbalance detected!");
|
||||
} else {
|
||||
if (--handle_description->dupes < 0) {
|
||||
LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!");
|
||||
} else if (handle_description->dupes == 0) {
|
||||
// Force unmap the handle
|
||||
if (handle_description->pin_virt_address) {
|
||||
std::scoped_lock queueLock(unmap_queue_lock);
|
||||
UnmapHandle(*handle_description);
|
||||
}
|
||||
|
||||
handle_description->pins = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to remove the shared ptr to the handle from the map, if nothing else is using the
|
||||
// handle then it will now be freed when `handle_description` goes out of scope
|
||||
if (TryRemoveHandle(*handle_description)) {
|
||||
LOG_DEBUG(Service_NVDRV, "Removed nvmap handle: {}", handle);
|
||||
} else {
|
||||
LOG_DEBUG(Service_NVDRV,
|
||||
"Tried to free nvmap handle: {} but didn't as it still has duplicates",
|
||||
handle);
|
||||
}
|
||||
|
||||
freeInfo = {
|
||||
.address = handle_description->address,
|
||||
.size = handle_description->size,
|
||||
.was_uncached = handle_description->flags.map_uncached.Value() != 0,
|
||||
};
|
||||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Handle hasn't been freed from memory, set address to 0 to mark that the handle wasn't freed
|
||||
if (!hWeak.expired()) {
|
||||
LOG_DEBUG(Service_NVDRV, "nvmap handle: {} wasn't freed as it is still in use", handle);
|
||||
freeInfo.address = 0;
|
||||
}
|
||||
|
||||
return freeInfo;
|
||||
}
|
||||
|
||||
} // namespace Service::Nvidia::NvCore
|
@ -0,0 +1,175 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <assert.h>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/service/nvdrv/nvdata.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Host1x {
|
||||
class Host1x;
|
||||
} // namespace Host1x
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
/**
|
||||
* @brief The nvmap core class holds the global state for nvmap and provides methods to manage
|
||||
* handles
|
||||
*/
|
||||
class NvMap {
|
||||
public:
|
||||
/**
|
||||
* @brief A handle to a contiguous block of memory in an application's address space
|
||||
*/
|
||||
struct Handle {
|
||||
std::mutex mutex;
|
||||
|
||||
u64 align{}; //!< The alignment to use when pinning the handle onto the SMMU
|
||||
u64 size; //!< Page-aligned size of the memory the handle refers to
|
||||
u64 aligned_size; //!< `align`-aligned size of the memory the handle refers to
|
||||
u64 orig_size; //!< Original unaligned size of the memory this handle refers to
|
||||
|
||||
s32 dupes{1}; //!< How many guest references there are to this handle
|
||||
s32 internal_dupes{0}; //!< How many emulator-internal references there are to this handle
|
||||
|
||||
using Id = u32;
|
||||
Id id; //!< A globally unique identifier for this handle
|
||||
|
||||
s32 pins{};
|
||||
u32 pin_virt_address{};
|
||||
std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{};
|
||||
|
||||
union Flags {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> map_uncached; //!< If the handle should be mapped as uncached
|
||||
BitField<2, 1, u32> keep_uncached_after_free; //!< Only applicable when the handle was
|
||||
//!< allocated with a fixed address
|
||||
BitField<4, 1, u32> _unk0_; //!< Passed to IOVMM for pins
|
||||
} flags{};
|
||||
static_assert(sizeof(Flags) == sizeof(u32));
|
||||
|
||||
u64 address{}; //!< The memory location in the guest's AS that this handle corresponds to,
|
||||
//!< this can also be in the nvdrv tmem
|
||||
bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC
|
||||
//!< call
|
||||
|
||||
u8 kind{}; //!< Used for memory compression
|
||||
bool allocated{}; //!< If the handle has been allocated with `Alloc`
|
||||
|
||||
u64 dma_map_addr{}; //! remove me after implementing pinning.
|
||||
|
||||
Handle(u64 size, Id id);
|
||||
|
||||
/**
|
||||
* @brief Sets up the handle with the given memory config, can allocate memory from the tmem
|
||||
* if a 0 address is passed
|
||||
*/
|
||||
[[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress);
|
||||
|
||||
/**
|
||||
* @brief Increases the dupe counter of the handle for the given session
|
||||
*/
|
||||
[[nodiscard]] NvResult Duplicate(bool internal_session);
|
||||
|
||||
/**
|
||||
* @brief Obtains a pointer to the handle's memory and marks the handle it as having been
|
||||
* mapped
|
||||
*/
|
||||
u8* GetPointer() {
|
||||
if (!address) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
is_shared_mem_mapped = true;
|
||||
return reinterpret_cast<u8*>(address);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encapsulates the result of a FreeHandle operation
|
||||
*/
|
||||
struct FreeInfo {
|
||||
u64 address; //!< Address the handle referred to before deletion
|
||||
u64 size; //!< Page-aligned handle size
|
||||
bool was_uncached; //!< If the handle was allocated as uncached
|
||||
};
|
||||
|
||||
explicit NvMap(Tegra::Host1x::Host1x& host1x);
|
||||
|
||||
/**
|
||||
* @brief Creates an unallocated handle of the given size
|
||||
*/
|
||||
[[nodiscard]] NvResult CreateHandle(u64 size, std::shared_ptr<NvMap::Handle>& result_out);
|
||||
|
||||
std::shared_ptr<Handle> GetHandle(Handle::Id handle);
|
||||
|
||||
VAddr GetHandleAddress(Handle::Id handle);
|
||||
|
||||
/**
|
||||
* @brief Maps a handle into the SMMU address space
|
||||
* @note This operation is refcounted, the number of calls to this must eventually match the
|
||||
* number of calls to `UnpinHandle`
|
||||
* @return The SMMU virtual address that the handle has been mapped to
|
||||
*/
|
||||
u32 PinHandle(Handle::Id handle);
|
||||
|
||||
/**
|
||||
* @brief When this has been called an equal number of times to `PinHandle` for the supplied
|
||||
* handle it will be added to a list of handles to be freed when necessary
|
||||
*/
|
||||
void UnpinHandle(Handle::Id handle);
|
||||
|
||||
/**
|
||||
* @brief Tries to duplicate a handle
|
||||
*/
|
||||
void DuplicateHandle(Handle::Id handle, bool internal_session = false);
|
||||
|
||||
/**
|
||||
* @brief Tries to free a handle and remove a single dupe
|
||||
* @note If a handle has no dupes left and has no other users a FreeInfo struct will be returned
|
||||
* describing the prior state of the handle
|
||||
*/
|
||||
std::optional<FreeInfo> FreeHandle(Handle::Id handle, bool internal_session);
|
||||
|
||||
private:
|
||||
std::list<std::shared_ptr<Handle>> unmap_queue{};
|
||||
std::mutex unmap_queue_lock{}; //!< Protects access to `unmap_queue`
|
||||
|
||||
std::unordered_map<Handle::Id, std::shared_ptr<Handle>>
|
||||
handles{}; //!< Main owning map of handles
|
||||
std::mutex handles_lock; //!< Protects access to `handles`
|
||||
|
||||
static constexpr u32 HandleIdIncrement{
|
||||
4}; //!< Each new handle ID is an increment of 4 from the previous
|
||||
std::atomic<u32> next_handle_id{HandleIdIncrement};
|
||||
Tegra::Host1x::Host1x& host1x;
|
||||
|
||||
void AddHandle(std::shared_ptr<Handle> handle);
|
||||
|
||||
/**
|
||||
* @brief Unmaps and frees the SMMU memory region a handle is mapped to
|
||||
* @note Both `unmap_queue_lock` and `handle_description.mutex` MUST be locked when calling this
|
||||
*/
|
||||
void UnmapHandle(Handle& handle_description);
|
||||
|
||||
/**
|
||||
* @brief Removes a handle from the map taking its dupes into account
|
||||
* @note handle_description.mutex MUST be locked when calling this
|
||||
* @return If the handle was removed from the map
|
||||
*/
|
||||
bool TryRemoveHandle(const Handle& handle_description);
|
||||
};
|
||||
} // namespace Service::Nvidia::NvCore
|
@ -0,0 +1,121 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
||||
SyncpointManager::SyncpointManager(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {
|
||||
constexpr u32 VBlank0SyncpointId{26};
|
||||
constexpr u32 VBlank1SyncpointId{27};
|
||||
|
||||
// Reserve both vblank syncpoints as client managed as they use Continuous Mode
|
||||
// Refer to section 14.3.5.3 of the TRM for more information on Continuous Mode
|
||||
// https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/drm/dc.c#L660
|
||||
ReserveSyncpoint(VBlank0SyncpointId, true);
|
||||
ReserveSyncpoint(VBlank1SyncpointId, true);
|
||||
|
||||
for (u32 syncpoint_id : channel_syncpoints) {
|
||||
if (syncpoint_id) {
|
||||
ReserveSyncpoint(syncpoint_id, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SyncpointManager::~SyncpointManager() = default;
|
||||
|
||||
u32 SyncpointManager::ReserveSyncpoint(u32 id, bool client_managed) {
|
||||
if (syncpoints.at(id).reserved) {
|
||||
ASSERT_MSG(false, "Requested syncpoint is in use");
|
||||
return 0;
|
||||
}
|
||||
|
||||
syncpoints.at(id).reserved = true;
|
||||
syncpoints.at(id).interface_managed = client_managed;
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
u32 SyncpointManager::FindFreeSyncpoint() {
|
||||
for (u32 i{1}; i < syncpoints.size(); i++) {
|
||||
if (!syncpoints[i].reserved) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
ASSERT_MSG(false, "Failed to find a free syncpoint!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 SyncpointManager::AllocateSyncpoint(bool client_managed) {
|
||||
std::lock_guard lock(reservation_lock);
|
||||
return ReserveSyncpoint(FindFreeSyncpoint(), client_managed);
|
||||
}
|
||||
|
||||
void SyncpointManager::FreeSyncpoint(u32 id) {
|
||||
std::lock_guard lock(reservation_lock);
|
||||
ASSERT(syncpoints.at(id).reserved);
|
||||
syncpoints.at(id).reserved = false;
|
||||
}
|
||||
|
||||
bool SyncpointManager::IsSyncpointAllocated(u32 id) {
|
||||
return (id <= SyncpointCount) && syncpoints[id].reserved;
|
||||
}
|
||||
|
||||
bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) const {
|
||||
const SyncpointInfo& syncpoint{syncpoints.at(id)};
|
||||
|
||||
if (!syncpoint.reserved) {
|
||||
ASSERT(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If the interface manages counters then we don't keep track of the maximum value as it handles
|
||||
// sanity checking the values then
|
||||
if (syncpoint.interface_managed) {
|
||||
return static_cast<s32>(syncpoint.counter_min - threshold) >= 0;
|
||||
} else {
|
||||
return (syncpoint.counter_max - threshold) >= (syncpoint.counter_min - threshold);
|
||||
}
|
||||
}
|
||||
|
||||
u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) {
|
||||
if (!syncpoints.at(id).reserved) {
|
||||
ASSERT(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return syncpoints.at(id).counter_max += amount;
|
||||
}
|
||||
|
||||
u32 SyncpointManager::ReadSyncpointMinValue(u32 id) {
|
||||
if (!syncpoints.at(id).reserved) {
|
||||
ASSERT(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return syncpoints.at(id).counter_min;
|
||||
}
|
||||
|
||||
u32 SyncpointManager::UpdateMin(u32 id) {
|
||||
if (!syncpoints.at(id).reserved) {
|
||||
ASSERT(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
syncpoints.at(id).counter_min = host1x.GetSyncpointManager().GetHostSyncpointValue(id);
|
||||
return syncpoints.at(id).counter_min;
|
||||
}
|
||||
|
||||
NvFence SyncpointManager::GetSyncpointFence(u32 id) {
|
||||
if (!syncpoints.at(id).reserved) {
|
||||
ASSERT(false);
|
||||
return NvFence{};
|
||||
}
|
||||
|
||||
return {.id = static_cast<s32>(id), .value = syncpoints.at(id).counter_max};
|
||||
}
|
||||
|
||||
} // namespace Service::Nvidia::NvCore
|
@ -0,0 +1,134 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/service/nvdrv/nvdata.h"
|
||||
|
||||
namespace Tegra::Host1x {
|
||||
class Host1x;
|
||||
} // namespace Tegra::Host1x
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
|
||||
enum class ChannelType : u32 {
|
||||
MsEnc = 0,
|
||||
VIC = 1,
|
||||
GPU = 2,
|
||||
NvDec = 3,
|
||||
Display = 4,
|
||||
NvJpg = 5,
|
||||
TSec = 6,
|
||||
Max = 7
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief SyncpointManager handles allocating and accessing host1x syncpoints, these are cached
|
||||
* versions of the HW syncpoints which are intermittently synced
|
||||
* @note Refer to Chapter 14 of the Tegra X1 TRM for an exhaustive overview of them
|
||||
* @url https://http.download.nvidia.com/tegra-public-appnotes/host1x.html
|
||||
* @url
|
||||
* https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/jetson-tx1/drivers/video/tegra/host/nvhost_syncpt.c
|
||||
*/
|
||||
class SyncpointManager final {
|
||||
public:
|
||||
explicit SyncpointManager(Tegra::Host1x::Host1x& host1x);
|
||||
~SyncpointManager();
|
||||
|
||||
/**
|
||||
* @brief Checks if the given syncpoint is both allocated and below the number of HW syncpoints
|
||||
*/
|
||||
bool IsSyncpointAllocated(u32 id);
|
||||
|
||||
/**
|
||||
* @brief Finds a free syncpoint and reserves it
|
||||
* @return The ID of the reserved syncpoint
|
||||
*/
|
||||
u32 AllocateSyncpoint(bool client_managed);
|
||||
|
||||
/**
|
||||
* @url
|
||||
* https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/syncpt.c#L259
|
||||
*/
|
||||
bool HasSyncpointExpired(u32 id, u32 threshold) const;
|
||||
|
||||
bool IsFenceSignalled(NvFence fence) const {
|
||||
return HasSyncpointExpired(fence.id, fence.value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Atomically increments the maximum value of a syncpoint by the given amount
|
||||
* @return The new max value of the syncpoint
|
||||
*/
|
||||
u32 IncrementSyncpointMaxExt(u32 id, u32 amount);
|
||||
|
||||
/**
|
||||
* @return The minimum value of the syncpoint
|
||||
*/
|
||||
u32 ReadSyncpointMinValue(u32 id);
|
||||
|
||||
/**
|
||||
* @brief Synchronises the minimum value of the syncpoint to with the GPU
|
||||
* @return The new minimum value of the syncpoint
|
||||
*/
|
||||
u32 UpdateMin(u32 id);
|
||||
|
||||
/**
|
||||
* @brief Frees the usage of a syncpoint.
|
||||
*/
|
||||
void FreeSyncpoint(u32 id);
|
||||
|
||||
/**
|
||||
* @return A fence that will be signalled once this syncpoint hits its maximum value
|
||||
*/
|
||||
NvFence GetSyncpointFence(u32 id);
|
||||
|
||||
static constexpr std::array<u32, static_cast<u32>(ChannelType::Max)> channel_syncpoints{
|
||||
0x0, // `MsEnc` is unimplemented
|
||||
0xC, // `VIC`
|
||||
0x0, // `GPU` syncpoints are allocated per-channel instead
|
||||
0x36, // `NvDec`
|
||||
0x0, // `Display` is unimplemented
|
||||
0x37, // `NvJpg`
|
||||
0x0, // `TSec` is unimplemented
|
||||
}; //!< Maps each channel ID to a constant syncpoint
|
||||
|
||||
private:
|
||||
/**
|
||||
* @note reservation_lock should be locked when calling this
|
||||
*/
|
||||
u32 ReserveSyncpoint(u32 id, bool client_managed);
|
||||
|
||||
/**
|
||||
* @return The ID of the first free syncpoint
|
||||
*/
|
||||
u32 FindFreeSyncpoint();
|
||||
|
||||
struct SyncpointInfo {
|
||||
std::atomic<u32> counter_min; //!< The least value the syncpoint can be (The value it was
|
||||
//!< when it was last synchronized with host1x)
|
||||
std::atomic<u32> counter_max; //!< The maximum value the syncpoint can reach according to
|
||||
//!< the current usage
|
||||
bool interface_managed; //!< If the syncpoint is managed by a host1x client interface, a
|
||||
//!< client interface is a HW block that can handle host1x
|
||||
//!< transactions on behalf of a host1x client (Which would
|
||||
//!< otherwise need to be manually synced using PIO which is
|
||||
//!< synchronous and requires direct cooperation of the CPU)
|
||||
bool reserved; //!< If the syncpoint is reserved or not, not to be confused with a reserved
|
||||
//!< value
|
||||
};
|
||||
|
||||
constexpr static std::size_t SyncpointCount{192};
|
||||
std::array<SyncpointInfo, SyncpointCount> syncpoints{};
|
||||
std::mutex reservation_lock;
|
||||
|
||||
Tegra::Host1x::Host1x& host1x;
|
||||
};
|
||||
|
||||
} // namespace Service::Nvidia::NvCore
|
@ -1,38 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/hle/service/nvdrv/syncpoint_manager.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Service::Nvidia {
|
||||
|
||||
SyncpointManager::SyncpointManager(Tegra::GPU& gpu_) : gpu{gpu_} {}
|
||||
|
||||
SyncpointManager::~SyncpointManager() = default;
|
||||
|
||||
u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
|
||||
syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
|
||||
return GetSyncpointMin(syncpoint_id);
|
||||
}
|
||||
|
||||
u32 SyncpointManager::AllocateSyncpoint() {
|
||||
for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
|
||||
if (!syncpoints[syncpoint_id].is_allocated) {
|
||||
syncpoints[syncpoint_id].is_allocated = true;
|
||||
return syncpoint_id;
|
||||
}
|
||||
}
|
||||
ASSERT_MSG(false, "No more available syncpoints!");
|
||||
return {};
|
||||
}
|
||||
|
||||
u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
|
||||
for (u32 index = 0; index < value; ++index) {
|
||||
syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
return GetSyncpointMax(syncpoint_id);
|
||||
}
|
||||
|
||||
} // namespace Service::Nvidia
|
@ -1,84 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/service/nvdrv/nvdata.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
}
|
||||
|
||||
namespace Service::Nvidia {
|
||||
|
||||
class SyncpointManager final {
|
||||
public:
|
||||
explicit SyncpointManager(Tegra::GPU& gpu_);
|
||||
~SyncpointManager();
|
||||
|
||||
/**
|
||||
* Returns true if the specified syncpoint is expired for the given value.
|
||||
* @param syncpoint_id Syncpoint ID to check.
|
||||
* @param value Value to check against the specified syncpoint.
|
||||
* @returns True if the specified syncpoint is expired for the given value, otherwise False.
|
||||
*/
|
||||
bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
|
||||
return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the lower bound for the specified syncpoint.
|
||||
* @param syncpoint_id Syncpoint ID to get the lower bound for.
|
||||
* @returns The lower bound for the specified syncpoint.
|
||||
*/
|
||||
u32 GetSyncpointMin(u32 syncpoint_id) const {
|
||||
return syncpoints.at(syncpoint_id).min.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the uper bound for the specified syncpoint.
|
||||
* @param syncpoint_id Syncpoint ID to get the upper bound for.
|
||||
* @returns The upper bound for the specified syncpoint.
|
||||
*/
|
||||
u32 GetSyncpointMax(u32 syncpoint_id) const {
|
||||
return syncpoints.at(syncpoint_id).max.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Refreshes the minimum value for the specified syncpoint.
|
||||
* @param syncpoint_id Syncpoint ID to be refreshed.
|
||||
* @returns The new syncpoint minimum value.
|
||||
*/
|
||||
u32 RefreshSyncpoint(u32 syncpoint_id);
|
||||
|
||||
/**
|
||||
* Allocates a new syncoint.
|
||||
* @returns The syncpoint ID for the newly allocated syncpoint.
|
||||
*/
|
||||
u32 AllocateSyncpoint();
|
||||
|
||||
/**
|
||||
* Increases the maximum value for the specified syncpoint.
|
||||
* @param syncpoint_id Syncpoint ID to be increased.
|
||||
* @param value Value to increase the specified syncpoint by.
|
||||
* @returns The new syncpoint maximum value.
|
||||
*/
|
||||
u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
|
||||
|
||||
private:
|
||||
struct Syncpoint {
|
||||
std::atomic<u32> min;
|
||||
std::atomic<u32> max;
|
||||
std::atomic<bool> is_allocated;
|
||||
};
|
||||
|
||||
std::array<Syncpoint, MaxSyncPoints> syncpoints{};
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
};
|
||||
|
||||
} // namespace Service::Nvidia
|
@ -1,29 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/command_classes/host1x.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
|
||||
|
||||
Tegra::Host1x::~Host1x() = default;
|
||||
|
||||
void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
|
||||
switch (method) {
|
||||
case Method::LoadSyncptPayload32:
|
||||
syncpoint_value = argument;
|
||||
break;
|
||||
case Method::WaitSyncpt:
|
||||
case Method::WaitSyncpt32:
|
||||
Execute(argument);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Tegra::Host1x::Execute(u32 data) {
|
||||
gpu.WaitFence(data, syncpoint_value);
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Control {
|
||||
|
||||
ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
|
||||
|
||||
void ChannelState::Init(Core::System& system, GPU& gpu) {
|
||||
ASSERT(memory_manager);
|
||||
dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
|
||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
|
||||
fermi_2d = std::make_unique<Engines::Fermi2D>();
|
||||
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
|
||||
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
|
||||
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
|
||||
dma_pusher->BindRasterizer(rasterizer);
|
||||
memory_manager->BindRasterizer(rasterizer);
|
||||
maxwell_3d->BindRasterizer(rasterizer);
|
||||
fermi_2d->BindRasterizer(rasterizer);
|
||||
kepler_memory->BindRasterizer(rasterizer);
|
||||
kepler_compute->BindRasterizer(rasterizer);
|
||||
maxwell_dma->BindRasterizer(rasterizer);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Control
|
@ -0,0 +1,68 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
class GPU;
|
||||
|
||||
namespace Engines {
|
||||
class Puller;
|
||||
class Fermi2D;
|
||||
class Maxwell3D;
|
||||
class MaxwellDMA;
|
||||
class KeplerCompute;
|
||||
class KeplerMemory;
|
||||
} // namespace Engines
|
||||
|
||||
class MemoryManager;
|
||||
class DmaPusher;
|
||||
|
||||
namespace Control {
|
||||
|
||||
struct ChannelState {
|
||||
explicit ChannelState(s32 bind_id);
|
||||
ChannelState(const ChannelState& state) = delete;
|
||||
ChannelState& operator=(const ChannelState&) = delete;
|
||||
ChannelState(ChannelState&& other) noexcept = default;
|
||||
ChannelState& operator=(ChannelState&& other) noexcept = default;
|
||||
|
||||
void Init(Core::System& system, GPU& gpu);
|
||||
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
s32 bind_id = -1;
|
||||
/// 3D engine
|
||||
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
|
||||
/// 2D engine
|
||||
std::unique_ptr<Engines::Fermi2D> fermi_2d;
|
||||
/// Compute engine
|
||||
std::unique_ptr<Engines::KeplerCompute> kepler_compute;
|
||||
/// DMA engine
|
||||
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
|
||||
/// Inline memory engine
|
||||
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
|
||||
|
||||
std::shared_ptr<MemoryManager> memory_manager;
|
||||
|
||||
std::unique_ptr<DmaPusher> dma_pusher;
|
||||
|
||||
bool initialized{};
|
||||
};
|
||||
|
||||
} // namespace Control
|
||||
|
||||
} // namespace Tegra
|
@ -0,0 +1,14 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "video_core/control/channel_state_cache.inc"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
|
||||
: maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
|
||||
gpu_memory{*channel_state.memory_manager} {}
|
||||
|
||||
template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
|
||||
|
||||
} // namespace VideoCommon
|
@ -0,0 +1,101 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Engines {
|
||||
class Maxwell3D;
|
||||
class KeplerCompute;
|
||||
} // namespace Engines
|
||||
|
||||
class MemoryManager;
|
||||
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
class ChannelInfo {
|
||||
public:
|
||||
ChannelInfo() = delete;
|
||||
explicit ChannelInfo(Tegra::Control::ChannelState& state);
|
||||
ChannelInfo(const ChannelInfo& state) = delete;
|
||||
ChannelInfo& operator=(const ChannelInfo&) = delete;
|
||||
ChannelInfo(ChannelInfo&& other) = default;
|
||||
ChannelInfo& operator=(ChannelInfo&& other) = default;
|
||||
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
};
|
||||
|
||||
template <class P>
|
||||
class ChannelSetupCaches {
|
||||
public:
|
||||
/// Operations for seting the channel of execution.
|
||||
virtual ~ChannelSetupCaches();
|
||||
|
||||
/// Create channel state.
|
||||
virtual void CreateChannel(Tegra::Control::ChannelState& channel);
|
||||
|
||||
/// Bind a channel for execution.
|
||||
void BindToChannel(s32 id);
|
||||
|
||||
/// Erase channel's state.
|
||||
void EraseChannel(s32 id);
|
||||
|
||||
Tegra::MemoryManager* GetFromID(size_t id) const {
|
||||
std::unique_lock<std::mutex> lk(config_mutex);
|
||||
const auto ref = address_spaces.find(id);
|
||||
return ref->second.gpu_memory;
|
||||
}
|
||||
|
||||
std::optional<size_t> getStorageID(size_t id) const {
|
||||
std::unique_lock<std::mutex> lk(config_mutex);
|
||||
const auto ref = address_spaces.find(id);
|
||||
if (ref == address_spaces.end()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return ref->second.storage_id;
|
||||
}
|
||||
|
||||
protected:
|
||||
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
|
||||
|
||||
P* channel_state;
|
||||
size_t current_channel_id{UNSET_CHANNEL};
|
||||
size_t current_address_space{};
|
||||
Tegra::Engines::Maxwell3D* maxwell3d;
|
||||
Tegra::Engines::KeplerCompute* kepler_compute;
|
||||
Tegra::MemoryManager* gpu_memory;
|
||||
|
||||
std::deque<P> channel_storage;
|
||||
std::deque<size_t> free_channel_ids;
|
||||
std::unordered_map<s32, size_t> channel_map;
|
||||
std::vector<size_t> active_channel_ids;
|
||||
struct AddresSpaceRef {
|
||||
size_t ref_count;
|
||||
size_t storage_id;
|
||||
Tegra::MemoryManager* gpu_memory;
|
||||
};
|
||||
std::unordered_map<size_t, AddresSpaceRef> address_spaces;
|
||||
mutable std::mutex config_mutex;
|
||||
|
||||
virtual void OnGPUASRegister([[maybe_unused]] size_t map_id) {}
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
@ -0,0 +1,86 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/control/channel_state_cache.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <class P>
|
||||
ChannelSetupCaches<P>::~ChannelSetupCaches() = default;
|
||||
|
||||
template <class P>
|
||||
void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
|
||||
std::unique_lock<std::mutex> lk(config_mutex);
|
||||
ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
|
||||
auto new_id = [this, &channel]() {
|
||||
if (!free_channel_ids.empty()) {
|
||||
auto id = free_channel_ids.front();
|
||||
free_channel_ids.pop_front();
|
||||
new (&channel_storage[id]) P(channel);
|
||||
return id;
|
||||
}
|
||||
channel_storage.emplace_back(channel);
|
||||
return channel_storage.size() - 1;
|
||||
}();
|
||||
channel_map.emplace(channel.bind_id, new_id);
|
||||
if (current_channel_id != UNSET_CHANNEL) {
|
||||
channel_state = &channel_storage[current_channel_id];
|
||||
}
|
||||
active_channel_ids.push_back(new_id);
|
||||
auto as_it = address_spaces.find(channel.memory_manager->GetID());
|
||||
if (as_it != address_spaces.end()) {
|
||||
as_it->second.ref_count++;
|
||||
return;
|
||||
}
|
||||
AddresSpaceRef new_gpu_mem_ref{
|
||||
.ref_count = 1,
|
||||
.storage_id = address_spaces.size(),
|
||||
.gpu_memory = channel.memory_manager.get(),
|
||||
};
|
||||
address_spaces.emplace(channel.memory_manager->GetID(), new_gpu_mem_ref);
|
||||
OnGPUASRegister(channel.memory_manager->GetID());
|
||||
}
|
||||
|
||||
/// Bind a channel for execution.
|
||||
template <class P>
|
||||
void ChannelSetupCaches<P>::BindToChannel(s32 id) {
|
||||
std::unique_lock<std::mutex> lk(config_mutex);
|
||||
auto it = channel_map.find(id);
|
||||
ASSERT(it != channel_map.end() && id >= 0);
|
||||
current_channel_id = it->second;
|
||||
channel_state = &channel_storage[current_channel_id];
|
||||
maxwell3d = &channel_state->maxwell3d;
|
||||
kepler_compute = &channel_state->kepler_compute;
|
||||
gpu_memory = &channel_state->gpu_memory;
|
||||
current_address_space = gpu_memory->GetID();
|
||||
}
|
||||
|
||||
/// Erase channel's channel_state.
|
||||
template <class P>
|
||||
void ChannelSetupCaches<P>::EraseChannel(s32 id) {
|
||||
std::unique_lock<std::mutex> lk(config_mutex);
|
||||
const auto it = channel_map.find(id);
|
||||
ASSERT(it != channel_map.end() && id >= 0);
|
||||
const auto this_id = it->second;
|
||||
free_channel_ids.push_back(this_id);
|
||||
channel_map.erase(it);
|
||||
if (this_id == current_channel_id) {
|
||||
current_channel_id = UNSET_CHANNEL;
|
||||
channel_state = nullptr;
|
||||
maxwell3d = nullptr;
|
||||
kepler_compute = nullptr;
|
||||
gpu_memory = nullptr;
|
||||
} else if (current_channel_id != UNSET_CHANNEL) {
|
||||
channel_state = &channel_storage[current_channel_id];
|
||||
}
|
||||
active_channel_ids.erase(
|
||||
std::find(active_channel_ids.begin(), active_channel_ids.end(), this_id));
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
@ -0,0 +1,32 @@
|
||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/control/scheduler.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra::Control {
|
||||
Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
|
||||
|
||||
Scheduler::~Scheduler() = default;
|
||||
|
||||
void Scheduler::Push(s32 channel, CommandList&& entries) {
|
||||
std::unique_lock lk(scheduling_guard);
|
||||
auto it = channels.find(channel);
|
||||
ASSERT(it != channels.end());
|
||||
auto channel_state = it->second;
|
||||
gpu.BindChannel(channel_state->bind_id);
|
||||
channel_state->dma_pusher->Push(std::move(entries));
|
||||
channel_state->dma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
|
||||
s32 channel = new_channel->bind_id;
|
||||
std::unique_lock lk(scheduling_guard);
|
||||
channels.emplace(channel, new_channel);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Control
|
@ -0,0 +1,37 @@
|
||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "video_core/dma_pusher.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
class GPU;
|
||||
|
||||
namespace Control {
|
||||
|
||||
struct ChannelState;
|
||||
|
||||
class Scheduler {
|
||||
public:
|
||||
explicit Scheduler(GPU& gpu_);
|
||||
~Scheduler();
|
||||
|
||||
void Push(s32 channel, CommandList&& entries);
|
||||
|
||||
void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
|
||||
|
||||
private:
|
||||
std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
|
||||
std::mutex scheduling_guard;
|
||||
GPU& gpu;
|
||||
};
|
||||
|
||||
} // namespace Control
|
||||
|
||||
} // namespace Tegra
|
@ -0,0 +1,306 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
|
||||
Control::ChannelState& channel_state_)
|
||||
: gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
|
||||
channel_state_} {}
|
||||
|
||||
Puller::~Puller() = default;
|
||||
|
||||
void Puller::ProcessBindMethod(const MethodCall& method_call) {
|
||||
// Bind the current subchannel to the desired engine id.
|
||||
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
|
||||
method_call.argument);
|
||||
const auto engine_id = static_cast<EngineID>(method_call.argument);
|
||||
bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
|
||||
switch (engine_id) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessFenceActionMethod() {
|
||||
switch (regs.fence_action.op) {
|
||||
case Puller::FenceOperation::Acquire:
|
||||
// UNIMPLEMENTED_MSG("Channel Scheduling pending.");
|
||||
// WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||
rasterizer->ReleaseFences();
|
||||
break;
|
||||
case Puller::FenceOperation::Increment:
|
||||
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreTriggerMethod() {
|
||||
const auto semaphoreOperationMask = 0xF;
|
||||
const auto op =
|
||||
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
|
||||
if (op == GpuSemaphoreOperation::WriteLong) {
|
||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||
const u32 payload = regs.semaphore_sequence;
|
||||
std::function<void()> operation([this, sequence_address, payload] {
|
||||
memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks());
|
||||
memory_manager.Write<u64>(sequence_address, payload);
|
||||
});
|
||||
rasterizer->SignalFence(std::move(operation));
|
||||
} else {
|
||||
do {
|
||||
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
||||
regs.acquire_source = true;
|
||||
regs.acquire_value = regs.semaphore_sequence;
|
||||
if (op == GpuSemaphoreOperation::AcquireEqual) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = false;
|
||||
if (word != regs.acquire_value) {
|
||||
rasterizer->ReleaseFences();
|
||||
continue;
|
||||
}
|
||||
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = true;
|
||||
if (word < regs.acquire_value) {
|
||||
rasterizer->ReleaseFences();
|
||||
continue;
|
||||
}
|
||||
} else if (op == GpuSemaphoreOperation::AcquireMask) {
|
||||
if (word && regs.semaphore_sequence == 0) {
|
||||
rasterizer->ReleaseFences();
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
|
||||
}
|
||||
} while (false);
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreRelease() {
|
||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||
const u32 payload = regs.semaphore_release;
|
||||
std::function<void()> operation([this, sequence_address, payload] {
|
||||
memory_manager.Write<u32>(sequence_address, payload);
|
||||
});
|
||||
rasterizer->SyncOperation(std::move(operation));
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreAcquire() {
|
||||
u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
const auto value = regs.semaphore_acquire;
|
||||
while (word != value) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_value = value;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
rasterizer->ReleaseFences();
|
||||
word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
// TODO(kemathe73) figure out how to do the acquire_timeout
|
||||
regs.acquire_mode = false;
|
||||
regs.acquire_source = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU puller method.
|
||||
void Puller::CallPullerMethod(const MethodCall& method_call) {
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
const auto method = static_cast<BufferMethods>(method_call.method);
|
||||
|
||||
switch (method) {
|
||||
case BufferMethods::BindObject: {
|
||||
ProcessBindMethod(method_call);
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Nop:
|
||||
case BufferMethods::SemaphoreAddressHigh:
|
||||
case BufferMethods::SemaphoreAddressLow:
|
||||
case BufferMethods::SemaphoreSequencePayload:
|
||||
case BufferMethods::SyncpointPayload:
|
||||
break;
|
||||
case BufferMethods::WrcacheFlush:
|
||||
case BufferMethods::RefCnt:
|
||||
rasterizer->SignalReference();
|
||||
break;
|
||||
case BufferMethods::SyncpointOperation:
|
||||
ProcessFenceActionMethod();
|
||||
break;
|
||||
case BufferMethods::WaitForIdle:
|
||||
rasterizer->WaitForIdle();
|
||||
break;
|
||||
case BufferMethods::SemaphoreOperation: {
|
||||
ProcessSemaphoreTriggerMethod();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::NonStallInterrupt: {
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpA: {
|
||||
LOG_ERROR(HW_GPU, "Memory Operation A");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpB: {
|
||||
// Implement this better.
|
||||
rasterizer->InvalidateGPUCache();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpC:
|
||||
case BufferMethods::MemOpD: {
|
||||
LOG_ERROR(HW_GPU, "Memory Operation C,D");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreAcquire: {
|
||||
ProcessSemaphoreAcquire();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreRelease: {
|
||||
ProcessSemaphoreRelease();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Yield: {
|
||||
// TODO(Kmather73): Research and implement this method.
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine method.
|
||||
void Puller::CallEngineMethod(const MethodCall& method_call) {
|
||||
const EngineID engine = bound_engines[method_call.subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine multivalue method.
|
||||
void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
const EngineID engine = bound_engines[subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU method.
|
||||
void Puller::CallMethod(const MethodCall& method_call) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
|
||||
method_call.subchannel);
|
||||
|
||||
ASSERT(method_call.subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method_call.method)) {
|
||||
CallEngineMethod(method_call);
|
||||
} else {
|
||||
CallPullerMethod(method_call);
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU multivalue method.
|
||||
void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
|
||||
|
||||
ASSERT(subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method)) {
|
||||
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
|
||||
} else {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallPullerMethod(MethodCall{
|
||||
method,
|
||||
base_start[i],
|
||||
subchannel,
|
||||
methods_pending - static_cast<u32>(i),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
/// Determines where the method should be executed.
|
||||
[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
|
||||
const auto buffer_method = static_cast<BufferMethods>(method);
|
||||
return buffer_method >= BufferMethods::NonPullerMethods;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
@ -0,0 +1,177 @@
|
||||
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
class DmaPusher;
|
||||
|
||||
enum class EngineID {
|
||||
FERMI_TWOD_A = 0x902D, // 2D Engine
|
||||
MAXWELL_B = 0xB197, // 3D Engine
|
||||
KEPLER_COMPUTE_B = 0xB1C0,
|
||||
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
|
||||
MAXWELL_DMA_COPY_A = 0xB0B5,
|
||||
};
|
||||
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
class Puller final {
|
||||
public:
|
||||
struct MethodCall {
|
||||
u32 method{};
|
||||
u32 argument{};
|
||||
u32 subchannel{};
|
||||
u32 method_count{};
|
||||
|
||||
explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
|
||||
: method(method_), argument(argument_), subchannel(subchannel_),
|
||||
method_count(method_count_) {}
|
||||
|
||||
[[nodiscard]] bool IsLastCall() const {
|
||||
return method_count <= 1;
|
||||
}
|
||||
};
|
||||
|
||||
enum class FenceOperation : u32 {
|
||||
Acquire = 0,
|
||||
Increment = 1,
|
||||
};
|
||||
|
||||
union FenceAction {
|
||||
u32 raw;
|
||||
BitField<0, 1, FenceOperation> op;
|
||||
BitField<8, 24, u32> syncpoint_id;
|
||||
};
|
||||
|
||||
explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
|
||||
Control::ChannelState& channel_state);
|
||||
~Puller();
|
||||
|
||||
void CallMethod(const MethodCall& method_call);
|
||||
|
||||
void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending);
|
||||
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
void CallPullerMethod(const MethodCall& method_call);
|
||||
|
||||
void CallEngineMethod(const MethodCall& method_call);
|
||||
|
||||
void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending);
|
||||
|
||||
private:
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
DmaPusher& dma_pusher;
|
||||
Control::ChannelState& channel_state;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
static constexpr std::size_t NUM_REGS = 0x800;
|
||||
struct Regs {
|
||||
static constexpr size_t NUM_REGS = 0x40;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4);
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
|
||||
[[nodiscard]] GPUVAddr SemaphoreAddress() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} semaphore_address;
|
||||
|
||||
u32 semaphore_sequence;
|
||||
u32 semaphore_trigger;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xC);
|
||||
|
||||
// The pusher and the puller share the reference counter, the pusher only has read
|
||||
// access
|
||||
u32 reference_count;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x5);
|
||||
|
||||
u32 semaphore_acquire;
|
||||
u32 semaphore_release;
|
||||
u32 fence_value;
|
||||
FenceAction fence_action;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xE2);
|
||||
|
||||
// Puller state
|
||||
u32 acquire_mode;
|
||||
u32 acquire_source;
|
||||
u32 acquire_active;
|
||||
u32 acquire_timeout;
|
||||
u32 acquire_value;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
void ProcessFenceActionMethod();
|
||||
void ProcessSemaphoreAcquire();
|
||||
void ProcessSemaphoreRelease();
|
||||
void ProcessSemaphoreTriggerMethod();
|
||||
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
|
||||
|
||||
/// Mapping of command subchannels to their bound engine ids
|
||||
std::array<EngineID, 8> bound_engines{};
|
||||
|
||||
enum class GpuSemaphoreOperation {
|
||||
AcquireEqual = 0x1,
|
||||
WriteLong = 0x2,
|
||||
AcquireGequal = 0x4,
|
||||
AcquireMask = 0x8,
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(semaphore_address, 0x4);
|
||||
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
|
||||
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
|
||||
ASSERT_REG_POSITION(reference_count, 0x14);
|
||||
ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
|
||||
ASSERT_REG_POSITION(semaphore_release, 0x1B);
|
||||
ASSERT_REG_POSITION(fence_value, 0x1C);
|
||||
ASSERT_REG_POSITION(fence_action, 0x1D);
|
||||
|
||||
ASSERT_REG_POSITION(acquire_mode, 0x100);
|
||||
ASSERT_REG_POSITION(acquire_source, 0x101);
|
||||
ASSERT_REG_POSITION(acquire_active, 0x102);
|
||||
ASSERT_REG_POSITION(acquire_timeout, 0x103);
|
||||
ASSERT_REG_POSITION(acquire_value, 0x104);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
@ -0,0 +1,33 @@
|
||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/host1x/control.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
|
||||
namespace Tegra::Host1x {
|
||||
|
||||
Control::Control(Host1x& host1x_) : host1x(host1x_) {}
|
||||
|
||||
Control::~Control() = default;
|
||||
|
||||
void Control::ProcessMethod(Method method, u32 argument) {
|
||||
switch (method) {
|
||||
case Method::LoadSyncptPayload32:
|
||||
syncpoint_value = argument;
|
||||
break;
|
||||
case Method::WaitSyncpt:
|
||||
case Method::WaitSyncpt32:
|
||||
Execute(argument);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Control method 0x{:X}", static_cast<u32>(method));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Control::Execute(u32 data) {
|
||||
host1x.GetSyncpointManager().WaitHost(data, syncpoint_value);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Host1x
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue