Merge pull request #5237 from ameerj/nvdec-syncpt

nvdec: Incorporate syncpoint manager
merge-requests/60/head
bunnei 2021-01-07 12:42:28 +07:00 committed by GitHub
commit aaf9e39f56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 67 additions and 94 deletions

@ -11,8 +11,9 @@
namespace Service::Nvidia::Devices { namespace Service::Nvidia::Devices {
nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
: nvhost_nvdec_common(system, std::move(nvmap_dev)) {} SyncpointManager& syncpoint_manager)
: nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
nvhost_nvdec::~nvhost_nvdec() = default; nvhost_nvdec::~nvhost_nvdec() = default;
NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,

@ -11,7 +11,8 @@ namespace Service::Nvidia::Devices {
class nvhost_nvdec final : public nvhost_nvdec_common { class nvhost_nvdec final : public nvhost_nvdec_common {
public: public:
explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager);
~nvhost_nvdec() override; ~nvhost_nvdec() override;
NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;

@ -11,6 +11,7 @@
#include "core/core.h" #include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
#include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
} }
} // Anonymous namespace } // Anonymous namespace
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} SyncpointManager& syncpoint_manager)
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {}
nvhost_nvdec_common::~nvhost_nvdec_common() = default; nvhost_nvdec_common::~nvhost_nvdec_common() = default;
NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
@ -71,10 +73,15 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
offset = SpliceVectors(input, fences, params.fence_count, offset); offset = SpliceVectors(input, fences, params.fence_count, offset);
// TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment
auto& gpu = system.GPU(); auto& gpu = system.GPU();
if (gpu.UseNvdec()) {
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
const SyncptIncr& syncpt_incr = syncpt_increments[i];
fences[i].id = syncpt_incr.id;
fences[i].value =
syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
}
}
for (const auto& cmd_buffer : command_buffers) { for (const auto& cmd_buffer : command_buffers) {
auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
@ -89,7 +96,13 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
cmdlist.size() * sizeof(u32)); cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(cmdlist); gpu.PushCommandBuffer(cmdlist);
} }
if (gpu.UseNvdec()) {
fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
gpu.PushCommandBuffer(cmdlist);
}
std::memcpy(output.data(), &params, sizeof(IoctlSubmit)); std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
// Some games expect command_buffers to be written back // Some games expect command_buffers to be written back
offset = sizeof(IoctlSubmit); offset = sizeof(IoctlSubmit);
@ -98,6 +111,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
offset = WriteVectors(output, reloc_shifts, offset); offset = WriteVectors(output, reloc_shifts, offset);
offset = WriteVectors(output, syncpt_increments, offset); offset = WriteVectors(output, syncpt_increments, offset);
offset = WriteVectors(output, wait_checks, offset); offset = WriteVectors(output, wait_checks, offset);
offset = WriteVectors(output, fences, offset);
return NvResult::Success; return NvResult::Success;
} }
@ -107,9 +121,10 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve
std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint)); std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
// We found that implementing this causes deadlocks with async gpu, along with degraded if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) {
// performance. TODO: RE the nvdec async implementation device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint();
params.value = 0; }
params.value = device_syncpoints[params.param];
std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint)); std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
return NvResult::Success; return NvResult::Success;

@ -10,12 +10,16 @@
#include "common/swap.h" #include "common/swap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/devices/nvdevice.h"
namespace Service::Nvidia::Devices { namespace Service::Nvidia {
class SyncpointManager;
namespace Devices {
class nvmap; class nvmap;
class nvhost_nvdec_common : public nvdevice { class nvhost_nvdec_common : public nvdevice {
public: public:
explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager);
~nvhost_nvdec_common() override; ~nvhost_nvdec_common() override;
protected: protected:
@ -157,8 +161,10 @@ protected:
s32_le nvmap_fd{}; s32_le nvmap_fd{};
u32_le submit_timeout{}; u32_le submit_timeout{};
std::shared_ptr<nvmap> nvmap_dev; std::shared_ptr<nvmap> nvmap_dev;
SyncpointManager& syncpoint_manager;
std::array<u32, MaxSyncPoints> device_syncpoints{};
// This is expected to be ordered, therefore we must use a map, not unordered_map // This is expected to be ordered, therefore we must use a map, not unordered_map
std::map<GPUVAddr, BufferMap> buffer_mappings; std::map<GPUVAddr, BufferMap> buffer_mappings;
}; };
}; // namespace Service::Nvidia::Devices }; // namespace Devices
} // namespace Service::Nvidia

@ -10,8 +10,9 @@
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices { namespace Service::Nvidia::Devices {
nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
: nvhost_nvdec_common(system, std::move(nvmap_dev)) {} SyncpointManager& syncpoint_manager)
: nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
nvhost_vic::~nvhost_vic() = default; nvhost_vic::~nvhost_vic() = default;

@ -7,11 +7,11 @@
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
namespace Service::Nvidia::Devices { namespace Service::Nvidia::Devices {
class nvmap;
class nvhost_vic final : public nvhost_nvdec_common { class nvhost_vic final : public nvhost_nvdec_common {
public: public:
explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
SyncpointManager& syncpoint_manager);
~nvhost_vic(); ~nvhost_vic();
NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;

@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
devices["/dev/nvhost-ctrl"] = devices["/dev/nvhost-ctrl"] =
std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); devices["/dev/nvhost-nvdec"] =
std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager);
devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); devices["/dev/nvhost-vic"] =
std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager);
} }
Module::~Module() = default; Module::~Module() = default;

@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_)
: gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
host1x_processor(std::make_unique<Host1x>(gpu)), host1x_processor(std::make_unique<Host1x>(gpu)),
nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)), sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {}
CDmaPusher::~CDmaPusher() = default; CDmaPusher::~CDmaPusher() = default;
@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
const auto syncpoint_id = static_cast<u32>(data & 0xFF); const auto syncpoint_id = static_cast<u32>(data & 0xFF);
const auto cond = static_cast<u32>((data >> 8) & 0xFF); const auto cond = static_cast<u32>((data >> 8) & 0xFF);
if (cond == 0) { if (cond == 0) {
nvdec_sync->Increment(syncpoint_id); sync_manager->Increment(syncpoint_id);
} else { } else {
nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); sync_manager->SignalDone(
nvdec_sync->SignalDone(syncpoint_id); sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
} }
break; break;
} }
@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
const auto syncpoint_id = static_cast<u32>(data & 0xFF); const auto syncpoint_id = static_cast<u32>(data & 0xFF);
const auto cond = static_cast<u32>((data >> 8) & 0xFF); const auto cond = static_cast<u32>((data >> 8) & 0xFF);
if (cond == 0) { if (cond == 0) {
vic_sync->Increment(syncpoint_id); sync_manager->Increment(syncpoint_id);
} else { } else {
vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); sync_manager->SignalDone(
vic_sync->SignalDone(syncpoint_id); sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
} }
break; break;
} }

@ -116,12 +116,10 @@ private:
void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
GPU& gpu; GPU& gpu;
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
std::shared_ptr<Nvdec> nvdec_processor; std::unique_ptr<Tegra::Vic> vic_processor;
std::unique_ptr<Vic> vic_processor; std::unique_ptr<Tegra::Host1x> host1x_processor;
std::unique_ptr<Host1x> host1x_processor; std::unique_ptr<SyncptIncrManager> sync_manager;
std::unique_ptr<SyncptIncrManager> nvdec_sync;
std::unique_ptr<SyncptIncrManager> vic_sync;
ChClassId current_class{}; ChClassId current_class{};
ThiRegisters vic_thi_state{}; ThiRegisters vic_thi_state{};
ThiRegisters nvdec_thi_state{}; ThiRegisters nvdec_thi_state{};

@ -10,22 +10,14 @@ Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
Tegra::Host1x::~Host1x() = default; Tegra::Host1x::~Host1x() = default;
void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32);
std::memcpy(state_offset, &arguments, sizeof(u32));
}
void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) {
StateWrite(static_cast<u32>(method), arguments[0]);
switch (method) { switch (method) {
case Method::WaitSyncpt:
Execute(arguments[0]);
break;
case Method::LoadSyncptPayload32: case Method::LoadSyncptPayload32:
syncpoint_value = arguments[0]; syncpoint_value = argument;
break; break;
case Method::WaitSyncpt:
case Method::WaitSyncpt32: case Method::WaitSyncpt32:
Execute(arguments[0]); Execute(argument);
break; break;
default: default:
UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method)); UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
@ -34,6 +26,5 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& argumen
} }
void Tegra::Host1x::Execute(u32 data) { void Tegra::Host1x::Execute(u32 data) {
// This method waits on a valid syncpoint. gpu.WaitFence(data, syncpoint_value);
// TODO: Implement when proper Async is in place
} }

@ -14,64 +14,23 @@ class Nvdec;
class Host1x { class Host1x {
public: public:
struct Host1xClassRegisters {
u32 incr_syncpt{};
u32 incr_syncpt_ctrl{};
u32 incr_syncpt_error{};
INSERT_PADDING_WORDS(5);
u32 wait_syncpt{};
u32 wait_syncpt_base{};
u32 wait_syncpt_incr{};
u32 load_syncpt_base{};
u32 incr_syncpt_base{};
u32 clear{};
u32 wait{};
u32 wait_with_interrupt{};
u32 delay_use{};
u32 tick_count_high{};
u32 tick_count_low{};
u32 tick_ctrl{};
INSERT_PADDING_WORDS(23);
u32 ind_ctrl{};
u32 ind_off2{};
u32 ind_off{};
std::array<u32, 31> ind_data{};
INSERT_PADDING_WORDS(1);
u32 load_syncpoint_payload32{};
u32 stall_ctrl{};
u32 wait_syncpt32{};
u32 wait_syncpt_base32{};
u32 load_syncpt_base32{};
u32 incr_syncpt_base32{};
u32 stall_count_high{};
u32 stall_count_low{};
u32 xref_ctrl{};
u32 channel_xref_high{};
u32 channel_xref_low{};
};
static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size");
enum class Method : u32 { enum class Method : u32 {
WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, WaitSyncpt = 0x8,
LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, LoadSyncptPayload32 = 0x4e,
WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, WaitSyncpt32 = 0x50,
}; };
explicit Host1x(GPU& gpu); explicit Host1x(GPU& gpu);
~Host1x(); ~Host1x();
/// Writes the method into the state, Invoke Execute() if encountered /// Writes the method into the state, Invoke Execute() if encountered
void ProcessMethod(Method method, const std::vector<u32>& arguments); void ProcessMethod(Method method, u32 argument);
private: private:
/// For Host1x, execute is waiting on a syncpoint previously written into the state /// For Host1x, execute is waiting on a syncpoint previously written into the state
void Execute(u32 data); void Execute(u32 data);
/// Write argument into the provided offset
void StateWrite(u32 offset, u32 arguments);
u32 syncpoint_value{}; u32 syncpoint_value{};
Host1xClassRegisters state{};
GPU& gpu; GPU& gpu;
}; };