VideoCore: Refactor fencing system.

master
Fernando Sahmkow 2022-02-06 01:16:11 +07:00
parent 4d60410dd9
commit bc8b3d225e
20 changed files with 154 additions and 167 deletions

@ -40,7 +40,8 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width,
u32 height, u32 stride, android::BufferTransformFlags transform, u32 height, u32 stride, android::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect) { const Common::Rectangle<int>& crop_rect,
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) {
const VAddr addr = nvmap.GetHandleAddress(buffer_handle); const VAddr addr = nvmap.GetHandleAddress(buffer_handle);
LOG_TRACE(Service, LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
@ -50,7 +51,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
stride, format, transform, crop_rect}; stride, format, transform, crop_rect};
system.GetPerfStats().EndSystemFrame(); system.GetPerfStats().EndSystemFrame();
system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0); system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences);
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
system.GetPerfStats().BeginSystemFrame(); system.GetPerfStats().BeginSystemFrame();
} }

@ -38,7 +38,8 @@ public:
/// Performs a screen flip, drawing the buffer pointed to by the handle. /// Performs a screen flip, drawing the buffer pointed to by the handle.
void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height,
u32 stride, android::BufferTransformFlags transform, u32 stride, android::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect); const Common::Rectangle<int>& crop_rect,
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences);
Kernel::KEvent* QueryEvent(u32 event_id) override; Kernel::KEvent* QueryEvent(u32 event_id) override;

@ -269,17 +269,6 @@ void NVFlinger::Compose() {
return; // We are likely shutting down return; // We are likely shutting down
} }
auto& syncpoint_manager = system.Host1x().GetSyncpointManager();
const auto& multi_fence = buffer.fence;
guard->unlock();
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
const auto& fence = multi_fence.fences[fence_id];
syncpoint_manager.WaitGuest(fence.id, fence.value);
}
guard->lock();
MicroProfileFlip();
// Now send the buffer to the GPU for drawing. // Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based // TODO(Subv): Support more than just disp0. The display device selection is probably based
// on which display we're drawing (Default, Internal, External, etc) // on which display we're drawing (Default, Internal, External, etc)
@ -293,8 +282,10 @@ void NVFlinger::Compose() {
nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(), nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(),
igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect); static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect,
buffer.fence.fences, buffer.fence.num_fences);
MicroProfileFlip();
guard->lock(); guard->lock();
swap_interval = buffer.swap_interval; swap_interval = buffer.swap_interval;

@ -826,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
const bool is_accuracy_normal = const bool is_accuracy_normal =
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
auto it = committed_ranges.begin();
while (it != committed_ranges.end()) {
auto& current_intervals = *it;
auto next_it = std::next(it);
while (next_it != committed_ranges.end()) {
for (auto& interval : *next_it) {
current_intervals.subtract(interval);
}
next_it++;
}
it++;
}
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0; u64 total_size_bytes = 0;
u64 largest_copy = 0; u64 largest_copy = 0;

@ -24,8 +24,6 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
void DmaPusher::DispatchCalls() { void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls); MICROPROFILE_SCOPE(DispatchCalls);
gpu.SyncGuestHost();
dma_pushbuffer_subindex = 0; dma_pushbuffer_subindex = 0;
dma_state.is_last_call = true; dma_state.is_last_call = true;
@ -36,7 +34,6 @@ void DmaPusher::DispatchCalls() {
} }
} }
gpu.FlushCommands(); gpu.FlushCommands();
gpu.SyncGuestHost();
gpu.OnCommandListEnd(); gpu.OnCommandListEnd();
} }

@ -242,6 +242,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return; return;
case MAXWELL3D_REG_INDEX(fragment_barrier): case MAXWELL3D_REG_INDEX(fragment_barrier):
return rasterizer->FragmentBarrier(); return rasterizer->FragmentBarrier();
case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache):
rasterizer->InvalidateGPUCache();
return rasterizer->WaitForIdle();
case MAXWELL3D_REG_INDEX(tiled_cache_barrier): case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return rasterizer->TiledCacheBarrier(); return rasterizer->TiledCacheBarrier();
} }
@ -472,10 +475,25 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) { switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release: case Regs::QueryOperation::Release:
if (regs.query.query_get.fence == 1) { if (regs.query.query_get.fence == 1 || regs.query.query_get.short_query != 0) {
rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); const GPUVAddr sequence_address{regs.query.QueryAddress()};
const u32 payload = regs.query.query_sequence;
std::function<void()> operation([this, sequence_address, payload] {
memory_manager.Write<u32>(sequence_address, payload);
});
rasterizer->SignalFence(std::move(operation));
} else { } else {
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); struct LongQueryResult {
u64_le value;
u64_le timestamp;
};
const GPUVAddr sequence_address{regs.query.QueryAddress()};
const u32 payload = regs.query.query_sequence;
std::function<void()> operation([this, sequence_address, payload] {
LongQueryResult query_result{payload, system.GPU().GetTicks()};
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
});
rasterizer->SignalFence(std::move(operation));
} }
break; break;
case Regs::QueryOperation::Acquire: case Regs::QueryOperation::Acquire:

@ -79,12 +79,15 @@ void Puller::ProcessSemaphoreTriggerMethod() {
u64 timestamp; u64 timestamp;
}; };
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
const u32 payload = regs.semaphore_sequence;
std::function<void()> operation([this, sequence_address, payload] {
Block block{}; Block block{};
block.sequence = regs.semaphore_sequence; block.sequence = payload;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = gpu.GetTicks(); block.timestamp = gpu.GetTicks();
memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); memory_manager.WriteBlock(sequence_address, &block, sizeof(block));
});
rasterizer->SignalFence(std::move(operation));
} else { } else {
do { do {
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
@ -94,6 +97,7 @@ void Puller::ProcessSemaphoreTriggerMethod() {
regs.acquire_active = true; regs.acquire_active = true;
regs.acquire_mode = false; regs.acquire_mode = false;
if (word != regs.acquire_value) { if (word != regs.acquire_value) {
rasterizer->ReleaseFences();
std::this_thread::sleep_for(std::chrono::milliseconds(1)); std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue; continue;
} }
@ -101,11 +105,13 @@ void Puller::ProcessSemaphoreTriggerMethod() {
regs.acquire_active = true; regs.acquire_active = true;
regs.acquire_mode = true; regs.acquire_mode = true;
if (word < regs.acquire_value) { if (word < regs.acquire_value) {
rasterizer->ReleaseFences();
std::this_thread::sleep_for(std::chrono::milliseconds(1)); std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue; continue;
} }
} else if (op == GpuSemaphoreOperation::AcquireMask) { } else if (op == GpuSemaphoreOperation::AcquireMask) {
if (word & regs.semaphore_sequence == 0) { if (word && regs.semaphore_sequence == 0) {
rasterizer->ReleaseFences();
std::this_thread::sleep_for(std::chrono::milliseconds(1)); std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue; continue;
} }
@ -117,16 +123,23 @@ void Puller::ProcessSemaphoreTriggerMethod() {
} }
void Puller::ProcessSemaphoreRelease() { void Puller::ProcessSemaphoreRelease() {
rasterizer->SignalSemaphore(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
const u32 payload = regs.semaphore_release;
std::function<void()> operation([this, sequence_address, payload] {
memory_manager.Write<u32>(sequence_address, payload);
});
rasterizer->SignalFence(std::move(operation));
} }
void Puller::ProcessSemaphoreAcquire() { void Puller::ProcessSemaphoreAcquire() {
const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
const auto value = regs.semaphore_acquire; const auto value = regs.semaphore_acquire;
std::this_thread::sleep_for(std::chrono::milliseconds(5)); while (word != value) {
if (word != value) {
regs.acquire_active = true; regs.acquire_active = true;
regs.acquire_value = value; regs.acquire_value = value;
std::this_thread::sleep_for(std::chrono::milliseconds(1));
rasterizer->ReleaseFences();
word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
// TODO(kemathe73) figure out how to do the acquire_timeout // TODO(kemathe73) figure out how to do the acquire_timeout
regs.acquire_mode = false; regs.acquire_mode = false;
regs.acquire_source = false; regs.acquire_source = false;
@ -147,9 +160,9 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::SemaphoreAddressHigh: case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow: case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequencePayload: case BufferMethods::SemaphoreSequencePayload:
case BufferMethods::WrcacheFlush:
case BufferMethods::SyncpointPayload: case BufferMethods::SyncpointPayload:
break; break;
case BufferMethods::WrcacheFlush:
case BufferMethods::RefCnt: case BufferMethods::RefCnt:
rasterizer->SignalReference(); rasterizer->SignalReference();
break; break;
@ -173,7 +186,7 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
} }
case BufferMethods::MemOpB: { case BufferMethods::MemOpB: {
// Implement this better. // Implement this better.
rasterizer->SyncGuestHost(); rasterizer->InvalidateGPUCache();
break; break;
} }
case BufferMethods::MemOpC: case BufferMethods::MemOpC:

@ -5,6 +5,8 @@
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
#include <deque>
#include <functional>
#include <memory> #include <memory>
#include <queue> #include <queue>
@ -19,28 +21,7 @@ namespace VideoCommon {
class FenceBase { class FenceBase {
public: public:
explicit FenceBase(u32 payload_, bool is_stubbed_) explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
: address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
: address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
u8* GetAddress() const {
return address;
}
u32 GetPayload() const {
return payload;
}
bool IsSemaphore() const {
return is_semaphore;
}
private:
u8* address;
u32 payload;
bool is_semaphore;
protected: protected:
bool is_stubbed; bool is_stubbed;
@ -60,31 +41,28 @@ public:
buffer_cache.AccumulateFlushes(); buffer_cache.AccumulateFlushes();
} }
void SignalSemaphore(u8* addr, u32 value) { void SyncOperation(std::function<void()>&& func) {
uncommitted_operations.emplace_back(std::move(func));
}
void SignalFence(std::function<void()>&& func) {
TryReleasePendingFences(); TryReleasePendingFences();
const bool should_flush = ShouldFlush(); const bool should_flush = ShouldFlush();
CommitAsyncFlushes(); CommitAsyncFlushes();
TFence new_fence = CreateFence(addr, value, !should_flush); uncommitted_operations.emplace_back(std::move(func));
CommitOperations();
TFence new_fence = CreateFence(!should_flush);
fences.push(new_fence); fences.push(new_fence);
QueueFence(new_fence); QueueFence(new_fence);
if (should_flush) { if (should_flush) {
rasterizer.FlushCommands(); rasterizer.FlushCommands();
} }
rasterizer.SyncGuestHost();
} }
void SignalSyncPoint(u32 value) { void SignalSyncPoint(u32 value) {
syncpoint_manager.IncrementGuest(value); syncpoint_manager.IncrementGuest(value);
TryReleasePendingFences(); std::function<void()> func([this, value] { syncpoint_manager.IncrementHost(value); });
const bool should_flush = ShouldFlush(); SignalFence(std::move(func));
CommitAsyncFlushes();
TFence new_fence = CreateFence(value, !should_flush);
fences.push(new_fence);
QueueFence(new_fence);
if (should_flush) {
rasterizer.FlushCommands();
}
rasterizer.SyncGuestHost();
} }
void WaitPendingFences() { void WaitPendingFences() {
@ -94,12 +72,10 @@ public:
WaitFence(current_fence); WaitFence(current_fence);
} }
PopAsyncFlushes(); PopAsyncFlushes();
if (current_fence->IsSemaphore()) { auto operations = std::move(pending_operations.front());
char* address = reinterpret_cast<char*>(current_fence->GetAddress()); pending_operations.pop_front();
auto payload = current_fence->GetPayload(); for (auto& operation : operations) {
std::memcpy(address, &payload, sizeof(payload)); operation();
} else {
syncpoint_manager.IncrementHost(current_fence->GetPayload());
} }
PopFence(); PopFence();
} }
@ -114,11 +90,9 @@ protected:
virtual ~FenceManager() = default; virtual ~FenceManager() = default;
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true /// true
virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; virtual TFence CreateFence(bool is_stubbed) = 0;
/// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
/// Queues a fence into the backend if the fence isn't stubbed. /// Queues a fence into the backend if the fence isn't stubbed.
virtual void QueueFence(TFence& fence) = 0; virtual void QueueFence(TFence& fence) = 0;
/// Notifies that the backend fence has been signaled/reached in host GPU. /// Notifies that the backend fence has been signaled/reached in host GPU.
@ -141,12 +115,10 @@ private:
return; return;
} }
PopAsyncFlushes(); PopAsyncFlushes();
if (current_fence->IsSemaphore()) { auto operations = std::move(pending_operations.front());
char* address = reinterpret_cast<char*>(current_fence->GetAddress()); pending_operations.pop_front();
const auto payload = current_fence->GetPayload(); for (auto& operation : operations) {
std::memcpy(address, &payload, sizeof(payload)); operation();
} else {
syncpoint_manager.IncrementHost(current_fence->GetPayload());
} }
PopFence(); PopFence();
} }
@ -165,16 +137,20 @@ private:
} }
void PopAsyncFlushes() { void PopAsyncFlushes() {
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes(); texture_cache.PopAsyncFlushes();
buffer_cache.PopAsyncFlushes(); buffer_cache.PopAsyncFlushes();
}
query_cache.PopAsyncFlushes(); query_cache.PopAsyncFlushes();
} }
void CommitAsyncFlushes() { void CommitAsyncFlushes() {
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CommitAsyncFlushes(); texture_cache.CommitAsyncFlushes();
buffer_cache.CommitAsyncFlushes(); buffer_cache.CommitAsyncFlushes();
}
query_cache.CommitAsyncFlushes(); query_cache.CommitAsyncFlushes();
} }
@ -183,7 +159,13 @@ private:
fences.pop(); fences.pop();
} }
void CommitOperations() {
pending_operations.emplace_back(std::move(uncommitted_operations));
}
std::queue<TFence> fences; std::queue<TFence> fences;
std::deque<std::function<void()>> uncommitted_operations;
std::deque<std::deque<std::function<void()>>> pending_operations;
DelayedDestructionRing<TFence, 6> delayed_destruction_ring; DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
}; };

@ -93,17 +93,14 @@ struct GPU::Impl {
} }
/// Synchronizes CPU writes with Host GPU memory. /// Synchronizes CPU writes with Host GPU memory.
void SyncGuestHost() { void InvalidateGPUCache() {
rasterizer->SyncGuestHost(); rasterizer->InvalidateGPUCache();
} }
/// Signal the ending of command list. /// Signal the ending of command list.
void OnCommandListEnd() { void OnCommandListEnd() {
if (is_async) {
// This command only applies to asynchronous GPU mode
gpu_thread.OnCommandListEnd(); gpu_thread.OnCommandListEnd();
} }
}
/// Request a host GPU memory flush from the CPU. /// Request a host GPU memory flush from the CPU.
template <typename Func> template <typename Func>
@ -296,7 +293,7 @@ struct GPU::Impl {
} }
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
Service::Nvidia::NvFence* fences, size_t num_fences) { std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
size_t current_request_counter{}; size_t current_request_counter{};
{ {
std::unique_lock<std::mutex> lk(request_swap_mutex); std::unique_lock<std::mutex> lk(request_swap_mutex);
@ -412,8 +409,8 @@ void GPU::FlushCommands() {
impl->FlushCommands(); impl->FlushCommands();
} }
void GPU::SyncGuestHost() { void GPU::InvalidateGPUCache() {
impl->SyncGuestHost(); impl->InvalidateGPUCache();
} }
void GPU::OnCommandListEnd() { void GPU::OnCommandListEnd() {
@ -488,7 +485,7 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
} }
void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
Service::Nvidia::NvFence* fences, size_t num_fences) { std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
impl->RequestSwapBuffers(framebuffer, fences, num_fences); impl->RequestSwapBuffers(framebuffer, fences, num_fences);
} }

@ -110,7 +110,7 @@ public:
/// Flush all current written commands into the host GPU for execution. /// Flush all current written commands into the host GPU for execution.
void FlushCommands(); void FlushCommands();
/// Synchronizes CPU writes with Host GPU memory. /// Synchronizes CPU writes with Host GPU memory.
void SyncGuestHost(); void InvalidateGPUCache();
/// Signal the ending of command list. /// Signal the ending of command list.
void OnCommandListEnd(); void OnCommandListEnd();
@ -180,7 +180,7 @@ public:
void RendererFrameEndNotify(); void RendererFrameEndNotify();
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
Service::Nvidia::NvFence* fences, size_t num_fences); std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences);
/// Performs any additional setup necessary in order to begin GPU emulation. /// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary /// This can be used to launch any necessary threads and register any necessary

@ -98,7 +98,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
} }
void ThreadManager::TickGPU() { void ThreadManager::TickGPU() {
PushCommand(GPUTickCommand(), true); PushCommand(GPUTickCommand());
} }
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {

@ -62,7 +62,10 @@ public:
virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0; virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0;
/// Signal a GPU based semaphore as a fence /// Signal a GPU based semaphore as a fence
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; virtual void SignalFence(std::function<void()>&& func) = 0;
/// Send an operation to be done after a certain amount of flushes.
virtual void SyncOperation(std::function<void()>&& func) = 0;
/// Signal a GPU based syncpoint as a fence /// Signal a GPU based syncpoint as a fence
virtual void SignalSyncPoint(u32 value) = 0; virtual void SignalSyncPoint(u32 value) = 0;
@ -89,7 +92,7 @@ public:
virtual void OnCPUWrite(VAddr addr, u64 size) = 0; virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
/// Sync memory between guest and host. /// Sync memory between guest and host.
virtual void SyncGuestHost() = 0; virtual void InvalidateGPUCache() = 0;
/// Unmap memory range /// Unmap memory range
virtual void UnmapMemory(VAddr addr, u64 size) = 0; virtual void UnmapMemory(VAddr addr, u64 size) = 0;

@ -10,10 +10,7 @@
namespace OpenGL { namespace OpenGL {
GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {} GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {}
GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default; GLInnerFence::~GLInnerFence() = default;
@ -48,12 +45,8 @@ FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterize
BufferCache& buffer_cache_, QueryCache& query_cache_) BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed); return std::make_shared<GLInnerFence>(is_stubbed);
}
Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
} }
void FenceManagerOpenGL::QueueFence(Fence& fence) { void FenceManagerOpenGL::QueueFence(Fence& fence) {

@ -16,8 +16,7 @@ namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase { class GLInnerFence : public VideoCommon::FenceBase {
public: public:
explicit GLInnerFence(u32 payload_, bool is_stubbed_); explicit GLInnerFence(bool is_stubbed_);
explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
~GLInnerFence(); ~GLInnerFence();
void Queue(); void Queue();
@ -40,8 +39,7 @@ public:
QueryCache& query_cache); QueryCache& query_cache);
protected: protected:
Fence CreateFence(u32 value, bool is_stubbed) override; Fence CreateFence(bool is_stubbed) override;
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override; void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override; bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override; void WaitFence(Fence& fence) override;

@ -358,7 +358,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
} }
} }
void RasterizerOpenGL::SyncGuestHost() { void RasterizerOpenGL::InvalidateGPUCache() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
shader_cache.SyncGuestHost(); shader_cache.SyncGuestHost();
{ {
@ -386,13 +386,12 @@ void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
} }
} }
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { void RasterizerOpenGL::SignalFence(std::function<void()>&& func) {
if (!gpu.IsAsync()) { fence_manager.SignalFence(std::move(func));
gpu_memory->Write<u32>(addr, value);
return;
} }
auto paddr = gpu_memory->GetPointer(addr);
fence_manager.SignalSemaphore(paddr, value); void RasterizerOpenGL::SyncOperation(std::function<void()>&& func) {
fence_manager.SyncOperation(std::move(func));
} }
void RasterizerOpenGL::SignalSyncPoint(u32 value) { void RasterizerOpenGL::SignalSyncPoint(u32 value) {
@ -400,16 +399,10 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
} }
void RasterizerOpenGL::SignalReference() { void RasterizerOpenGL::SignalReference() {
if (!gpu.IsAsync()) {
return;
}
fence_manager.SignalOrdering(); fence_manager.SignalOrdering();
} }
void RasterizerOpenGL::ReleaseFences() { void RasterizerOpenGL::ReleaseFences() {
if (!gpu.IsAsync()) {
return;
}
fence_manager.WaitPendingFences(); fence_manager.WaitPendingFences();
} }
@ -426,6 +419,7 @@ void RasterizerOpenGL::WaitForIdle() {
} }
void RasterizerOpenGL::FragmentBarrier() { void RasterizerOpenGL::FragmentBarrier() {
glTextureBarrier();
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
} }

@ -80,10 +80,11 @@ public:
bool MustFlushRegion(VAddr addr, u64 size) override; bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override; void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override; void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override; void SignalSyncPoint(u32 value) override;
void SignalReference() override; void SignalReference() override;
void ReleaseFences() override; void ReleaseFences() override;

@ -11,11 +11,8 @@
namespace Vulkan { namespace Vulkan {
InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_) InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_)
: FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {} : FenceBase{is_stubbed_}, scheduler{scheduler_} {}
InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default; InnerFence::~InnerFence() = default;
@ -48,12 +45,8 @@ FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::G
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {} scheduler{scheduler_} {}
Fence FenceManager::CreateFence(u32 value, bool is_stubbed) { Fence FenceManager::CreateFence(bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, value, is_stubbed); return std::make_shared<InnerFence>(scheduler, is_stubbed);
}
Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
} }
void FenceManager::QueueFence(Fence& fence) { void FenceManager::QueueFence(Fence& fence) {

@ -25,8 +25,7 @@ class Scheduler;
class InnerFence : public VideoCommon::FenceBase { class InnerFence : public VideoCommon::FenceBase {
public: public:
explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_); explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_);
explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
~InnerFence(); ~InnerFence();
void Queue(); void Queue();
@ -50,8 +49,7 @@ public:
QueryCache& query_cache, const Device& device, Scheduler& scheduler); QueryCache& query_cache, const Device& device, Scheduler& scheduler);
protected: protected:
Fence CreateFence(u32 value, bool is_stubbed) override; Fence CreateFence(bool is_stubbed) override;
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override; void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override; bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override; void WaitFence(Fence& fence) override;

@ -428,7 +428,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
} }
} }
void RasterizerVulkan::SyncGuestHost() { void RasterizerVulkan::InvalidateGPUCache() {
pipeline_cache.SyncGuestHost(); pipeline_cache.SyncGuestHost();
{ {
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
@ -455,13 +455,12 @@ void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
} }
} }
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { void RasterizerVulkan::SignalFence(std::function<void()>&& func) {
if (!gpu.IsAsync()) { fence_manager.SignalFence(std::move(func));
gpu_memory->Write<u32>(addr, value);
return;
} }
auto paddr = gpu_memory->GetPointer(addr);
fence_manager.SignalSemaphore(paddr, value); void RasterizerVulkan::SyncOperation(std::function<void()>&& func) {
fence_manager.SyncOperation(std::move(func));
} }
void RasterizerVulkan::SignalSyncPoint(u32 value) { void RasterizerVulkan::SignalSyncPoint(u32 value) {
@ -469,16 +468,10 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
} }
void RasterizerVulkan::SignalReference() { void RasterizerVulkan::SignalReference() {
if (!gpu.IsAsync()) {
return;
}
fence_manager.SignalOrdering(); fence_manager.SignalOrdering();
} }
void RasterizerVulkan::ReleaseFences() { void RasterizerVulkan::ReleaseFences() {
if (!gpu.IsAsync()) {
return;
}
fence_manager.WaitPendingFences(); fence_manager.WaitPendingFences();
} }

@ -76,10 +76,11 @@ public:
bool MustFlushRegion(VAddr addr, u64 size) override; bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override; void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override; void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override; void SignalSyncPoint(u32 value) override;
void SignalReference() override; void SignalReference() override;
void ReleaseFences() override; void ReleaseFences() override;