video_core/gpu_thread: Keep the write lock for allocating the fence.

Else the fence might get submited out-of-order into the queue, which makes testing them pointless. Overhead should be tiny as the mutex is just moved from the queue to the writing code.
2021-04-07 13:57:49 +07:00 · 2021-04-07 13:57:49 +07:00 · e6fb49fa4b
parent 5145133a60
commit e6fb49fa4b
2 changed files with 4 additions and 1 deletions
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@ -151,11 +151,13 @@ void ThreadManager::OnCommandListEnd() {
 }

 u64 ThreadManager::PushCommand(CommandData&& command_data) {
+    std::unique_lock lk(state.write_lock);
    const u64 fence{++state.last_fence};
    state.queue.Push(CommandDataContainer(std::move(command_data), fence));

    if (!is_async) {
        // In synchronous GPU mode, block the caller until the command has executed
+        lk.unlock();
        WaitIdle();
    }

--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@ -101,7 +101,8 @@ struct CommandDataContainer {
 struct SynchState final {
    std::atomic_bool is_running{true};

-    using CommandQueue = Common::MPSCQueue<CommandDataContainer>;
+    using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+    std::mutex write_lock;
    CommandQueue queue;
    u64 last_fence{};
    std::atomic<u64> signaled_fence{};