vk_pipeline_cache: Unify pipeline cache keys into a single operation

This allows us to call Common::CityHash and std::memcmp only once for GraphicsPipelineCacheKey. While we are at it, do the same for compute.
2020-04-22 20:52:29 +07:00 · 2020-04-22 20:52:29 +07:00 · 8c37cd1af6
parent f665c92114
commit 8c37cd1af6
5 changed files with 58 additions and 46 deletions
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@ -140,6 +140,12 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
    enable.Assign(1);
 }

+void FixedPipelineState::Fill(const Maxwell& regs) {
+    rasterizer.Fill(regs);
+    depth_stencil.Fill(regs);
+    color_blending.Fill(regs);
+}
+
 std::size_t FixedPipelineState::Hash() const noexcept {
    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
    return static_cast<std::size_t>(hash);
@ -149,15 +155,6 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep
    return std::memcmp(this, &rhs, sizeof *this) == 0;
 }

-FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
-    FixedPipelineState fixed_state;
-    fixed_state.rasterizer.Fill(regs);
-    fixed_state.depth_stencil.Fill(regs);
-    fixed_state.color_blending.Fill(regs);
-    fixed_state.padding = {};
-    return fixed_state;
-}
-
 u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
    // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
    // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@ -17,7 +17,7 @@ namespace Vulkan {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

-struct alignas(32) FixedPipelineState {
+struct FixedPipelineState {
    static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
    static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;

@ -237,7 +237,8 @@ struct alignas(32) FixedPipelineState {
    Rasterizer rasterizer;
    DepthStencil depth_stencil;
    ColorBlending color_blending;
-    std::array<u8, 20> padding;
+
+    void Fill(const Maxwell& regs);

    std::size_t Hash() const noexcept;

@ -250,9 +251,6 @@ struct alignas(32) FixedPipelineState {
 static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
 static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
 static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
-static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned");
-
-FixedPipelineState GetFixedPipelineState(const Maxwell& regs);

 } // namespace Vulkan

--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -161,6 +161,24 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,

 } // Anonymous namespace

+std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+    return static_cast<std::size_t>(hash);
+}
+
+bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
+    return std::memcmp(&rhs, this, sizeof *this) == 0;
+}
+
+std::size_t ComputePipelineCacheKey::Hash() const noexcept {
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+    return static_cast<std::size_t>(hash);
+}
+
+bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+    return std::memcmp(&rhs, this, sizeof *this) == 0;
+}
+
 CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
                           u32 main_offset)
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@ -7,7 +7,6 @@
 #include <array>
 #include <cstddef>
 #include <memory>
-#include <tuple>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>
@ -51,42 +50,38 @@ using ProgramCode = std::vector<u64>;

 struct GraphicsPipelineCacheKey {
    FixedPipelineState fixed_state;
-    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
    RenderPassParams renderpass_params;
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+    u64 padding; // This is necessary for unique object representations

-    std::size_t Hash() const noexcept {
-        std::size_t hash = fixed_state.Hash();
-        for (const auto& shader : shaders) {
-            boost::hash_combine(hash, shader);
-        }
-        boost::hash_combine(hash, renderpass_params.Hash());
-        return hash;
-    }
+    std::size_t Hash() const noexcept;

-    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
-        return std::tie(fixed_state, shaders, renderpass_params) ==
-               std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
+
+    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return !operator==(rhs);
    }
 };
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);

 struct ComputePipelineCacheKey {
-    GPUVAddr shader{};
-    u32 shared_memory_size{};
-    std::array<u32, 3> workgroup_size{};
+    GPUVAddr shader;
+    u32 shared_memory_size;
+    std::array<u32, 3> workgroup_size;

-    std::size_t Hash() const noexcept {
-        return static_cast<std::size_t>(shader) ^
-               ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
-               static_cast<std::size_t>(workgroup_size[0]) ^
-               (static_cast<std::size_t>(workgroup_size[1]) << 16) ^
-               (static_cast<std::size_t>(workgroup_size[2]) << 24);
-    }
+    std::size_t Hash() const noexcept;

-    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
-        return std::tie(shader, shared_memory_size, workgroup_size) ==
-               std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
+    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
+
+    bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
+        return !operator==(rhs);
    }
 };
+static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);

 } // namespace Vulkan

--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -316,7 +316,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
    query_cache.UpdateCounters();

    const auto& gpu = system.GPU().Maxwell3D();
-    GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
+    GraphicsPipelineCacheKey key;
+    key.fixed_state.Fill(gpu.regs);

    buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));

@ -334,10 +335,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {

    buffer_cache.Unmap();

-    const auto texceptions = UpdateAttachments();
+    const Texceptions texceptions = UpdateAttachments();
    SetupImageTransitions(texceptions, color_attachments, zeta_attachment);

    key.renderpass_params = GetRenderPassParams(texceptions);
+    key.padding = 0;

    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
@ -453,10 +455,12 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
    query_cache.UpdateCounters();

    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
-    const ComputePipelineCacheKey key{
-        code_addr,
-        launch_desc.shared_alloc,
-        {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
+    ComputePipelineCacheKey key;
+    key.shader = code_addr;
+    key.shared_memory_size = launch_desc.shared_alloc;
+    key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y,
+                          launch_desc.block_dim_z};
+
    auto& pipeline = pipeline_cache.GetComputePipeline(key);

    // Compute dispatches can't be executed inside a renderpass