Texture Cache: Implement async texture downloads.

master
Fernando Sahmkow 2022-12-28 09:32:31 +07:00
parent ddbf851ef6
commit 03ccd8bf43
5 changed files with 91 additions and 35 deletions

@ -354,6 +354,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
using Runtime = OpenGL::TextureCacheRuntime; using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image; using Image = OpenGL::Image;
@ -361,6 +362,7 @@ struct TextureCacheParams {
using ImageView = OpenGL::ImageView; using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler; using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer; using Framebuffer = OpenGL::Framebuffer;
using AsyncBuffer = u32;
}; };
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;

@ -812,8 +812,12 @@ StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Upload); return staging_buffer_pool.Request(size, MemoryUsage::Upload);
} }
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
return staging_buffer_pool.Request(size, MemoryUsage::Download); return staging_buffer_pool.Request(size, MemoryUsage::Download, deferred);
}
void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
staging_buffer_pool.FreeDeferred(ref);
} }
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {

@ -51,7 +51,9 @@ public:
StagingBufferRef UploadStagingBuffer(size_t size); StagingBufferRef UploadStagingBuffer(size_t size);
StagingBufferRef DownloadStagingBuffer(size_t size); StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
void TickFrame(); void TickFrame();
@ -347,6 +349,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_EMULATED_COPIES = false;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
using Runtime = Vulkan::TextureCacheRuntime; using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image; using Image = Vulkan::Image;
@ -354,6 +357,7 @@ struct TextureCacheParams {
using ImageView = Vulkan::ImageView; using ImageView = Vulkan::ImageView;
using Sampler = Vulkan::Sampler; using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer; using Framebuffer = Vulkan::Framebuffer;
using AsyncBuffer = Vulkan::StagingBufferRef;
}; };
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;

@ -646,7 +646,28 @@ bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
template <class P> template <class P>
void TextureCache<P>::CommitAsyncFlushes() { void TextureCache<P>::CommitAsyncFlushes() {
// This is intentionally passing the value by copy // This is intentionally passing the value by copy
committed_downloads.push(uncommitted_downloads); if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const std::span<const ImageId> download_ids = uncommitted_downloads;
if (download_ids.empty()) {
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear();
async_buffers.emplace_back(std::optional<AsyncBuffer>{});
return;
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
async_buffers.emplace_back(download_map);
}
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear(); uncommitted_downloads.clear();
} }
@ -655,9 +676,30 @@ void TextureCache<P>::PopAsyncFlushes() {
if (committed_downloads.empty()) { if (committed_downloads.empty()) {
return; return;
} }
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const std::span<const ImageId> download_ids = committed_downloads.front(); const std::span<const ImageId> download_ids = committed_downloads.front();
if (download_ids.empty()) { if (download_ids.empty()) {
committed_downloads.pop(); committed_downloads.pop_front();
async_buffers.pop_front();
return;
}
auto download_map = *async_buffers.front();
std::span<u8> download_span = download_map.mapped_span;
for (size_t i = download_ids.size(); i > 0; i--) {
const ImageBase& image = slot_images[download_ids[i - 1]];
const auto copies = FullDownloadCopies(image.info);
download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
std::span<u8> download_span_alt = download_span.subspan(download_map.offset);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt,
swizzle_data_buffer);
}
runtime.FreeDeferredStagingBuffer(download_map);
committed_downloads.pop_front();
async_buffers.pop_front();
} else {
const std::span<const ImageId> download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
return; return;
} }
size_t total_size_bytes = 0; size_t total_size_bytes = 0;
@ -674,7 +716,6 @@ void TextureCache<P>::PopAsyncFlushes() {
} }
// Wait for downloads to finish // Wait for downloads to finish
runtime.Finish(); runtime.Finish();
download_map.offset = original_offset; download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span; std::span<u8> download_span = download_map.mapped_span;
for (const ImageId image_id : download_ids) { for (const ImageId image_id : download_ids) {
@ -685,7 +726,8 @@ void TextureCache<P>::PopAsyncFlushes() {
download_map.offset += image.unswizzled_size_bytes; download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes); download_span = download_span.subspan(image.unswizzled_size_bytes);
} }
committed_downloads.pop(); committed_downloads.pop_front();
}
} }
template <class P> template <class P>

@ -92,6 +92,8 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device. /// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// True when the API can do asynchronous texture downloads.
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()}; static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
@ -106,6 +108,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
using ImageView = typename P::ImageView; using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler; using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer; using Framebuffer = typename P::Framebuffer;
using AsyncBuffer = typename P::AsyncBuffer;
struct BlitImages { struct BlitImages {
ImageId dst_id; ImageId dst_id;
@ -403,7 +406,8 @@ private:
// TODO: This data structure is not optimal and it should be reworked // TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads; std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads; std::deque<std::vector<ImageId>> committed_downloads;
std::deque<std::optional<AsyncBuffer>> async_buffers;
struct LRUItemParams { struct LRUItemParams {
using ObjectType = ImageId; using ObjectType = ImageId;