video_core: Add BCn decoding support
parent
b8c96cee5f
commit
eac46ad7ce
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,43 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace bcn {
|
||||
/**
|
||||
* @brief Decodes a BC1 encoded image to R8G8B8A8
|
||||
*/
|
||||
void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC2 encoded image to R8G8B8A8
|
||||
*/
|
||||
void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC3 encoded image to R8G8B8A8
|
||||
*/
|
||||
void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC4 encoded image to R8
|
||||
*/
|
||||
void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC5 encoded image to R8G8
|
||||
*/
|
||||
void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC6 encoded image to R16G16B16A16
|
||||
*/
|
||||
void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC7 encoded image to R8G8B8A8
|
||||
*/
|
||||
void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
|
||||
}
|
@ -0,0 +1,129 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <span>
|
||||
#include <bc_decoder.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/texture_cache/decode_bc.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
namespace {
|
||||
constexpr u32 BLOCK_SIZE = 4;
|
||||
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
constexpr bool IsSigned(PixelFormat pixel_format) {
|
||||
switch (pixel_format) {
|
||||
case PixelFormat::BC4_SNORM:
|
||||
case PixelFormat::BC4_UNORM:
|
||||
case PixelFormat::BC5_SNORM:
|
||||
case PixelFormat::BC5_UNORM:
|
||||
case PixelFormat::BC6H_SFLOAT:
|
||||
case PixelFormat::BC6H_UFLOAT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr u32 BlockSize(PixelFormat pixel_format) {
|
||||
switch (pixel_format) {
|
||||
case PixelFormat::BC1_RGBA_SRGB:
|
||||
case PixelFormat::BC1_RGBA_UNORM:
|
||||
case PixelFormat::BC4_SNORM:
|
||||
case PixelFormat::BC4_UNORM:
|
||||
return 8;
|
||||
default:
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {
|
||||
switch (pixel_format) {
|
||||
case PixelFormat::BC4_SNORM:
|
||||
case PixelFormat::BC4_UNORM:
|
||||
return 1;
|
||||
case PixelFormat::BC5_SNORM:
|
||||
case PixelFormat::BC5_UNORM:
|
||||
return 2;
|
||||
case PixelFormat::BC6H_SFLOAT:
|
||||
case PixelFormat::BC6H_UFLOAT:
|
||||
return 8;
|
||||
default:
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
template <auto decompress, PixelFormat pixel_format>
|
||||
void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent,
|
||||
bool is_signed = false) {
|
||||
const u32 out_bpp = ConvertedBytesPerBlock(pixel_format);
|
||||
const u32 block_width = std::min(extent.width, BLOCK_SIZE);
|
||||
const u32 block_height = std::min(extent.height, BLOCK_SIZE);
|
||||
const u32 pitch = extent.width * out_bpp;
|
||||
size_t input_offset = 0;
|
||||
size_t output_offset = 0;
|
||||
for (u32 slice = 0; slice < extent.depth; ++slice) {
|
||||
for (u32 y = 0; y < extent.height; y += block_height) {
|
||||
size_t row_offset = 0;
|
||||
for (u32 x = 0; x < extent.width;
|
||||
x += block_width, row_offset += block_width * out_bpp) {
|
||||
const u8* src = input.data() + input_offset;
|
||||
u8* const dst = output.data() + output_offset + row_offset;
|
||||
if constexpr (IsSigned(pixel_format)) {
|
||||
decompress(src, dst, x, y, extent.width, extent.height, is_signed);
|
||||
} else {
|
||||
decompress(src, dst, x, y, extent.width, extent.height);
|
||||
}
|
||||
input_offset += BlockSize(pixel_format);
|
||||
}
|
||||
output_offset += block_height * pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
|
||||
VideoCore::Surface::PixelFormat pixel_format) {
|
||||
switch (pixel_format) {
|
||||
case PixelFormat::BC1_RGBA_UNORM:
|
||||
case PixelFormat::BC1_RGBA_SRGB:
|
||||
DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent);
|
||||
break;
|
||||
case PixelFormat::BC2_UNORM:
|
||||
case PixelFormat::BC2_SRGB:
|
||||
DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent);
|
||||
break;
|
||||
case PixelFormat::BC3_UNORM:
|
||||
case PixelFormat::BC3_SRGB:
|
||||
DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent);
|
||||
break;
|
||||
case PixelFormat::BC4_SNORM:
|
||||
case PixelFormat::BC4_UNORM:
|
||||
DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>(
|
||||
input, output, extent, pixel_format == PixelFormat::BC4_SNORM);
|
||||
break;
|
||||
case PixelFormat::BC5_SNORM:
|
||||
case PixelFormat::BC5_UNORM:
|
||||
DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>(
|
||||
input, output, extent, pixel_format == PixelFormat::BC5_SNORM);
|
||||
break;
|
||||
case PixelFormat::BC6H_SFLOAT:
|
||||
case PixelFormat::BC6H_UFLOAT:
|
||||
DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>(
|
||||
input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT);
|
||||
break;
|
||||
case PixelFormat::BC7_SRGB:
|
||||
case PixelFormat::BC7_UNORM:
|
||||
DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent);
|
||||
break;
|
||||
default:
|
||||
LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
@ -1,96 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <span>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/texture_cache/decode_bc4.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
|
||||
[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
|
||||
const u32 code_offset = 16 + 3 * (4 * y + x);
|
||||
const u32 code = (bits >> code_offset) & 7;
|
||||
const u32 red0 = (bits >> 0) & 0xff;
|
||||
const u32 red1 = (bits >> 8) & 0xff;
|
||||
if (red0 > red1) {
|
||||
switch (code) {
|
||||
case 0:
|
||||
return red0;
|
||||
case 1:
|
||||
return red1;
|
||||
case 2:
|
||||
return (6 * red0 + 1 * red1) / 7;
|
||||
case 3:
|
||||
return (5 * red0 + 2 * red1) / 7;
|
||||
case 4:
|
||||
return (4 * red0 + 3 * red1) / 7;
|
||||
case 5:
|
||||
return (3 * red0 + 4 * red1) / 7;
|
||||
case 6:
|
||||
return (2 * red0 + 5 * red1) / 7;
|
||||
case 7:
|
||||
return (1 * red0 + 6 * red1) / 7;
|
||||
}
|
||||
} else {
|
||||
switch (code) {
|
||||
case 0:
|
||||
return red0;
|
||||
case 1:
|
||||
return red1;
|
||||
case 2:
|
||||
return (4 * red0 + 1 * red1) / 5;
|
||||
case 3:
|
||||
return (3 * red0 + 2 * red1) / 5;
|
||||
case 4:
|
||||
return (2 * red0 + 3 * red1) / 5;
|
||||
case 5:
|
||||
return (1 * red0 + 4 * red1) / 5;
|
||||
case 6:
|
||||
return 0;
|
||||
case 7:
|
||||
return 0xff;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
|
||||
UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
|
||||
UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
|
||||
static constexpr u32 BLOCK_SIZE = 4;
|
||||
size_t input_offset = 0;
|
||||
for (u32 slice = 0; slice < extent.depth; ++slice) {
|
||||
for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
|
||||
for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
|
||||
u64 bits;
|
||||
std::memcpy(&bits, &input[input_offset], sizeof(bits));
|
||||
input_offset += sizeof(bits);
|
||||
|
||||
for (u32 y = 0; y < BLOCK_SIZE; ++y) {
|
||||
for (u32 x = 0; x < BLOCK_SIZE; ++x) {
|
||||
const u32 linear_z = slice;
|
||||
const u32 linear_y = block_y * BLOCK_SIZE + y;
|
||||
const u32 linear_x = block_x * BLOCK_SIZE + x;
|
||||
const u32 offset_z = linear_z * extent.width * extent.height;
|
||||
const u32 offset_y = linear_y * extent.width;
|
||||
const u32 offset_x = linear_x;
|
||||
const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
|
||||
const u32 color = DecompressBlock(bits, x, y);
|
||||
output[output_offset + 0] = static_cast<u8>(color);
|
||||
output[output_offset + 1] = 0;
|
||||
output[output_offset + 2] = 0;
|
||||
output[output_offset + 3] = 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
Loading…
Reference in New Issue