@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable {
std : : array < std : : array < u16 , M > , N > values { } ;
std : : array < std : : array < u16 , M > , N > values { } ;
} ;
} ;
constexpr u32 gob_size_x = 64 ;
constexpr u32 gob_size_x_shift = 6 ;
constexpr u32 gob_size_y = 8 ;
constexpr u32 gob_size_y_shift = 3 ;
constexpr u32 gob_size_z = 1 ;
constexpr u32 gob_size_z_shift = 0 ;
constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z ;
constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift ;
constexpr u32 gob_size_x = 1U < < gob_size_x_shift ;
constexpr u32 gob_size_y = 1U < < gob_size_y_shift ;
constexpr u32 gob_size_z = 1U < < gob_size_z_shift ;
constexpr u32 gob_size = 1U < < gob_size_shift ;
constexpr u32 fast_swizzle_align = 16 ;
constexpr u32 fast_swizzle_align = 16 ;
constexpr auto legacy_swizzle_table = SwizzleTable < gob_size_y , gob_size_x , gob_size_z > ( ) ;
constexpr auto legacy_swizzle_table = SwizzleTable < gob_size_y , gob_size_x , gob_size_z > ( ) ;
@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
void CopySwizzledData ( u32 width , u32 height , u32 depth , u32 bytes_per_pixel ,
void CopySwizzledData ( u32 width , u32 height , u32 depth , u32 bytes_per_pixel ,
u32 out_bytes_per_pixel , u8 * const swizzled_data , u8 * const unswizzled_data ,
u32 out_bytes_per_pixel , u8 * const swizzled_data , u8 * const unswizzled_data ,
bool unswizzle , u32 block_height , u32 block_depth , u32 width_spacing ) {
bool unswizzle , u32 block_height , u32 block_depth , u32 width_spacing ) {
const u32 block_height_size { 1U < < block_height } ;
const u32 block_depth_size { 1U < < block_depth } ;
if ( bytes_per_pixel % 3 ! = 0 & & ( width * bytes_per_pixel ) % fast_swizzle_align = = 0 ) {
if ( bytes_per_pixel % 3 ! = 0 & & ( width * bytes_per_pixel ) % fast_swizzle_align = = 0 ) {
SwizzledData < true > ( swizzled_data , unswizzled_data , unswizzle , width , height , depth ,
SwizzledData < true > ( swizzled_data , unswizzled_data , unswizzle , width , height , depth ,
bytes_per_pixel , out_bytes_per_pixel , block_height , block_depth ,
bytes_per_pixel , out_bytes_per_pixel , block_height _size ,
width_spacing) ;
block_depth_size, width_spacing) ;
} else {
} else {
SwizzledData < false > ( swizzled_data , unswizzled_data , unswizzle , width , height , depth ,
SwizzledData < false > ( swizzled_data , unswizzled_data , unswizzle , width , height , depth ,
bytes_per_pixel , out_bytes_per_pixel , block_height , block_depth ,
bytes_per_pixel , out_bytes_per_pixel , block_height _size ,
width_spacing) ;
block_depth_size, width_spacing) ;
}
}
}
}
@ -249,16 +257,18 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
void SwizzleSubrect ( u32 subrect_width , u32 subrect_height , u32 source_pitch , u32 swizzled_width ,
void SwizzleSubrect ( u32 subrect_width , u32 subrect_height , u32 source_pitch , u32 swizzled_width ,
u32 bytes_per_pixel , u8 * swizzled_data , u8 * unswizzled_data , u32 block_height ) {
u32 bytes_per_pixel , u8 * swizzled_data , u8 * unswizzled_data , u32 block_height ) {
const u32 block_height_size { 1U < < block_height } ;
const u32 image_width_in_gobs { ( swizzled_width * bytes_per_pixel + ( gob_size_x - 1 ) ) /
const u32 image_width_in_gobs { ( swizzled_width * bytes_per_pixel + ( gob_size_x - 1 ) ) /
gob_size_x } ;
gob_size_x } ;
for ( u32 line = 0 ; line < subrect_height ; + + line ) {
for ( u32 line = 0 ; line < subrect_height ; + + line ) {
const u32 gob_address_y =
const u32 gob_address_y =
( line / ( gob_size_y * block_height ) ) * gob_size * block_height * image_width_in_gobs +
( line / ( gob_size_y * block_height_size ) ) * gob_size * block_height_size *
( ( line % ( gob_size_y * block_height ) ) / gob_size_y ) * gob_size ;
image_width_in_gobs +
( ( line % ( gob_size_y * block_height_size ) ) / gob_size_y ) * gob_size ;
const auto & table = legacy_swizzle_table [ line % gob_size_y ] ;
const auto & table = legacy_swizzle_table [ line % gob_size_y ] ;
for ( u32 x = 0 ; x < subrect_width ; + + x ) {
for ( u32 x = 0 ; x < subrect_width ; + + x ) {
const u32 gob_address =
const u32 gob_address =
gob_address_y + ( x * bytes_per_pixel / gob_size_x ) * gob_size * block_height ;
gob_address_y + ( x * bytes_per_pixel / gob_size_x ) * gob_size * block_height _size ;
const u32 swizzled_offset = gob_address + table [ ( x * bytes_per_pixel ) % gob_size_x ] ;
const u32 swizzled_offset = gob_address + table [ ( x * bytes_per_pixel ) % gob_size_x ] ;
u8 * source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel ;
u8 * source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel ;
u8 * dest_addr = swizzled_data + swizzled_offset ;
u8 * dest_addr = swizzled_data + swizzled_offset ;
@ -271,14 +281,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
void UnswizzleSubrect ( u32 subrect_width , u32 subrect_height , u32 dest_pitch , u32 swizzled_width ,
void UnswizzleSubrect ( u32 subrect_width , u32 subrect_height , u32 dest_pitch , u32 swizzled_width ,
u32 bytes_per_pixel , u8 * swizzled_data , u8 * unswizzled_data , u32 block_height ,
u32 bytes_per_pixel , u8 * swizzled_data , u8 * unswizzled_data , u32 block_height ,
u32 offset_x , u32 offset_y ) {
u32 offset_x , u32 offset_y ) {
const u32 block_height_size { 1U < < block_height } ;
for ( u32 line = 0 ; line < subrect_height ; + + line ) {
for ( u32 line = 0 ; line < subrect_height ; + + line ) {
const u32 y2 = line + offset_y ;
const u32 y2 = line + offset_y ;
const u32 gob_address_y = ( y2 / ( gob_size_y * block_height ) ) * gob_size * block_height +
const u32 gob_address_y =
( ( y2 % ( gob_size_y * block_height ) ) / gob_size_y ) * gob_size ;
( y2 / ( gob_size_y * block_height_size ) ) * gob_size * block_height_size +
( ( y2 % ( gob_size_y * block_height_size ) ) / gob_size_y ) * gob_size ;
const auto & table = legacy_swizzle_table [ y2 % gob_size_y ] ;
const auto & table = legacy_swizzle_table [ y2 % gob_size_y ] ;
for ( u32 x = 0 ; x < subrect_width ; + + x ) {
for ( u32 x = 0 ; x < subrect_width ; + + x ) {
const u32 x2 = ( x + offset_x ) * bytes_per_pixel ;
const u32 x2 = ( x + offset_x ) * bytes_per_pixel ;
const u32 gob_address = gob_address_y + ( x2 / gob_size_x ) * gob_size * block_height ;
const u32 gob_address =
gob_address_y + ( x2 / gob_size_x ) * gob_size * block_height_size ;
const u32 swizzled_offset = gob_address + table [ x2 % gob_size_x ] ;
const u32 swizzled_offset = gob_address + table [ x2 % gob_size_x ] ;
u8 * dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel ;
u8 * dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel ;
u8 * source_addr = swizzled_data + swizzled_offset ;
u8 * source_addr = swizzled_data + swizzled_offset ;
@ -291,16 +304,18 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
void SwizzleKepler ( const u32 width , const u32 height , const u32 dst_x , const u32 dst_y ,
void SwizzleKepler ( const u32 width , const u32 height , const u32 dst_x , const u32 dst_y ,
const u32 block_height , const std : : size_t copy_size , const u8 * source_data ,
const u32 block_height , const std : : size_t copy_size , const u8 * source_data ,
u8 * swizzle_data ) {
u8 * swizzle_data ) {
const u32 block_height_size { 1U < < block_height } ;
const u32 image_width_in_gobs { ( width + gob_size_x - 1 ) / gob_size_x } ;
const u32 image_width_in_gobs { ( width + gob_size_x - 1 ) / gob_size_x } ;
std : : size_t count = 0 ;
std : : size_t count = 0 ;
for ( std : : size_t y = dst_y ; y < height & & count < copy_size ; + + y ) {
for ( std : : size_t y = dst_y ; y < height & & count < copy_size ; + + y ) {
const std : : size_t gob_address_y =
const std : : size_t gob_address_y =
( y / ( gob_size_y * block_height ) ) * gob_size * block_height * image_width_in_gobs +
( y / ( gob_size_y * block_height_size ) ) * gob_size * block_height_size *
( ( y % ( gob_size_y * block_height ) ) / gob_size_y ) * gob_size ;
image_width_in_gobs +
( ( y % ( gob_size_y * block_height_size ) ) / gob_size_y ) * gob_size ;
const auto & table = legacy_swizzle_table [ y % gob_size_y ] ;
const auto & table = legacy_swizzle_table [ y % gob_size_y ] ;
for ( std : : size_t x = dst_x ; x < width & & count < copy_size ; + + x ) {
for ( std : : size_t x = dst_x ; x < width & & count < copy_size ; + + x ) {
const std : : size_t gob_address =
const std : : size_t gob_address =
gob_address_y + ( x / gob_size_x ) * gob_size * block_height ;
gob_address_y + ( x / gob_size_x ) * gob_size * block_height _size ;
const std : : size_t swizzled_offset = gob_address + table [ x % gob_size_x ] ;
const std : : size_t swizzled_offset = gob_address + table [ x % gob_size_x ] ;
const u8 * source_line = source_data + count ;
const u8 * source_line = source_data + count ;
u8 * dest_addr = swizzle_data + swizzled_offset ;
u8 * dest_addr = swizzle_data + swizzled_offset ;
@ -356,9 +371,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
std : : size_t CalculateSize ( bool tiled , u32 bytes_per_pixel , u32 width , u32 height , u32 depth ,
std : : size_t CalculateSize ( bool tiled , u32 bytes_per_pixel , u32 width , u32 height , u32 depth ,
u32 block_height , u32 block_depth ) {
u32 block_height , u32 block_depth ) {
if ( tiled ) {
if ( tiled ) {
const u32 aligned_width = Common : : Align Up ( width * bytes_per_pixel , gob_size_x ) ;
const u32 aligned_width = Common : : Align Bits ( width * bytes_per_pixel , gob_size_x _shift ) ;
const u32 aligned_height = Common : : Align Up( height , gob_size_y * block_height ) ;
const u32 aligned_height = Common : : Align Bits( height , gob_size_y_shift + block_height ) ;
const u32 aligned_depth = Common : : Align Up( depth , gob_size_z * block_depth ) ;
const u32 aligned_depth = Common : : Align Bits( depth , gob_size_z_shift + block_depth ) ;
return aligned_width * aligned_height * aligned_depth ;
return aligned_width * aligned_height * aligned_depth ;
} else {
} else {
return width * height * depth * bytes_per_pixel ;
return width * height * depth * bytes_per_pixel ;