@ -127,7 +127,8 @@ RasterizerOpenGL::~RasterizerOpenGL() {
}
}
void RasterizerOpenGL : : SetupVertexArray ( u8 * array_ptr , GLintptr buffer_offset ) {
std : : pair < u8 * , GLintptr > RasterizerOpenGL : : SetupVertexArrays ( u8 * array_ptr ,
GLintptr buffer_offset ) {
MICROPROFILE_SCOPE ( OpenGL_VAO ) ;
const auto & regs = Core : : System ( ) . GetInstance ( ) . GPU ( ) . Maxwell3D ( ) . regs ;
const auto & memory_manager = Core : : System ( ) . GetInstance ( ) . GPU ( ) . memory_manager ;
@ -136,43 +137,59 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
state . draw . vertex_buffer = stream_buffer - > GetHandle ( ) ;
state . Apply ( ) ;
// TODO(bunnei): Add support for 1+ vertex arrays
const auto & vertex_array { regs . vertex_array [ 0 ] } ;
const auto & vertex_array_limit { regs . vertex_array_limit [ 0 ] } ;
ASSERT_MSG ( vertex_array . enable , " vertex array 0 is disabled? " ) ;
ASSERT_MSG ( ! vertex_array . divisor , " vertex array 0 divisor is unimplemented! " ) ;
for ( unsigned index = 1 ; index < Maxwell : : NumVertexArrays ; + + index ) {
ASSERT_MSG ( ! regs . vertex_array [ index ] . enable , " vertex array %d is unimplemented! " , index ) ;
// Upload all guest vertex arrays sequentially to our buffer
for ( u32 index = 0 ; index < Maxwell : : NumVertexArrays ; + + index ) {
const auto & vertex_array = regs . vertex_array [ index ] ;
if ( ! vertex_array . IsEnabled ( ) )
continue ;
const Tegra : : GPUVAddr start = vertex_array . StartAddress ( ) ;
const Tegra : : GPUVAddr end = regs . vertex_array_limit [ index ] . LimitAddress ( ) ;
ASSERT ( end > start ) ;
u64 size = end - start + 1 ;
// Copy vertex array data
const VAddr data_addr { memory_manager - > PhysicalToVirtualAddress ( start ) } ;
res_cache . FlushRegion ( data_addr , size , nullptr ) ;
Memory : : ReadBlock ( data_addr , array_ptr , size ) ;
// Bind the vertex array to the buffer at the current offset.
glBindVertexBuffer ( index , stream_buffer - > GetHandle ( ) , buffer_offset , vertex_array . stride ) ;
ASSERT_MSG ( vertex_array . divisor = = 0 , " Vertex buffer divisor unimplemented " ) ;
array_ptr + = size ;
buffer_offset + = size ;
}
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
// Enables the first 16 vertex attributes always, as we don't know which ones are actually used
// until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now
// until shader time. Note, Tegra technically supports 32, but we're cappin g this to 16 for now
// to avoid OpenGL errors.
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
// assume every shader uses them all.
for ( unsigned index = 0 ; index < 16 ; + + index ) {
auto & attrib = regs . vertex_attrib_format [ index ] ;
NGLOG_DEBUG ( HW_GPU , " vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={} " ,
index , attrib . ComponentCount ( ) , attrib . SizeString ( ) , attrib . TypeString ( ) ,
attrib . offset . Value ( ) , attrib . IsNormalized ( ) ) ;
glVertexAttribPointer ( index , attrib . ComponentCount ( ) , MaxwellToGL : : VertexType ( attrib ) ,
attrib . IsNormalized ( ) ? GL_TRUE : GL_FALSE , vertex_array . stride ,
reinterpret_cast < GLvoid * > ( buffer_offset + attrib . offset ) ) ;
auto & buffer = regs . vertex_array [ attrib . buffer ] ;
ASSERT ( buffer . IsEnabled ( ) ) ;
glEnableVertexAttribArray ( index ) ;
glVertexAttribFormat ( index , attrib . ComponentCount ( ) , MaxwellToGL : : VertexType ( attrib ) ,
attrib . IsNormalized ( ) ? GL_TRUE : GL_FALSE , attrib . offset ) ;
glVertexAttribBinding ( index , attrib . buffer ) ;
hw_vao_enabled_attributes [ index ] = true ;
}
// Copy vertex array data
const u64 data_size { vertex_array_limit . LimitAddress ( ) - vertex_array . StartAddress ( ) + 1 } ;
const VAddr data_addr { memory_manager - > PhysicalToVirtualAddress ( vertex_array . StartAddress ( ) ) } ;
res_cache . FlushRegion ( data_addr , data_size , nullptr ) ;
Memory : : ReadBlock ( data_addr , array_ptr , data_size ) ;
array_ptr + = data_size ;
buffer_offset + = data_size ;
return { array_ptr , buffer_offset } ;
}
void RasterizerOpenGL : : SetupShaders ( u8 * buffer_ptr , GLintptr buffer_offset , size_t ptr_pos ) {
void RasterizerOpenGL : : SetupShaders ( u8 * buffer_ptr , GLintptr buffer_offset ) {
// Helper function for uploading uniform data
const auto copy_buffer = [ & ] ( GLuint handle , GLintptr offset , GLsizeiptr size ) {
if ( has_ARB_direct_state_access ) {
@ -190,8 +207,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size
u32 current_constbuffer_bindpoint = 0 ;
for ( unsigned index = 1 ; index < Maxwell : : MaxShaderProgram ; + + index ) {
ptr_pos + = sizeof ( GLShader : : MaxwellUniformData ) ;
auto & shader_config = gpu . regs . shader_config [ index ] ;
const Maxwell : : ShaderProgram program { static_cast < Maxwell : : ShaderProgram > ( index ) } ;
@ -205,13 +220,16 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size
}
// Upload uniform data as one UBO per stage
const GLintptr ubo_offset = buffer_offset + static_cast < GLintptr > ( ptr_pos ) ;
const GLintptr ubo_offset = buffer_offset ;
copy_buffer ( uniform_buffers [ stage ] . handle , ubo_offset ,
sizeof ( GLShader : : MaxwellUniformData ) ) ;
GLShader : : MaxwellUniformData * ub_ptr =
reinterpret_cast < GLShader : : MaxwellUniformData * > ( & buffer_ptr [ ptr_pos ] ) ;
reinterpret_cast < GLShader : : MaxwellUniformData * > ( buffer_ptr ) ;
ub_ptr - > SetFromRegs ( gpu . state . shader_stages [ stage ] ) ;
buffer_ptr + = sizeof ( GLShader : : MaxwellUniformData ) ;
buffer_offset + = sizeof ( GLShader : : MaxwellUniformData ) ;
// Fetch program code from memory
GLShader : : ProgramCode program_code ;
const u64 gpu_address { gpu . regs . code_address . CodeAddress ( ) + shader_config . offset } ;
@ -252,6 +270,24 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size
shader_program_manager - > UseTrivialGeometryShader ( ) ;
}
size_t RasterizerOpenGL : : CalculateVertexArraysSize ( ) const {
const auto & regs = Core : : System ( ) . GetInstance ( ) . GPU ( ) . Maxwell3D ( ) . regs ;
size_t size = 0 ;
for ( u32 index = 0 ; index < Maxwell : : NumVertexArrays ; + + index ) {
if ( ! regs . vertex_array [ index ] . IsEnabled ( ) )
continue ;
const Tegra : : GPUVAddr start = regs . vertex_array [ index ] . StartAddress ( ) ;
const Tegra : : GPUVAddr end = regs . vertex_array_limit [ index ] . LimitAddress ( ) ;
ASSERT ( end > start ) ;
size + = end - start + 1 ;
}
return size ;
}
bool RasterizerOpenGL : : AccelerateDrawBatch ( bool is_indexed ) {
accelerate_draw = is_indexed ? AccelDraw : : Indexed : AccelDraw : : Arrays ;
DrawArrays ( ) ;
@ -329,44 +365,49 @@ void RasterizerOpenGL::DrawArrays() {
const u64 index_buffer_size { regs . index_array . count * regs . index_array . FormatSizeInBytes ( ) } ;
const unsigned vertex_num { is_indexed ? regs . index_array . count : regs . vertex_buffer . count } ;
// TODO(bunnei): Add support for 1+ vertex arrays
vs_input_size = vertex_num * regs . vertex_array [ 0 ] . stride ;
state . draw . vertex_buffer = stream_buffer - > GetHandle ( ) ;
state . Apply ( ) ;
size_t buffer_size = static_cast < size_t > ( vs_input_size ) ;
size_t buffer_size = CalculateVertexArraysSize ( ) ;
if ( is_indexed ) {
buffer_size = Common : : AlignUp ( buffer_size , 4 ) + index_buffer_size ;
buffer_size = Common : : AlignUp < size_t > ( buffer_size , 4 ) + index_buffer_size ;
}
// Uniform space for the 5 shader stages
buffer_size + = sizeof ( GLShader : : MaxwellUniformData ) * Maxwell : : MaxShaderStage ;
buffer_size = Common : : AlignUp < size_t > ( buffer_size , 4 ) +
sizeof ( GLShader : : MaxwellUniformData ) * Maxwell : : MaxShaderStage ;
size_t ptr_pos = 0 ;
u8 * buffer_ptr ;
GLintptr buffer_offset ;
std : : tie ( buffer_ptr , buffer_offset ) =
stream_buffer - > Map ( static_cast < GLsizeiptr > ( buffer_size ) , 4 ) ;
SetupVertexArray ( buffer_ptr , buffer_offset ) ;
ptr_pos + = vs_input_size ;
u8 * offseted_buffer ;
std : : tie ( offseted_buffer , buffer_offset ) = SetupVertexArrays ( buffer_ptr , buffer_offset ) ;
offseted_buffer =
reinterpret_cast < u8 * > ( Common : : AlignUp ( reinterpret_cast < size_t > ( offseted_buffer ) , 4 ) ) ;
buffer_offset = Common : : AlignUp < size_t > ( buffer_offset , 4 ) ;
// If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0 ;
if ( is_indexed ) {
ptr_pos = Common : : AlignUp ( ptr_pos , 4 ) ;
const auto & memory_manager = Core : : System ( ) . GetInstance ( ) . GPU ( ) . memory_manager ;
const VAddr index_data_addr {
memory_manager - > PhysicalToVirtualAddress ( regs . index_array . StartAddress ( ) ) } ;
Memory : : ReadBlock ( index_data_addr , & buffer_ptr[ ptr_pos ] , index_buffer_size ) ;
Memory : : ReadBlock ( index_data_addr , offseted_ buffer, index_buffer_size ) ;
index_buffer_offset = buffer_offset + static_cast < GLintptr > ( ptr_pos ) ;
ptr_pos + = index_buffer_size ;
index_buffer_offset = buffer_offset ;
offseted_buffer + = index_buffer_size ;
buffer_offset + = index_buffer_size ;
}
SetupShaders ( buffer_ptr , buffer_offset , ptr_pos ) ;
offseted_buffer =
reinterpret_cast < u8 * > ( Common : : AlignUp ( reinterpret_cast < size_t > ( offseted_buffer ) , 4 ) ) ;
buffer_offset = Common : : AlignUp < size_t > ( buffer_offset , 4 ) ;
SetupShaders ( offseted_buffer , buffer_offset ) ;
stream_buffer - > Unmap ( ) ;