GPU: Make use of RegisterSet.

master
Tony Wasserka 2014-07-16 11:24:09 +07:00
parent 357d893b26
commit 75775e9ef4
4 changed files with 225 additions and 350 deletions

@ -139,8 +139,8 @@ void RegisterInterruptRelayQueue(Service::Interface* self) {
Kernel::SetEventLocked(g_event, false); Kernel::SetEventLocked(g_event, false);
// Hack - This function will permanently set the state of the GSP event such that GPU command // Hack - This function will permanently set the state of the GSP event such that GPU command
// synchronization barriers always passthrough. Correct solution would be to set this after the // synchronization barriers always passthrough. Correct solution would be to set this after the
// GPU as processed all queued up commands, but due to the emulator being single-threaded they // GPU as processed all queued up commands, but due to the emulator being single-threaded they
// will always be ready. // will always be ready.
Kernel::SetPermanentLock(g_event, true); Kernel::SetPermanentLock(g_event, true);
@ -153,6 +153,12 @@ void RegisterInterruptRelayQueue(Service::Interface* self) {
/// This triggers handling of the GX command written to the command buffer in shared memory. /// This triggers handling of the GX command written to the command buffer in shared memory.
void TriggerCmdReqQueue(Service::Interface* self) { void TriggerCmdReqQueue(Service::Interface* self) {
// Utility function to convert register ID to address
auto WriteGPURegister = [](u32 id, u32 data) {
GPU::Write<u32>(0x1EF00000 + 4 * id, data);
};
GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(g_thread_id); GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(g_thread_id);
u32* cmd_buff = (u32*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20)); u32* cmd_buff = (u32*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20));
@ -164,9 +170,9 @@ void TriggerCmdReqQueue(Service::Interface* self) {
break; break;
case GXCommandId::SET_COMMAND_LIST_LAST: case GXCommandId::SET_COMMAND_LIST_LAST:
GPU::Write<u32>(GPU::Registers::CommandListAddress, cmd_buff[1] >> 3); WriteGPURegister(GPU::Regs::CommandProcessor + 2, cmd_buff[1] >> 3); // command list data address
GPU::Write<u32>(GPU::Registers::CommandListSize, cmd_buff[2] >> 3); WriteGPURegister(GPU::Regs::CommandProcessor, cmd_buff[2] >> 3); // command list address
GPU::Write<u32>(GPU::Registers::ProcessCommandList, 1); // TODO: Not sure if we are supposed to always write this WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1); // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though
// TODO: Move this to GPU // TODO: Move this to GPU
// TODO: Not sure what units the size is measured in // TODO: Not sure what units the size is measured in
@ -174,27 +180,28 @@ void TriggerCmdReqQueue(Service::Interface* self) {
break; break;
case GXCommandId::SET_MEMORY_FILL: case GXCommandId::SET_MEMORY_FILL:
GPU::Write<u32>(GPU::Registers::MemoryFillStart1, cmd_buff[1] >> 3); WriteGPURegister(GPU::Regs::MemoryFill, cmd_buff[1] >> 3); // Start 1
GPU::Write<u32>(GPU::Registers::MemoryFillEnd1, cmd_buff[3] >> 3); WriteGPURegister(GPU::Regs::MemoryFill + 1, cmd_buff[3] >> 3); // End 1
GPU::Write<u32>(GPU::Registers::MemoryFillSize1, cmd_buff[3] - cmd_buff[1]); WriteGPURegister(GPU::Regs::MemoryFill + 2, cmd_buff[3] - cmd_buff[1]); // Size 1
GPU::Write<u32>(GPU::Registers::MemoryFillValue1, cmd_buff[2]); WriteGPURegister(GPU::Regs::MemoryFill + 3, cmd_buff[2]); // Value 1
GPU::Write<u32>(GPU::Registers::MemoryFillStart2, cmd_buff[4] >> 3);
GPU::Write<u32>(GPU::Registers::MemoryFillEnd2, cmd_buff[6] >> 3); WriteGPURegister(GPU::Regs::MemoryFill + 4, cmd_buff[4] >> 3); // Start 2
GPU::Write<u32>(GPU::Registers::MemoryFillSize2, cmd_buff[6] - cmd_buff[4]); WriteGPURegister(GPU::Regs::MemoryFill + 5, cmd_buff[6] >> 3); // End 2
GPU::Write<u32>(GPU::Registers::MemoryFillValue2, cmd_buff[5]); WriteGPURegister(GPU::Regs::MemoryFill + 6, cmd_buff[6] - cmd_buff[4]); // Size 2
WriteGPURegister(GPU::Regs::MemoryFill + 7, cmd_buff[5]); // Value 2
break; break;
// TODO: Check if texture copies are implemented correctly.. // TODO: Check if texture copies are implemented correctly..
case GXCommandId::SET_DISPLAY_TRANSFER: case GXCommandId::SET_DISPLAY_TRANSFER:
case GXCommandId::SET_TEXTURE_COPY: case GXCommandId::SET_TEXTURE_COPY:
GPU::Write<u32>(GPU::Registers::DisplayInputBufferAddr, cmd_buff[1] >> 3); WriteGPURegister(GPU::Regs::DisplayTransfer, cmd_buff[1] >> 3); // input buffer address
GPU::Write<u32>(GPU::Registers::DisplayOutputBufferAddr, cmd_buff[2] >> 3); WriteGPURegister(GPU::Regs::DisplayTransfer + 1, cmd_buff[2] >> 3); // output buffer address
GPU::Write<u32>(GPU::Registers::DisplayInputBufferSize, cmd_buff[3]); WriteGPURegister(GPU::Regs::DisplayTransfer + 3, cmd_buff[3]); // input buffer size
GPU::Write<u32>(GPU::Registers::DisplayOutputBufferSize, cmd_buff[4]); WriteGPURegister(GPU::Regs::DisplayTransfer + 2, cmd_buff[4]); // output buffer size
GPU::Write<u32>(GPU::Registers::DisplayTransferFlags, cmd_buff[5]); WriteGPURegister(GPU::Regs::DisplayTransfer + 4, cmd_buff[5]); // transfer flags
// TODO: GPU::Registers::DisplayTriggerTransfer should be ORed with 1 for texture copies? // TODO: Should this only be ORed with 1 for texture copies?
GPU::Write<u32>(GPU::Registers::DisplayTriggerTransfer, 1); WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1); // trigger transfer
break; break;
case GXCommandId::SET_COMMAND_LIST_FIRST: case GXCommandId::SET_COMMAND_LIST_FIRST:

@ -15,38 +15,48 @@
namespace GPU { namespace GPU {
Registers g_regs; RegisterSet<u32, Regs> g_regs;
u64 g_last_ticks = 0; ///< Last CPU ticks u64 g_last_ticks = 0; ///< Last CPU ticks
/** /**
* Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM
* @param * @param
*/ */
void SetFramebufferLocation(const FramebufferLocation mode) { void SetFramebufferLocation(const FramebufferLocation mode) {
switch (mode) { switch (mode) {
case FRAMEBUFFER_LOCATION_FCRAM: case FRAMEBUFFER_LOCATION_FCRAM:
g_regs.framebuffer_top_left_1 = PADDR_TOP_LEFT_FRAME1; {
g_regs.framebuffer_top_left_2 = PADDR_TOP_LEFT_FRAME2; auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
g_regs.framebuffer_top_right_1 = PADDR_TOP_RIGHT_FRAME1; auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
g_regs.framebuffer_top_right_2 = PADDR_TOP_RIGHT_FRAME2;
g_regs.framebuffer_sub_left_1 = PADDR_SUB_FRAME1; framebuffer_top.data.address_left1 = PADDR_TOP_LEFT_FRAME1;
//g_regs.framebuffer_sub_left_2 = unknown; framebuffer_top.data.address_left2 = PADDR_TOP_LEFT_FRAME2;
g_regs.framebuffer_sub_right_1 = PADDR_SUB_FRAME2; framebuffer_top.data.address_right1 = PADDR_TOP_RIGHT_FRAME1;
//g_regs.framebufferr_sub_right_2 = unknown; framebuffer_top.data.address_right2 = PADDR_TOP_RIGHT_FRAME2;
framebuffer_sub.data.address_left1 = PADDR_SUB_FRAME1;
//framebuffer_sub.data.address_left2 = unknown;
framebuffer_sub.data.address_right1 = PADDR_SUB_FRAME2;
//framebuffer_sub.data.address_right2 = unknown;
break; break;
}
case FRAMEBUFFER_LOCATION_VRAM: case FRAMEBUFFER_LOCATION_VRAM:
g_regs.framebuffer_top_left_1 = PADDR_VRAM_TOP_LEFT_FRAME1; {
g_regs.framebuffer_top_left_2 = PADDR_VRAM_TOP_LEFT_FRAME2; auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
g_regs.framebuffer_top_right_1 = PADDR_VRAM_TOP_RIGHT_FRAME1; auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
g_regs.framebuffer_top_right_2 = PADDR_VRAM_TOP_RIGHT_FRAME2;
g_regs.framebuffer_sub_left_1 = PADDR_VRAM_SUB_FRAME1; framebuffer_top.data.address_left1 = PADDR_VRAM_TOP_LEFT_FRAME1;
//g_regs.framebuffer_sub_left_2 = unknown; framebuffer_top.data.address_left2 = PADDR_VRAM_TOP_LEFT_FRAME2;
g_regs.framebuffer_sub_right_1 = PADDR_VRAM_SUB_FRAME2; framebuffer_top.data.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1;
//g_regs.framebufferr_sub_right_2 = unknown; framebuffer_top.data.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2;
framebuffer_sub.data.address_left1 = PADDR_VRAM_SUB_FRAME1;
//framebuffer_sub.data.address_left2 = unknown;
framebuffer_sub.data.address_right1 = PADDR_VRAM_SUB_FRAME2;
//framebuffer_sub.data.address_right2 = unknown;
break; break;
} }
}
} }
/** /**
@ -87,219 +97,73 @@ const u8* GetFramebufferPointer(const u32 address) {
} }
template <typename T> template <typename T>
inline void Read(T &var, const u32 addr) { inline void Read(T &var, const u32 raw_addr) {
switch (addr) { u32 addr = raw_addr - 0x1EF00000;
case Registers::MemoryFillStart1: int index = addr / 4;
case Registers::MemoryFillStart2:
var = g_regs.memory_fill[(addr - Registers::MemoryFillStart1) / 0x10].address_start;
break;
case Registers::MemoryFillEnd1: // Reads other than u32 are untested, so I'd rather have them abort than silently fail
case Registers::MemoryFillEnd2: if (index >= Regs::NumIds || !std::is_same<T,u32>::value)
var = g_regs.memory_fill[(addr - Registers::MemoryFillEnd1) / 0x10].address_end; {
break;
case Registers::MemoryFillSize1:
case Registers::MemoryFillSize2:
var = g_regs.memory_fill[(addr - Registers::MemoryFillSize1) / 0x10].size;
break;
case Registers::MemoryFillValue1:
case Registers::MemoryFillValue2:
var = g_regs.memory_fill[(addr - Registers::MemoryFillValue1) / 0x10].value;
break;
case Registers::FramebufferTopSize:
var = g_regs.top_framebuffer.size;
break;
case Registers::FramebufferTopLeft1:
var = g_regs.framebuffer_top_left_1;
break;
case Registers::FramebufferTopLeft2:
var = g_regs.framebuffer_top_left_2;
break;
case Registers::FramebufferTopFormat:
var = g_regs.top_framebuffer.format;
break;
case Registers::FramebufferTopSwapBuffers:
var = g_regs.top_framebuffer.active_fb;
break;
case Registers::FramebufferTopStride:
var = g_regs.top_framebuffer.stride;
break;
case Registers::FramebufferTopRight1:
var = g_regs.framebuffer_top_right_1;
break;
case Registers::FramebufferTopRight2:
var = g_regs.framebuffer_top_right_2;
break;
case Registers::FramebufferSubSize:
var = g_regs.sub_framebuffer.size;
break;
case Registers::FramebufferSubLeft1:
var = g_regs.framebuffer_sub_left_1;
break;
case Registers::FramebufferSubRight1:
var = g_regs.framebuffer_sub_right_1;
break;
case Registers::FramebufferSubFormat:
var = g_regs.sub_framebuffer.format;
break;
case Registers::FramebufferSubSwapBuffers:
var = g_regs.sub_framebuffer.active_fb;
break;
case Registers::FramebufferSubStride:
var = g_regs.sub_framebuffer.stride;
break;
case Registers::FramebufferSubLeft2:
var = g_regs.framebuffer_sub_left_2;
break;
case Registers::FramebufferSubRight2:
var = g_regs.framebuffer_sub_right_2;
break;
case Registers::DisplayInputBufferAddr:
var = g_regs.display_transfer.input_address;
break;
case Registers::DisplayOutputBufferAddr:
var = g_regs.display_transfer.output_address;
break;
case Registers::DisplayOutputBufferSize:
var = g_regs.display_transfer.output_size;
break;
case Registers::DisplayInputBufferSize:
var = g_regs.display_transfer.input_size;
break;
case Registers::DisplayTransferFlags:
var = g_regs.display_transfer.flags;
break;
// Not sure if this is supposed to be readable
case Registers::DisplayTriggerTransfer:
var = g_regs.display_transfer.trigger;
break;
case Registers::CommandListSize:
var = g_regs.command_list_size;
break;
case Registers::CommandListAddress:
var = g_regs.command_list_address;
break;
case Registers::ProcessCommandList:
var = g_regs.command_processing_enabled;
break;
default:
ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr); ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr);
break; return;
} }
var = g_regs[static_cast<Regs::Id>(addr / 4)];
} }
template <typename T> template <typename T>
inline void Write(u32 addr, const T data) { inline void Write(u32 addr, const T data) {
switch (static_cast<Registers::Id>(addr)) { addr -= 0x1EF00000;
case Registers::MemoryFillStart1: int index = addr / 4;
case Registers::MemoryFillStart2:
g_regs.memory_fill[(addr - Registers::MemoryFillStart1) / 0x10].address_start = data;
break;
case Registers::MemoryFillEnd1: // Writes other than u32 are untested, so I'd rather have them abort than silently fail
case Registers::MemoryFillEnd2: if (index >= Regs::NumIds || !std::is_same<T,u32>::value)
g_regs.memory_fill[(addr - Registers::MemoryFillEnd1) / 0x10].address_end = data;
break;
case Registers::MemoryFillSize1:
case Registers::MemoryFillSize2:
g_regs.memory_fill[(addr - Registers::MemoryFillSize1) / 0x10].size = data;
break;
case Registers::MemoryFillValue1:
case Registers::MemoryFillValue2:
{ {
Registers::MemoryFillConfig& config = g_regs.memory_fill[(addr - Registers::MemoryFillValue1) / 0x10]; ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr);
config.value = data; return;
}
g_regs[static_cast<Regs::Id>(index)] = data;
switch (static_cast<Regs::Id>(index)) {
// Memory fills are triggered once the fill value is written.
// NOTE: This is not verified.
case Regs::MemoryFill + 3:
case Regs::MemoryFill + 7:
{
const auto& config = g_regs.Get<Regs::MemoryFill>(static_cast<Regs::Id>(index - 3));
// TODO: Not sure if this check should be done at GSP level instead // TODO: Not sure if this check should be done at GSP level instead
if (config.address_start) { if (config.data.address_start) {
// TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all
u32* start = (u32*)Memory::GetPointer(config.GetStartAddress()); u32* start = (u32*)Memory::GetPointer(config.data.GetStartAddress());
u32* end = (u32*)Memory::GetPointer(config.GetEndAddress()); u32* end = (u32*)Memory::GetPointer(config.data.GetEndAddress());
for (u32* ptr = start; ptr < end; ++ptr) for (u32* ptr = start; ptr < end; ++ptr)
*ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation *ptr = bswap32(config.data.value); // TODO: This is just a workaround to missing framebuffer format emulation
DEBUG_LOG(GPU, "MemoryFill from %x to %x", config.GetStartAddress(), config.GetEndAddress()); DEBUG_LOG(GPU, "MemoryFill from %x to %x", config.data.GetStartAddress(), config.data.GetEndAddress());
} }
break; break;
} }
// TODO: Framebuffer registers!! case Regs::DisplayTransfer + 6:
case Registers::FramebufferTopSwapBuffers: {
g_regs.top_framebuffer.active_fb = data; const auto& config = g_regs.Get<Regs::DisplayTransfer>();
// TODO: Not sure if this should only be done upon a change! if (config.data.trigger & 1) {
break; u8* source_pointer = Memory::GetPointer(config.data.GetPhysicalInputAddress());
u8* dest_pointer = Memory::GetPointer(config.data.GetPhysicalOutputAddress());
case Registers::FramebufferSubSwapBuffers: for (int y = 0; y < config.data.output_height; ++y) {
g_regs.sub_framebuffer.active_fb = data;
// TODO: Not sure if this should only be done upon a change!
break;
case Registers::DisplayInputBufferAddr:
g_regs.display_transfer.input_address = data;
break;
case Registers::DisplayOutputBufferAddr:
g_regs.display_transfer.output_address = data;
break;
case Registers::DisplayOutputBufferSize:
g_regs.display_transfer.output_size = data;
break;
case Registers::DisplayInputBufferSize:
g_regs.display_transfer.input_size = data;
break;
case Registers::DisplayTransferFlags:
g_regs.display_transfer.flags = data;
break;
case Registers::DisplayTriggerTransfer:
g_regs.display_transfer.trigger = data;
if (g_regs.display_transfer.trigger & 1) {
u8* source_pointer = Memory::GetPointer(g_regs.display_transfer.GetPhysicalInputAddress());
u8* dest_pointer = Memory::GetPointer(g_regs.display_transfer.GetPhysicalOutputAddress());
for (int y = 0; y < g_regs.display_transfer.output_height; ++y) {
// TODO: Why does the register seem to hold twice the framebuffer width? // TODO: Why does the register seem to hold twice the framebuffer width?
for (int x = 0; x < g_regs.display_transfer.output_width / 2; ++x) { for (int x = 0; x < config.data.output_width / 2; ++x) {
int source[4] = { 0, 0, 0, 0}; // rgba; int source[4] = { 0, 0, 0, 0}; // rgba;
switch (g_regs.display_transfer.input_format) { switch (config.data.input_format) {
case Registers::FramebufferFormat::RGBA8: case Regs::FramebufferFormat::RGBA8:
{ {
// TODO: Most likely got the component order messed up. // TODO: Most likely got the component order messed up.
u8* srcptr = source_pointer + x * 4 + y * g_regs.display_transfer.input_width * 4 / 2; u8* srcptr = source_pointer + x * 4 + y * config.data.input_width * 4 / 2;
source[0] = srcptr[0]; // blue source[0] = srcptr[0]; // blue
source[1] = srcptr[1]; // green source[1] = srcptr[1]; // green
source[2] = srcptr[2]; // red source[2] = srcptr[2]; // red
@ -308,15 +172,15 @@ inline void Write(u32 addr, const T data) {
} }
default: default:
ERROR_LOG(GPU, "Unknown source framebuffer format %x", (int)g_regs.display_transfer.input_format.Value()); ERROR_LOG(GPU, "Unknown source framebuffer format %x", config.data.input_format.Value());
break; break;
} }
switch (g_regs.display_transfer.output_format) { switch (config.data.output_format) {
/*case Registers::FramebufferFormat::RGBA8: /*case Regs::FramebufferFormat::RGBA8:
{ {
// TODO: Untested // TODO: Untested
u8* dstptr = (u32*)(dest_pointer + x * 4 + y * g_regs.display_transfer.output_width * 4); u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.data.output_width * 4);
dstptr[0] = source[0]; dstptr[0] = source[0];
dstptr[1] = source[1]; dstptr[1] = source[1];
dstptr[2] = source[2]; dstptr[2] = source[2];
@ -324,9 +188,9 @@ inline void Write(u32 addr, const T data) {
break; break;
}*/ }*/
case Registers::FramebufferFormat::RGB8: case Regs::FramebufferFormat::RGB8:
{ {
u8* dstptr = dest_pointer + x * 3 + y * g_regs.display_transfer.output_width * 3 / 2; u8* dstptr = dest_pointer + x * 3 + y * config.data.output_width * 3 / 2;
dstptr[0] = source[0]; // blue dstptr[0] = source[0]; // blue
dstptr[1] = source[1]; // green dstptr[1] = source[1]; // green
dstptr[2] = source[2]; // red dstptr[2] = source[2]; // red
@ -334,40 +198,34 @@ inline void Write(u32 addr, const T data) {
} }
default: default:
ERROR_LOG(GPU, "Unknown destination framebuffer format %x", static_cast<int>(g_regs.display_transfer.output_format.Value())); ERROR_LOG(GPU, "Unknown destination framebuffer format %x", config.data.output_format.Value());
break; break;
} }
} }
} }
DEBUG_LOG(GPU, "DisplayTriggerTransfer: %x bytes from %x(%xx%x)-> %x(%xx%x), dst format %x", DEBUG_LOG(GPU, "DisplayTriggerTransfer: %x bytes from %x(%xx%x)-> %x(%xx%x), dst format %x",
g_regs.display_transfer.output_height * g_regs.display_transfer.output_width * 4, config.data.output_height * config.data.output_width * 4,
g_regs.display_transfer.GetPhysicalInputAddress(), (int)g_regs.display_transfer.input_width, (int)g_regs.display_transfer.input_height, config.data.GetPhysicalInputAddress(), (int)config.data.input_width, (int)config.data.input_height,
g_regs.display_transfer.GetPhysicalOutputAddress(), (int)g_regs.display_transfer.output_width, (int)g_regs.display_transfer.output_height, config.data.GetPhysicalOutputAddress(), (int)config.data.output_width, (int)config.data.output_height,
(int)g_regs.display_transfer.output_format.Value()); config.data.output_format.Value());
} }
break; break;
}
case Registers::CommandListSize: case Regs::CommandProcessor + 4:
g_regs.command_list_size = data; {
break; const auto& config = g_regs.Get<Regs::CommandProcessor>();
if (config.data.trigger & 1)
case Registers::CommandListAddress:
g_regs.command_list_address = data;
break;
case Registers::ProcessCommandList:
g_regs.command_processing_enabled = data;
if (g_regs.command_processing_enabled & 1)
{ {
// u32* buffer = (u32*)Memory::GetPointer(g_regs.command_list_address << 3); // u32* buffer = (u32*)Memory::GetPointer(config.data.address << 3);
ERROR_LOG(GPU, "Beginning %x bytes of commands from address %x", g_regs.command_list_size, g_regs.command_list_address << 3); ERROR_LOG(GPU, "Beginning %x bytes of commands from address %x", config.data.size, config.data.address << 3);
// TODO: Process command list! // TODO: Process command list!
} }
break; break;
}
default: default:
ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr);
break; break;
} }
} }
@ -402,18 +260,20 @@ void Init() {
// SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); // SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM);
SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM); SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM);
auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
// TODO: Width should be 240 instead? // TODO: Width should be 240 instead?
g_regs.top_framebuffer.width = 480; framebuffer_top.data.width = 480;
g_regs.top_framebuffer.height = 400; framebuffer_top.data.height = 400;
g_regs.top_framebuffer.stride = 480*3; framebuffer_top.data.stride = 480*3;
g_regs.top_framebuffer.color_format = Registers::FramebufferFormat::RGB8; framebuffer_top.data.color_format = Regs::FramebufferFormat::RGB8;
g_regs.top_framebuffer.active_fb = 0; framebuffer_top.data.active_fb = 0;
g_regs.sub_framebuffer.width = 480; framebuffer_sub.data.width = 480;
g_regs.sub_framebuffer.height = 400; framebuffer_sub.data.height = 400;
g_regs.sub_framebuffer.stride = 480*3; framebuffer_sub.data.stride = 480*3;
g_regs.sub_framebuffer.color_format = Registers::FramebufferFormat::RGB8; framebuffer_sub.data.color_format = Regs::FramebufferFormat::RGB8;
g_regs.sub_framebuffer.active_fb = 0; framebuffer_sub.data.active_fb = 0;
NOTICE_LOG(GPU, "initialized OK"); NOTICE_LOG(GPU, "initialized OK");
} }

@ -6,54 +6,31 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/register_set.h"
namespace GPU { namespace GPU {
static const u32 kFrameCycles = 268123480 / 60; ///< 268MHz / 60 frames per second static const u32 kFrameCycles = 268123480 / 60; ///< 268MHz / 60 frames per second
static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of instructions/frame static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of instructions/frame
struct Registers { // MMIO region 0x1EFxxxxx
struct Regs {
enum Id : u32 { enum Id : u32 {
MemoryFillStart1 = 0x1EF00010, MemoryFill = 0x00004, // + 5,6,7; second block at 8-11
MemoryFillEnd1 = 0x1EF00014,
MemoryFillSize1 = 0x1EF00018,
MemoryFillValue1 = 0x1EF0001C,
MemoryFillStart2 = 0x1EF00020,
MemoryFillEnd2 = 0x1EF00024,
MemoryFillSize2 = 0x1EF00028,
MemoryFillValue2 = 0x1EF0002C,
FramebufferTopSize = 0x1EF0045C, FramebufferTop = 0x00117, // + 11a,11b,11c,11d(?),11e...126
FramebufferTopLeft1 = 0x1EF00468, // Main LCD, first framebuffer for 3D left FramebufferBottom = 0x00157, // + 15a,15b,15c,15d(?),15e...166
FramebufferTopLeft2 = 0x1EF0046C, // Main LCD, second framebuffer for 3D left
FramebufferTopFormat = 0x1EF00470,
FramebufferTopSwapBuffers = 0x1EF00478,
FramebufferTopStride = 0x1EF00490, // framebuffer row stride?
FramebufferTopRight1 = 0x1EF00494, // Main LCD, first framebuffer for 3D right
FramebufferTopRight2 = 0x1EF00498, // Main LCD, second framebuffer for 3D right
FramebufferSubSize = 0x1EF0055C, DisplayTransfer = 0x00300, // + 301,302,303,304,305,306
FramebufferSubLeft1 = 0x1EF00568, // Sub LCD, first framebuffer
FramebufferSubLeft2 = 0x1EF0056C, // Sub LCD, second framebuffer
FramebufferSubFormat = 0x1EF00570,
FramebufferSubSwapBuffers = 0x1EF00578,
FramebufferSubStride = 0x1EF00590, // framebuffer row stride?
FramebufferSubRight1 = 0x1EF00594, // Sub LCD, unused first framebuffer
FramebufferSubRight2 = 0x1EF00598, // Sub LCD, unused second framebuffer
DisplayInputBufferAddr = 0x1EF00C00, CommandProcessor = 0x00638, // + 63a,63c
DisplayOutputBufferAddr = 0x1EF00C04,
DisplayOutputBufferSize = 0x1EF00C08,
DisplayInputBufferSize = 0x1EF00C0C,
DisplayTransferFlags = 0x1EF00C10,
// Unknown??
DisplayTriggerTransfer = 0x1EF00C18,
CommandListSize = 0x1EF018E0, NumIds = 0x01000
CommandListAddress = 0x1EF018E8,
ProcessCommandList = 0x1EF018F0,
}; };
template<Id id>
union Struct;
enum class FramebufferFormat : u32 { enum class FramebufferFormat : u32 {
RGBA8 = 0, RGBA8 = 0,
RGB8 = 1, RGB8 = 1,
@ -62,7 +39,11 @@ struct Registers {
RGBA4 = 4, RGBA4 = 4,
}; };
struct MemoryFillConfig { };
template<>
union Regs::Struct<Regs::MemoryFill> {
struct {
u32 address_start; u32 address_start;
u32 address_end; // ? u32 address_end; // ?
u32 size; u32 size;
@ -75,21 +56,15 @@ struct Registers {
inline u32 GetEndAddress() const { inline u32 GetEndAddress() const {
return address_end * 8; return address_end * 8;
} }
}; } data;
};
static_assert(sizeof(Regs::Struct<Regs::MemoryFill>) == 0x10, "Structure size and register block length don't match");
MemoryFillConfig memory_fill[2]; template<>
union Regs::Struct<Regs::FramebufferTop> {
using Format = Regs::FramebufferFormat;
// TODO: Move these into the framebuffer struct struct {
u32 framebuffer_top_left_1;
u32 framebuffer_top_left_2;
u32 framebuffer_top_right_1;
u32 framebuffer_top_right_2;
u32 framebuffer_sub_left_1;
u32 framebuffer_sub_left_2;
u32 framebuffer_sub_right_1;
u32 framebuffer_sub_right_2;
struct FrameBufferConfig {
union { union {
u32 size; u32 size;
@ -97,22 +72,43 @@ struct Registers {
BitField<16, 16, u32> height; BitField<16, 16, u32> height;
}; };
u32 pad0[2];
u32 address_left1;
u32 address_left2;
union { union {
u32 format; u32 format;
BitField< 0, 3, FramebufferFormat> color_format; BitField< 0, 3, Format> color_format;
}; };
u32 pad1;
union { union {
u32 active_fb; u32 active_fb;
BitField<0, 1, u32> second_fb_active; BitField<0, 1, u32> second_fb_active;
}; };
u32 pad2[5];
u32 stride; u32 stride;
};
FrameBufferConfig top_framebuffer; u32 address_right1;
FrameBufferConfig sub_framebuffer; u32 address_right2;
} data;
};
template<>
union Regs::Struct<Regs::FramebufferBottom> {
using Type = decltype(Regs::Struct<Regs::FramebufferTop>::data);
Type data;
};
static_assert(sizeof(Regs::Struct<Regs::FramebufferTop>) == 0x40, "Structure size and register block length don't match");
template<>
union Regs::Struct<Regs::DisplayTransfer> {
using Format = Regs::FramebufferFormat;
struct { struct {
u32 input_address; u32 input_address;
@ -144,21 +140,31 @@ struct Registers {
u32 flags; u32 flags;
BitField< 0, 1, u32> flip_data; BitField< 0, 1, u32> flip_data;
BitField< 8, 3, FramebufferFormat> input_format; BitField< 8, 3, Format> input_format;
BitField<12, 3, FramebufferFormat> output_format; BitField<12, 3, Format> output_format;
BitField<16, 1, u32> output_tiled; BitField<16, 1, u32> output_tiled;
}; };
u32 unknown; u32 unknown;
u32 trigger; u32 trigger;
} display_transfer; } data;
u32 command_list_size;
u32 command_list_address;
u32 command_processing_enabled;
}; };
static_assert(sizeof(Regs::Struct<Regs::DisplayTransfer>) == 0x1C, "Structure size and register block length don't match");
extern Registers g_regs; template<>
union Regs::Struct<Regs::CommandProcessor> {
struct {
u32 size;
u32 pad0;
u32 address;
u32 pad1;
u32 trigger;
} data;
};
static_assert(sizeof(Regs::Struct<Regs::CommandProcessor>) == 0x14, "Structure size and register block length don't match");
extern RegisterSet<u32, Regs> g_regs;
enum { enum {
TOP_ASPECT_X = 0x5, TOP_ASPECT_X = 0x5,
@ -208,7 +214,7 @@ enum FramebufferLocation {
/** /**
* Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM
* @param * @param
*/ */
void SetFramebufferLocation(const FramebufferLocation mode); void SetFramebufferLocation(const FramebufferLocation mode);

@ -12,8 +12,8 @@
/// RendererOpenGL constructor /// RendererOpenGL constructor
RendererOpenGL::RendererOpenGL() { RendererOpenGL::RendererOpenGL() {
memset(m_fbo, 0, sizeof(m_fbo)); memset(m_fbo, 0, sizeof(m_fbo));
memset(m_fbo_rbo, 0, sizeof(m_fbo_rbo)); memset(m_fbo_rbo, 0, sizeof(m_fbo_rbo));
memset(m_fbo_depth_buffers, 0, sizeof(m_fbo_depth_buffers)); memset(m_fbo_depth_buffers, 0, sizeof(m_fbo_depth_buffers));
m_resolution_width = max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); m_resolution_width = max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth);
@ -35,7 +35,7 @@ void RendererOpenGL::SwapBuffers() {
m_render_window->MakeCurrent(); m_render_window->MakeCurrent();
// EFB->XFB copy // EFB->XFB copy
// TODO(bunnei): This is a hack and does not belong here. The copy should be triggered by some // TODO(bunnei): This is a hack and does not belong here. The copy should be triggered by some
// register write We're also treating both framebuffers as a single one in OpenGL. // register write We're also treating both framebuffers as a single one in OpenGL.
common::Rect framebuffer_size(0, 0, m_resolution_width, m_resolution_height); common::Rect framebuffer_size(0, 0, m_resolution_width, m_resolution_height);
RenderXFB(framebuffer_size, framebuffer_size); RenderXFB(framebuffer_size, framebuffer_size);
@ -71,24 +71,26 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) {
} }
} }
/** /**
* Renders external framebuffer (XFB) * Renders external framebuffer (XFB)
* @param src_rect Source rectangle in XFB to copy * @param src_rect Source rectangle in XFB to copy
* @param dst_rect Destination rectangle in output framebuffer to copy to * @param dst_rect Destination rectangle in output framebuffer to copy to
*/ */
void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) {
const u32 active_fb_top = (GPU::g_regs.top_framebuffer.active_fb == 1) const auto& framebuffer_top = GPU::g_regs.Get<GPU::Regs::FramebufferTop>();
? GPU::g_regs.framebuffer_top_left_2 const auto& framebuffer_sub = GPU::g_regs.Get<GPU::Regs::FramebufferBottom>();
: GPU::g_regs.framebuffer_top_left_1; const u32 active_fb_top = (framebuffer_top.data.active_fb == 1)
const u32 active_fb_sub = (GPU::g_regs.sub_framebuffer.active_fb == 1) ? framebuffer_top.data.address_left2
? GPU::g_regs.framebuffer_sub_left_2 : framebuffer_top.data.address_left1;
: GPU::g_regs.framebuffer_sub_left_1; const u32 active_fb_sub = (framebuffer_sub.data.active_fb == 1)
? framebuffer_sub.data.address_left2
: framebuffer_sub.data.address_left1;
DEBUG_LOG(GPU, "RenderXFB: %x bytes from %x(%xx%x), fmt %x", DEBUG_LOG(GPU, "RenderXFB: %x bytes from %x(%xx%x), fmt %x",
GPU::g_regs.top_framebuffer.stride * GPU::g_regs.top_framebuffer.height, framebuffer_top.data.stride * framebuffer_top.data.height,
GPU::GetFramebufferAddr(GPU::g_regs.framebuffer_top_left_1), (int)GPU::g_regs.top_framebuffer.width, GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.data.width,
(int)GPU::g_regs.top_framebuffer.height, (int)GPU::g_regs.top_framebuffer.format); (int)framebuffer_top.data.height, (int)framebuffer_top.data.format);
// TODO: This should consider the GPU registers for framebuffer width, height and stride. // TODO: This should consider the GPU registers for framebuffer width, height and stride.
FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped); FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped);
@ -112,7 +114,7 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect&
glReadBuffer(GL_COLOR_ATTACHMENT0); glReadBuffer(GL_COLOR_ATTACHMENT0);
// Blit // Blit
glBlitFramebuffer(src_rect.x0_, src_rect.y0_, src_rect.x1_, src_rect.y1_, glBlitFramebuffer(src_rect.x0_, src_rect.y0_, src_rect.x1_, src_rect.y1_,
dst_rect.x0_, dst_rect.y1_, dst_rect.x1_, dst_rect.y0_, dst_rect.x0_, dst_rect.y1_, dst_rect.x1_, dst_rect.y0_,
GL_COLOR_BUFFER_BIT, GL_LINEAR); GL_COLOR_BUFFER_BIT, GL_LINEAR);
@ -138,7 +140,7 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect&
// Blit // Blit
int offset = (VideoCore::kScreenTopWidth - VideoCore::kScreenBottomWidth) / 2; int offset = (VideoCore::kScreenTopWidth - VideoCore::kScreenBottomWidth) / 2;
glBlitFramebuffer(0,0, VideoCore::kScreenBottomWidth, VideoCore::kScreenBottomHeight, glBlitFramebuffer(0,0, VideoCore::kScreenBottomWidth, VideoCore::kScreenBottomHeight,
offset, VideoCore::kScreenBottomHeight, VideoCore::kScreenBottomWidth + offset, 0, offset, VideoCore::kScreenBottomHeight, VideoCore::kScreenBottomWidth + offset, 0,
GL_COLOR_BUFFER_BIT, GL_LINEAR); GL_COLOR_BUFFER_BIT, GL_LINEAR);
@ -147,7 +149,7 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect&
/// Initialize the FBO /// Initialize the FBO
void RendererOpenGL::InitFramebuffer() { void RendererOpenGL::InitFramebuffer() {
// TODO(bunnei): This should probably be implemented with the top screen and bottom screen as // TODO(bunnei): This should probably be implemented with the top screen and bottom screen as
// separate framebuffers // separate framebuffers
// Init the FBOs // Init the FBOs
@ -160,12 +162,12 @@ void RendererOpenGL::InitFramebuffer() {
for (int i = 0; i < kMaxFramebuffers; i++) { for (int i = 0; i < kMaxFramebuffers; i++) {
// Generate color buffer storage // Generate color buffer storage
glBindRenderbuffer(GL_RENDERBUFFER, m_fbo_rbo[i]); glBindRenderbuffer(GL_RENDERBUFFER, m_fbo_rbo[i]);
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, VideoCore::kScreenTopWidth, glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, VideoCore::kScreenTopWidth,
VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight); VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight);
// Generate depth buffer storage // Generate depth buffer storage
glBindRenderbuffer(GL_RENDERBUFFER, m_fbo_depth_buffers[i]); glBindRenderbuffer(GL_RENDERBUFFER, m_fbo_depth_buffers[i]);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT32, VideoCore::kScreenTopWidth, glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT32, VideoCore::kScreenTopWidth,
VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight); VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight);
// Attach the buffers // Attach the buffers
@ -181,7 +183,7 @@ void RendererOpenGL::InitFramebuffer() {
} else { } else {
ERROR_LOG(RENDER, "couldn't create OpenGL frame buffer"); ERROR_LOG(RENDER, "couldn't create OpenGL frame buffer");
exit(1); exit(1);
} }
} }
glBindFramebuffer(GL_FRAMEBUFFER, 0); // Unbind our frame buffer(s) glBindFramebuffer(GL_FRAMEBUFFER, 0); // Unbind our frame buffer(s)
@ -189,8 +191,8 @@ void RendererOpenGL::InitFramebuffer() {
// ------------------------------- // -------------------------------
// Create XFB textures // Create XFB textures
glGenTextures(1, &m_xfb_texture_top); glGenTextures(1, &m_xfb_texture_top);
glGenTextures(1, &m_xfb_texture_bottom); glGenTextures(1, &m_xfb_texture_bottom);
// Alocate video memorry for XFB textures // Alocate video memorry for XFB textures
glBindTexture(GL_TEXTURE_2D, m_xfb_texture_top); glBindTexture(GL_TEXTURE_2D, m_xfb_texture_top);
@ -206,13 +208,13 @@ void RendererOpenGL::InitFramebuffer() {
// Create the FBO and attach color/depth textures // Create the FBO and attach color/depth textures
glGenFramebuffers(1, &m_xfb_top); // Generate framebuffer glGenFramebuffers(1, &m_xfb_top); // Generate framebuffer
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_xfb_top); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_xfb_top);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
m_xfb_texture_top, 0); m_xfb_texture_top, 0);
glBindFramebuffer(GL_FRAMEBUFFER, 0); glBindFramebuffer(GL_FRAMEBUFFER, 0);
glGenFramebuffers(1, &m_xfb_bottom); // Generate framebuffer glGenFramebuffers(1, &m_xfb_bottom); // Generate framebuffer
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_xfb_bottom); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_xfb_bottom);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
m_xfb_texture_bottom, 0); m_xfb_texture_bottom, 0);
glBindFramebuffer(GL_FRAMEBUFFER, 0); glBindFramebuffer(GL_FRAMEBUFFER, 0);
} }
@ -228,7 +230,7 @@ void RendererOpenGL::RenderFramebuffer() {
glReadBuffer(GL_COLOR_ATTACHMENT0); glReadBuffer(GL_COLOR_ATTACHMENT0);
// Blit // Blit
glBlitFramebuffer(0, 0, m_resolution_width, m_resolution_height, 0, 0, m_resolution_width, glBlitFramebuffer(0, 0, m_resolution_width, m_resolution_height, 0, 0, m_resolution_width,
m_resolution_height, GL_COLOR_BUFFER_BIT, GL_LINEAR); m_resolution_height, GL_COLOR_BUFFER_BIT, GL_LINEAR);
// Update the FPS count // Update the FPS count
@ -244,7 +246,7 @@ void RendererOpenGL::RenderFramebuffer() {
void RendererOpenGL::UpdateFramerate() { void RendererOpenGL::UpdateFramerate() {
} }
/** /**
* Set the emulator window to use for renderer * Set the emulator window to use for renderer
* @param window EmuWindow handle to emulator window to use for rendering * @param window EmuWindow handle to emulator window to use for rendering
*/ */
@ -278,7 +280,7 @@ void RendererOpenGL::Init() {
GLenum err = glewInit(); GLenum err = glewInit();
if (GLEW_OK != err) { if (GLEW_OK != err) {
ERROR_LOG(RENDER, "Failed to initialize GLEW! Error message: \"%s\". Exiting...", ERROR_LOG(RENDER, "Failed to initialize GLEW! Error message: \"%s\". Exiting...",
glewGetErrorString(err)); glewGetErrorString(err));
exit(-1); exit(-1);
} }