Optimize AttributeBuffer to OutputVertex conversion (#3283)
Optimize AttributeBuffer to OutputVertex conversion First I unrolled the inner loop, then I pushed semantics validation outside of the hotloop. I also added overflow slots to avoid conditional branches. Super Mario 3D Land's intro runs at almost full speed when compiled with Clang, and theres a noticible speed increase in MSVC. GCC hasn't been tested but I'm confident in its ability to optimize this code.
This commit is contained in:
committed by
Yuri Kunde Schlesner
parent
3f7f2b42c0
commit
41929371dc
@@ -221,6 +221,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
MICROPROFILE_SCOPE(GPU_Drawing);
|
||||
immediate_attribute_id = 0;
|
||||
|
||||
Shader::OutputVertex::ValidateSemantics(regs.rasterizer);
|
||||
|
||||
auto* shader_engine = Shader::GetEngine();
|
||||
shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
|
||||
|
||||
@@ -289,6 +291,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
// Later, these can be compiled and cached.
|
||||
const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
|
||||
VertexLoader loader(regs.pipeline);
|
||||
Shader::OutputVertex::ValidateSemantics(regs.rasterizer);
|
||||
|
||||
// Load vertices
|
||||
bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
|
||||
|
Reference in New Issue
Block a user