|
|
@ -387,13 +387,18 @@ void JitShader::Compile_DP4(Instruction instr) {
|
|
|
|
|
|
|
|
|
|
|
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
|
|
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
|
|
|
|
|
|
|
|
|
|
|
movaps(SRC2, SRC1);
|
|
|
|
if (Common::GetCPUCaps().sse3) {
|
|
|
|
shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
|
|
|
|
haddps(SRC1, SRC1);
|
|
|
|
addps(SRC1, SRC2);
|
|
|
|
haddps(SRC1, SRC1);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
movaps(SRC2, SRC1);
|
|
|
|
|
|
|
|
shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
|
|
|
|
|
|
|
|
addps(SRC1, SRC2);
|
|
|
|
|
|
|
|
|
|
|
|
movaps(SRC2, SRC1);
|
|
|
|
movaps(SRC2, SRC1);
|
|
|
|
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
|
|
|
|
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
|
|
|
|
addps(SRC1, SRC2);
|
|
|
|
addps(SRC1, SRC2);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Compile_DestEnable(instr, SRC1);
|
|
|
|
Compile_DestEnable(instr, SRC1);
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -419,13 +424,18 @@ void JitShader::Compile_DPH(Instruction instr) {
|
|
|
|
|
|
|
|
|
|
|
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
|
|
|
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
|
|
|
|
|
|
|
|
|
|
|
|
movaps(SRC2, SRC1);
|
|
|
|
if (Common::GetCPUCaps().sse3) {
|
|
|
|
shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
|
|
|
|
haddps(SRC1, SRC1);
|
|
|
|
addps(SRC1, SRC2);
|
|
|
|
haddps(SRC1, SRC1);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
movaps(SRC2, SRC1);
|
|
|
|
|
|
|
|
shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
|
|
|
|
|
|
|
|
addps(SRC1, SRC2);
|
|
|
|
|
|
|
|
|
|
|
|
movaps(SRC2, SRC1);
|
|
|
|
movaps(SRC2, SRC1);
|
|
|
|
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
|
|
|
|
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
|
|
|
|
addps(SRC1, SRC2);
|
|
|
|
addps(SRC1, SRC2);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Compile_DestEnable(instr, SRC1);
|
|
|
|
Compile_DestEnable(instr, SRC1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|