block_of_code: Allow Fast BMI2 paths on Zen 3 (#593)
BMI2 instructions such as `pdep` and `pext` have been known to be incredibly slow on AMD. But on Zen3 and newer, the performance of these instructions are now much greater, but previous versions of AMD architectures should still avoid BMI2. On Zen 2, pdep/pext were 300 cycles. Now on Zen 3 it is 3 cycles. This is a big enough improvement to allow BMI2 code to be dispatched if available. The Zen 3 architecture is checked for by detecting the family of the processor.
This commit is contained in:
parent
c28f13af97
commit
e06933f123
1 changed files with 16 additions and 1 deletions
|
@ -13,6 +13,7 @@
|
|||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/perf_map.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
|
@ -364,7 +365,21 @@ bool BlockOfCode::HasBMI2() const {
|
|||
}
|
||||
|
||||
bool BlockOfCode::HasFastBMI2() const {
|
||||
return DoesCpuSupport(Xbyak::util::Cpu::tBMI2) && !DoesCpuSupport(Xbyak::util::Cpu::tAMD);
|
||||
if (DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
|
||||
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
|
||||
// Check for Zen 3 or newer by its family (0x19).
|
||||
// See also: https://en.wikichip.org/wiki/amd/cpuid
|
||||
if (DoesCpuSupport(Xbyak::util::Cpu::tAMD)) {
|
||||
std::array<u32, 4> data{};
|
||||
cpu_info.getCpuid(1, data.data());
|
||||
const u32 family_base = Common::Bits< 8, 11>(data[0]);
|
||||
const u32 family_extended = Common::Bits<20, 27>(data[0]);
|
||||
const u32 family = family_base + family_extended;
|
||||
return family >= 0x19;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BlockOfCode::HasFMA() const {
|
||||
|
|
Loading…
Reference in a new issue