forked from I2P_Developers/i2p.i2p
CPUID: Multiple bug fixes:
- Add support for extended feature registers EBX/ECX - No such thing as EBX for 0x80000001 call; remove getExtendedEBXCPUFlags() method, replaced with getExtendedEBXFeatureFlags() - Check for support of 6 required Core i3/i5/i7 instructions to enable Haswell, since GMP Haswell requires Core i3/i5/i7 support. There are Pentium/Celeron Haswells that do not support these instructions. - Fix hasAVX2(), hasAVX512(), and hasADX() using wrong register - Fix hasAVX512() checking wrong bit - Define hasAVX512() as supporting AVX-512 Foundation, not the "full" instruction set as previously specified in the javadocs. - hasAVX2(), hasAVX512(), and hasADX() need not check hasAVX() first - Add missing hasADX() to CPUInfo interface Also: - More diagnostic output in CPUID.main() - More javadocs
This commit is contained in:
@@ -190,12 +190,6 @@ public class CPUID {
|
||||
return c.ECX;
|
||||
}
|
||||
|
||||
static int getExtendedEBXCPUFlags()
|
||||
{
|
||||
CPUIDResult c = doCPUID(0x80000001);
|
||||
return c.EBX;
|
||||
}
|
||||
|
||||
static int getExtendedECXCPUFlags()
|
||||
{
|
||||
CPUIDResult c = doCPUID(0x80000001);
|
||||
@@ -209,6 +203,31 @@ public class CPUID {
|
||||
return c.EDX;
|
||||
}
|
||||
|
||||
/**
|
||||
* @since 0.9.24
|
||||
*/
|
||||
static int getExtendedEBXFeatureFlags()
|
||||
{
|
||||
// Supposed to set ECX to 0 before calling?
|
||||
// But we don't have support for that in jcpuid.
|
||||
// And it works just fine without that.
|
||||
CPUIDResult c = doCPUID(7);
|
||||
return c.EBX;
|
||||
}
|
||||
|
||||
/**
|
||||
* There's almost nothing in here.
|
||||
* @since 0.9.24
|
||||
*/
|
||||
static int getExtendedECXFeatureFlags()
|
||||
{
|
||||
// Supposed to set ECX to 0 before calling?
|
||||
// But we don't have support for that in jcpuid.
|
||||
// And it works just fine without that.
|
||||
CPUIDResult c = doCPUID(7);
|
||||
return c.ECX;
|
||||
}
|
||||
|
||||
/**
|
||||
* The model name string, up to 48 characters, as reported by
|
||||
* the processor itself.
|
||||
@@ -294,19 +313,28 @@ public class CPUID {
|
||||
System.out.println("CPU Family: " + family);
|
||||
System.out.println("CPU Model: " + model);
|
||||
System.out.println("CPU Stepping: " + getCPUStepping());
|
||||
System.out.println("CPU Flags: 0x" + Integer.toHexString(getEDXCPUFlags()));
|
||||
System.out.println("CPU Flags (EDX): 0x" + Integer.toHexString(getEDXCPUFlags()));
|
||||
System.out.println("CPU Flags (ECX): 0x" + Integer.toHexString(getECXCPUFlags()));
|
||||
System.out.println("CPU Ext. Info. (EDX): 0x" + Integer.toHexString(getExtendedEDXCPUFlags()));
|
||||
System.out.println("CPU Ext. Info. (ECX): 0x" + Integer.toHexString(getExtendedECXCPUFlags()));
|
||||
System.out.println("CPU Ext. Feat. (EBX): 0x" + Integer.toHexString(getExtendedEBXFeatureFlags()));
|
||||
System.out.println("CPU Ext. Feat. (ECX): 0x" + Integer.toHexString(getExtendedECXFeatureFlags()));
|
||||
|
||||
CPUInfo c = getInfo();
|
||||
System.out.println("\n **More CPUInfo**");
|
||||
System.out.println("CPU model string: " + c.getCPUModelString());
|
||||
System.out.println("CPU has MMX: " + c.hasMMX());
|
||||
System.out.println("CPU has SSE: " + c.hasSSE());
|
||||
System.out.println("CPU has SSE2: " + c.hasSSE2());
|
||||
System.out.println("CPU has SSE3: " + c.hasSSE3());
|
||||
System.out.println("CPU has MMX: " + c.hasMMX());
|
||||
System.out.println("CPU has SSE: " + c.hasSSE());
|
||||
System.out.println("CPU has SSE2: " + c.hasSSE2());
|
||||
System.out.println("CPU has SSE3: " + c.hasSSE3());
|
||||
System.out.println("CPU has SSE4.1: " + c.hasSSE41());
|
||||
System.out.println("CPU has SSE4.2: " + c.hasSSE42());
|
||||
System.out.println("CPU has SSE4A: " + c.hasSSE4A());
|
||||
System.out.println("CPU has AES-NI: " + c.hasAES());
|
||||
System.out.println("CPU has SSE4A: " + c.hasSSE4A());
|
||||
System.out.println("CPU has AVX: " + c.hasAVX());
|
||||
System.out.println("CPU has AVX2: " + c.hasAVX2());
|
||||
System.out.println("CPU has AVX512: " + c.hasAVX512());
|
||||
System.out.println("CPU has ADX: " + c.hasADX());
|
||||
System.out.println("CPU has TBM: " + c.hasTBM());
|
||||
if(c instanceof IntelCPUInfo){
|
||||
System.out.println("\n **Intel-info**");
|
||||
System.out.println("Is PII-compatible: "+((IntelCPUInfo)c).IsPentium2Compatible());
|
||||
@@ -316,6 +344,10 @@ public class CPUID {
|
||||
System.out.println("Is Pentium M compatible: "+((IntelCPUInfo)c).IsPentiumMCompatible());
|
||||
System.out.println("Is Core2-compatible: "+((IntelCPUInfo)c).IsCore2Compatible());
|
||||
System.out.println("Is Corei-compatible: "+((IntelCPUInfo)c).IsCoreiCompatible());
|
||||
System.out.println("Is Sandy-compatible: "+((IntelCPUInfo)c).IsSandyCompatible());
|
||||
System.out.println("Is Ivy-compatible: "+((IntelCPUInfo)c).IsIvyCompatible());
|
||||
System.out.println("Is Haswell-compatible: "+((IntelCPUInfo)c).IsHaswellCompatible());
|
||||
System.out.println("Is Broadwell-compatible: "+((IntelCPUInfo)c).IsBroadwellCompatible());
|
||||
}
|
||||
if(c instanceof AMDCPUInfo){
|
||||
System.out.println("\n **AMD-info**");
|
||||
|
@@ -63,18 +63,26 @@ abstract class CPUIDCPUInfo implements CPUInfo
|
||||
*/
|
||||
public boolean hasAVX2()
|
||||
{
|
||||
return hasAVX() &&
|
||||
(CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
|
||||
return (CPUID.getExtendedEBXFeatureFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true iff the CPU supports the AVX512 instruction set.
|
||||
* Does the CPU supports the AVX-512 Foundation instruction set?
|
||||
*
|
||||
* Quote wikipedia:
|
||||
*
|
||||
* AVX-512 consists of multiple extensions not all meant to be supported
|
||||
* by all processors implementing them. Only the core extension AVX-512F
|
||||
* (AVX-512 Foundation) is required by all implementations.
|
||||
*
|
||||
* ref: https://en.wikipedia.org/wiki/AVX-512
|
||||
*
|
||||
* @return true iff the CPU supports the AVX-512 Foundation instruction set.
|
||||
* @since 0.9.21
|
||||
*/
|
||||
public boolean hasAVX512()
|
||||
{
|
||||
return hasAVX() &&
|
||||
(CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
|
||||
return (CPUID.getExtendedEBXFeatureFlags() & (1 << 16)) != 0; //Extended EBX Bit 16
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -83,8 +91,7 @@ abstract class CPUIDCPUInfo implements CPUInfo
|
||||
*/
|
||||
public boolean hasADX()
|
||||
{
|
||||
return hasAVX() &&
|
||||
(CPUID.getExtendedEBXCPUFlags() & (1 << 19)) != 0; //Extended EBX Bit 19
|
||||
return (CPUID.getExtendedEBXFeatureFlags() & (1 << 19)) != 0; //Extended EBX Bit 19
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -59,6 +59,9 @@ public interface CPUInfo
|
||||
public boolean hasSSE42();
|
||||
|
||||
/**
|
||||
* AMD K10 only. Not supported on Intel.
|
||||
* ref: https://en.wikipedia.org/wiki/SSE4.2#SSE4a
|
||||
*
|
||||
* @return true iff the CPU support the SSE4A instruction set.
|
||||
*/
|
||||
public boolean hasSSE4A();
|
||||
@@ -76,11 +79,27 @@ public interface CPUInfo
|
||||
public boolean hasAVX2();
|
||||
|
||||
/**
|
||||
* @return true iff the CPU supports the full AVX512 instruction set.
|
||||
* Does the CPU supports the AVX-512 Foundation instruction set?
|
||||
*
|
||||
* Quote wikipedia:
|
||||
*
|
||||
* AVX-512 consists of multiple extensions not all meant to be supported
|
||||
* by all processors implementing them. Only the core extension AVX-512F
|
||||
* (AVX-512 Foundation) is required by all implementations.
|
||||
*
|
||||
* ref: https://en.wikipedia.org/wiki/AVX-512
|
||||
*
|
||||
* @return true iff the CPU supports the AVX-512 Foundation instruction set.
|
||||
* @since 0.9.21
|
||||
*/
|
||||
public boolean hasAVX512();
|
||||
|
||||
/**
|
||||
* @return true iff the CPU supports the ADX instruction set.
|
||||
* @since 0.9.21
|
||||
*/
|
||||
public boolean hasADX();
|
||||
|
||||
/**
|
||||
* @return true iff the CPU supports TBM.
|
||||
* @since 0.9.21
|
||||
|
@@ -66,6 +66,10 @@ public interface IntelCPUInfo extends CPUInfo {
|
||||
/**
|
||||
* Supports the SSE 3, 4.1, 4.2 instructions.
|
||||
* In general, this requires 45nm or smaller process.
|
||||
*
|
||||
* This is the Nehalem architecture.
|
||||
* ref: https://en.wikipedia.org/wiki/Nehalem_%28microarchitecture%29
|
||||
*
|
||||
* @return true if the CPU implements at least a Corei level instruction/feature set.
|
||||
*/
|
||||
public boolean IsCoreiCompatible();
|
||||
@@ -82,6 +86,11 @@ public interface IntelCPUInfo extends CPUInfo {
|
||||
* Supports the SSE 3, 4.1, 4.2 instructions.
|
||||
* Supports the AVX 1 instructions.
|
||||
* In general, this requires 22nm or smaller process.
|
||||
*
|
||||
* UNUSED, there is no specific GMP build for Ivy Bridge,
|
||||
* and this is never called from NativeBigInteger.
|
||||
* Ivy Bridge is a successor to Sandy Bridge, so use IsSandyCompatible().
|
||||
*
|
||||
* @return true if the CPU implements at least a IvyBridge level instruction/feature set.
|
||||
*/
|
||||
public boolean IsIvyCompatible();
|
||||
@@ -89,6 +98,19 @@ public interface IntelCPUInfo extends CPUInfo {
|
||||
/**
|
||||
* Supports the SSE 3, 4.1, 4.2 instructions.
|
||||
* Supports the AVX 1, 2 instructions.
|
||||
* Supports the BMI 1, 2 instructions.
|
||||
*
|
||||
* WARNING - GMP 6 uses the BMI2 MULX instruction for the "coreihwl" binaries.
|
||||
* Only Core i3/i5/i7 Haswell processors support BMI2.
|
||||
*
|
||||
* Requires support for all 6 of these Corei features: FMA3 MOVBE ABM AVX2 BMI1 BMI2
|
||||
* Pentium/Celeron Haswell processors do NOT support BMI2 and are NOT compatible.
|
||||
* Those processors will be Sandy-compatible if they have AVX 1 support,
|
||||
* and Corei-compatible if they do not.
|
||||
*
|
||||
* ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
|
||||
* ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29
|
||||
*
|
||||
* In general, this requires 22nm or smaller process.
|
||||
* @return true if the CPU implements at least a Haswell level instruction/feature set.
|
||||
*/
|
||||
@@ -98,6 +120,11 @@ public interface IntelCPUInfo extends CPUInfo {
|
||||
* Supports the SSE 3, 4.1, 4.2 instructions.
|
||||
* Supports the AVX 1, 2 instructions.
|
||||
* In general, this requires 14nm or smaller process.
|
||||
*
|
||||
* NOT FULLY USED as of GMP 6.0.
|
||||
* All GMP coreibwl binaries are duplicates of binaries for older technologies,
|
||||
* so we do not distribute any. However, this is called from NativeBigInteger.
|
||||
*
|
||||
* @return true if the CPU implements at least a Broadwell level instruction/feature set.
|
||||
*/
|
||||
public boolean IsBroadwellCompatible();
|
||||
|
@@ -282,6 +282,8 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
|
||||
modelString = "Atom";
|
||||
break;
|
||||
// Sandy bridge 32 nm
|
||||
// 1, 2, or 4 cores
|
||||
// ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
|
||||
case 0x2a:
|
||||
isSandyCompatible = true;
|
||||
modelString = "Sandy Bridge";
|
||||
@@ -295,6 +297,8 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
|
||||
modelString = "Westmere";
|
||||
break;
|
||||
// Sandy Bridge 32 nm
|
||||
// Sandy Bridge-E up to 8 cores
|
||||
// ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
|
||||
case 0x2d:
|
||||
isSandyCompatible = true;
|
||||
modelString = "Sandy Bridge";
|
||||
@@ -328,18 +332,15 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
|
||||
modelString = "Atom";
|
||||
break;
|
||||
// Ivy Bridge 22 nm
|
||||
// ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
|
||||
case 0x3a:
|
||||
isSandyCompatible = true;
|
||||
isIvyCompatible = true;
|
||||
modelString = "Ivy Bridge";
|
||||
break;
|
||||
// Haswell 22 nm
|
||||
case 0x3c:
|
||||
isSandyCompatible = true;
|
||||
isIvyCompatible = true;
|
||||
isHaswellCompatible = true;
|
||||
modelString = "Haswell";
|
||||
break;
|
||||
|
||||
// case 0x3c: See below
|
||||
|
||||
// Broadwell 14 nm
|
||||
case 0x3d:
|
||||
isSandyCompatible = true;
|
||||
@@ -354,32 +355,72 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
|
||||
isIvyCompatible = true;
|
||||
modelString = "Ivy Bridge";
|
||||
break;
|
||||
// Haswell 22 nm
|
||||
case 0x3f:
|
||||
isSandyCompatible = true;
|
||||
isIvyCompatible = true;
|
||||
isHaswellCompatible = true;
|
||||
modelString = "Haswell";
|
||||
break;
|
||||
|
||||
// case 0x3f: See below
|
||||
|
||||
// following are for extended model == 4
|
||||
// most flags are set above
|
||||
// isCoreiCompatible = true is the default
|
||||
|
||||
// Haswell 22 nm
|
||||
// Pentium and Celeron Haswells do not support new Haswell instructions,
|
||||
// only Corei ones do, but we can't tell that from the model alone.
|
||||
//
|
||||
// We know for sure that GMP coreihwl uses the MULX instruction from BMI2,
|
||||
// unsure about the others, but let's be safe and check all 6 feature bits, as
|
||||
// the Intel app note suggests.
|
||||
//
|
||||
// ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29
|
||||
// ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
|
||||
case 0x3c:
|
||||
case 0x3f:
|
||||
case 0x45:
|
||||
isSandyCompatible = true;
|
||||
isIvyCompatible = true;
|
||||
isHaswellCompatible = true;
|
||||
modelString = "Haswell";
|
||||
break;
|
||||
// Haswell 22 nm
|
||||
case 0x46:
|
||||
isSandyCompatible = true;
|
||||
isIvyCompatible = true;
|
||||
isHaswellCompatible = true;
|
||||
modelString = "Haswell";
|
||||
boolean hasNewInstructions = false;
|
||||
int reg = CPUID.getECXCPUFlags();
|
||||
boolean hasFMA3 = (reg & (1 << 12)) != 0;
|
||||
boolean hasMOVBE = (reg & (1 << 22)) != 0;
|
||||
// AVX is implied by AVX2, so we don't need to check the value here,
|
||||
// but we will need it below to enable Sandy Bridge if the Haswell checks fail.
|
||||
// This is the same as hasAVX().
|
||||
boolean hasAVX = (reg & (1 << 28)) != 0 && (reg & (1 << 27)) != 0;
|
||||
//System.out.println("FMA3 MOVBE: " +
|
||||
// hasFMA3 + ' ' + hasMOVBE);
|
||||
if (hasFMA3 && hasMOVBE) {
|
||||
reg = CPUID.getExtendedECXCPUFlags();
|
||||
boolean hasABM = (reg & (1 << 5)) != 0; // aka LZCNT
|
||||
//System.out.println("FMA3 MOVBE ABM: " +
|
||||
// hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM);
|
||||
if (hasABM) {
|
||||
reg = CPUID.getExtendedEBXFeatureFlags();
|
||||
boolean hasAVX2 = (reg & (1 << 5)) != 0;
|
||||
boolean hasBMI1 = (reg & (1 << 3)) != 0;
|
||||
boolean hasBMI2 = (reg & (1 << 8)) != 0;
|
||||
//System.out.println("FMA3 MOVBE ABM AVX2 BMI1 BMI2: " +
|
||||
// hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM + ' ' +
|
||||
// hasAVX2 + ' ' + hasBMI1 + ' ' + hasBMI2);
|
||||
if (hasAVX2 && hasBMI1 && hasBMI2)
|
||||
hasNewInstructions = true;
|
||||
}
|
||||
}
|
||||
if (hasNewInstructions) {
|
||||
isSandyCompatible = true;
|
||||
isIvyCompatible = true;
|
||||
isHaswellCompatible = true;
|
||||
modelString = "Haswell Core i3/i5/i7 model " + model;
|
||||
} else {
|
||||
// This processor is "corei" compatible, as we define it,
|
||||
// i.e. SSE4.2 but not necessarily AVX.
|
||||
if (hasAVX) {
|
||||
isSandyCompatible = true;
|
||||
isIvyCompatible = true;
|
||||
modelString = "Haswell Celeron/Pentium w/ AVX model " + model;
|
||||
} else {
|
||||
modelString = "Haswell Celeron/Pentium model " + model;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
// Quark 32nm
|
||||
case 0x4a:
|
||||
isCore2Compatible = false;
|
||||
|
Reference in New Issue
Block a user