CPUID: Multiple bug fixes:

- Add support for extended feature registers EBX/ECX
 - No such thing as EBX for 0x80000001 call; remove getExtendedEBXCPUFlags() method,
   replaced with getExtendedEBXFeatureFlags()
 - Check for support of 6 required Core i3/i5/i7 instructions to enable Haswell,
   since GMP Haswell requires Core i3/i5/i7 support.
   There are Pentium/Celeron Haswells that do not support these instructions.
 - Fix hasAVX2(), hasAVX512(), and hasADX() using wrong register
 - Fix hasAVX512() checking wrong bit
 - Define hasAVX512() as supporting AVX-512 Foundation, not the "full"
   instruction set as previously specified in the javadocs.
 - hasAVX2(), hasAVX512(), and hasADX() need not check hasAVX() first
 - Add missing hasADX() to CPUInfo interface
Also:
 - More diagnostic output in CPUID.main()
 - More javadocs
This commit is contained in:
zzz
2015-12-12 22:53:33 +00:00
parent 5df3f404f8
commit fe3642edd4
5 changed files with 171 additions and 45 deletions

View File

@@ -190,12 +190,6 @@ public class CPUID {
return c.ECX;
}
static int getExtendedEBXCPUFlags()
{
CPUIDResult c = doCPUID(0x80000001);
return c.EBX;
}
static int getExtendedECXCPUFlags()
{
CPUIDResult c = doCPUID(0x80000001);
@@ -209,6 +203,31 @@ public class CPUID {
return c.EDX;
}
/**
* @since 0.9.24
*/
static int getExtendedEBXFeatureFlags()
{
// Supposed to set ECX to 0 before calling?
// But we don't have support for that in jcpuid.
// And it works just fine without that.
CPUIDResult c = doCPUID(7);
return c.EBX;
}
/**
* There's almost nothing in here.
* @since 0.9.24
*/
static int getExtendedECXFeatureFlags()
{
// Supposed to set ECX to 0 before calling?
// But we don't have support for that in jcpuid.
// And it works just fine without that.
CPUIDResult c = doCPUID(7);
return c.ECX;
}
/**
* The model name string, up to 48 characters, as reported by
* the processor itself.
@@ -294,19 +313,28 @@ public class CPUID {
System.out.println("CPU Family: " + family);
System.out.println("CPU Model: " + model);
System.out.println("CPU Stepping: " + getCPUStepping());
System.out.println("CPU Flags: 0x" + Integer.toHexString(getEDXCPUFlags()));
System.out.println("CPU Flags (EDX): 0x" + Integer.toHexString(getEDXCPUFlags()));
System.out.println("CPU Flags (ECX): 0x" + Integer.toHexString(getECXCPUFlags()));
System.out.println("CPU Ext. Info. (EDX): 0x" + Integer.toHexString(getExtendedEDXCPUFlags()));
System.out.println("CPU Ext. Info. (ECX): 0x" + Integer.toHexString(getExtendedECXCPUFlags()));
System.out.println("CPU Ext. Feat. (EBX): 0x" + Integer.toHexString(getExtendedEBXFeatureFlags()));
System.out.println("CPU Ext. Feat. (ECX): 0x" + Integer.toHexString(getExtendedECXFeatureFlags()));
CPUInfo c = getInfo();
System.out.println("\n **More CPUInfo**");
System.out.println("CPU model string: " + c.getCPUModelString());
System.out.println("CPU has MMX: " + c.hasMMX());
System.out.println("CPU has SSE: " + c.hasSSE());
System.out.println("CPU has SSE2: " + c.hasSSE2());
System.out.println("CPU has SSE3: " + c.hasSSE3());
System.out.println("CPU has MMX: " + c.hasMMX());
System.out.println("CPU has SSE: " + c.hasSSE());
System.out.println("CPU has SSE2: " + c.hasSSE2());
System.out.println("CPU has SSE3: " + c.hasSSE3());
System.out.println("CPU has SSE4.1: " + c.hasSSE41());
System.out.println("CPU has SSE4.2: " + c.hasSSE42());
System.out.println("CPU has SSE4A: " + c.hasSSE4A());
System.out.println("CPU has AES-NI: " + c.hasAES());
System.out.println("CPU has SSE4A: " + c.hasSSE4A());
System.out.println("CPU has AVX: " + c.hasAVX());
System.out.println("CPU has AVX2: " + c.hasAVX2());
System.out.println("CPU has AVX512: " + c.hasAVX512());
System.out.println("CPU has ADX: " + c.hasADX());
System.out.println("CPU has TBM: " + c.hasTBM());
if(c instanceof IntelCPUInfo){
System.out.println("\n **Intel-info**");
System.out.println("Is PII-compatible: "+((IntelCPUInfo)c).IsPentium2Compatible());
@@ -316,6 +344,10 @@ public class CPUID {
System.out.println("Is Pentium M compatible: "+((IntelCPUInfo)c).IsPentiumMCompatible());
System.out.println("Is Core2-compatible: "+((IntelCPUInfo)c).IsCore2Compatible());
System.out.println("Is Corei-compatible: "+((IntelCPUInfo)c).IsCoreiCompatible());
System.out.println("Is Sandy-compatible: "+((IntelCPUInfo)c).IsSandyCompatible());
System.out.println("Is Ivy-compatible: "+((IntelCPUInfo)c).IsIvyCompatible());
System.out.println("Is Haswell-compatible: "+((IntelCPUInfo)c).IsHaswellCompatible());
System.out.println("Is Broadwell-compatible: "+((IntelCPUInfo)c).IsBroadwellCompatible());
}
if(c instanceof AMDCPUInfo){
System.out.println("\n **AMD-info**");

View File

@@ -63,18 +63,26 @@ abstract class CPUIDCPUInfo implements CPUInfo
*/
public boolean hasAVX2()
{
return hasAVX() &&
(CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
return (CPUID.getExtendedEBXFeatureFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
}
/**
* @return true iff the CPU supports the AVX512 instruction set.
* Does the CPU supports the AVX-512 Foundation instruction set?
*
* Quote wikipedia:
*
* AVX-512 consists of multiple extensions not all meant to be supported
* by all processors implementing them. Only the core extension AVX-512F
* (AVX-512 Foundation) is required by all implementations.
*
* ref: https://en.wikipedia.org/wiki/AVX-512
*
* @return true iff the CPU supports the AVX-512 Foundation instruction set.
* @since 0.9.21
*/
public boolean hasAVX512()
{
return hasAVX() &&
(CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
return (CPUID.getExtendedEBXFeatureFlags() & (1 << 16)) != 0; //Extended EBX Bit 16
}
/**
@@ -83,8 +91,7 @@ abstract class CPUIDCPUInfo implements CPUInfo
*/
public boolean hasADX()
{
return hasAVX() &&
(CPUID.getExtendedEBXCPUFlags() & (1 << 19)) != 0; //Extended EBX Bit 19
return (CPUID.getExtendedEBXFeatureFlags() & (1 << 19)) != 0; //Extended EBX Bit 19
}
/**

View File

@@ -59,6 +59,9 @@ public interface CPUInfo
public boolean hasSSE42();
/**
* AMD K10 only. Not supported on Intel.
* ref: https://en.wikipedia.org/wiki/SSE4.2#SSE4a
*
* @return true iff the CPU support the SSE4A instruction set.
*/
public boolean hasSSE4A();
@@ -76,11 +79,27 @@ public interface CPUInfo
public boolean hasAVX2();
/**
* @return true iff the CPU supports the full AVX512 instruction set.
* Does the CPU supports the AVX-512 Foundation instruction set?
*
* Quote wikipedia:
*
* AVX-512 consists of multiple extensions not all meant to be supported
* by all processors implementing them. Only the core extension AVX-512F
* (AVX-512 Foundation) is required by all implementations.
*
* ref: https://en.wikipedia.org/wiki/AVX-512
*
* @return true iff the CPU supports the AVX-512 Foundation instruction set.
* @since 0.9.21
*/
public boolean hasAVX512();
/**
* @return true iff the CPU supports the ADX instruction set.
* @since 0.9.21
*/
public boolean hasADX();
/**
* @return true iff the CPU supports TBM.
* @since 0.9.21

View File

@@ -66,6 +66,10 @@ public interface IntelCPUInfo extends CPUInfo {
/**
* Supports the SSE 3, 4.1, 4.2 instructions.
* In general, this requires 45nm or smaller process.
*
* This is the Nehalem architecture.
* ref: https://en.wikipedia.org/wiki/Nehalem_%28microarchitecture%29
*
* @return true if the CPU implements at least a Corei level instruction/feature set.
*/
public boolean IsCoreiCompatible();
@@ -82,6 +86,11 @@ public interface IntelCPUInfo extends CPUInfo {
* Supports the SSE 3, 4.1, 4.2 instructions.
* Supports the AVX 1 instructions.
* In general, this requires 22nm or smaller process.
*
* UNUSED, there is no specific GMP build for Ivy Bridge,
* and this is never called from NativeBigInteger.
* Ivy Bridge is a successor to Sandy Bridge, so use IsSandyCompatible().
*
* @return true if the CPU implements at least a IvyBridge level instruction/feature set.
*/
public boolean IsIvyCompatible();
@@ -89,6 +98,19 @@ public interface IntelCPUInfo extends CPUInfo {
/**
* Supports the SSE 3, 4.1, 4.2 instructions.
* Supports the AVX 1, 2 instructions.
* Supports the BMI 1, 2 instructions.
*
* WARNING - GMP 6 uses the BMI2 MULX instruction for the "coreihwl" binaries.
* Only Core i3/i5/i7 Haswell processors support BMI2.
*
* Requires support for all 6 of these Corei features: FMA3 MOVBE ABM AVX2 BMI1 BMI2
* Pentium/Celeron Haswell processors do NOT support BMI2 and are NOT compatible.
* Those processors will be Sandy-compatible if they have AVX 1 support,
* and Corei-compatible if they do not.
*
* ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
* ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29
*
* In general, this requires 22nm or smaller process.
* @return true if the CPU implements at least a Haswell level instruction/feature set.
*/
@@ -98,6 +120,11 @@ public interface IntelCPUInfo extends CPUInfo {
* Supports the SSE 3, 4.1, 4.2 instructions.
* Supports the AVX 1, 2 instructions.
* In general, this requires 14nm or smaller process.
*
* NOT FULLY USED as of GMP 6.0.
* All GMP coreibwl binaries are duplicates of binaries for older technologies,
* so we do not distribute any. However, this is called from NativeBigInteger.
*
* @return true if the CPU implements at least a Broadwell level instruction/feature set.
*/
public boolean IsBroadwellCompatible();

View File

@@ -282,6 +282,8 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
modelString = "Atom";
break;
// Sandy bridge 32 nm
// 1, 2, or 4 cores
// ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
case 0x2a:
isSandyCompatible = true;
modelString = "Sandy Bridge";
@@ -295,6 +297,8 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
modelString = "Westmere";
break;
// Sandy Bridge 32 nm
// Sandy Bridge-E up to 8 cores
// ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
case 0x2d:
isSandyCompatible = true;
modelString = "Sandy Bridge";
@@ -328,18 +332,15 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
modelString = "Atom";
break;
// Ivy Bridge 22 nm
// ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
case 0x3a:
isSandyCompatible = true;
isIvyCompatible = true;
modelString = "Ivy Bridge";
break;
// Haswell 22 nm
case 0x3c:
isSandyCompatible = true;
isIvyCompatible = true;
isHaswellCompatible = true;
modelString = "Haswell";
break;
// case 0x3c: See below
// Broadwell 14 nm
case 0x3d:
isSandyCompatible = true;
@@ -354,32 +355,72 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
isIvyCompatible = true;
modelString = "Ivy Bridge";
break;
// Haswell 22 nm
case 0x3f:
isSandyCompatible = true;
isIvyCompatible = true;
isHaswellCompatible = true;
modelString = "Haswell";
break;
// case 0x3f: See below
// following are for extended model == 4
// most flags are set above
// isCoreiCompatible = true is the default
// Haswell 22 nm
// Pentium and Celeron Haswells do not support new Haswell instructions,
// only Corei ones do, but we can't tell that from the model alone.
//
// We know for sure that GMP coreihwl uses the MULX instruction from BMI2,
// unsure about the others, but let's be safe and check all 6 feature bits, as
// the Intel app note suggests.
//
// ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29
// ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
case 0x3c:
case 0x3f:
case 0x45:
isSandyCompatible = true;
isIvyCompatible = true;
isHaswellCompatible = true;
modelString = "Haswell";
break;
// Haswell 22 nm
case 0x46:
isSandyCompatible = true;
isIvyCompatible = true;
isHaswellCompatible = true;
modelString = "Haswell";
boolean hasNewInstructions = false;
int reg = CPUID.getECXCPUFlags();
boolean hasFMA3 = (reg & (1 << 12)) != 0;
boolean hasMOVBE = (reg & (1 << 22)) != 0;
// AVX is implied by AVX2, so we don't need to check the value here,
// but we will need it below to enable Sandy Bridge if the Haswell checks fail.
// This is the same as hasAVX().
boolean hasAVX = (reg & (1 << 28)) != 0 && (reg & (1 << 27)) != 0;
//System.out.println("FMA3 MOVBE: " +
// hasFMA3 + ' ' + hasMOVBE);
if (hasFMA3 && hasMOVBE) {
reg = CPUID.getExtendedECXCPUFlags();
boolean hasABM = (reg & (1 << 5)) != 0; // aka LZCNT
//System.out.println("FMA3 MOVBE ABM: " +
// hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM);
if (hasABM) {
reg = CPUID.getExtendedEBXFeatureFlags();
boolean hasAVX2 = (reg & (1 << 5)) != 0;
boolean hasBMI1 = (reg & (1 << 3)) != 0;
boolean hasBMI2 = (reg & (1 << 8)) != 0;
//System.out.println("FMA3 MOVBE ABM AVX2 BMI1 BMI2: " +
// hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM + ' ' +
// hasAVX2 + ' ' + hasBMI1 + ' ' + hasBMI2);
if (hasAVX2 && hasBMI1 && hasBMI2)
hasNewInstructions = true;
}
}
if (hasNewInstructions) {
isSandyCompatible = true;
isIvyCompatible = true;
isHaswellCompatible = true;
modelString = "Haswell Core i3/i5/i7 model " + model;
} else {
// This processor is "corei" compatible, as we define it,
// i.e. SSE4.2 but not necessarily AVX.
if (hasAVX) {
isSandyCompatible = true;
isIvyCompatible = true;
modelString = "Haswell Celeron/Pentium w/ AVX model " + model;
} else {
modelString = "Haswell Celeron/Pentium model " + model;
}
}
break;
// Quark 32nm
case 0x4a:
isCore2Compatible = false;