CPUID: Multiple bug fixes:

- Add support for extended feature registers EBX/ECX
 - No such thing as EBX for 0x80000001 call; remove getExtendedEBXCPUFlags() method,
   replaced with getExtendedEBXFeatureFlags()
 - Check for support of 6 required Core i3/i5/i7 instructions to enable Haswell,
   since GMP Haswell requires Core i3/i5/i7 support.
   There are Pentium/Celeron Haswells that do not support these instructions.
 - Fix hasAVX2(), hasAVX512(), and hasADX() using wrong register
 - Fix hasAVX512() checking wrong bit
 - Define hasAVX512() as supporting AVX-512 Foundation, not the "full"
   instruction set as previously specified in the javadocs.
 - hasAVX2(), hasAVX512(), and hasADX() need not check hasAVX() first
 - Add missing hasADX() to CPUInfo interface
Also:
 - More diagnostic output in CPUID.main()
 - More javadocs
This commit is contained in:
zzz
2015-12-12 22:53:33 +00:00
parent 5df3f404f8
commit fe3642edd4
5 changed files with 171 additions and 45 deletions

View File

@@ -190,12 +190,6 @@ public class CPUID {
return c.ECX; return c.ECX;
} }
static int getExtendedEBXCPUFlags()
{
CPUIDResult c = doCPUID(0x80000001);
return c.EBX;
}
static int getExtendedECXCPUFlags() static int getExtendedECXCPUFlags()
{ {
CPUIDResult c = doCPUID(0x80000001); CPUIDResult c = doCPUID(0x80000001);
@@ -209,6 +203,31 @@ public class CPUID {
return c.EDX; return c.EDX;
} }
/**
* @since 0.9.24
*/
static int getExtendedEBXFeatureFlags()
{
// Supposed to set ECX to 0 before calling?
// But we don't have support for that in jcpuid.
// And it works just fine without that.
CPUIDResult c = doCPUID(7);
return c.EBX;
}
/**
* There's almost nothing in here.
* @since 0.9.24
*/
static int getExtendedECXFeatureFlags()
{
// Supposed to set ECX to 0 before calling?
// But we don't have support for that in jcpuid.
// And it works just fine without that.
CPUIDResult c = doCPUID(7);
return c.ECX;
}
/** /**
* The model name string, up to 48 characters, as reported by * The model name string, up to 48 characters, as reported by
* the processor itself. * the processor itself.
@@ -294,19 +313,28 @@ public class CPUID {
System.out.println("CPU Family: " + family); System.out.println("CPU Family: " + family);
System.out.println("CPU Model: " + model); System.out.println("CPU Model: " + model);
System.out.println("CPU Stepping: " + getCPUStepping()); System.out.println("CPU Stepping: " + getCPUStepping());
System.out.println("CPU Flags: 0x" + Integer.toHexString(getEDXCPUFlags())); System.out.println("CPU Flags (EDX): 0x" + Integer.toHexString(getEDXCPUFlags()));
System.out.println("CPU Flags (ECX): 0x" + Integer.toHexString(getECXCPUFlags()));
System.out.println("CPU Ext. Info. (EDX): 0x" + Integer.toHexString(getExtendedEDXCPUFlags()));
System.out.println("CPU Ext. Info. (ECX): 0x" + Integer.toHexString(getExtendedECXCPUFlags()));
System.out.println("CPU Ext. Feat. (EBX): 0x" + Integer.toHexString(getExtendedEBXFeatureFlags()));
System.out.println("CPU Ext. Feat. (ECX): 0x" + Integer.toHexString(getExtendedECXFeatureFlags()));
CPUInfo c = getInfo(); CPUInfo c = getInfo();
System.out.println("\n **More CPUInfo**"); System.out.println("\n **More CPUInfo**");
System.out.println("CPU model string: " + c.getCPUModelString()); System.out.println("CPU model string: " + c.getCPUModelString());
System.out.println("CPU has MMX: " + c.hasMMX()); System.out.println("CPU has MMX: " + c.hasMMX());
System.out.println("CPU has SSE: " + c.hasSSE()); System.out.println("CPU has SSE: " + c.hasSSE());
System.out.println("CPU has SSE2: " + c.hasSSE2()); System.out.println("CPU has SSE2: " + c.hasSSE2());
System.out.println("CPU has SSE3: " + c.hasSSE3()); System.out.println("CPU has SSE3: " + c.hasSSE3());
System.out.println("CPU has SSE4.1: " + c.hasSSE41()); System.out.println("CPU has SSE4.1: " + c.hasSSE41());
System.out.println("CPU has SSE4.2: " + c.hasSSE42()); System.out.println("CPU has SSE4.2: " + c.hasSSE42());
System.out.println("CPU has SSE4A: " + c.hasSSE4A()); System.out.println("CPU has SSE4A: " + c.hasSSE4A());
System.out.println("CPU has AES-NI: " + c.hasAES()); System.out.println("CPU has AVX: " + c.hasAVX());
System.out.println("CPU has AVX2: " + c.hasAVX2());
System.out.println("CPU has AVX512: " + c.hasAVX512());
System.out.println("CPU has ADX: " + c.hasADX());
System.out.println("CPU has TBM: " + c.hasTBM());
if(c instanceof IntelCPUInfo){ if(c instanceof IntelCPUInfo){
System.out.println("\n **Intel-info**"); System.out.println("\n **Intel-info**");
System.out.println("Is PII-compatible: "+((IntelCPUInfo)c).IsPentium2Compatible()); System.out.println("Is PII-compatible: "+((IntelCPUInfo)c).IsPentium2Compatible());
@@ -316,6 +344,10 @@ public class CPUID {
System.out.println("Is Pentium M compatible: "+((IntelCPUInfo)c).IsPentiumMCompatible()); System.out.println("Is Pentium M compatible: "+((IntelCPUInfo)c).IsPentiumMCompatible());
System.out.println("Is Core2-compatible: "+((IntelCPUInfo)c).IsCore2Compatible()); System.out.println("Is Core2-compatible: "+((IntelCPUInfo)c).IsCore2Compatible());
System.out.println("Is Corei-compatible: "+((IntelCPUInfo)c).IsCoreiCompatible()); System.out.println("Is Corei-compatible: "+((IntelCPUInfo)c).IsCoreiCompatible());
System.out.println("Is Sandy-compatible: "+((IntelCPUInfo)c).IsSandyCompatible());
System.out.println("Is Ivy-compatible: "+((IntelCPUInfo)c).IsIvyCompatible());
System.out.println("Is Haswell-compatible: "+((IntelCPUInfo)c).IsHaswellCompatible());
System.out.println("Is Broadwell-compatible: "+((IntelCPUInfo)c).IsBroadwellCompatible());
} }
if(c instanceof AMDCPUInfo){ if(c instanceof AMDCPUInfo){
System.out.println("\n **AMD-info**"); System.out.println("\n **AMD-info**");

View File

@@ -63,18 +63,26 @@ abstract class CPUIDCPUInfo implements CPUInfo
*/ */
public boolean hasAVX2() public boolean hasAVX2()
{ {
return hasAVX() && return (CPUID.getExtendedEBXFeatureFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
(CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
} }
/** /**
* @return true iff the CPU supports the AVX512 instruction set. * Does the CPU supports the AVX-512 Foundation instruction set?
*
* Quote wikipedia:
*
* AVX-512 consists of multiple extensions not all meant to be supported
* by all processors implementing them. Only the core extension AVX-512F
* (AVX-512 Foundation) is required by all implementations.
*
* ref: https://en.wikipedia.org/wiki/AVX-512
*
* @return true iff the CPU supports the AVX-512 Foundation instruction set.
* @since 0.9.21 * @since 0.9.21
*/ */
public boolean hasAVX512() public boolean hasAVX512()
{ {
return hasAVX() && return (CPUID.getExtendedEBXFeatureFlags() & (1 << 16)) != 0; //Extended EBX Bit 16
(CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
} }
/** /**
@@ -83,8 +91,7 @@ abstract class CPUIDCPUInfo implements CPUInfo
*/ */
public boolean hasADX() public boolean hasADX()
{ {
return hasAVX() && return (CPUID.getExtendedEBXFeatureFlags() & (1 << 19)) != 0; //Extended EBX Bit 19
(CPUID.getExtendedEBXCPUFlags() & (1 << 19)) != 0; //Extended EBX Bit 19
} }
/** /**

View File

@@ -59,6 +59,9 @@ public interface CPUInfo
public boolean hasSSE42(); public boolean hasSSE42();
/** /**
* AMD K10 only. Not supported on Intel.
* ref: https://en.wikipedia.org/wiki/SSE4.2#SSE4a
*
* @return true iff the CPU support the SSE4A instruction set. * @return true iff the CPU support the SSE4A instruction set.
*/ */
public boolean hasSSE4A(); public boolean hasSSE4A();
@@ -76,11 +79,27 @@ public interface CPUInfo
public boolean hasAVX2(); public boolean hasAVX2();
/** /**
* @return true iff the CPU supports the full AVX512 instruction set. * Does the CPU supports the AVX-512 Foundation instruction set?
*
* Quote wikipedia:
*
* AVX-512 consists of multiple extensions not all meant to be supported
* by all processors implementing them. Only the core extension AVX-512F
* (AVX-512 Foundation) is required by all implementations.
*
* ref: https://en.wikipedia.org/wiki/AVX-512
*
* @return true iff the CPU supports the AVX-512 Foundation instruction set.
* @since 0.9.21 * @since 0.9.21
*/ */
public boolean hasAVX512(); public boolean hasAVX512();
/**
* @return true iff the CPU supports the ADX instruction set.
* @since 0.9.21
*/
public boolean hasADX();
/** /**
* @return true iff the CPU supports TBM. * @return true iff the CPU supports TBM.
* @since 0.9.21 * @since 0.9.21

View File

@@ -66,6 +66,10 @@ public interface IntelCPUInfo extends CPUInfo {
/** /**
* Supports the SSE 3, 4.1, 4.2 instructions. * Supports the SSE 3, 4.1, 4.2 instructions.
* In general, this requires 45nm or smaller process. * In general, this requires 45nm or smaller process.
*
* This is the Nehalem architecture.
* ref: https://en.wikipedia.org/wiki/Nehalem_%28microarchitecture%29
*
* @return true if the CPU implements at least a Corei level instruction/feature set. * @return true if the CPU implements at least a Corei level instruction/feature set.
*/ */
public boolean IsCoreiCompatible(); public boolean IsCoreiCompatible();
@@ -82,6 +86,11 @@ public interface IntelCPUInfo extends CPUInfo {
* Supports the SSE 3, 4.1, 4.2 instructions. * Supports the SSE 3, 4.1, 4.2 instructions.
* Supports the AVX 1 instructions. * Supports the AVX 1 instructions.
* In general, this requires 22nm or smaller process. * In general, this requires 22nm or smaller process.
*
* UNUSED, there is no specific GMP build for Ivy Bridge,
* and this is never called from NativeBigInteger.
* Ivy Bridge is a successor to Sandy Bridge, so use IsSandyCompatible().
*
* @return true if the CPU implements at least a IvyBridge level instruction/feature set. * @return true if the CPU implements at least a IvyBridge level instruction/feature set.
*/ */
public boolean IsIvyCompatible(); public boolean IsIvyCompatible();
@@ -89,6 +98,19 @@ public interface IntelCPUInfo extends CPUInfo {
/** /**
* Supports the SSE 3, 4.1, 4.2 instructions. * Supports the SSE 3, 4.1, 4.2 instructions.
* Supports the AVX 1, 2 instructions. * Supports the AVX 1, 2 instructions.
* Supports the BMI 1, 2 instructions.
*
* WARNING - GMP 6 uses the BMI2 MULX instruction for the "coreihwl" binaries.
* Only Core i3/i5/i7 Haswell processors support BMI2.
*
* Requires support for all 6 of these Corei features: FMA3 MOVBE ABM AVX2 BMI1 BMI2
* Pentium/Celeron Haswell processors do NOT support BMI2 and are NOT compatible.
* Those processors will be Sandy-compatible if they have AVX 1 support,
* and Corei-compatible if they do not.
*
* ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
* ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29
*
* In general, this requires 22nm or smaller process. * In general, this requires 22nm or smaller process.
* @return true if the CPU implements at least a Haswell level instruction/feature set. * @return true if the CPU implements at least a Haswell level instruction/feature set.
*/ */
@@ -98,6 +120,11 @@ public interface IntelCPUInfo extends CPUInfo {
* Supports the SSE 3, 4.1, 4.2 instructions. * Supports the SSE 3, 4.1, 4.2 instructions.
* Supports the AVX 1, 2 instructions. * Supports the AVX 1, 2 instructions.
* In general, this requires 14nm or smaller process. * In general, this requires 14nm or smaller process.
*
* NOT FULLY USED as of GMP 6.0.
* All GMP coreibwl binaries are duplicates of binaries for older technologies,
* so we do not distribute any. However, this is called from NativeBigInteger.
*
* @return true if the CPU implements at least a Broadwell level instruction/feature set. * @return true if the CPU implements at least a Broadwell level instruction/feature set.
*/ */
public boolean IsBroadwellCompatible(); public boolean IsBroadwellCompatible();

View File

@@ -282,6 +282,8 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
modelString = "Atom"; modelString = "Atom";
break; break;
// Sandy bridge 32 nm // Sandy bridge 32 nm
// 1, 2, or 4 cores
// ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
case 0x2a: case 0x2a:
isSandyCompatible = true; isSandyCompatible = true;
modelString = "Sandy Bridge"; modelString = "Sandy Bridge";
@@ -295,6 +297,8 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
modelString = "Westmere"; modelString = "Westmere";
break; break;
// Sandy Bridge 32 nm // Sandy Bridge 32 nm
// Sandy Bridge-E up to 8 cores
// ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
case 0x2d: case 0x2d:
isSandyCompatible = true; isSandyCompatible = true;
modelString = "Sandy Bridge"; modelString = "Sandy Bridge";
@@ -328,18 +332,15 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
modelString = "Atom"; modelString = "Atom";
break; break;
// Ivy Bridge 22 nm // Ivy Bridge 22 nm
// ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
case 0x3a: case 0x3a:
isSandyCompatible = true; isSandyCompatible = true;
isIvyCompatible = true; isIvyCompatible = true;
modelString = "Ivy Bridge"; modelString = "Ivy Bridge";
break; break;
// Haswell 22 nm
case 0x3c: // case 0x3c: See below
isSandyCompatible = true;
isIvyCompatible = true;
isHaswellCompatible = true;
modelString = "Haswell";
break;
// Broadwell 14 nm // Broadwell 14 nm
case 0x3d: case 0x3d:
isSandyCompatible = true; isSandyCompatible = true;
@@ -354,32 +355,72 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo
isIvyCompatible = true; isIvyCompatible = true;
modelString = "Ivy Bridge"; modelString = "Ivy Bridge";
break; break;
// Haswell 22 nm
case 0x3f: // case 0x3f: See below
isSandyCompatible = true;
isIvyCompatible = true;
isHaswellCompatible = true;
modelString = "Haswell";
break;
// following are for extended model == 4 // following are for extended model == 4
// most flags are set above // most flags are set above
// isCoreiCompatible = true is the default // isCoreiCompatible = true is the default
// Haswell 22 nm // Haswell 22 nm
// Pentium and Celeron Haswells do not support new Haswell instructions,
// only Corei ones do, but we can't tell that from the model alone.
//
// We know for sure that GMP coreihwl uses the MULX instruction from BMI2,
// unsure about the others, but let's be safe and check all 6 feature bits, as
// the Intel app note suggests.
//
// ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29
// ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
case 0x3c:
case 0x3f:
case 0x45: case 0x45:
isSandyCompatible = true;
isIvyCompatible = true;
isHaswellCompatible = true;
modelString = "Haswell";
break;
// Haswell 22 nm
case 0x46: case 0x46:
isSandyCompatible = true; boolean hasNewInstructions = false;
isIvyCompatible = true; int reg = CPUID.getECXCPUFlags();
isHaswellCompatible = true; boolean hasFMA3 = (reg & (1 << 12)) != 0;
modelString = "Haswell"; boolean hasMOVBE = (reg & (1 << 22)) != 0;
// AVX is implied by AVX2, so we don't need to check the value here,
// but we will need it below to enable Sandy Bridge if the Haswell checks fail.
// This is the same as hasAVX().
boolean hasAVX = (reg & (1 << 28)) != 0 && (reg & (1 << 27)) != 0;
//System.out.println("FMA3 MOVBE: " +
// hasFMA3 + ' ' + hasMOVBE);
if (hasFMA3 && hasMOVBE) {
reg = CPUID.getExtendedECXCPUFlags();
boolean hasABM = (reg & (1 << 5)) != 0; // aka LZCNT
//System.out.println("FMA3 MOVBE ABM: " +
// hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM);
if (hasABM) {
reg = CPUID.getExtendedEBXFeatureFlags();
boolean hasAVX2 = (reg & (1 << 5)) != 0;
boolean hasBMI1 = (reg & (1 << 3)) != 0;
boolean hasBMI2 = (reg & (1 << 8)) != 0;
//System.out.println("FMA3 MOVBE ABM AVX2 BMI1 BMI2: " +
// hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM + ' ' +
// hasAVX2 + ' ' + hasBMI1 + ' ' + hasBMI2);
if (hasAVX2 && hasBMI1 && hasBMI2)
hasNewInstructions = true;
}
}
if (hasNewInstructions) {
isSandyCompatible = true;
isIvyCompatible = true;
isHaswellCompatible = true;
modelString = "Haswell Core i3/i5/i7 model " + model;
} else {
// This processor is "corei" compatible, as we define it,
// i.e. SSE4.2 but not necessarily AVX.
if (hasAVX) {
isSandyCompatible = true;
isIvyCompatible = true;
modelString = "Haswell Celeron/Pentium w/ AVX model " + model;
} else {
modelString = "Haswell Celeron/Pentium model " + model;
}
}
break; break;
// Quark 32nm // Quark 32nm
case 0x4a: case 0x4a:
isCore2Compatible = false; isCore2Compatible = false;