Changeset fe3642e


Ignore:
Timestamp:
Dec 12, 2015 10:53:33 PM (6 years ago)
Author:
zzz <zzz@…>
Branches:
master
Children:
2c8179f
Parents:
5df3f40
Message:

CPUID: Multiple bug fixes:

  • Add support for extended feature registers EBX/ECX
  • No such thing as EBX for 0x80000001 call; remove getExtendedEBXCPUFlags() method, replaced with getExtendedEBXFeatureFlags()
  • Check for support of 6 required Core i3/i5/i7 instructions to enable Haswell, since GMP Haswell requires Core i3/i5/i7 support. There are Pentium/Celeron? Haswells that do not support these instructions.
  • Fix hasAVX2(), hasAVX512(), and hasADX() using wrong register
  • Fix hasAVX512() checking wrong bit
  • Define hasAVX512() as supporting AVX-512 Foundation, not the "full" instruction set as previously specified in the javadocs.
  • hasAVX2(), hasAVX512(), and hasADX() need not check hasAVX() first
  • Add missing hasADX() to CPUInfo interface

Also:

  • More diagnostic output in CPUID.main()
  • More javadocs
Location:
core/java/src/freenet/support/CPUInformation
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • core/java/src/freenet/support/CPUInformation/CPUID.java

    r5df3f40 rfe3642e  
    191191    }
    192192
    193     static int getExtendedEBXCPUFlags()
    194     {
    195         CPUIDResult c = doCPUID(0x80000001);
    196         return c.EBX;
    197     }
    198 
    199193    static int getExtendedECXCPUFlags()
    200194    {
     
    208202        CPUIDResult c = doCPUID(0x80000001);
    209203        return c.EDX;
     204    }
     205
     206    /**
     207     *  @since 0.9.24
     208     */
     209    static int getExtendedEBXFeatureFlags()
     210    {
     211        // Supposed to set ECX to 0 before calling?
     212        // But we don't have support for that in jcpuid.
     213        // And it works just fine without that.
     214        CPUIDResult c = doCPUID(7);
     215        return c.EBX;
     216    }
     217
     218    /**
     219     *  There's almost nothing in here.
     220     *  @since 0.9.24
     221     */
     222    static int getExtendedECXFeatureFlags()
     223    {
     224        // Supposed to set ECX to 0 before calling?
     225        // But we don't have support for that in jcpuid.
     226        // And it works just fine without that.
     227        CPUIDResult c = doCPUID(7);
     228        return c.ECX;
    210229    }
    211230
     
    295314        System.out.println("CPU Model: " + model);
    296315        System.out.println("CPU Stepping: " + getCPUStepping());
    297         System.out.println("CPU Flags: 0x" + Integer.toHexString(getEDXCPUFlags()));
     316        System.out.println("CPU Flags (EDX):      0x" + Integer.toHexString(getEDXCPUFlags()));
     317        System.out.println("CPU Flags (ECX):      0x" + Integer.toHexString(getECXCPUFlags()));
     318        System.out.println("CPU Ext. Info. (EDX): 0x" + Integer.toHexString(getExtendedEDXCPUFlags()));
     319        System.out.println("CPU Ext. Info. (ECX): 0x" + Integer.toHexString(getExtendedECXCPUFlags()));
     320        System.out.println("CPU Ext. Feat. (EBX): 0x" + Integer.toHexString(getExtendedEBXFeatureFlags()));
     321        System.out.println("CPU Ext. Feat. (ECX): 0x" + Integer.toHexString(getExtendedECXFeatureFlags()));
    298322
    299323        CPUInfo c = getInfo();
    300324        System.out.println("\n **More CPUInfo**");
    301325        System.out.println("CPU model string: " + c.getCPUModelString());
    302         System.out.println("CPU has MMX: " + c.hasMMX());
    303         System.out.println("CPU has SSE: " + c.hasSSE());
    304         System.out.println("CPU has SSE2: " + c.hasSSE2());
    305         System.out.println("CPU has SSE3: " + c.hasSSE3());
     326        System.out.println("CPU has MMX:    " + c.hasMMX());
     327        System.out.println("CPU has SSE:    " + c.hasSSE());
     328        System.out.println("CPU has SSE2:   " + c.hasSSE2());
     329        System.out.println("CPU has SSE3:   " + c.hasSSE3());
    306330        System.out.println("CPU has SSE4.1: " + c.hasSSE41());
    307331        System.out.println("CPU has SSE4.2: " + c.hasSSE42());
    308         System.out.println("CPU has SSE4A: " + c.hasSSE4A());
    309         System.out.println("CPU has AES-NI: " + c.hasAES());
     332        System.out.println("CPU has SSE4A:  " + c.hasSSE4A());
     333        System.out.println("CPU has AVX:    " + c.hasAVX());
     334        System.out.println("CPU has AVX2:   " + c.hasAVX2());
     335        System.out.println("CPU has AVX512: " + c.hasAVX512());
     336        System.out.println("CPU has ADX:    " + c.hasADX());
     337        System.out.println("CPU has TBM:    " + c.hasTBM());
    310338        if(c instanceof IntelCPUInfo){
    311339            System.out.println("\n **Intel-info**");
     
    317345            System.out.println("Is Core2-compatible: "+((IntelCPUInfo)c).IsCore2Compatible());
    318346            System.out.println("Is Corei-compatible: "+((IntelCPUInfo)c).IsCoreiCompatible());
     347            System.out.println("Is Sandy-compatible: "+((IntelCPUInfo)c).IsSandyCompatible());
     348            System.out.println("Is Ivy-compatible: "+((IntelCPUInfo)c).IsIvyCompatible());
     349            System.out.println("Is Haswell-compatible: "+((IntelCPUInfo)c).IsHaswellCompatible());
     350            System.out.println("Is Broadwell-compatible: "+((IntelCPUInfo)c).IsBroadwellCompatible());
    319351        }
    320352        if(c instanceof AMDCPUInfo){
  • core/java/src/freenet/support/CPUInformation/CPUIDCPUInfo.java

    r5df3f40 rfe3642e  
    6464    public boolean hasAVX2()
    6565    {
    66         return hasAVX() &&
    67                (CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
     66        return (CPUID.getExtendedEBXFeatureFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
    6867    }
    6968   
    7069    /**
    71      * @return true iff the CPU supports the AVX512 instruction set.
     70     * Does the CPU supports the AVX-512 Foundation instruction set?
     71     *
     72     * Quote wikipedia:
     73     *
     74     * AVX-512 consists of multiple extensions not all meant to be supported
     75     * by all processors implementing them. Only the core extension AVX-512F
     76     * (AVX-512 Foundation) is required by all implementations.
     77     *
     78     * ref: https://en.wikipedia.org/wiki/AVX-512
     79     *
     80     * @return true iff the CPU supports the AVX-512 Foundation instruction set.
    7281     * @since 0.9.21
    7382     */
    7483    public boolean hasAVX512()
    7584    {
    76         return hasAVX() &&
    77                (CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5
     85        return (CPUID.getExtendedEBXFeatureFlags() & (1 << 16)) != 0; //Extended EBX Bit 16
    7886    }
    7987   
     
    8492    public boolean hasADX()
    8593    {
    86         return hasAVX() &&
    87                (CPUID.getExtendedEBXCPUFlags() & (1 << 19)) != 0; //Extended EBX Bit 19
     94        return (CPUID.getExtendedEBXFeatureFlags() & (1 << 19)) != 0; //Extended EBX Bit 19
    8895    }
    8996   
  • core/java/src/freenet/support/CPUInformation/CPUInfo.java

    r5df3f40 rfe3642e  
    6060
    6161    /**
     62     * AMD K10 only. Not supported on Intel.
     63     * ref: https://en.wikipedia.org/wiki/SSE4.2#SSE4a
     64     *
    6265     * @return true iff the CPU support the SSE4A instruction set.
    6366     */
     
    7780   
    7881    /**
    79      * @return true iff the CPU supports the full AVX512 instruction set.
     82     * Does the CPU supports the AVX-512 Foundation instruction set?
     83     *
     84     * Quote wikipedia:
     85     *
     86     * AVX-512 consists of multiple extensions not all meant to be supported
     87     * by all processors implementing them. Only the core extension AVX-512F
     88     * (AVX-512 Foundation) is required by all implementations.
     89     *
     90     * ref: https://en.wikipedia.org/wiki/AVX-512
     91     *
     92     * @return true iff the CPU supports the AVX-512 Foundation instruction set.
    8093     * @since 0.9.21
    8194     */
    8295    public boolean hasAVX512();
     96   
     97    /**
     98     * @return true iff the CPU supports the ADX instruction set.
     99     * @since 0.9.21
     100     */
     101    public boolean hasADX();
    83102   
    84103    /**
  • core/java/src/freenet/support/CPUInformation/IntelCPUInfo.java

    r5df3f40 rfe3642e  
    6767     * Supports the SSE 3, 4.1, 4.2 instructions.
    6868     * In general, this requires 45nm or smaller process.
     69     *
     70     * This is the Nehalem architecture.
     71     * ref: https://en.wikipedia.org/wiki/Nehalem_%28microarchitecture%29
     72     *
    6973     * @return true if the CPU implements at least a Corei level instruction/feature set.
    7074     */
     
    8387     * Supports the AVX 1 instructions.
    8488     * In general, this requires 22nm or smaller process.
     89     *
     90     * UNUSED, there is no specific GMP build for Ivy Bridge,
     91     * and this is never called from NativeBigInteger.
     92     * Ivy Bridge is a successor to Sandy Bridge, so use IsSandyCompatible().
     93     *
    8594     * @return true if the CPU implements at least a IvyBridge level instruction/feature set.
    8695     */
     
    9099     * Supports the SSE 3, 4.1, 4.2 instructions.
    91100     * Supports the AVX 1, 2 instructions.
     101     * Supports the BMI 1, 2 instructions.
     102     *
     103     * WARNING - GMP 6 uses the BMI2 MULX instruction for the "coreihwl" binaries.
     104     * Only Core i3/i5/i7 Haswell processors support BMI2.
     105     *
     106     * Requires support for all 6 of these Corei features: FMA3 MOVBE ABM AVX2 BMI1 BMI2
     107     * Pentium/Celeron Haswell processors do NOT support BMI2 and are NOT compatible.
     108     * Those processors will be Sandy-compatible if they have AVX 1 support,
     109     * and Corei-compatible if they do not.
     110     *
     111     * ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
     112     * ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29
     113     *
    92114     * In general, this requires 22nm or smaller process.
    93115     * @return true if the CPU implements at least a Haswell level instruction/feature set.
     
    99121     * Supports the AVX 1, 2 instructions.
    100122     * In general, this requires 14nm or smaller process.
     123     *
     124     * NOT FULLY USED as of GMP 6.0.
     125     * All GMP coreibwl binaries are duplicates of binaries for older technologies,
     126     * so we do not distribute any. However, this is called from NativeBigInteger.
     127     *
    101128     * @return true if the CPU implements at least a Broadwell level instruction/feature set.
    102129     */
  • core/java/src/freenet/support/CPUInformation/IntelInfoImpl.java

    r5df3f40 rfe3642e  
    283283                        break;
    284284                    // Sandy bridge 32 nm
     285                    // 1, 2, or 4 cores
     286                    // ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
    285287                    case 0x2a:
    286288                        isSandyCompatible = true;
     
    296298                        break;
    297299                    // Sandy Bridge 32 nm
     300                    // Sandy Bridge-E up to 8 cores
     301                    // ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
    298302                    case 0x2d:
    299303                        isSandyCompatible = true;
     
    329333                        break;
    330334                    // Ivy Bridge 22 nm
     335                    // ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29
    331336                    case 0x3a:
    332337                        isSandyCompatible = true;
     
    334339                        modelString = "Ivy Bridge";
    335340                        break;
    336                     // Haswell 22 nm
    337                     case 0x3c:
    338                         isSandyCompatible = true;
    339                         isIvyCompatible = true;
    340                         isHaswellCompatible = true;
    341                         modelString = "Haswell";
    342                         break;
     341
     342                    // case 0x3c: See below
     343
    343344                    // Broadwell 14 nm
    344345                    case 0x3d:
     
    355356                        modelString = "Ivy Bridge";
    356357                        break;
    357                     // Haswell 22 nm
    358                     case 0x3f:
    359                         isSandyCompatible = true;
    360                         isIvyCompatible = true;
    361                         isHaswellCompatible = true;
    362                         modelString = "Haswell";
    363                         break;
     358
     359                    // case 0x3f: See below
    364360
    365361                // following are for extended model == 4
     
    368364
    369365                    // Haswell 22 nm
     366                    // Pentium and Celeron Haswells do not support new Haswell instructions,
     367                    // only Corei ones do, but we can't tell that from the model alone.
     368                    //
     369                    // We know for sure that GMP coreihwl uses the MULX instruction from BMI2,
     370                    // unsure about the others, but let's be safe and check all 6 feature bits, as
     371                    // the Intel app note suggests.
     372                    //
     373                    // ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29
     374                    // ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
     375                    case 0x3c:
     376                    case 0x3f:
    370377                    case 0x45:
    371                         isSandyCompatible = true;
    372                         isIvyCompatible = true;
    373                         isHaswellCompatible = true;
    374                         modelString = "Haswell";
    375                         break;
    376                     // Haswell 22 nm
    377378                    case 0x46:
    378                         isSandyCompatible = true;
    379                         isIvyCompatible = true;
    380                         isHaswellCompatible = true;
    381                         modelString = "Haswell";
    382                         break;
     379                        boolean hasNewInstructions = false;
     380                        int reg = CPUID.getECXCPUFlags();
     381                        boolean hasFMA3 = (reg & (1 << 12)) != 0;
     382                        boolean hasMOVBE = (reg & (1 << 22)) != 0;
     383                        // AVX is implied by AVX2, so we don't need to check the value here,
     384                        // but we will need it below to enable Sandy Bridge if the Haswell checks fail.
     385                        // This is the same as hasAVX().
     386                        boolean hasAVX = (reg & (1 << 28)) != 0 && (reg & (1 << 27)) != 0;
     387                        //System.out.println("FMA3 MOVBE: " +
     388                        //                   hasFMA3 + ' ' + hasMOVBE);
     389                        if (hasFMA3 && hasMOVBE) {
     390                            reg = CPUID.getExtendedECXCPUFlags();
     391                            boolean hasABM = (reg & (1 << 5)) != 0;  // aka LZCNT
     392                            //System.out.println("FMA3 MOVBE ABM: " +
     393                            //                   hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM);
     394                            if (hasABM) {
     395                                reg = CPUID.getExtendedEBXFeatureFlags();
     396                                boolean hasAVX2 = (reg & (1 << 5)) != 0;
     397                                boolean hasBMI1 = (reg & (1 << 3)) != 0;
     398                                boolean hasBMI2 = (reg & (1 << 8)) != 0;
     399                                //System.out.println("FMA3 MOVBE ABM AVX2 BMI1 BMI2: " +
     400                                //                   hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM + ' ' +
     401                                //                   hasAVX2 + ' ' + hasBMI1 + ' ' + hasBMI2);
     402                                if (hasAVX2 && hasBMI1 && hasBMI2)
     403                                    hasNewInstructions = true;
     404                            }
     405                        }
     406                        if (hasNewInstructions) {
     407                            isSandyCompatible = true;
     408                            isIvyCompatible = true;
     409                            isHaswellCompatible = true;
     410                            modelString = "Haswell Core i3/i5/i7 model " + model;
     411                        } else {
     412                            // This processor is "corei" compatible, as we define it,
     413                            // i.e. SSE4.2 but not necessarily AVX.
     414                            if (hasAVX) {
     415                                isSandyCompatible = true;
     416                                isIvyCompatible = true;
     417                                modelString = "Haswell Celeron/Pentium w/ AVX model " + model;
     418                            } else {
     419                                modelString = "Haswell Celeron/Pentium model " + model;
     420                            }
     421                        }
     422                        break;
     423
    383424                    // Quark 32nm
    384425                    case 0x4a:
Note: See TracChangeset for help on using the changeset viewer.