Actual source code: sseenabled.c

petsc-3.4.5 2014-06-29
  2: #include <petscsys.h> /*I "petscsys.h" I*/

  4: #if defined(PETSC_HAVE_SSE)

  6: #include PETSC_HAVE_SSE
  7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

 11: PetscErrorCode  PetscSSEHardwareTest(PetscBool  *flag)
 12: {
 14:   char           *vendor;
 15:   char           Intel[13]="GenuineIntel";
 16:   char           AMD[13]  ="AuthenticAMD";

 19:   PetscMalloc(13*sizeof(char),&vendor);
 20:   strcpy(vendor,"************");
 21:   CPUID_GET_VENDOR(vendor);
 22:   if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
 23:     /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
 24:     /* to denote availability of SSE Support */
 25:     unsigned long myeax,myebx,myecx,myedx;
 26:     CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
 27:     if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
 28:     else *flag = PETSC_FALSE;
 29:   }
 30:   PetscFree(vendor);
 31:   return(0);
 32: }

 34: #if defined(PETSC_HAVE_FORK)
 35: #include <signal.h>
 36: /*
 37:    Early versions of the Linux kernel disables SSE hardware because
 38:    it does not know how to preserve the SSE state at a context switch.
 39:    To detect this feature, try an sse instruction in another process.
 40:    If it works, great!  If not, an illegal instruction signal will be thrown,
 41:    so catch it and return an error code.
 42: */
 43: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 45: static void PetscSSEDisabledHandler(int sig)
 46: {
 47:   signal(SIGILL,SIG_IGN);
 48:   exit(-1);
 49: }

 53: PetscErrorCode  PetscSSEOSEnabledTest_Linux(PetscBool  *flag)
 54: {
 55:   int status, pid = 0;

 58:   signal(SIGILL,PetscSSEDisabledHandler);
 59:   pid = fork();
 60:   if (pid==0) {
 61:     SSE_SCOPE_BEGIN;
 62:     XOR_PS(XMM0,XMM0);
 63:     SSE_SCOPE_END;
 64:     exit(0);
 65:   } else wait(&status);
 66:   if (!status) *flag = PETSC_TRUE;
 67:   else *flag = PETSC_FALSE;
 68:   return(0);
 69: }

 71: #else
 72: /*
 73:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 74:    Windows ME/2000 doesn't disable SSE Hardware
 75: */
 76: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 77: #endif

 81: PetscErrorCode  PetscSSEOSEnabledTest_TRUE(PetscBool  *flag)
 82: {
 84:   if (flag) *flag = PETSC_TRUE;
 85:   return(0);
 86: }

 88: #else  /* Not defined PETSC_HAVE_SSE */

 90: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
 91: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

 95: PetscErrorCode  PetscSSEEnabledTest_FALSE(PetscBool  *flag)
 96: {
 98:   if (flag) *flag = PETSC_FALSE;
 99:   return(0);
100: }

102: #endif /* defined PETSC_HAVE_SSE */

106: /*@C
107:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
108:      set can be used.  Some operating systems do not allow the use of these instructions despite
109:      hardware availability.

111:      Collective on MPI_Comm

113:      Input Parameter:
114: .    comm - the MPI Communicator

116:      Output Parameters:
117: .    lflag - Local Flag:  PETSC_TRUE if enabled in this process
118: .    gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm

120:      Notes:
121:      NULL can be specified for lflag or gflag if either of these values are not desired.

123:      Options Database Keys:
124: .    -disable_sse - Disable use of hand tuned Intel SSE implementations

126:      Level: developer
127: @*/
128: static PetscBool petsc_sse_local_is_untested  = PETSC_TRUE;
129: static PetscBool petsc_sse_enabled_local      = PETSC_FALSE;
130: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
131: static PetscBool petsc_sse_enabled_global     = PETSC_FALSE;
132: PetscErrorCode  PetscSSEIsEnabled(MPI_Comm comm,PetscBool  *lflag,PetscBool  *gflag)
133: {
135:   PetscBool      disabled_option;

138:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
139:     disabled_option = PETSC_FALSE;

141:     PetscOptionsBool("-disable_sse",
142:                             "Disable use of hand tuned Intel SSE implementations <true,false>.",
143:                             "PetscSSEIsEnabled",disabled_option,&disabled_option,NULL);
144:     if (disabled_option) {
145:       petsc_sse_local_is_untested  = PETSC_FALSE;
146:       petsc_sse_enabled_local      = PETSC_FALSE;
147:       petsc_sse_global_is_untested = PETSC_FALSE;
148:       petsc_sse_enabled_global     = PETSC_FALSE;
149:     }

151:     if (petsc_sse_local_is_untested) {
152:       PetscSSEHardwareTest(&petsc_sse_enabled_local);
153:       if (petsc_sse_enabled_local) {
154:         PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
155:       }
156:       petsc_sse_local_is_untested = PETSC_FALSE;
157:     }

159:     if (gflag && petsc_sse_global_is_untested) {
160:       MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPIU_BOOL,MPI_LAND,comm);

162:       petsc_sse_global_is_untested = PETSC_FALSE;
163:     }
164:   }

166:   if (lflag) *lflag = petsc_sse_enabled_local;
167:   if (gflag) *gflag = petsc_sse_enabled_global;
168:   return(0);
169: }