Actual source code: sseenabled.c

  1: /* $Id: sseenabled.c,v 1.15 2001/07/20 21:03:24 buschelm Exp $ */
 2:  #include petscsys.h

  4: #ifdef PETSC_HAVE_SSE

  6: #include PETSC_HAVE_SSE
  7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

  9: #include <string.h>

 13: int PetscSSEHardwareTest(PetscTruth *flag) {
 14:   int  ierr;
 15:   char *vendor;
 16:   char Intel[13]="GenuineIntel";
 17:   char AMD[13]  ="AuthenticAMD";

 20:   PetscMalloc(13*sizeof(char),&vendor);
 21:   strcpy(vendor,"************");
 22:   CPUID_GET_VENDOR(vendor);
 23:   if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
 24:     /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
 25:     /* to denote availability of SSE Support */
 26:     unsigned long myeax,myebx,myecx,myedx;
 27:     CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
 28:     if (myedx & SSE_FEATURE_FLAG) {
 29:       *flag = PETSC_TRUE;
 30:     } else {
 31:       *flag = PETSC_FALSE;
 32:     }
 33:   }
 34:   PetscFree(vendor);
 35:   return(0);
 36: }

 38: #ifdef PARCH_linux
 39: #include <signal.h>
 40: /* 
 41:    Early versions of the Linux kernel disables SSE hardware because
 42:    it does not know how to preserve the SSE state at a context switch.
 43:    To detect this feature, try an sse instruction in another process.  
 44:    If it works, great!  If not, an illegal instruction signal will be thrown,
 45:    so catch it and return an error code. 
 46: */
 47: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 49: static void PetscSSEDisabledHandler(int sig) {
 50:   signal(SIGILL,SIG_IGN);
 51:   exit(-1);
 52: }

 56: int PetscSSEOSEnabledTest_Linux(PetscTruth *flag) {
 57:   int status, pid = 0;
 59:   signal(SIGILL,PetscSSEDisabledHandler);
 60:   pid = fork();
 61:   if (pid==0) {
 62:     SSE_SCOPE_BEGIN;
 63:       XOR_PS(XMM0,XMM0);
 64:     SSE_SCOPE_END;
 65:     exit(0);
 66:   } else {
 67:     wait(&status);
 68:   }
 69:   if (!status) {
 70:     *flag = PETSC_TRUE;
 71:   } else {
 72:     *flag = PETSC_FALSE;
 73:   }
 74:   return(0);
 75: }

 77: #endif
 78: #ifdef PARCH_win32
 79: /* 
 80:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 81:    Windows ME/2000 doesn't disable SSE Hardware 
 82: */
 83: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 84: #endif 

 88: int PetscSSEOSEnabledTest_TRUE(PetscTruth *flag) {
 90:   if (flag) {
 91:     *flag = PETSC_TRUE;
 92:   }
 93:   return(0);
 94: }

 96: #else  /* Not defined PETSC_HAVE_SSE */

 98: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
 99: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

103: int PetscSSEEnabledTest_FALSE(PetscTruth *flag) {
105:   if (flag) {
106:     *flag = PETSC_FALSE;
107:   }
108:   return(0);
109: }

111: #endif /* defined PETSC_HAVE_SSE */

115: /*@C
116:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction 
117:      set can be used.  Some operating systems do not allow the use of these instructions despite
118:      hardware availability.

120:      Collective on MPI_Comm

122:      Input Parameter:
123: .    comm - the MPI Communicator

125:      Output Parameters:
126: .    lflag - Local Flag:  PETSC_TRUE if enabled in this process
127: .    gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm

129:      Notes:
130:      PETSC_NULL can be specified for lflag or gflag if either of these values are not desired.

132:      Options Database Keys:
133: .    -disable_sse - Disable use of hand tuned Intel SSE implementations

135:      Level: developer
136: @*/
137: static PetscTruth petsc_sse_local_is_untested  = PETSC_TRUE;
138: static PetscTruth petsc_sse_enabled_local      = PETSC_FALSE;
139: static PetscTruth petsc_sse_global_is_untested = PETSC_TRUE;
140: static PetscTruth petsc_sse_enabled_global     = PETSC_FALSE;
141: int PetscSSEIsEnabled(MPI_Comm comm,PetscTruth *lflag,PetscTruth *gflag) {
143:   PetscTruth disabled_option;


147:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
148:     disabled_option = PETSC_FALSE;

150:     PetscOptionsName("-disable_sse",
151:                             "Disable use of hand tuned Intel SSE implementations <true,false>.",
152:                             "PetscSSEIsEnabled",&disabled_option);
153:     if (disabled_option) {
154:       petsc_sse_local_is_untested  = PETSC_FALSE;
155:       petsc_sse_enabled_local      = PETSC_FALSE;
156:       petsc_sse_global_is_untested = PETSC_FALSE;
157:       petsc_sse_enabled_global     = PETSC_FALSE;
158:     }

160:     if (petsc_sse_local_is_untested) {
161:       PetscSSEHardwareTest(&petsc_sse_enabled_local);
162:       if (petsc_sse_enabled_local) {
163:         PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
164:       }
165:       petsc_sse_local_is_untested = PETSC_FALSE;
166:     }

168:     if (gflag && petsc_sse_global_is_untested) {
169:       MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPI_INT,MPI_LAND,comm);
170:       petsc_sse_global_is_untested = PETSC_FALSE;
171:     }
172:   }

174:   if (lflag) {
175:     *lflag = petsc_sse_enabled_local;
176:   }
177:   if (gflag) {
178:     *gflag = petsc_sse_enabled_global;
179:   }
180:   return(0);
181: }