Actual source code: sseenabled.c
1: /* $Id: sseenabled.c,v 1.15 2001/07/20 21:03:24 buschelm Exp $ */
2: #include petscsys.h
4: #ifdef PETSC_HAVE_SSE
6: #include PETSC_HAVE_SSE
7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
9: #include <string.h>
13: int PetscSSEHardwareTest(PetscTruth *flag) {
14: int ierr;
15: char *vendor;
16: char Intel[13]="GenuineIntel";
17: char AMD[13] ="AuthenticAMD";
20: PetscMalloc(13*sizeof(char),&vendor);
21: strcpy(vendor,"************");
22: CPUID_GET_VENDOR(vendor);
23: if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
24: /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
25: /* to denote availability of SSE Support */
26: unsigned long myeax,myebx,myecx,myedx;
27: CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
28: if (myedx & SSE_FEATURE_FLAG) {
29: *flag = PETSC_TRUE;
30: } else {
31: *flag = PETSC_FALSE;
32: }
33: }
34: PetscFree(vendor);
35: return(0);
36: }
38: #ifdef PARCH_linux
39: #include <signal.h>
40: /*
41: Early versions of the Linux kernel disables SSE hardware because
42: it does not know how to preserve the SSE state at a context switch.
43: To detect this feature, try an sse instruction in another process.
44: If it works, great! If not, an illegal instruction signal will be thrown,
45: so catch it and return an error code.
46: */
47: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
49: static void PetscSSEDisabledHandler(int sig) {
50: signal(SIGILL,SIG_IGN);
51: exit(-1);
52: }
56: int PetscSSEOSEnabledTest_Linux(PetscTruth *flag) {
57: int status, pid = 0;
59: signal(SIGILL,PetscSSEDisabledHandler);
60: pid = fork();
61: if (pid==0) {
62: SSE_SCOPE_BEGIN;
63: XOR_PS(XMM0,XMM0);
64: SSE_SCOPE_END;
65: exit(0);
66: } else {
67: wait(&status);
68: }
69: if (!status) {
70: *flag = PETSC_TRUE;
71: } else {
72: *flag = PETSC_FALSE;
73: }
74: return(0);
75: }
77: #endif
78: #ifdef PARCH_win32
79: /*
80: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
81: Windows ME/2000 doesn't disable SSE Hardware
82: */
83: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
84: #endif
88: int PetscSSEOSEnabledTest_TRUE(PetscTruth *flag) {
90: if (flag) {
91: *flag = PETSC_TRUE;
92: }
93: return(0);
94: }
96: #else /* Not defined PETSC_HAVE_SSE */
98: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
99: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
103: int PetscSSEEnabledTest_FALSE(PetscTruth *flag) {
105: if (flag) {
106: *flag = PETSC_FALSE;
107: }
108: return(0);
109: }
111: #endif /* defined PETSC_HAVE_SSE */
115: /*@C
116: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
117: set can be used. Some operating systems do not allow the use of these instructions despite
118: hardware availability.
120: Collective on MPI_Comm
122: Input Parameter:
123: . comm - the MPI Communicator
125: Output Parameters:
126: . lflag - Local Flag: PETSC_TRUE if enabled in this process
127: . gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm
129: Notes:
130: PETSC_NULL can be specified for lflag or gflag if either of these values are not desired.
132: Options Database Keys:
133: . -disable_sse - Disable use of hand tuned Intel SSE implementations
135: Level: developer
136: @*/
137: static PetscTruth petsc_sse_local_is_untested = PETSC_TRUE;
138: static PetscTruth petsc_sse_enabled_local = PETSC_FALSE;
139: static PetscTruth petsc_sse_global_is_untested = PETSC_TRUE;
140: static PetscTruth petsc_sse_enabled_global = PETSC_FALSE;
141: int PetscSSEIsEnabled(MPI_Comm comm,PetscTruth *lflag,PetscTruth *gflag) {
143: PetscTruth disabled_option;
147: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
148: disabled_option = PETSC_FALSE;
150: PetscOptionsName("-disable_sse",
151: "Disable use of hand tuned Intel SSE implementations <true,false>.",
152: "PetscSSEIsEnabled",&disabled_option);
153: if (disabled_option) {
154: petsc_sse_local_is_untested = PETSC_FALSE;
155: petsc_sse_enabled_local = PETSC_FALSE;
156: petsc_sse_global_is_untested = PETSC_FALSE;
157: petsc_sse_enabled_global = PETSC_FALSE;
158: }
160: if (petsc_sse_local_is_untested) {
161: PetscSSEHardwareTest(&petsc_sse_enabled_local);
162: if (petsc_sse_enabled_local) {
163: PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
164: }
165: petsc_sse_local_is_untested = PETSC_FALSE;
166: }
168: if (gflag && petsc_sse_global_is_untested) {
169: MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPI_INT,MPI_LAND,comm);
170: petsc_sse_global_is_untested = PETSC_FALSE;
171: }
172: }
174: if (lflag) {
175: *lflag = petsc_sse_enabled_local;
176: }
177: if (gflag) {
178: *gflag = petsc_sse_enabled_global;
179: }
180: return(0);
181: }