00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H
00039 #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1
00040
00041 #include <parallel/types.h>
00042 #include <parallel/base.h>
00043
00044 #if defined(__SUNPRO_CC) && defined(__sparc)
00045 #include <sys/atomic.h>
00046 #endif
00047
00048 #if !defined(_WIN32) || defined (__CYGWIN__)
00049 #include <sched.h>
00050 #endif
00051
00052 #if defined(_MSC_VER)
00053 #include <Windows.h>
00054 #include <intrin.h>
00055 #undef max
00056 #undef min
00057 #endif
00058
00059 #ifdef __MINGW32__
00060
00061
00062
00063 extern "C"
00064 __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
00065 #endif
00066
00067 namespace __gnu_parallel
00068 {
00069 #if defined(__ICC)
00070 template<typename must_be_int = int>
00071 int32 faa32(int32* x, int32 inc)
00072 {
00073 asm volatile("lock xadd %0,%1"
00074 : "=r" (inc), "=m" (*x)
00075 : "0" (inc)
00076 : "memory");
00077 return inc;
00078 }
00079 #if defined(__x86_64)
00080 template<typename must_be_int = int>
00081 int64 faa64(int64* x, int64 inc)
00082 {
00083 asm volatile("lock xadd %0,%1"
00084 : "=r" (inc), "=m" (*x)
00085 : "0" (inc)
00086 : "memory");
00087 return inc;
00088 }
00089 #endif
00090 #endif
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100 inline int32
00101 fetch_and_add_32(volatile int32* ptr, int32 addend)
00102 {
00103 #if defined(__ICC) //x86 version
00104 return _InterlockedExchangeAdd((void*)ptr, addend);
00105 #elif defined(__ECC) //IA-64 version
00106 return _InterlockedExchangeAdd((void*)ptr, addend);
00107 #elif defined(__ICL) || defined(_MSC_VER)
00108 return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr),
00109 addend);
00110 #elif defined(__GNUC__)
00111 return __sync_fetch_and_add(ptr, addend);
00112 #elif defined(__SUNPRO_CC) && defined(__sparc)
00113 volatile int32 before, after;
00114 do
00115 {
00116 before = *ptr;
00117 after = before + addend;
00118 } while (atomic_cas_32((volatile unsigned int*)ptr, before,
00119 after) != before);
00120 return before;
00121 #else //fallback, slow
00122 #pragma message("slow fetch_and_add_32")
00123 int32 res;
00124 #pragma omp critical
00125 {
00126 res = *ptr;
00127 *(ptr) += addend;
00128 }
00129 return res;
00130 #endif
00131 }
00132
00133
00134
00135
00136
00137
00138
00139 inline int64
00140 fetch_and_add_64(volatile int64* ptr, int64 addend)
00141 {
00142 #if defined(__ICC) && defined(__x86_64) //x86 version
00143 return faa64<int>((int64*)ptr, addend);
00144 #elif defined(__ECC) //IA-64 version
00145 return _InterlockedExchangeAdd64((void*)ptr, addend);
00146 #elif defined(__ICL) || defined(_MSC_VER)
00147 #ifndef _WIN64
00148 _GLIBCXX_PARALLEL_ASSERT(false);
00149 return 0;
00150 #else
00151 return _InterlockedExchangeAdd64(ptr, addend);
00152 #endif
00153 #elif defined(__GNUC__) && defined(__x86_64)
00154 return __sync_fetch_and_add(ptr, addend);
00155 #elif defined(__GNUC__) && defined(__i386) && \
00156 (defined(__i686) || defined(__pentium4) || defined(__athlon))
00157 return __sync_fetch_and_add(ptr, addend);
00158 #elif defined(__SUNPRO_CC) && defined(__sparc)
00159 volatile int64 before, after;
00160 do
00161 {
00162 before = *ptr;
00163 after = before + addend;
00164 } while (atomic_cas_64((volatile unsigned long long*)ptr, before,
00165 after) != before);
00166 return before;
00167 #else //fallback, slow
00168 #if defined(__GNUC__) && defined(__i386)
00169
00170
00171 #endif
00172 #pragma message("slow fetch_and_add_64")
00173 int64 res;
00174 #pragma omp critical
00175 {
00176 res = *ptr;
00177 *(ptr) += addend;
00178 }
00179 return res;
00180 #endif
00181 }
00182
00183
00184
00185
00186
00187
00188
00189 template<typename T>
00190 inline T
00191 fetch_and_add(volatile T* ptr, T addend)
00192 {
00193 if (sizeof(T) == sizeof(int32))
00194 return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend);
00195 else if (sizeof(T) == sizeof(int64))
00196 return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend);
00197 else
00198 _GLIBCXX_PARALLEL_ASSERT(false);
00199 }
00200
00201
00202 #if defined(__ICC)
00203
00204 template<typename must_be_int = int>
00205 inline int32
00206 cas32(volatile int32* ptr, int32 old, int32 nw)
00207 {
00208 int32 before;
00209 __asm__ __volatile__("lock; cmpxchgl %1,%2"
00210 : "=a"(before)
00211 : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
00212 : "memory");
00213 return before;
00214 }
00215
00216 #if defined(__x86_64)
00217 template<typename must_be_int = int>
00218 inline int64
00219 cas64(volatile int64 *ptr, int64 old, int64 nw)
00220 {
00221 int64 before;
00222 __asm__ __volatile__("lock; cmpxchgq %1,%2"
00223 : "=a"(before)
00224 : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
00225 : "memory");
00226 return before;
00227 }
00228 #endif
00229
00230 #endif
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240 inline bool
00241 compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement)
00242 {
00243 #if defined(__ICC) //x86 version
00244 return _InterlockedCompareExchange((void*)ptr, replacement,
00245 comparand) == comparand;
00246 #elif defined(__ECC) //IA-64 version
00247 return _InterlockedCompareExchange((void*)ptr, replacement,
00248 comparand) == comparand;
00249 #elif defined(__ICL) || defined(_MSC_VER)
00250 return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr),
00251 replacement, comparand) == comparand;
00252 #elif defined(__GNUC__)
00253 return __sync_bool_compare_and_swap(ptr, comparand, replacement);
00254 #elif defined(__SUNPRO_CC) && defined(__sparc)
00255 return atomic_cas_32((volatile unsigned int*)ptr, comparand,
00256 replacement) == comparand;
00257 #else
00258 #pragma message("slow compare_and_swap_32")
00259 bool res = false;
00260 #pragma omp critical
00261 {
00262 if (*ptr == comparand)
00263 {
00264 *ptr = replacement;
00265 res = true;
00266 }
00267 }
00268 return res;
00269 #endif
00270 }
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280 inline bool
00281 compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement)
00282 {
00283 #if defined(__ICC) && defined(__x86_64) //x86 version
00284 return cas64<int>(ptr, comparand, replacement) == comparand;
00285 #elif defined(__ECC) //IA-64 version
00286 return _InterlockedCompareExchange64((void*)ptr, replacement,
00287 comparand) == comparand;
00288 #elif defined(__ICL) || defined(_MSC_VER)
00289 #ifndef _WIN64
00290 _GLIBCXX_PARALLEL_ASSERT(false);
00291 return 0;
00292 #else
00293 return _InterlockedCompareExchange64(ptr, replacement,
00294 comparand) == comparand;
00295 #endif
00296
00297 #elif defined(__GNUC__) && defined(__x86_64)
00298 return __sync_bool_compare_and_swap(ptr, comparand, replacement);
00299 #elif defined(__GNUC__) && defined(__i386) && \
00300 (defined(__i686) || defined(__pentium4) || defined(__athlon))
00301 return __sync_bool_compare_and_swap(ptr, comparand, replacement);
00302 #elif defined(__SUNPRO_CC) && defined(__sparc)
00303 return atomic_cas_64((volatile unsigned long long*)ptr,
00304 comparand, replacement) == comparand;
00305 #else
00306 #if defined(__GNUC__) && defined(__i386)
00307
00308
00309 #endif
00310 #pragma message("slow compare_and_swap_64")
00311 bool res = false;
00312 #pragma omp critical
00313 {
00314 if (*ptr == comparand)
00315 {
00316 *ptr = replacement;
00317 res = true;
00318 }
00319 }
00320 return res;
00321 #endif
00322 }
00323
00324
00325
00326
00327
00328
00329
00330
00331 template<typename T>
00332 inline bool
00333 compare_and_swap(volatile T* ptr, T comparand, T replacement)
00334 {
00335 if (sizeof(T) == sizeof(int32))
00336 return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement);
00337 else if (sizeof(T) == sizeof(int64))
00338 return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement);
00339 else
00340 _GLIBCXX_PARALLEL_ASSERT(false);
00341 }
00342
00343
00344
00345 inline void
00346 yield()
00347 {
00348 #if defined (_WIN32) && !defined (__CYGWIN__)
00349 Sleep(0);
00350 #else
00351 sched_yield();
00352 #endif
00353 }
00354 }
00355
00356 #endif