00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048 #ifndef _GLIBCXX_PARALLEL_BAL_QUICKSORT_H
00049 #define _GLIBCXX_PARALLEL_BAL_QUICKSORT_H 1
00050
00051 #include <parallel/basic_iterator.h>
00052 #include <bits/stl_algo.h>
00053
00054 #include <parallel/settings.h>
00055 #include <parallel/partition.h>
00056 #include <parallel/random_number.h>
00057 #include <parallel/queue.h>
00058 #include <functional>
00059
00060 #if _GLIBCXX_ASSERTIONS
00061 #include <parallel/checkers.h>
00062 #endif
00063
00064 namespace __gnu_parallel
00065 {
00066
00067 template<typename RandomAccessIterator>
00068 struct QSBThreadLocal
00069 {
00070 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00071 typedef typename traits_type::difference_type difference_type;
00072
00073
00074
00075 typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
00076
00077
00078 Piece initial;
00079
00080
00081 RestrictedBoundedConcurrentQueue<Piece> leftover_parts;
00082
00083
00084 thread_index_t num_threads;
00085
00086
00087 volatile difference_type* elements_leftover;
00088
00089
00090 Piece global;
00091
00092
00093
00094 QSBThreadLocal(int queue_size) : leftover_parts(queue_size) { }
00095 };
00096
00097
00098
00099
00100
00101
00102
00103
00104 template<typename RandomAccessIterator, typename Comparator>
00105 typename std::iterator_traits<RandomAccessIterator>::difference_type
00106 qsb_divide(RandomAccessIterator begin, RandomAccessIterator end,
00107 Comparator comp, thread_index_t num_threads)
00108 {
00109 _GLIBCXX_PARALLEL_ASSERT(num_threads > 0);
00110
00111 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00112 typedef typename traits_type::value_type value_type;
00113 typedef typename traits_type::difference_type difference_type;
00114
00115 RandomAccessIterator pivot_pos =
00116 median_of_three_iterators(begin, begin + (end - begin) / 2,
00117 end - 1, comp);
00118
00119 #if defined(_GLIBCXX_ASSERTIONS)
00120
00121 difference_type n = end - begin;
00122
00123 _GLIBCXX_PARALLEL_ASSERT(
00124 (!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos))
00125 || (!comp(*pivot_pos, *begin) && !comp(*(end - 1), *pivot_pos))
00126 || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos))
00127 || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*(end - 1), *pivot_pos))
00128 || (!comp(*pivot_pos, *(end - 1)) && !comp(*begin, *pivot_pos))
00129 || (!comp(*pivot_pos, *(end - 1)) && !comp(*(begin + n / 2), *pivot_pos)));
00130 #endif
00131
00132
00133 if (pivot_pos != (end - 1))
00134 std::swap(*pivot_pos, *(end - 1));
00135 pivot_pos = end - 1;
00136
00137 __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
00138 pred(comp, *pivot_pos);
00139
00140
00141 difference_type split_pos = parallel_partition(
00142 begin, end - 1, pred, num_threads);
00143
00144
00145 std::swap(*(begin + split_pos), *pivot_pos);
00146 pivot_pos = begin + split_pos;
00147
00148 #if _GLIBCXX_ASSERTIONS
00149 RandomAccessIterator r;
00150 for (r = begin; r != pivot_pos; ++r)
00151 _GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos));
00152 for (; r != end; ++r)
00153 _GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos));
00154 #endif
00155
00156 return split_pos;
00157 }
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167 template<typename RandomAccessIterator, typename Comparator>
00168 void
00169 qsb_conquer(QSBThreadLocal<RandomAccessIterator>** tls,
00170 RandomAccessIterator begin, RandomAccessIterator end,
00171 Comparator comp,
00172 thread_index_t iam, thread_index_t num_threads,
00173 bool parent_wait)
00174 {
00175 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00176 typedef typename traits_type::value_type value_type;
00177 typedef typename traits_type::difference_type difference_type;
00178
00179 difference_type n = end - begin;
00180
00181 if (num_threads <= 1 || n <= 1)
00182 {
00183 tls[iam]->initial.first = begin;
00184 tls[iam]->initial.second = end;
00185
00186 qsb_local_sort_with_helping(tls, comp, iam, parent_wait);
00187
00188 return;
00189 }
00190
00191
00192 difference_type split_pos = qsb_divide(begin, end, comp, num_threads);
00193
00194 #if _GLIBCXX_ASSERTIONS
00195 _GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin));
00196 #endif
00197
00198 thread_index_t num_threads_leftside =
00199 std::max<thread_index_t>(1, std::min<thread_index_t>(
00200 num_threads - 1, split_pos * num_threads / n));
00201
00202 # pragma omp atomic
00203 *tls[iam]->elements_leftover -= (difference_type)1;
00204
00205
00206 # pragma omp parallel num_threads(2)
00207 {
00208 bool wait;
00209 if(omp_get_num_threads() < 2)
00210 wait = false;
00211 else
00212 wait = parent_wait;
00213
00214 # pragma omp sections
00215 {
00216 # pragma omp section
00217 {
00218 qsb_conquer(tls, begin, begin + split_pos, comp,
00219 iam,
00220 num_threads_leftside,
00221 wait);
00222 wait = parent_wait;
00223 }
00224
00225 # pragma omp section
00226 {
00227 qsb_conquer(tls, begin + split_pos + 1, end, comp,
00228 iam + num_threads_leftside,
00229 num_threads - num_threads_leftside,
00230 wait);
00231 wait = parent_wait;
00232 }
00233 }
00234 }
00235 }
00236
00237
00238
00239
00240
00241
00242
00243 template<typename RandomAccessIterator, typename Comparator>
00244 void
00245 qsb_local_sort_with_helping(QSBThreadLocal<RandomAccessIterator>** tls,
00246 Comparator& comp, int iam, bool wait)
00247 {
00248 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00249 typedef typename traits_type::value_type value_type;
00250 typedef typename traits_type::difference_type difference_type;
00251 typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
00252
00253 QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam];
00254
00255 difference_type base_case_n = _Settings::get().sort_qsb_base_case_maximal_n;
00256 if (base_case_n < 2)
00257 base_case_n = 2;
00258 thread_index_t num_threads = tl.num_threads;
00259
00260
00261 random_number rng(iam + 1);
00262
00263 Piece current = tl.initial;
00264
00265 difference_type elements_done = 0;
00266 #if _GLIBCXX_ASSERTIONS
00267 difference_type total_elements_done = 0;
00268 #endif
00269
00270 for (;;)
00271 {
00272
00273 RandomAccessIterator begin = current.first, end = current.second;
00274 difference_type n = end - begin;
00275
00276 if (n > base_case_n)
00277 {
00278
00279 RandomAccessIterator pivot_pos = begin + rng(n);
00280
00281
00282 if (pivot_pos != (end - 1))
00283 std::swap(*pivot_pos, *(end - 1));
00284 pivot_pos = end - 1;
00285
00286 __gnu_parallel::binder2nd
00287 <Comparator, value_type, value_type, bool>
00288 pred(comp, *pivot_pos);
00289
00290
00291 RandomAccessIterator split_pos1, split_pos2;
00292 split_pos1 = __gnu_sequential::partition(begin, end - 1, pred);
00293
00294
00295 #if _GLIBCXX_ASSERTIONS
00296 _GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end);
00297 #endif
00298
00299 if (split_pos1 != pivot_pos)
00300 std::swap(*split_pos1, *pivot_pos);
00301 pivot_pos = split_pos1;
00302
00303
00304 if ((split_pos1 + 1 - begin) < (n >> 7)
00305 || (end - split_pos1) < (n >> 7))
00306 {
00307
00308
00309 __gnu_parallel::unary_negate<__gnu_parallel::binder1st
00310 <Comparator, value_type, value_type, bool>, value_type>
00311 pred(__gnu_parallel::binder1st
00312 <Comparator, value_type, value_type, bool>(comp,
00313 *pivot_pos));
00314
00315
00316 split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
00317 end, pred);
00318 }
00319 else
00320
00321 split_pos2 = split_pos1 + 1;
00322
00323
00324 elements_done += (split_pos2 - split_pos1);
00325 #if _GLIBCXX_ASSERTIONS
00326 total_elements_done += (split_pos2 - split_pos1);
00327 #endif
00328
00329 if (((split_pos1 + 1) - begin) < (end - (split_pos2)))
00330 {
00331
00332 if ((split_pos2) != end)
00333 tl.leftover_parts.push_front(std::make_pair(split_pos2,
00334 end));
00335
00336
00337 current.second = split_pos1;
00338 continue;
00339 }
00340 else
00341 {
00342
00343 if (begin != split_pos1)
00344 tl.leftover_parts.push_front(std::make_pair(begin,
00345 split_pos1));
00346
00347 current.first = split_pos2;
00348
00349 continue;
00350 }
00351 }
00352 else
00353 {
00354 __gnu_sequential::sort(begin, end, comp);
00355 elements_done += n;
00356 #if _GLIBCXX_ASSERTIONS
00357 total_elements_done += n;
00358 #endif
00359
00360
00361 if (tl.leftover_parts.pop_front(current))
00362 continue;
00363
00364 # pragma omp atomic
00365 *tl.elements_leftover -= elements_done;
00366
00367 elements_done = 0;
00368
00369 #if _GLIBCXX_ASSERTIONS
00370 double search_start = omp_get_wtime();
00371 #endif
00372
00373
00374 bool successfully_stolen = false;
00375 while (wait && *tl.elements_leftover > 0 && !successfully_stolen
00376 #if _GLIBCXX_ASSERTIONS
00377
00378 && (omp_get_wtime() < (search_start + 1.0))
00379 #endif
00380 )
00381 {
00382 thread_index_t victim;
00383 victim = rng(num_threads);
00384
00385
00386 successfully_stolen = (victim != iam)
00387 && tls[victim]->leftover_parts.pop_back(current);
00388 if (!successfully_stolen)
00389 yield();
00390 #if !defined(__ICC) && !defined(__ECC)
00391 # pragma omp flush
00392 #endif
00393 }
00394
00395 #if _GLIBCXX_ASSERTIONS
00396 if (omp_get_wtime() >= (search_start + 1.0))
00397 {
00398 sleep(1);
00399 _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
00400 < (search_start + 1.0));
00401 }
00402 #endif
00403 if (!successfully_stolen)
00404 {
00405 #if _GLIBCXX_ASSERTIONS
00406 _GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0);
00407 #endif
00408 return;
00409 }
00410 }
00411 }
00412 }
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422 template<typename RandomAccessIterator, typename Comparator>
00423 void
00424 parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end,
00425 Comparator comp,
00426 typename std::iterator_traits<RandomAccessIterator>
00427 ::difference_type n,
00428 thread_index_t num_threads)
00429 {
00430 _GLIBCXX_CALL(end - begin)
00431
00432 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00433 typedef typename traits_type::value_type value_type;
00434 typedef typename traits_type::difference_type difference_type;
00435 typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
00436
00437 typedef QSBThreadLocal<RandomAccessIterator> tls_type;
00438
00439 if (n <= 1)
00440 return;
00441
00442
00443 if (num_threads > n)
00444 num_threads = static_cast<thread_index_t>(n);
00445
00446
00447 tls_type** tls = new tls_type*[num_threads];
00448 difference_type queue_size = num_threads * (thread_index_t)(log2(n) + 1);
00449 for (thread_index_t t = 0; t < num_threads; ++t)
00450 tls[t] = new QSBThreadLocal<RandomAccessIterator>(queue_size);
00451
00452
00453
00454
00455
00456 volatile difference_type elements_leftover = n;
00457 for (int i = 0; i < num_threads; ++i)
00458 {
00459 tls[i]->elements_leftover = &elements_leftover;
00460 tls[i]->num_threads = num_threads;
00461 tls[i]->global = std::make_pair(begin, end);
00462
00463
00464 tls[i]->initial = std::make_pair(end, end);
00465 }
00466
00467
00468 qsb_conquer(tls, begin, begin + n, comp, 0, num_threads, true);
00469
00470 #if _GLIBCXX_ASSERTIONS
00471
00472 Piece dummy;
00473 for (int i = 1; i < num_threads; ++i)
00474 _GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy));
00475 #endif
00476
00477 for (int i = 0; i < num_threads; ++i)
00478 delete tls[i];
00479 delete[] tls;
00480 }
00481 }
00482
00483 #endif