18 #ifndef TLX_SORT_PARALLEL_MERGESORT_HEADER 19 #define TLX_SORT_PARALLEL_MERGESORT_HEADER 40 namespace parallel_mergesort_detail {
43 template <
typename DiffType>
56 template <
typename RandomAccessIterator>
59 typename std::iterator_traits<RandomAccessIterator>::value_type;
61 typename std::iterator_traits<RandomAccessIterator>::difference_type;
78 : starts(num_threads + 1),
79 temporary(num_threads),
80 offsets(num_threads - 1),
92 template <
typename RandomAccessIterator,
typename DiffType>
94 DiffType& num_samples,
103 static_cast<size_t>(num_samples + 1), es.
begin());
105 for (DiffType i = 0; i < num_samples; i++) {
119 template <
bool Stable,
typename RandomAccessIterator,
typename Comparator>
127 typename std::iterator_traits<RandomAccessIterator>::value_type;
129 typename std::iterator_traits<RandomAccessIterator>::difference_type;
132 DiffType length_local = sd->
starts[iam + 1] - sd->
starts[iam];
134 using SortingPlacesIterator = ValueType *;
137 sd->
temporary[iam] =
static_cast<ValueType*
>(
138 ::operator
new (
sizeof(ValueType) * (length_local + 1)));
148 sd->
temporary[iam] + length_local, comp);
151 sd->
temporary[iam] + length_local, comp);
158 DiffType num_samples;
166 for (
size_t s = 0; s < num_threads; s++)
169 if (num_samples * iam > 0)
170 sd->
pieces[iam][s].begin =
173 sd->
samples[num_samples * iam],
178 sd->
pieces[iam][s].begin = 0;
180 if ((num_samples * (iam + 1)) < (num_samples * num_threads))
184 sd->
samples[num_samples * (iam + 1)],
197 SortingPlacesIterator> > seqs(num_threads);
199 for (
size_t s = 0; s < num_threads; s++)
200 seqs[s] = std::make_pair(
207 if (iam < num_threads - 1)
211 for (
size_t seq = 0; seq < num_threads; seq++)
214 if (iam < (num_threads - 1))
215 sd->
pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
223 for (
size_t seq = 0; seq < num_threads; seq++)
227 sd->
pieces[iam][seq].begin = sd->
pieces[iam - 1][seq].end;
230 sd->
pieces[iam][seq].begin = 0;
235 DiffType offset = 0, length_am = 0;
236 for (
size_t s = 0; s < num_threads; s++)
238 length_am += sd->
pieces[iam][s].end - sd->
pieces[iam][s].begin;
239 offset += sd->
pieces[iam][s].begin;
245 SortingPlacesIterator> > seqs(num_threads);
247 for (
size_t s = 0; s < num_threads; s++)
249 seqs[s] = std::make_pair(
255 seqs.begin(), seqs.end(),
256 sd->
source + offset, length_am, comp);
278 template <
bool Stable,
279 typename RandomAccessIterator,
typename Comparator>
281 RandomAccessIterator
begin,
282 RandomAccessIterator
end,
284 size_t num_threads = std::thread::hardware_concurrency(),
287 using namespace parallel_mergesort_detail;
290 typename std::iterator_traits<RandomAccessIterator>::difference_type;
292 DiffType n = end -
begin;
298 if (num_threads > static_cast<size_t>(n))
299 num_threads =
static_cast<size_t>(n);
301 PMWMSSortingData<RandomAccessIterator> sd(num_threads);
309 for (
size_t s = 0; s < num_threads; s++)
310 sd.pieces[s].resize(num_threads);
312 DiffType* starts = sd.starts.data();
314 DiffType chunk_length = n / num_threads,
split = n % num_threads, start = 0;
315 for (
size_t i = 0; i < num_threads; i++)
318 start += (i < static_cast<size_t>(
split))
319 ? (chunk_length + 1) : chunk_length;
321 starts[num_threads] = start;
328 #pragma omp parallel num_threads(num_threads) 330 size_t iam = omp_get_thread_num();
331 parallel_sort_mwms_pu<Stable>(
332 &sd, iam, num_threads, barrier, comp, mwmsa);
336 for (
size_t iam = 0; iam < num_threads; ++iam) {
337 threads[iam] = std::thread(
339 parallel_sort_mwms_pu<Stable>(
340 &sd, iam, num_threads, barrier, comp, mwmsa);
343 for (
size_t i = 0; i < num_threads; i++) {
346 #endif // defined(_OPENMP) 358 template <
typename RandomAccessIterator,
359 typename Comparator = std::less<
360 typename std::iterator_traits<RandomAccessIterator>::value_type> >
362 RandomAccessIterator begin,
363 RandomAccessIterator end,
364 Comparator comp = Comparator(),
365 size_t num_threads = std::thread::hardware_concurrency(),
369 begin,
end, comp, num_threads, mwmsa);
381 template <
typename RandomAccessIterator,
382 typename Comparator = std::less<
383 typename std::iterator_traits<RandomAccessIterator>::value_type> >
385 RandomAccessIterator begin,
386 RandomAccessIterator end,
387 Comparator comp = Comparator(),
388 size_t num_threads = std::thread::hardware_concurrency(),
392 begin,
end, comp, num_threads, mwmsa);
400 #endif // !TLX_SORT_PARALLEL_MERGESORT_HEADER Data accessed by all threads.
simple_vector< DiffType > starts
Start indices, per thread.
DiffTypeOutputIterator equally_split(DiffType n, size_t p, DiffTypeOutputIterator s)
Split a sequence into parts of almost equal size.
void parallel_mergesort_base(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, size_t num_threads=std::thread::hardware_concurrency(), MultiwayMergeSplittingAlgorithm mwmsa=MWMSA_DEFAULT)
Parallel multiway mergesort main call.
simple_vector< ValueType > samples
Samples.
void stable_parallel_mergesort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp=Comparator(), size_t num_threads=std::thread::hardware_concurrency(), MultiwayMergeSplittingAlgorithm mwmsa=MWMSA_DEFAULT)
Stable parallel multiway mergesort.
Simpler non-growing vector without initialization.
simple_vector< ValueType * > temporary
Storage in which to sort.
PMWMSSortingData(size_t num_threads)
void parallel_sort_mwms_pu(PMWMSSortingData< RandomAccessIterator > *sd, size_t iam, size_t num_threads, ThreadBarrierMutex &barrier, Comparator &comp, MultiwayMergeSplittingAlgorithm mwmsa)
PMWMS code executed by each thread.
void multisequence_partition(const RanSeqs &begin_seqs, const RanSeqs &end_seqs, const RankType &rank, RankIterator begin_offsets, Comparator comp=Comparator())
Splits several sorted sequences at a certain global rank, resulting in a splitting point for each seq...
RandomAccessIterator source
Input begin.
std::vector< std::string > split(char sep, const std::string &str, std::string::size_type limit)
Split the given string at each separator character into distinct substrings.
void wait(Lambda lambda=Lambda())
Waits for n threads to arrive.
iterator begin() noexcept
return mutable iterator to first element
MultiwayMergeSplittingAlgorithm
Different splitting strategies for sorting/merging: by sampling, exact.
static void sort(Iterator begin, Iterator end, Comparator cmp=Comparator())
Call best known sorting network for up to sixteen elements with given comparison method.
void parallel_mergesort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp=Comparator(), size_t num_threads=std::thread::hardware_concurrency(), MultiwayMergeSplittingAlgorithm mwmsa=MWMSA_DEFAULT)
Parallel multiway mergesort.
RandomAccessIterator3 multiway_merge_base(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type size, Comparator comp=Comparator(), MultiwayMergeAlgorithm mwma=MWMA_ALGORITHM_DEFAULT)
Sequential multi-way merging switch.
Implements a thread barrier using mutex locking and condition variables that can be used to synchroni...
simple_vector< DiffType > offsets
Offsets to add to the found positions.
void determine_samples(PMWMSSortingData< RandomAccessIterator > *sd, DiffType &num_samples, size_t iam, size_t num_threads)
Select samples from a sequence.
DiffType begin
Begin of subsequence.
size_t parallel_multiway_merge_oversampling
default oversampling factor for parallel_multiway_merge
DiffType end
End of subsequence.
typename std::iterator_traits< RandomAccessIterator >::difference_type DiffType
simple_vector< simple_vector< PMWMSPiece< DiffType > > > pieces
PMWMSPieces of data to merge [thread][sequence].
typename std::iterator_traits< RandomAccessIterator >::value_type ValueType