18 #ifndef TLX_ALGORITHM_MULTIWAY_MERGE_SPLITTING_HEADER 19 #define TLX_ALGORITHM_MULTIWAY_MERGE_SPLITTING_HEADER 42 namespace multiway_merge_detail {
56 template <
typename DiffType,
typename DiffTypeOutputIterator>
58 DiffType n,
size_t p, DiffTypeOutputIterator s) {
60 DiffType chunk_length = n / p,
split = n % p, start = 0;
61 for (
size_t i = 0; i < p; i++)
64 start += (
static_cast<DiffType
>(i) < split) ? (chunk_length + 1) : chunk_length;
92 typename RandomAccessIteratorIterator,
95 const RandomAccessIteratorIterator& seqs_begin,
96 const RandomAccessIteratorIterator& seqs_end,
97 typename std::iterator_traits<
98 typename std::iterator_traits<
99 RandomAccessIteratorIterator>::value_type::first_type>::
100 difference_type size,
101 typename std::iterator_traits<
102 typename std::iterator_traits<
103 RandomAccessIteratorIterator>::value_type::first_type>::
104 difference_type total_size,
106 std::vector<
typename std::iterator_traits<
107 RandomAccessIteratorIterator>::value_type>* chunks,
108 const size_t num_threads,
109 const size_t merge_oversampling) {
111 using RandomAccessIterator =
112 typename std::iterator_traits<RandomAccessIteratorIterator>
113 ::value_type::first_type;
114 using value_type =
typename std::iterator_traits<RandomAccessIterator>
116 using DiffType =
typename std::iterator_traits<RandomAccessIterator>
119 const DiffType num_seqs = seqs_end - seqs_begin;
120 const DiffType num_samples =
121 num_threads *
static_cast<DiffType
>(merge_oversampling);
126 for (DiffType s = 0; s < num_seqs; ++s)
128 for (DiffType i = 0; i < num_samples; ++i)
130 DiffType sample_index =
static_cast<DiffType
>(
131 double(seqs_begin[s].second - seqs_begin[s].first)
132 * (double(i + 1) / double(num_samples + 1))
133 * (
double(size) / double(total_size)));
134 samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
139 std::stable_sort(samples.begin(), samples.end(), comp);
141 std::sort(samples.begin(), samples.end(), comp);
144 for (
size_t slab = 0; slab < num_threads; ++slab)
147 for (DiffType seq = 0; seq < num_seqs; ++seq)
150 chunks[slab][
static_cast<size_t>(seq)].first =
152 seqs_begin[seq].first, seqs_begin[seq].second,
153 samples[num_samples * num_seqs * slab / num_threads],
157 chunks[slab][
static_cast<size_t>(seq)].first = seqs_begin[seq].first;
159 if ((slab + 1) < num_threads) {
160 chunks[slab][
static_cast<size_t>(seq)].second =
162 seqs_begin[seq].first, seqs_begin[seq].second,
163 samples[num_samples * num_seqs * (slab + 1) / num_threads],
167 chunks[slab][
static_cast<size_t>(seq)].second = seqs_begin[seq].second;
188 typename RandomAccessIteratorIterator,
191 const RandomAccessIteratorIterator& seqs_begin,
192 const RandomAccessIteratorIterator& seqs_end,
193 typename std::iterator_traits<
194 typename std::iterator_traits<
195 RandomAccessIteratorIterator>::value_type::first_type>::
196 difference_type size,
197 typename std::iterator_traits<
198 typename std::iterator_traits<
199 RandomAccessIteratorIterator>::value_type::first_type>::
200 difference_type total_size,
202 std::vector<
typename std::iterator_traits<
203 RandomAccessIteratorIterator>::value_type>* chunks,
204 const size_t num_threads) {
206 using RandomAccessIteratorPair =
207 typename std::iterator_traits<RandomAccessIteratorIterator>
209 using RandomAccessIterator =
typename RandomAccessIteratorPair
211 using DiffType =
typename std::iterator_traits<RandomAccessIterator>
214 const size_t num_seqs =
static_cast<size_t>(seqs_end - seqs_begin);
215 const bool tight = (total_size == size);
219 std::vector<DiffType> ranks(static_cast<size_t>(num_threads + 1));
222 for (
size_t s = 0; s < (num_threads - 1); ++s)
224 offsets[s].
resize(num_seqs);
226 seqs_begin, seqs_end,
227 ranks[static_cast<size_t>(s + 1)], offsets[s].begin(), comp);
231 offsets[num_threads - 1].
resize(num_seqs);
233 seqs_begin, seqs_end,
234 size, offsets[num_threads - 1].begin(), comp);
239 for (
size_t slab = 0; slab < num_threads; ++slab)
242 for (
size_t s = 0; s < num_seqs; ++s)
245 chunks[slab][s].first = seqs_begin[
static_cast<DiffType
>(s)].first;
247 chunks[slab][s].first = offsets[slab - 1][s];
249 if (!tight || slab < (num_threads - 1))
250 chunks[slab][s].second = offsets[slab][s];
252 chunks[slab][s].second = seqs_begin[
static_cast<DiffType
>(s)].second;
261 #endif // !TLX_ALGORITHM_MULTIWAY_MERGE_SPLITTING_HEADER void multiway_merge_exact_splitting(const RandomAccessIteratorIterator &seqs_begin, const RandomAccessIteratorIterator &seqs_end, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type size, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type total_size, Comparator comp, std::vector< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type > *chunks, const size_t num_threads)
Splitting method for parallel multi-way merge routine: use multisequence selection for exact splittin...
void resize(size_type new_size)
resize the array to contain exactly new_size items
DiffTypeOutputIterator equally_split(DiffType n, size_t p, DiffTypeOutputIterator s)
Split a sequence into parts of almost equal size.
Simpler non-growing vector without initialization.
void multisequence_partition(const RanSeqs &begin_seqs, const RanSeqs &end_seqs, const RankType &rank, RankIterator begin_offsets, Comparator comp=Comparator())
Splits several sorted sequences at a certain global rank, resulting in a splitting point for each seq...
std::vector< std::string > split(char sep, const std::string &str, std::string::size_type limit)
Split the given string at each separator character into distinct substrings.
MultiwayMergeSplittingAlgorithm
Different splitting strategies for sorting/merging: by sampling, exact.
static void sort(Iterator begin, Iterator end, Comparator cmp=Comparator())
Call best known sorting network for up to sixteen elements with given comparison method.
void multiway_merge_sampling_splitting(const RandomAccessIteratorIterator &seqs_begin, const RandomAccessIteratorIterator &seqs_end, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type size, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type total_size, Comparator comp, std::vector< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type > *chunks, const size_t num_threads, const size_t merge_oversampling)
Splitting method for parallel multi-way merge routine: use sampling and binary search for in-exact sp...