tlx
multiway_merge_splitting.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/algorithm/multiway_merge_splitting.hpp
3  *
4  * Two splitting variants for balancing parallel multiway merge.
5  *
6  * Copied and modified from STXXL, see http://stxxl.org, which itself extracted
7  * it from MCSTL http://algo2.iti.uni-karlsruhe.de/singler/mcstl/. Both are
8  * distributed under the Boost Software License, Version 1.0.
9  *
10  * Part of tlx - http://panthema.net/tlx
11  *
12  * Copyright (C) 2007 Johannes Singler <singler@ira.uka.de>
13  * Copyright (C) 2014-2018 Timo Bingmann <tb@panthema.net>
14  *
15  * All rights reserved. Published under the Boost Software License, Version 1.0
16  ******************************************************************************/
17 
18 #ifndef TLX_ALGORITHM_MULTIWAY_MERGE_SPLITTING_HEADER
19 #define TLX_ALGORITHM_MULTIWAY_MERGE_SPLITTING_HEADER
20 
21 #include <algorithm>
22 #include <vector>
23 
25 #include <tlx/simple_vector.hpp>
26 
27 namespace tlx {
28 
29 //! \addtogroup tlx_algorithm
30 //! \{
31 
32 /*!
33  * Different splitting strategies for sorting/merging: by sampling, exact
34 */
40 };
41 
42 namespace multiway_merge_detail {
43 
44 /*!
45  * Split a sequence into parts of almost equal size.
46  *
47  * The resulting sequence s of length p+1 contains the splitting positions when
48  * splitting the range [0,n) into parts of almost equal size (plus minus 1).
49  * The first entry is 0, the last one n. There may result empty parts.
50  *
51  * \param n Number of elements
52  * \param p Number of parts
53  * \param s Splitters
54  * \returns End of splitter sequence, i. e. \c s+p+1
55  */
56 template <typename DiffType, typename DiffTypeOutputIterator>
57 DiffTypeOutputIterator equally_split(
58  DiffType n, size_t p, DiffTypeOutputIterator s) {
59 
60  DiffType chunk_length = n / p, split = n % p, start = 0;
61  for (size_t i = 0; i < p; i++)
62  {
63  *s++ = start;
64  start += (static_cast<DiffType>(i) < split) ? (chunk_length + 1) : chunk_length;
65  if (start >= n)
66  start = n - 1;
67  }
68  *s++ = n;
69 
70  return s;
71 }
72 
73 } // namespace multiway_merge_detail
74 
75 /*!
76  * Splitting method for parallel multi-way merge routine: use sampling and
77  * binary search for in-exact splitting.
78  *
79  * \param seqs_begin Begin iterator of iterator pair input sequence.
80  * \param seqs_end End iterator of iterator pair input sequence.
81  * \param size Maximum size to merge.
82  * \param total_size Total size of all sequences combined.
83  * \param comp Comparator.
84  * \param chunks Output subsequences for num_threads.
85  * \param num_threads Split the sequences into for num_threads.
86  * \param merge_oversampling oversampling factor
87  * \tparam Stable Stable merging incurs a performance penalty.
88  * \return End iterator of output sequence.
89  */
90 template <
91  bool Stable,
92  typename RandomAccessIteratorIterator,
93  typename Comparator>
95  const RandomAccessIteratorIterator& seqs_begin,
96  const RandomAccessIteratorIterator& seqs_end,
97  typename std::iterator_traits<
98  typename std::iterator_traits<
99  RandomAccessIteratorIterator>::value_type::first_type>::
100  difference_type size,
101  typename std::iterator_traits<
102  typename std::iterator_traits<
103  RandomAccessIteratorIterator>::value_type::first_type>::
104  difference_type total_size,
105  Comparator comp,
106  std::vector<typename std::iterator_traits<
107  RandomAccessIteratorIterator>::value_type>* chunks,
108  const size_t num_threads,
109  const size_t merge_oversampling) {
110 
111  using RandomAccessIterator =
112  typename std::iterator_traits<RandomAccessIteratorIterator>
113  ::value_type::first_type;
114  using value_type = typename std::iterator_traits<RandomAccessIterator>
115  ::value_type;
116  using DiffType = typename std::iterator_traits<RandomAccessIterator>
117  ::difference_type;
118 
119  const DiffType num_seqs = seqs_end - seqs_begin;
120  const DiffType num_samples =
121  num_threads * static_cast<DiffType>(merge_oversampling);
122 
123  // pick samples
124  simple_vector<value_type> samples(num_seqs * num_samples);
125 
126  for (DiffType s = 0; s < num_seqs; ++s)
127  {
128  for (DiffType i = 0; i < num_samples; ++i)
129  {
130  DiffType sample_index = static_cast<DiffType>(
131  double(seqs_begin[s].second - seqs_begin[s].first)
132  * (double(i + 1) / double(num_samples + 1))
133  * (double(size) / double(total_size)));
134  samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
135  }
136  }
137 
138  if (Stable)
139  std::stable_sort(samples.begin(), samples.end(), comp);
140  else
141  std::sort(samples.begin(), samples.end(), comp);
142 
143  // for each processor
144  for (size_t slab = 0; slab < num_threads; ++slab)
145  {
146  // for each sequence
147  for (DiffType seq = 0; seq < num_seqs; ++seq)
148  {
149  if (slab > 0) {
150  chunks[slab][static_cast<size_t>(seq)].first =
151  std::upper_bound(
152  seqs_begin[seq].first, seqs_begin[seq].second,
153  samples[num_samples * num_seqs * slab / num_threads],
154  comp);
155  }
156  else // absolute beginning
157  chunks[slab][static_cast<size_t>(seq)].first = seqs_begin[seq].first;
158 
159  if ((slab + 1) < num_threads) {
160  chunks[slab][static_cast<size_t>(seq)].second =
161  std::upper_bound(
162  seqs_begin[seq].first, seqs_begin[seq].second,
163  samples[num_samples * num_seqs * (slab + 1) / num_threads],
164  comp);
165  }
166  else // absolute ending
167  chunks[slab][static_cast<size_t>(seq)].second = seqs_begin[seq].second;
168  }
169  }
170 }
171 
172 /*!
173  * Splitting method for parallel multi-way merge routine: use multisequence
174  * selection for exact splitting.
175  *
176  * \param seqs_begin Begin iterator of iterator pair input sequence.
177  * \param seqs_end End iterator of iterator pair input sequence.
178  * \param size Maximum size to merge.
179  * \param total_size Total size of all sequences combined.
180  * \param comp Comparator.
181  * \param chunks Output subsequences for num_threads.
182  * \param num_threads Split the sequences into for num_threads.
183  * \tparam Stable Stable merging incurs a performance penalty.
184  * \return End iterator of output sequence.
185  */
186 template <
187  bool Stable,
188  typename RandomAccessIteratorIterator,
189  typename Comparator>
191  const RandomAccessIteratorIterator& seqs_begin,
192  const RandomAccessIteratorIterator& seqs_end,
193  typename std::iterator_traits<
194  typename std::iterator_traits<
195  RandomAccessIteratorIterator>::value_type::first_type>::
196  difference_type size,
197  typename std::iterator_traits<
198  typename std::iterator_traits<
199  RandomAccessIteratorIterator>::value_type::first_type>::
200  difference_type total_size,
201  Comparator comp,
202  std::vector<typename std::iterator_traits<
203  RandomAccessIteratorIterator>::value_type>* chunks,
204  const size_t num_threads) {
205 
206  using RandomAccessIteratorPair =
207  typename std::iterator_traits<RandomAccessIteratorIterator>
208  ::value_type;
209  using RandomAccessIterator = typename RandomAccessIteratorPair
210  ::first_type;
211  using DiffType = typename std::iterator_traits<RandomAccessIterator>
212  ::difference_type;
213 
214  const size_t num_seqs = static_cast<size_t>(seqs_end - seqs_begin);
215  const bool tight = (total_size == size);
216 
218 
219  std::vector<DiffType> ranks(static_cast<size_t>(num_threads + 1));
220  multiway_merge_detail::equally_split(size, num_threads, ranks.begin());
221 
222  for (size_t s = 0; s < (num_threads - 1); ++s)
223  {
224  offsets[s].resize(num_seqs);
226  seqs_begin, seqs_end,
227  ranks[static_cast<size_t>(s + 1)], offsets[s].begin(), comp);
228 
229  if (!tight) // last one also needed and available
230  {
231  offsets[num_threads - 1].resize(num_seqs);
233  seqs_begin, seqs_end,
234  size, offsets[num_threads - 1].begin(), comp);
235  }
236  }
237 
238  // for each processor
239  for (size_t slab = 0; slab < num_threads; ++slab)
240  {
241  // for each sequence
242  for (size_t s = 0; s < num_seqs; ++s)
243  {
244  if (slab == 0) // absolute beginning
245  chunks[slab][s].first = seqs_begin[static_cast<DiffType>(s)].first;
246  else
247  chunks[slab][s].first = offsets[slab - 1][s];
248 
249  if (!tight || slab < (num_threads - 1))
250  chunks[slab][s].second = offsets[slab][s];
251  else // slab == num_threads - 1
252  chunks[slab][s].second = seqs_begin[static_cast<DiffType>(s)].second;
253  }
254  }
255 }
256 
257 //! \}
258 
259 } // namespace tlx
260 
261 #endif // !TLX_ALGORITHM_MULTIWAY_MERGE_SPLITTING_HEADER
262 
263 /******************************************************************************/
void multiway_merge_exact_splitting(const RandomAccessIteratorIterator &seqs_begin, const RandomAccessIteratorIterator &seqs_end, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type size, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type total_size, Comparator comp, std::vector< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type > *chunks, const size_t num_threads)
Splitting method for parallel multi-way merge routine: use multisequence selection for exact splittin...
void resize(size_type new_size)
resize the array to contain exactly new_size items
DiffTypeOutputIterator equally_split(DiffType n, size_t p, DiffTypeOutputIterator s)
Split a sequence into parts of almost equal size.
Simpler non-growing vector without initialization.
void multisequence_partition(const RanSeqs &begin_seqs, const RanSeqs &end_seqs, const RankType &rank, RankIterator begin_offsets, Comparator comp=Comparator())
Splits several sorted sequences at a certain global rank, resulting in a splitting point for each seq...
std::vector< std::string > split(char sep, const std::string &str, std::string::size_type limit)
Split the given string at each separator character into distinct substrings.
Definition: split.cpp:20
MultiwayMergeSplittingAlgorithm
Different splitting strategies for sorting/merging: by sampling, exact.
static void sort(Iterator begin, Iterator end, Comparator cmp=Comparator())
Call best known sorting network for up to sixteen elements with given comparison method.
Definition: best.hpp:523
void multiway_merge_sampling_splitting(const RandomAccessIteratorIterator &seqs_begin, const RandomAccessIteratorIterator &seqs_end, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type size, typename std::iterator_traits< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type::first_type >::difference_type total_size, Comparator comp, std::vector< typename std::iterator_traits< RandomAccessIteratorIterator >::value_type > *chunks, const size_t num_threads, const size_t merge_oversampling)
Splitting method for parallel multi-way merge routine: use sampling and binary search for in-exact sp...