tlx
sample_sort_tools.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/sort/strings/sample_sort_tools.hpp
3  *
4  * Helpers for (Parallel) Super Scalar String Sample Sort
5  *
6  * See also Timo Bingmann, Andreas Eberle, and Peter Sanders. "Engineering
7  * parallel string sorting." Algorithmica 77.1 (2017): 235-286.
8  *
9  * Part of tlx - http://panthema.net/tlx
10  *
11  * Copyright (C) 2013-2019 Timo Bingmann <tb@panthema.net>
12  *
13  * All rights reserved. Published under the Boost Software License, Version 1.0
14  ******************************************************************************/
15 
16 #ifndef TLX_SORT_STRINGS_SAMPLE_SORT_TOOLS_HEADER
17 #define TLX_SORT_STRINGS_SAMPLE_SORT_TOOLS_HEADER
18 
20 
22 #include <tlx/die/core.hpp>
23 #include <tlx/logger/core.hpp>
24 #include <tlx/math/clz.hpp>
25 #include <tlx/math/ctz.hpp>
26 #include <tlx/string/hexdump.hpp>
27 
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 
32 namespace tlx {
33 namespace sort_strings_detail {
34 
35 /******************************************************************************/
36 
37 //! represent binary digits of large integer datatypes
38 template <typename Type>
39 static inline
40 std::string to_binary(Type v, const size_t width = (8 * sizeof(Type))) {
41  std::string str(width, ' ');
42  for (size_t i = 0; i < width; i++) {
43  str[width - i - 1] = (v & 1) ? '1' : '0';
44  v /= 2;
45  }
46  return str;
47 }
48 
49 //! Class to transform in-order to level-order indexes in a perfect binary tree
50 template <size_t TreeBits>
52  static const bool debug = false;
53 
54  static const size_t treebits = TreeBits;
55  static const size_t num_nodes = (1 << treebits) - 1;
56 
57  static inline unsigned int level_to_preorder(unsigned int id) {
58  assert(id > 0);
59  TLX_LOG << "index: " << id << " = " << to_binary(id);
60 
61  static const int bitmask = num_nodes;
62 
63  int hi = treebits - 32 + clz<uint32_t>(id);
64  TLX_LOG << "high zero: " << hi;
65 
66  unsigned int bkt = ((id << (hi + 1)) & bitmask) | (1 << hi);
67 
68  TLX_LOG << "bkt: " << bkt << " = " << to_binary(bkt);
69  return bkt;
70  }
71 
72  static inline unsigned int pre_to_levelorder(unsigned int id) {
73  assert(id > 0);
74  TLX_LOG << "index: " << id << " = " << to_binary(id);
75 
76  static const int bitmask = num_nodes;
77 
78  int lo = ctz<uint32_t>(id) + 1;
79  TLX_LOG << "low zero: " << lo;
80 
81  unsigned int bkt = ((id >> lo) & bitmask) | (1 << (treebits - lo));
82 
83  TLX_LOG << "bkt: " << bkt << " = " << to_binary(bkt);
84  return bkt;
85  }
86 
87  static inline void self_verify() {
88  for (size_t i = 1; i <= num_nodes; ++i) {
89  TLX_LOG << to_binary(i, treebits) << " -> ";
90 
91  size_t id = level_to_preorder(i);
92  TLX_LOG << to_binary(id, treebits) << " -> ";
93 
94  id = pre_to_levelorder(id);
95  TLX_LOG << to_binary(id, treebits);
96 
97  TLX_LOG << "";
98  tlx_die_unequal(id, i);
99  }
100  }
101 };
102 
114 }
115 
116 /******************************************************************************/
117 
118 //! Recursive TreeBuilder for full-descent and unrolled variants, constructs a
119 //! both a pre-order and level-order array of splitters and the corresponding
120 //! LCPs.
121 template <typename key_type, size_t num_splitters>
123 {
124  static const bool debug_splitter = false;
125 
126 public:
127  // build tree: splitter[num_splitters], splitter_tree[num_splitters + 1],
128  // and splitter_lcp[num_splitters + 1].
129  SSTreeBuilderPreAndLevelOrder(key_type splitter[num_splitters],
130  key_type tree[num_splitters + 1],
131  unsigned char splitter_lcp[num_splitters + 1],
132  const key_type* samples, size_t samplesize)
133  : splitter_(splitter), tree_(tree),
134  lcp_iter_(splitter_lcp), samples_(samples) {
135  key_type sentinel = 0;
136  recurse(samples, samples + samplesize, 1, sentinel);
137 
138  assert(splitter_ == splitter + num_splitters);
139  assert(lcp_iter_ == splitter_lcp + num_splitters);
140  // overwrite sentinel lcp for first < everything bucket
141  splitter_lcp[0] &= 0x80;
142  // sentinel for > everything bucket
143  splitter_lcp[num_splitters] = 0;
144  }
145 
146  ptrdiff_t snum(const key_type* s) const {
147  return static_cast<ptrdiff_t>(s - samples_);
148  }
149 
150  key_type recurse(const key_type* lo, const key_type* hi,
151  unsigned int treeidx, key_type& rec_prevkey) {
152  TLX_LOGC(debug_splitter)
153  << "rec_buildtree(" << snum(lo) << "," << snum(hi)
154  << ", treeidx=" << treeidx << ")";
155 
156  // pick middle element as splitter
157  const key_type* mid = lo + static_cast<ptrdiff_t>(hi - lo) / 2;
158 
159  TLX_LOGC(debug_splitter)
160  << "tree[" << treeidx << "] = samples[" << snum(mid) << "] = "
161  << hexdump_type(*mid);
162 
163  key_type mykey = tree_[treeidx] = *mid;
164 #if 1
165  const key_type* midlo = mid;
166  while (lo < midlo && *(midlo - 1) == mykey) midlo--;
167 
168  const key_type* midhi = mid;
169  while (midhi + 1 < hi && *midhi == mykey) midhi++;
170 
171  if (midhi - midlo > 1)
172  TLX_LOG0 << "key range = [" << snum(midlo) << "," << snum(midhi) << ")";
173 #else
174  const key_type* midlo = mid, * midhi = mid + 1;
175 #endif
176  if (2 * treeidx < num_splitters)
177  {
178  key_type prevkey = recurse(lo, midlo, 2 * treeidx + 0, rec_prevkey);
179 
180  key_type xorSplit = prevkey ^ mykey;
181 
182  TLX_LOGC(debug_splitter)
183  << " lcp: " << hexdump_type(prevkey)
184  << " XOR " << hexdump_type(mykey)
185  << " = " << hexdump_type(xorSplit)
186  << " - " << clz(xorSplit)
187  << " bits = " << clz(xorSplit) / 8
188  << " chars lcp";
189 
190  *splitter_++ = mykey;
191 
192  *lcp_iter_++ =
193  (clz(xorSplit) / 8) |
194  // marker for done splitters
195  ((mykey & 0xFF) ? 0 : 0x80);
196 
197  return recurse(midhi, hi, 2 * treeidx + 1, mykey);
198  }
199  else
200  {
201  key_type xorSplit = rec_prevkey ^ mykey;
202 
203  TLX_LOGC(debug_splitter)
204  << " lcp: " << hexdump_type(rec_prevkey)
205  << " XOR " << hexdump_type(mykey)
206  << " = " << hexdump_type(xorSplit)
207  << " - " << clz(xorSplit)
208  << " bits = " << clz(xorSplit) / 8
209  << " chars lcp";
210 
211  *splitter_++ = mykey;
212 
213  *lcp_iter_++ =
214  (clz(xorSplit) / 8) |
215  // marker for done splitters
216  ((mykey & 0xFF) ? 0 : 0x80);
217 
218  return mykey;
219  }
220  }
221 
222 private:
223  key_type* splitter_;
224  key_type* tree_;
225  unsigned char* lcp_iter_;
226  const key_type* samples_;
227 };
228 
229 //! Recursive TreeBuilder for full-descent and unrolled variants, constructs
230 //! only a level-order binary tree of splitters
231 template <typename key_type, size_t num_splitters>
233 {
234  static const bool debug_splitter = false;
235 
236 public:
237  //! build tree, sizes: splitter_tree[num_splitters + 1] and
238  SSTreeBuilderLevelOrder(key_type tree[num_splitters],
239  unsigned char splitter_lcp[num_splitters + 1],
240  const key_type* samples, size_t samplesize)
241  : tree_(tree),
242  lcp_iter_(splitter_lcp),
243  samples_(samples) {
244  key_type sentinel = 0;
245  recurse(samples, samples + samplesize, 1, sentinel);
246 
247  assert(lcp_iter_ == splitter_lcp + num_splitters);
248  // overwrite sentinel lcp for first < everything bucket
249  splitter_lcp[0] &= 0x80;
250  // sentinel for > everything bucket
251  splitter_lcp[num_splitters] = 0;
252  }
253 
254  ptrdiff_t snum(const key_type* s) const {
255  return static_cast<ptrdiff_t>(s - samples_);
256  }
257 
258  key_type recurse(const key_type* lo, const key_type* hi,
259  unsigned int treeidx, key_type& rec_prevkey) {
260  TLX_LOGC(debug_splitter)
261  << "rec_buildtree(" << snum(lo) << "," << snum(hi)
262  << ", treeidx=" << treeidx << ")";
263 
264  // pick middle element as splitter
265  const key_type* mid = lo + static_cast<ptrdiff_t>(hi - lo) / 2;
266 
267  TLX_LOGC(debug_splitter)
268  << "tree[" << treeidx << "] = samples[" << snum(mid) << "] = "
269  << hexdump_type(*mid);
270 
271  key_type mykey = tree_[treeidx] = *mid;
272 #if 1
273  const key_type* midlo = mid;
274  while (lo < midlo && *(midlo - 1) == mykey) midlo--;
275 
276  const key_type* midhi = mid;
277  while (midhi + 1 < hi && *midhi == mykey) midhi++;
278 
279  if (midhi - midlo > 1)
280  TLX_LOG0 << "key range = [" << snum(midlo) << "," << snum(midhi) << ")";
281 #else
282  const key_type* midlo = mid, * midhi = mid + 1;
283 #endif
284  if (2 * treeidx < num_splitters)
285  {
286  key_type prevkey = recurse(lo, midlo, 2 * treeidx + 0, rec_prevkey);
287 
288  key_type xorSplit = prevkey ^ mykey;
289 
290  TLX_LOGC(debug_splitter)
291  << " lcp: " << hexdump_type(prevkey)
292  << " XOR " << hexdump_type(mykey)
293  << " = " << hexdump_type(xorSplit)
294  << " - " << clz(xorSplit)
295  << " bits = " << clz(xorSplit) / 8
296  << " chars lcp";
297 
298  *lcp_iter_++ =
299  (clz(xorSplit) / 8) |
300  // marker for done splitters
301  ((mykey & 0xFF) ? 0 : 0x80);
302 
303  return recurse(midhi, hi, 2 * treeidx + 1, mykey);
304  }
305  else
306  {
307  key_type xorSplit = rec_prevkey ^ mykey;
308 
309  TLX_LOGC(debug_splitter)
310  << " lcp: " << hexdump_type(rec_prevkey)
311  << " XOR " << hexdump_type(mykey)
312  << " = " << hexdump_type(xorSplit)
313  << " - " << clz(xorSplit)
314  << " bits = " << clz(xorSplit) / 8
315  << " chars lcp";
316 
317  *lcp_iter_++ =
318  (clz(xorSplit) / 8) |
319  // marker for done splitters
320  ((mykey & 0xFF) ? 0 : 0x80);
321 
322  return mykey;
323  }
324  }
325 
326 private:
327  key_type* tree_;
328  unsigned char* lcp_iter_;
329  const key_type* samples_;
330 };
331 
332 /******************************************************************************/
333 
334 //! Sample Sort Classification Tree Unrolled and Interleaved
335 template <typename key_type, size_t TreeBits, size_t Rollout = 4>
337 {
338 public:
339  static const size_t treebits = TreeBits;
340  static const size_t num_splitters = (1 << treebits) - 1;
341 
342  //! build tree and splitter array from sample
343  void build(key_type* samples, size_t samplesize,
344  unsigned char* splitter_lcp) {
346  splitter_, splitter_tree_, splitter_lcp,
347  samples, samplesize);
348  }
349 
350  //! binary search on splitter array for bucket number
351  unsigned int find_bkt(const key_type& key) const {
352  // binary tree traversal without left branch
353 
354  unsigned int i = 1;
355  while (i <= num_splitters) {
356  // in gcc-4.6.3 this produces a SETA, LEA sequence
357  i = 2 * i + (key <= splitter_tree_[i] ? 0 : 1);
358  }
359  i -= num_splitters + 1;
360 
361  size_t b = i * 2; // < bucket
362  if (i < num_splitters && splitter_[i] == key) b += 1; // equal bucket
363 
364  return b;
365  }
366 
367  // in gcc-4.6.3 this produces a SETA, LEA sequence
368 #define TLX_CLASSIFY_TREE_STEP \
369  for (size_t u = 0; u < Rollout; ++u) { \
370  i[u] = 2 * i[u] + (key[u] <= splitter_tree_[i[u]] ? 0 : 1); \
371  } \
372  TLX_ATTRIBUTE_FALLTHROUGH;
373 
374  //! search in splitter tree for bucket number, unrolled for Rollout keys at
375  //! once.
377  const key_type key[Rollout], uint16_t obkt[Rollout]) const {
378  // binary tree traversal without left branch
379 
380  unsigned int i[Rollout];
381  std::fill(i, i + Rollout, 1u);
382 
383  switch (treebits)
384  {
385  default:
386  abort();
387  case 15:
389  case 14:
391  case 13:
393  case 12:
395  case 11:
397 
398  case 10:
400  case 9:
402  case 8:
404  case 7:
406  case 6:
408 
409  case 5:
411  case 4:
413  case 3:
415  case 2:
417  case 1:
419  }
420 
421  for (size_t u = 0; u < Rollout; ++u)
422  i[u] -= num_splitters + 1;
423 
424  for (size_t u = 0; u < Rollout; ++u) {
425  // < bucket
426  obkt[u] = i[u] * 2;
427  }
428 
429  for (size_t u = 0; u < Rollout; ++u) {
430  // equal bucket
431  if (i[u] < num_splitters && splitter_[i[u]] == key[u])
432  obkt[u] += 1;
433  }
434  }
435 
436 #undef TLX_CLASSIFY_TREE_STEP
437 
438  //! classify all strings in area by walking tree and saving bucket id
439  template <typename StringSet>
440  // __attribute__ ((optimize("unroll-all-loops")))
441  void classify(
442  const StringSet& strset,
443  typename StringSet::Iterator begin, typename StringSet::Iterator end,
444  uint16_t* bktout, size_t depth) const {
445  while (begin != end)
446  {
447  if (begin + Rollout <= end)
448  {
449  key_type key[Rollout];
450  for (size_t u = 0; u < Rollout; ++u)
451  key[u] = get_key<key_type>(strset, begin[u], depth);
452 
453  find_bkt_unroll(key, bktout);
454 
455  begin += Rollout, bktout += Rollout;
456  }
457  else
458  {
459  key_type key = get_key<key_type>(strset, *begin++, depth);
460  *bktout++ = this->find_bkt(key);
461  }
462  }
463  }
464 
465  //! return a splitter
466  key_type get_splitter(unsigned int i) const
467  { return splitter_[i]; }
468 
469 private:
470  key_type splitter_[num_splitters];
471  key_type splitter_tree_[num_splitters + 1];
472 };
473 
474 /******************************************************************************/
475 
476 //! Sample Sort Classification Tree Unrolled with Equal Comparisons
477 template <typename key_type, size_t TreeBits>
479 {
480 public:
481  static const size_t treebits = TreeBits;
482  static const size_t num_splitters = (1 << treebits) - 1;
483 
484  //! build tree and splitter array from sample
485  void build(key_type* samples, size_t samplesize, unsigned char* splitter_lcp) {
487  splitter_tree_, splitter_lcp, samples, samplesize);
488  }
489 
490 #define TLX_CLASSIFY_TREE_STEP \
491  if (TLX_UNLIKELY(key == splitter_tree_[i])) { \
492  return \
493  2 * PerfectTreeCalculations<treebits>::level_to_preorder(i) - 1; \
494  } \
495  i = 2 * i + (key < splitter_tree_[i] ? 0 : 1); \
496  TLX_ATTRIBUTE_FALLTHROUGH;
497 
498  //! binary search on splitter array for bucket number
499  unsigned int find_bkt(const key_type& key) const {
500  // binary tree traversal without left branch
501 
502  unsigned int i = 1;
503 
504  switch (treebits)
505  {
506  default:
507  abort();
508  case 15:
510  case 14:
512  case 13:
514  case 12:
516  case 11:
518 
519  case 10:
521  case 9:
523  case 8:
525  case 7:
527  case 6:
529 
530  case 5:
532  case 4:
534  case 3:
536  case 2:
538  case 1:
540  }
541 
542  i -= num_splitters + 1;
543  return 2 * i; // < or > bucket
544  }
545 
546 #undef TLX_CLASSIFY_TREE_STEP
547 
548  //! classify all strings in area by walking tree and saving bucket id
549  template <typename StringSet>
550  void classify(
551  const StringSet& strset,
552  typename StringSet::Iterator begin, typename StringSet::Iterator end,
553  uint16_t* bktout, size_t depth) const {
554  while (begin != end)
555  {
556  key_type key = get_key<key_type>(strset, *begin++, depth);
557  *bktout++ = find_bkt(key);
558  }
559  }
560 
561  //! return a splitter
562  key_type get_splitter(unsigned int i) const {
563  return splitter_tree_[
565  }
566 
567 private:
568  key_type splitter_tree_[num_splitters + 1];
569 };
570 
571 /******************************************************************************/
572 
573 //! Sample Sort Classification Tree Unrolled, Interleaved, and with Perfect Tree
574 //! Index Calculations
575 template <typename key_type, size_t TreeBits, unsigned Rollout = 4>
577 {
578 public:
579  static const size_t treebits = TreeBits;
580  static const size_t num_splitters = (1 << treebits) - 1;
581 
582  //! build tree and splitter array from sample
583  void build(key_type* samples, size_t samplesize,
584  unsigned char* splitter_lcp) {
586  splitter_tree_, splitter_lcp, samples, samplesize);
587  }
588 
589  //! binary search on splitter array for bucket number
590  unsigned int find_bkt(const key_type& key) const {
591  // binary tree traversal without left branch
592 
593  unsigned int i = 1;
594 
595  while (i <= num_splitters) {
596  // in gcc-4.6.3 this produces a SETA, LEA sequence
597  i = 2 * i + (key <= splitter_tree_[i] ? 0 : 1);
598  }
599 
600  i -= num_splitters + 1;
601 
602  // < bucket
603  size_t b = i * 2;
604  if (i < num_splitters && get_splitter(i) == key) {
605  // equal bucket
606  b += 1;
607  }
608 
609  return b;
610  }
611 
612  // in gcc-4.6.3 this produces a SETA, LEA sequence
613 #define TLX_CLASSIFY_TREE_STEP \
614  for (size_t u = 0; u < Rollout; ++u) { \
615  i[u] = 2 * i[u] + (key[u] <= splitter_tree_[i[u]] ? 0 : 1); \
616  } \
617  TLX_ATTRIBUTE_FALLTHROUGH;
618 
619  //! search in splitter tree for bucket number, unrolled for Rollout keys at
620  //! once.
622  const key_type key[Rollout], uint16_t obkt[Rollout]) const {
623  // binary tree traversal without left branch
624 
625  unsigned int i[Rollout];
626  std::fill(i + 0, i + Rollout, 1);
627 
628  switch (treebits)
629  {
630  default:
631  abort();
632 
633  case 15:
635  case 14:
637  case 13:
639  case 12:
641  case 11:
643 
644  case 10:
646  case 9:
648  case 8:
650  case 7:
652  case 6:
654 
655  case 5:
657  case 4:
659  case 3:
661  case 2:
663  case 1:
665  }
666 
667  for (unsigned u = 0; u < Rollout; ++u)
668  i[u] -= num_splitters + 1;
669 
670  for (unsigned u = 0; u < Rollout; ++u) {
671  // < bucket
672  obkt[u] = i[u] * 2;
673  }
674 
675  for (unsigned u = 0; u < Rollout; ++u) {
676  // equal bucket
677  if (i[u] < num_splitters && get_splitter(i[u]) == key[u])
678  obkt[u] += 1;
679  }
680  }
681 
682 #undef TLX_CLASSIFY_TREE_STEP
683 
684  //! classify all strings in area by walking tree and saving bucket id
685  template <typename StringSet>
686  // __attribute__ ((optimize("unroll-all-loops")))
687  void classify(
688  const StringSet& strset,
689  typename StringSet::Iterator begin, typename StringSet::Iterator end,
690  uint16_t* bktout, size_t depth) const {
691  while (begin != end)
692  {
693  if (begin + Rollout <= end)
694  {
695  key_type key[Rollout];
696  for (size_t u = 0; u < Rollout; ++u)
697  key[u] = get_key<key_type>(strset, begin[u], depth);
698 
699  find_bkt_unroll(key, bktout);
700 
701  begin += Rollout, bktout += Rollout;
702  }
703  else
704  {
705  key_type key = get_key<key_type>(strset, *begin++, depth);
706  *bktout++ = this->find_bkt(key);
707  }
708  }
709  }
710 
711  //! return a splitter
712  key_type get_splitter(unsigned int i) const {
713  return splitter_tree_[
715  }
716 
717 private:
718  key_type splitter_tree_[num_splitters + 1];
719 };
720 
721 /******************************************************************************/
722 
723 } // namespace sort_strings_detail
724 } // namespace tlx
725 
726 #endif // !TLX_SORT_STRINGS_SAMPLE_SORT_TOOLS_HEADER
727 
728 /******************************************************************************/
void build(key_type *samples, size_t samplesize, unsigned char *splitter_lcp)
build tree and splitter array from sample
Sample Sort Classification Tree Unrolled with Equal Comparisons.
static void perfect_tree_calculations_self_verify()
unsigned clz(Integral x)
key_type get_splitter(unsigned int i) const
return a splitter
unsigned int find_bkt(const key_type &key) const
binary search on splitter array for bucket number
void build(key_type *samples, size_t samplesize, unsigned char *splitter_lcp)
build tree and splitter array from sample
unsigned int find_bkt(const key_type &key) const
binary search on splitter array for bucket number
Sample Sort Classification Tree Unrolled, Interleaved, and with Perfect Tree Index Calculations...
SSTreeBuilderLevelOrder(key_type tree[num_splitters], unsigned char splitter_lcp[num_splitters+1], const key_type *samples, size_t samplesize)
build tree, sizes: splitter_tree[num_splitters + 1] and
SSTreeBuilderPreAndLevelOrder(key_type splitter[num_splitters], key_type tree[num_splitters+1], unsigned char splitter_lcp[num_splitters+1], const key_type *samples, size_t samplesize)
void find_bkt_unroll(const key_type key[Rollout], uint16_t obkt[Rollout]) const
search in splitter tree for bucket number, unrolled for Rollout keys at once.
#define TLX_CLASSIFY_TREE_STEP
unsigned int find_bkt(const key_type &key) const
binary search on splitter array for bucket number
void classify(const StringSet &strset, typename StringSet::Iterator begin, typename StringSet::Iterator end, uint16_t *bktout, size_t depth) const
classify all strings in area by walking tree and saving bucket id
#define tlx_die_unequal(X, Y)
Check that X == Y or die miserably, but output the values of X and Y for better debugging.
Definition: core.hpp:132
Recursive TreeBuilder for full-descent and unrolled variants, constructs only a level-order binary tr...
key_type get_splitter(unsigned int i) const
return a splitter
void classify(const StringSet &strset, typename StringSet::Iterator begin, typename StringSet::Iterator end, uint16_t *bktout, size_t depth) const
classify all strings in area by walking tree and saving bucket id
Sample Sort Classification Tree Unrolled and Interleaved.
void find_bkt_unroll(const key_type key[Rollout], uint16_t obkt[Rollout]) const
search in splitter tree for bucket number, unrolled for Rollout keys at once.
static std::string to_binary(Type v, const size_t width=(8 *sizeof(Type)))
represent binary digits of large integer datatypes
#define TLX_LOG0
Override default output: never or always output log.
Definition: core.hpp:144
static unsigned int pre_to_levelorder(unsigned int id)
#define TLX_LOGC(cond)
Explicitly specify the condition for logging.
Definition: core.hpp:137
static unsigned int level_to_preorder(unsigned int id)
Class to transform in-order to level-order indexes in a perfect binary tree.
key_type recurse(const key_type *lo, const key_type *hi, unsigned int treeidx, key_type &rec_prevkey)
std::string hexdump_type(const Type &t)
Dump a (binary) item as a sequence of uppercase hexadecimal pairs.
Definition: hexdump.hpp:52
Recursive TreeBuilder for full-descent and unrolled variants, constructs a both a pre-order and level...
key_type get_splitter(unsigned int i) const
return a splitter
void build(key_type *samples, size_t samplesize, unsigned char *splitter_lcp)
build tree and splitter array from sample
#define TLX_LOG
Default logging method: output if the local debug variable is true.
Definition: core.hpp:141
void classify(const StringSet &strset, typename StringSet::Iterator begin, typename StringSet::Iterator end, uint16_t *bktout, size_t depth) const
classify all strings in area by walking tree and saving bucket id
key_type recurse(const key_type *lo, const key_type *hi, unsigned int treeidx, key_type &rec_prevkey)