21 #ifndef TLX_SORT_STRINGS_RADIX_SORT_HEADER 22 #define TLX_SORT_STRINGS_RADIX_SORT_HEADER 38 namespace sort_strings_detail {
47 template <
typename StringShadowPtr>
58 const StringSet& ss = strptr.
active();
61 std::fill(bkt_size, bkt_size + 256, 0);
62 for (Iterator i = ss.begin(); i != ss.end(); ++i)
63 ++bkt_size[ss.get_uint8(ss[i], depth)];
66 Iterator bkt_index[256];
67 bkt_index[0] = strptr.
shadow().begin();
68 for (
size_t i = 1; i < 256; ++i)
69 bkt_index[i] = bkt_index[i - 1] + bkt_size[i - 1];
72 for (Iterator i = ss.begin(); i != ss.end(); ++i)
73 *(bkt_index[ss.get_uint8(ss[i], depth)]++) = std::move(ss[i]);
84 size_t size = ss.
size();
87 for (
size_t i = 1; i <
pos; ++i)
91 size_t bkt = bkt_size[0], i = 1;
92 if (bkt > 0 && bkt < size)
95 while (i < 256 && bkt_size[i] == 0)
110 template <
typename StringShadowPtr>
116 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
117 radixstack.emplace(strptr, depth);
119 while (radixstack.size())
121 while (radixstack.top().idx < 255)
123 RadixStep& rs = radixstack.top();
126 size_t bkt_size = rs.bkt_size[++rs.idx];
131 else if (bkt_size < g_inssort_threshold) {
133 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
134 depth + radixstack.size(),
135 memory -
sizeof(RadixStep) * radixstack.size());
141 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
144 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
145 depth + radixstack.size(),
146 memory -
sizeof(RadixStep) * radixstack.size());
152 radixstack.emplace(rs.strptr.flip(rs.pos - bkt_size, bkt_size),
153 depth + radixstack.size());
164 template <
typename StringPtr>
169 const StringSet& ss = strptr.
active();
177 2 *
sizeof(size_t) +
sizeof(StringSet)
178 + ss.size() *
sizeof(
typename StringSet::String);
179 size_t memory_slack = 3 *
sizeof(RadixStep);
181 if (memory != 0 && memory < memory_use + memory_slack + 1)
184 typename StringSet::Container shadow = ss.allocate(ss.size());
186 strptr.
add_shadow(StringSet(shadow)), depth, memory - memory_use);
187 StringSet::deallocate(shadow);
193 template <
typename StringShadowPtr>
202 uint8_t* charcache) : strptr(in_strptr) {
204 const StringSet& ss = strptr.
active();
205 const size_t n = ss.size();
208 std::fill(bkt_size, bkt_size + 256, 0);
209 uint8_t* cc = charcache;
210 for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
211 *cc = ss.get_uint8(ss[i], depth);
212 for (cc = charcache; cc != charcache + n; ++cc)
213 ++bkt_size[static_cast<uint8_t>(*cc)];
216 Iterator bkt_index[256];
217 bkt_index[0] = strptr.
shadow().begin();
218 for (
size_t i = 1; i < 256; ++i)
219 bkt_index[i] = bkt_index[i - 1] + bkt_size[i - 1];
223 for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
224 *(bkt_index[static_cast<uint8_t>(*cc)]++) = std::move(ss[i]);
234 size_t size = ss.
size();
237 for (
size_t i = 1; i <
pos; ++i)
241 size_t bkt = bkt_size[0], i = 1;
242 if (bkt > 0 && bkt < size)
245 while (i < 256 && bkt_size[i] == 0)
257 template <
typename StringPtr>
260 size_t depth,
size_t memory);
265 template <
typename StringShadowPtr>
268 uint8_t* charcache,
size_t depth,
size_t memory) {
272 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
273 radixstack.emplace(strptr, depth, charcache);
277 while (
TLX_LIKELY(radixstack.top().idx < 255))
279 RadixStep& rs = radixstack.top();
282 size_t bkt_size = rs.bkt_size[++rs.idx];
287 else if (bkt_size < g_inssort_threshold) {
289 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
290 depth + radixstack.size(),
291 memory -
sizeof(RadixStep) * radixstack.size());
297 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
300 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
301 depth + radixstack.size(),
302 memory -
sizeof(RadixStep) * radixstack.size());
309 radixstack.emplace(rs.strptr.flip(rs.pos - bkt_size, bkt_size),
310 depth + radixstack.size(), charcache);
317 template <
typename StringPtr>
322 const StringSet& ss = strptr.
active();
330 2 *
sizeof(size_t) +
sizeof(StringSet)
331 + ss.size() *
sizeof(uint8_t)
332 + ss.size() *
sizeof(
typename StringSet::String);
333 size_t memory_slack = 3 *
sizeof(RadixStep);
335 if (memory != 0 && memory < memory_use + memory_slack + 1)
338 typename StringSet::Container shadow = ss.allocate(ss.size());
339 uint8_t* charcache =
new uint8_t[ss.size()];
342 charcache, depth, memory - memory_use);
345 StringSet::deallocate(shadow);
351 template <
typename StringShadowPtr>
353 enum { RADIX = 0x10000 };
362 uint16_t* charcache) : strptr(in_strptr) {
364 const StringSet& ss = strptr.
active();
365 const size_t n = ss.size();
368 std::fill(bkt_size, bkt_size + RADIX, 0);
369 uint16_t* cc = charcache;
370 for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
371 *cc = ss.get_uint16(ss[i], depth);
372 for (cc = charcache; cc != charcache + n; ++cc)
373 ++bkt_size[static_cast<uint16_t>(*cc)];
377 bkt_index[0] = strptr.
shadow().begin();
378 for (
size_t i = 1; i < RADIX; ++i)
379 bkt_index[i] = bkt_index[i - 1] + bkt_size[i - 1];
384 for (
size_t i = 1; i < bkt_size[0]; ++i)
388 size_t first = get_next_non_empty_bkt_index(0);
389 size_t bkt = bkt_index[first] + bkt_size[first] - bkt_index[0];
391 size_t second = get_next_non_empty_bkt_index(first + 1);
392 while (second < RADIX)
394 size_t partial_equal = (first >> 8) == (second >> 8);
395 strptr.
set_lcp(bkt, depth + partial_equal);
396 bkt += bkt_size[second];
398 second = get_next_non_empty_bkt_index(second + 1);
404 for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
405 *(bkt_index[static_cast<uint16_t>(*cc)]++) = std::move(ss[i]);
418 while (start < RADIX && bkt_size[start] == 0)
428 template <
typename StringShadowPtr>
431 uint16_t* charcache,
size_t depth,
size_t memory) {
433 enum { RADIX = 0x10000 };
437 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
438 radixstack.emplace(strptr, depth, charcache);
442 while (
TLX_LIKELY(radixstack.top().idx < RADIX - 1))
444 RadixStep& rs = radixstack.top();
447 size_t bkt_size = rs.bkt_size[++rs.idx];
454 rs.strptr.flip(rs.pos, bkt_size).copy_back();
455 for (
size_t i = rs.pos + 1; i < rs.pos + bkt_size; ++i)
456 rs.strptr.set_lcp(i, depth + 2 * radixstack.size() - 1);
462 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
463 depth + 2 * radixstack.size(),
464 memory -
sizeof(RadixStep) * radixstack.size());
467 else if (bkt_size < RADIX)
470 rs.strptr.flip(rs.pos, bkt_size),
471 reinterpret_cast<uint8_t*
>(charcache),
472 depth + 2 * radixstack.size(),
473 memory -
sizeof(RadixStep) * radixstack.size());
479 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
482 rs.strptr.flip(rs.pos, bkt_size).copy_back(),
483 depth + 2 * radixstack.size(),
484 memory -
sizeof(RadixStep) * radixstack.size());
492 rs.strptr.flip(rs.pos - bkt_size, bkt_size),
493 depth + 2 * radixstack.size(), charcache);
500 template <
typename StringPtr>
503 enum { RADIX = 0x10000 };
506 const StringSet& ss = strptr.
active();
510 if (ss.size() < RADIX)
517 2 *
sizeof(size_t) +
sizeof(StringSet)
518 + ss.size() *
sizeof(uint16_t)
519 + ss.size() *
sizeof(
typename StringSet::String);
520 size_t memory_slack = 3 *
sizeof(RadixStep);
522 if (memory != 0 && memory < memory_use + memory_slack + 1)
525 typename StringSet::Container shadow = ss.allocate(ss.size());
526 uint16_t* charcache =
new uint16_t[ss.size()];
529 charcache, depth, memory - memory_use);
532 StringSet::deallocate(shadow);
538 template <
typename StringPtr>
542 typedef typename StringSet::String
String;
548 size_t base,
size_t depth, uint8_t* charcache) {
550 const StringSet& ss = strptr.
active();
551 size_t size = ss.size();
554 std::fill(bkt_size, bkt_size + 256, 0);
555 uint8_t* cc = charcache;
556 for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
557 *cc = ss.get_uint8(ss[i], depth);
558 for (cc = charcache; cc != charcache + size; ++cc)
559 ++bkt_size[static_cast<uint8_t>(*cc)];
563 bkt[0] = bkt_size[0];
564 size_t last_bkt_size = bkt_size[0];
565 for (
size_t i = 1; i < 256; ++i) {
566 bkt[i] = bkt[i - 1] + bkt_size[i];
567 if (bkt_size[i] != 0) last_bkt_size = bkt_size[i];
571 for (
size_t i = 0, j; i < size - last_bkt_size; )
573 String perm = std::move(ss[ss.begin() + i]);
574 uint8_t permch = charcache[i];
575 while ((j = --bkt[permch]) > i)
580 ss[ss.begin() + i] = std::move(perm);
581 i += bkt_size[permch];
586 pos = base + bkt_size[0];
591 for (
size_t i = 1; i < bkt_size[0]; ++i)
595 size_t lbkt = bkt_size[0], i = 1;
596 if (lbkt > 0 && lbkt < size)
599 while (i < 256 && bkt_size[i] == 0)
614 template <
typename StringPtr>
617 size_t depth,
size_t memory) {
621 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
622 radixstack.emplace(strptr, 0, depth, charcache);
626 while (
TLX_LIKELY(radixstack.top().idx < 255))
628 RadixStep& rs = radixstack.top();
631 size_t bkt_size = rs.bkt_size[++rs.idx];
637 else if (bkt_size < g_inssort_threshold) {
639 strptr.
sub(rs.pos, bkt_size),
640 depth + radixstack.size(),
641 memory -
sizeof(RadixStep) * radixstack.size());
647 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
650 strptr.
sub(rs.pos, bkt_size),
651 depth + radixstack.size(),
652 memory -
sizeof(RadixStep) * radixstack.size());
660 strptr.
sub(rs.pos - bkt_size, bkt_size),
661 rs.pos -
bkt_size, depth + radixstack.size(), charcache);
671 template <
typename StringPtr>
683 2 *
sizeof(size_t) +
sizeof(StringSet)
684 + strptr.
size() *
sizeof(uint8_t);
685 size_t memory_slack = 3 *
sizeof(RadixStep);
687 if (memory != 0 && memory < memory_use + memory_slack + 1)
690 uint8_t* charcache =
new uint8_t[strptr.
size()];
692 radixsort_CI2(strptr, charcache, depth, memory - memory_use);
700 template <
typename StringPtr>
702 enum { RADIX = 0x10000 };
706 typedef typename StringSet::String
String;
712 uint16_t* charcache) {
714 const StringSet& ss = strptr.
active();
715 const size_t n = ss.size();
717 std::fill(bkt_size, bkt_size + RADIX, 0);
718 uint16_t* cc = charcache;
719 for (Iterator i = ss.begin(); i != ss.end(); ++i, ++cc)
720 *cc = ss.get_uint16(ss[i], depth);
721 for (cc = charcache; cc != charcache + n; ++cc)
722 ++bkt_size[static_cast<uint16_t>(*cc)];
726 bkt_index[0] = bkt_size[0];
727 size_t last_bkt_size = bkt_size[0];
728 for (
size_t i = 1; i < RADIX; ++i) {
729 bkt_index[i] = bkt_index[i - 1] + bkt_size[i];
730 if (bkt_size[i]) last_bkt_size = bkt_size[i];
736 for (
size_t i = 1; i < bkt_size[0]; ++i)
740 size_t first = get_next_non_empty_bkt_index(0);
741 size_t bkt = bkt_index[first];
743 size_t second = get_next_non_empty_bkt_index(first + 1);
744 while (second < RADIX)
746 size_t partial_equal = (first >> 8) == (second >> 8);
747 strptr.
set_lcp(bkt, depth + partial_equal);
748 bkt += bkt_size[second];
750 second = get_next_non_empty_bkt_index(second + 1);
755 for (
size_t i = 0, j; i < n - last_bkt_size; )
757 String perm = std::move(ss[ss.begin() + i]);
758 uint16_t permch = charcache[i];
759 while ((j = --bkt_index[permch]) > i)
764 ss[ss.begin() + i] = std::move(perm);
765 i += bkt_size[permch];
770 pos = base + bkt_size[0];
776 while (start < RADIX && bkt_size[start] == 0)
786 template <
typename StringPtr>
789 size_t depth,
size_t memory) {
790 enum { RADIX = 0x10000 };
794 std::stack<RadixStep, std::vector<RadixStep> > radixstack;
795 radixstack.emplace(strptr, 0, depth, charcache);
799 while (
TLX_LIKELY(radixstack.top().idx < RADIX - 1))
801 RadixStep& rs = radixstack.top();
804 size_t bkt_size = rs.bkt_size[++rs.idx];
812 for (
size_t i = rs.pos + 1; i < rs.pos + bkt_size; ++i)
813 strptr.
set_lcp(i, depth + 2 * radixstack.size() - 1);
820 strptr.
sub(rs.pos, bkt_size),
821 depth + 2 * radixstack.size(),
822 memory -
sizeof(RadixStep) * radixstack.size());
825 else if (bkt_size < RADIX)
828 strptr.
sub(rs.pos, bkt_size),
829 reinterpret_cast<uint8_t*
>(charcache),
830 depth + 2 * radixstack.size(),
831 memory -
sizeof(RadixStep) * radixstack.size());
837 memory <
sizeof(RadixStep) * (radixstack.size() + 1)))
840 strptr.
sub(rs.pos, bkt_size),
841 depth + 2 * radixstack.size(),
842 memory -
sizeof(RadixStep) * radixstack.size());
850 strptr.
sub(rs.pos - bkt_size, bkt_size),
852 depth + 2 * radixstack.size(), charcache);
859 template <
typename StringPtr>
862 enum { RADIX = 0x10000 };
868 if (strptr.
size() < RADIX)
875 2 *
sizeof(size_t) +
sizeof(StringSet)
876 + strptr.
size() *
sizeof(uint16_t);
877 size_t memory_slack = 3 *
sizeof(RadixStep);
879 if (memory != 0 && memory < memory_use + memory_slack + 1)
882 uint16_t* charcache =
new uint16_t[strptr.
size()];
883 radixsort_CI3(strptr, charcache, depth, memory - memory_use);
895 #endif // !TLX_SORT_STRINGS_RADIX_SORT_HEADER StringShadowPtr::StringSet StringSet
StringPtr sub(size_t offset, size_t sub_size) const
Advance (both) pointers by given offset, return sub-array.
RadixStep_CI3(const StringPtr &strptr, size_t base, size_t depth, uint16_t *charcache)
RadixStep_CE0(const StringShadowPtr &in_strptr, size_t depth)
size_t size() const
return valid length
static void multikey_quicksort(const StringPtr &strptr, size_t depth, size_t memory)
Generic multikey quicksort for strings.
const StringSet & active() const
return currently active array
void set_lcp(size_t, const LcpType &) const
set the i-th lcp to v and check its value
StringPtr::StringSet StringSet
size_t get_next_non_empty_bkt_index(size_t start)
size_t get_next_non_empty_bkt_index(size_t start)
StringShadowPtr::StringSet StringSet
StringShadowPtr copy_back() const
return subarray pointer to n strings in original array, might copy from shadow before returning...
Simpler non-growing vector without initialization.
const StringSet & active() const
return currently active array
RadixStep_CI2(const StringPtr &strptr, size_t base, size_t depth, uint8_t *charcache)
static void radixsort_CE2(const StringPtr &strptr, size_t depth, size_t memory)
StringPtr::StringSet StringSet
static void radixsort_CE0(const StringPtr &strptr, size_t depth, size_t memory)
Adapter method to allocate shadow array if needed.
void set_lcp(size_t, const LcpType &) const
set the i-th lcp to v and check its value
static const size_t g_inssort_threshold
size_t size() const
return valid length
static void radixsort_CE0_loop(const StringShadowPtr &strptr, size_t depth, size_t memory)
static enable_if<!StringPtr::with_lcp, void >::type insertion_sort(const StringPtr &strptr, size_t depth, size_t)
Generic insertion sort for abstract string sets.
static const bool with_lcp
if we want to save the LCPs
void swap(CountingPtr< A, D > &a1, CountingPtr< A, D > &a2) noexcept
swap enclosed object with another counting pointer (no reference counts need change) ...
RadixStep_CE2(const StringShadowPtr &in_strptr, size_t depth, uint8_t *charcache)
static void radixsort_CE3(const StringPtr &strptr, size_t depth, size_t memory)
StringSet::Iterator Iterator
WithShadow add_shadow(const StringSet &shadow) const
construct objectified string and shadow pointer class
static const bool with_lcp
if we want to save the LCPs
StringSet::Iterator Iterator
StringSet::Iterator Iterator
StringSet::Iterator Iterator
static void radixsort_CE3_loop(const StringShadowPtr &strptr, uint16_t *charcache, size_t depth, size_t memory)
static void radixsort_CE2_loop(const StringShadowPtr &strptr, uint8_t *charcache, size_t depth, size_t memory)
RadixStep_CE3(const StringShadowPtr &in_strptr, size_t depth, uint16_t *charcache)
Objectified string array pointer and shadow pointer array for out-of-place swapping of pointers...
StringShadowPtr flip(size_t offset, size_t sub_size) const
construct a StringShadowPtr object specifying a sub-array with flipping to other array.
StringSet::Iterator Iterator
StringShadowPtr::StringSet StringSet
static void radixsort_CI2(const StringPtr &strptr, uint8_t *charcache, size_t depth, size_t memory)
const StringSet & shadow() const
return current shadow array
static void radixsort_CI3(const StringPtr &strptr, uint16_t *charcache, size_t depth, size_t memory)
Objectified string array pointer array.