21 #ifndef TLX_SORT_STRINGS_STRING_SET_HEADER 22 #define TLX_SORT_STRINGS_STRING_SET_HEADER 40 namespace sort_strings_detail {
47 template <
typename StringSet,
typename Traits>
52 typename Traits::String&
at(
size_t i)
const {
53 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
54 return *(ss.begin() + i);
61 bool is_equal(
const typename Traits::String& a,
62 const typename Traits::CharIterator& ai,
63 const typename Traits::String& b,
64 const typename Traits::CharIterator& bi)
const {
65 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
66 return !ss.is_end(a, ai) && !ss.is_end(b, bi) && (*ai == *bi);
70 bool is_less(
const typename Traits::String& a,
71 const typename Traits::CharIterator& ai,
72 const typename Traits::String& b,
73 const typename Traits::CharIterator& bi)
const {
74 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
75 return ss.is_end(a, ai) ||
76 (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai < *bi);
80 bool is_leq(
const typename Traits::String& a,
81 const typename Traits::CharIterator& ai,
82 const typename Traits::String& b,
83 const typename Traits::CharIterator& bi)
const {
84 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
85 return ss.is_end(a, ai) ||
86 (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai <= *bi);
95 get_char(
const typename Traits::String& s,
size_t depth)
const {
96 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
97 return *ss.get_chars(s, depth);
103 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
104 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
106 if (ss.is_end(s, i))
return 0;
113 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
114 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
117 if (ss.is_end(s, i))
return v;
118 v = (uint16_t(*i) << 8);
120 if (ss.is_end(s, i))
return v;
121 v |= (uint16_t(*i) << 0);
128 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
129 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
132 if (ss.is_end(s, i))
return v;
133 v = (uint32_t(*i) << 24);
135 if (ss.is_end(s, i))
return v;
136 v |= (uint32_t(*i) << 16);
138 if (ss.is_end(s, i))
return v;
139 v |= (uint32_t(*i) << 8);
141 if (ss.is_end(s, i))
return v;
142 v |= (uint32_t(*i) << 0);
149 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
150 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
153 if (ss.is_end(s, i))
return v;
154 v = (uint64_t(*i) << 56);
156 if (ss.is_end(s, i))
return v;
157 v |= (uint64_t(*i) << 48);
159 if (ss.is_end(s, i))
return v;
160 v |= (uint64_t(*i) << 40);
162 if (ss.is_end(s, i))
return v;
163 v |= (uint64_t(*i) << 32);
165 if (ss.is_end(s, i))
return v;
166 v |= (uint64_t(*i) << 24);
168 if (ss.is_end(s, i))
return v;
169 v |= (uint64_t(*i) << 16);
171 if (ss.is_end(s, i))
return v;
172 v |= (uint64_t(*i) << 8);
174 if (ss.is_end(s, i))
return v;
175 v |= (uint64_t(*i) << 0);
179 uint8_t
get_uint8(
const typename Traits::String& s,
size_t depth)
const {
180 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
181 return get_uint8(s, ss.get_chars(s, depth));
184 uint16_t
get_uint16(
const typename Traits::String& s,
size_t depth)
const {
185 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
189 uint32_t
get_uint32(
const typename Traits::String& s,
size_t depth)
const {
190 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
194 uint64_t
get_uint64(
const typename Traits::String& s,
size_t depth)
const {
195 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
202 StringSet
subi(
size_t begin,
size_t end)
const {
203 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
204 return ss.
sub(ss.begin() + begin, ss.begin() + end);
208 const typename Traits::String& s2)
const {
209 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
211 typename StringSet::CharIterator c1 = ss.get_chars(s1, 0);
212 typename StringSet::CharIterator c2 = ss.get_chars(s2, 0);
214 while (ss.is_equal(s1, c1, s2, c2))
217 return ss.is_leq(s1, c1, s2, c2);
221 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
223 for (
typename Traits::Iterator pi = ss.begin();
224 pi + 1 != ss.end(); ++pi)
227 TLX_LOG1 <<
"check_order() failed at position " << pi - ss.begin();
235 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
237 for (
typename Traits::Iterator pi = ss.begin(); pi != ss.end(); ++pi)
239 TLX_LOG1 <<
"[" << i++ <<
"] = " << *pi
240 <<
" = " << ss.get_string(*pi, 0);
245 template <
typename Type,
typename StringSet>
249 const typename StringSet::String& s,
size_t depth) {
250 return strset.get_uint32(s, depth);
253 template <
typename Type,
typename StringSet>
257 const typename StringSet::String& s,
size_t depth) {
258 return strset.get_uint64(s, depth);
261 template <
typename Type,
typename StringSet>
263 Type
get_key_at(
const StringSet& strset,
size_t idx,
size_t depth) {
264 return get_key<Type>(strset, strset.at(idx), depth);
273 template <
typename CharType>
296 template <
typename CharType>
300 GenericCharStringSetTraits<CharType> >
313 : begin_(begin), end_(end)
318 : begin_(c.first), end_(c.first + c.second)
322 size_t size()
const {
return end_ - begin_; }
324 Iterator
begin()
const {
return begin_; }
326 Iterator
end()
const {
return end_; }
329 String& operator [] (Iterator i)
const 333 CharIterator
get_chars(
const String& s,
size_t depth)
const 334 {
return s + depth; }
337 bool is_end(
const String&,
const CharIterator& i)
const 338 {
return (*i == 0); }
341 std::string
get_string(
const String& s,
size_t depth = 0)
const 342 {
return std::string(reinterpret_cast<const char*>(s) + depth); }
350 {
return std::make_pair(
new String[n], n); }
354 {
delete[] c.first; c.first =
nullptr; }
401 : begin_(begin), end_(end)
406 : begin_(c.first), end_(c.first + c.second)
410 size_t size()
const {
return end_ - begin_; }
422 {
return reinterpret_cast<CharIterator>(s.data()) + depth; }
426 {
return (i >= reinterpret_cast<CharIterator>(s.data()) + s.size()); }
430 {
return s.substr(depth); }
438 {
return std::make_pair(
new String[n], n); }
442 {
delete[] c.first; c.first =
nullptr; }
462 typedef std::unique_ptr<std::string>
String;
480 public StringSetBase<UPtrStdStringSet, UPtrStdStringSetTraits>
485 : begin_(begin), end_(end)
490 : begin_(c.first), end_(c.first + c.second)
494 size_t size()
const {
return end_ - begin_; }
496 Iterator
begin()
const {
return begin_; }
498 Iterator
end()
const {
return end_; }
501 String& operator [] (
const Iterator& i)
const 506 {
return reinterpret_cast<CharIterator>(s->data()) + depth; }
510 {
return (i >= reinterpret_cast<CharIterator>(s->data()) + s->size()); }
514 {
return s->substr(depth); }
522 {
return std::make_pair(
new String[n], n); }
526 {
delete[] c.first; c.first =
nullptr; }
530 for (Iterator pi = begin(); pi != end(); ++pi)
532 TLX_LOG1 <<
"[" << i++ <<
"] = " << pi->get()
533 <<
" = " << get_string(*pi, 0);
562 typedef typename std::vector<String>::iterator
Iterator;
568 typedef std::pair<Text, std::vector<String> >
Container;
584 begin_(begin), end_(end)
590 sa.resize(text.size());
591 for (
size_t i = 0; i < text.size(); ++i)
597 size_t size()
const {
return end_ - begin_; }
609 {
return reinterpret_cast<CharIterator>(text_->data()) + s + depth; }
613 {
return (i >= reinterpret_cast<CharIterator>(text_->data()) + text_->size()); }
617 {
return text_->substr(s + depth); }
625 {
return std::make_pair(*text_, std::vector<String>(n)); }
629 { std::vector<String> v; v.swap(c.second); }
634 begin_(c.second.begin()), end_(c.second.end())
653 #endif // !TLX_SORT_STRINGS_STRING_SET_HEADER std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
Char * String
String reference: pointer to first character.
GenericCharStringSet(const Container &c)
Construct from a string container.
Iterator begin() const
Iterator representing first String position.
StdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
StringSuffixSet(Container &c)
Construct from a string container.
GenericCharStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
uint16_t get_uint16(const typename Traits::String &s, size_t depth) const
static void deallocate(Container &c)
Deallocate a temporary string container.
Class implementing StringSet concept for a std::unique_ptr<std::string objects, which are non-copyabl...
uint8_t get_uint8(const typename Traits::String &s, size_t depth) const
std::pair< Text, std::vector< String > > Container
exported alias for assumed string container
CharType Char
exported alias for character type
Traits::String & at(size_t i) const
index-based array access (readable and writable) to String objects.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
static void deallocate(Container &c)
Deallocate a temporary string container.
Traits class implementing StringSet concept for char* and unsigned char* strings. ...
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
Iterator end() const
Iterator representing beyond last String position.
static void deallocate(Container &c)
Deallocate a temporary string container.
uint8_t Char
exported alias for character type
std::pair< Iterator, size_t > Container
exported alias for assumed string container
Traits::CharIterator CharIterator
uint32_t get_uint32(const typename Traits::String &s, size_t depth) const
const Char * CharIterator
iterator of characters in a string
uint64_t get_uint64(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 8 characters of string s at iterator i packed into a uint64_t (only works correctly for ...
std::pair< Iterator, size_t > Container
exported alias for assumed string container
StringSuffixSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
UPtrStdStringSet(Container &c)
Construct from a string container.
uint64_t get_uint64(const typename Traits::String &s, size_t depth) const
Class implementing StringSet concept for suffix sorting indexes of a std::string text object...
Class implementing StringSet concept for char* and unsigned char* strings.
GenericCharStringSet< const char > CCharStringSet
GenericCharStringSet< char > CharStringSet
UPtrStdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
Class implementing StringSet concept for arrays of std::string objects.
StringSet subi(size_t begin, size_t end) const
Subset this string set using index range.
enable_if< sizeof(Type)==4, uint32_t >::type get_key(const StringSet &strset, const typename StringSet::String &s, size_t depth)
std::pair< Iterator, size_t > Container
exported alias for assumed string container
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
GenericCharStringSet< const unsigned char > CUCharStringSet
String * Iterator
Iterator over string references: using std::vector's iterator.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
bool is_less(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
uint8_t Char
exported alias for character type
Class implementing StringSet concept for a std::string objects.
String * Iterator
Iterator over string references: pointer to std::string.
size_t size() const
Return size of string array.
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
uint16_t get_uint16(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 2 characters of string s at iterator i packed into a uint16_t (only works correctly for ...
bool check_order(const typename Traits::String &s1, const typename Traits::String &s2) const
const Char * CharIterator
iterator of characters in a string
SFINAE enable_if – copy of std::enable_if<> with less extra cruft.
GenericCharStringSetTraits< CharType > Traits
uint8_t Char
exported alias for character type
Text::size_type String
String reference: suffix index of the text.
Traits::Char get_char(const typename Traits::String &s, size_t depth) const
bool is_leq(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
StringSuffixSet(const Text &text, const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
bool is_equal(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check equality of two strings a and b at char iterators ai and bi.
StdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Class implementing StringSet concept for suffix sorting indexes of a std::string text object...
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
GenericCharStringSet< unsigned char > UCharStringSet
Iterator end() const
Iterator representing beyond last String position.
const Char * CharIterator
iterator of characters in a string
Iterator end() const
Iterator representing beyond last String position.
GenericCharStringSet(Iterator begin, Iterator end)
Construct from begin and end string pointers.
StdStringSet(Container &c)
Construct from a string container.
std::unique_ptr< std::string > String
String reference: std::string, which should be reference counted.
uint32_t get_uint32(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 4 characters of string s at iterator i packed into a uint32_t (only works correctly for ...
static void deallocate(Container &c)
Deallocate a temporary string container.
std::string String
String reference: std::string, which should be reference counted.
static StringSuffixSet Initialize(const Text &text, std::vector< String > &sa)
Initializing constructor which fills output vector sa with indices.
Class implementing StringSet concept for a std::vector containing std::string objects.
Traits::Iterator Iterator
Container allocate(size_t n) const
Allocate a new temporary string container with n empty Strings.
Iterator end() const
Iterator representing beyond last String position.
Iterator begin() const
Iterator representing first String position.
const Char * CharIterator
iterator of characters in a string
size_t size() const
Return size of string array.
size_t size() const
Return size of string array.
UPtrStdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
Base class for common string set functions, included via CRTP.
uint8_t get_uint8(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 1 characters of string s at iterator i packed into a uint8_t (only works correctly for 8...
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belong to this set.
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
std::string Text
exported alias for assumed string container
Iterator begin() const
Iterator representing first String position.
std::vector< String >::iterator Iterator
Iterator over string references: using std::vector's iterator over suffix array vector.
Type get_key_at(const StringSet &strset, size_t idx, size_t depth)
Iterator begin() const
Iterator representing first String position.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
String * Iterator
Iterator over string references: pointer over pointers.
size_t size() const
Return size of string array.
Traits::Container Container
const Text * text_
reference to base text