tlx
string_set.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/sort/strings/string_set.hpp
3  *
4  * Implementations of a StringSet concept. This is an internal implementation
5  * header, see tlx/sort/strings.hpp for public front-end functions.
6  *
7  * A StringSet abstracts from arrays of strings, we provide four abstractions:
8  *
9  * - UCharStringSet: (const) unsigned char**
10  * - StdStringSet: std::string*
11  * - UPtrStdStringSet: std::unique_ptr<std::string>*
12  * - StringSuffixSet: suffix sorting indexes of a std::string text
13  *
14  * Part of tlx - http://panthema.net/tlx
15  *
16  * Copyright (C) 2015-2019 Timo Bingmann <tb@panthema.net>
17  *
18  * All rights reserved. Published under the Boost Software License, Version 1.0
19  ******************************************************************************/
20 
21 #ifndef TLX_SORT_STRINGS_STRING_SET_HEADER
22 #define TLX_SORT_STRINGS_STRING_SET_HEADER
23 
24 #include <cassert>
25 #include <cstdint>
26 #include <memory>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 #include <tlx/logger/core.hpp>
32 #include <tlx/math/bswap.hpp>
33 #include <tlx/meta/enable_if.hpp>
34 
35 namespace tlx {
36 
37 //! \addtogroup tlx_sort
38 //! \{
39 
40 namespace sort_strings_detail {
41 
42 /******************************************************************************/
43 
44 /*!
45  * Base class for common string set functions, included via CRTP.
46  */
47 template <typename StringSet, typename Traits>
49 {
50 public:
51  //! index-based array access (readable and writable) to String objects.
52  typename Traits::String& at(size_t i) const {
53  const StringSet& ss = *static_cast<const StringSet*>(this);
54  return *(ss.begin() + i);
55  }
56 
57  //! \name CharIterator Comparisons
58  //! \{
59 
60  //! check equality of two strings a and b at char iterators ai and bi.
61  bool is_equal(const typename Traits::String& a,
62  const typename Traits::CharIterator& ai,
63  const typename Traits::String& b,
64  const typename Traits::CharIterator& bi) const {
65  const StringSet& ss = *static_cast<const StringSet*>(this);
66  return !ss.is_end(a, ai) && !ss.is_end(b, bi) && (*ai == *bi);
67  }
68 
69  //! check if string a is less or equal to string b at iterators ai and bi.
70  bool is_less(const typename Traits::String& a,
71  const typename Traits::CharIterator& ai,
72  const typename Traits::String& b,
73  const typename Traits::CharIterator& bi) const {
74  const StringSet& ss = *static_cast<const StringSet*>(this);
75  return ss.is_end(a, ai) ||
76  (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai < *bi);
77  }
78 
79  //! check if string a is less or equal to string b at iterators ai and bi.
80  bool is_leq(const typename Traits::String& a,
81  const typename Traits::CharIterator& ai,
82  const typename Traits::String& b,
83  const typename Traits::CharIterator& bi) const {
84  const StringSet& ss = *static_cast<const StringSet*>(this);
85  return ss.is_end(a, ai) ||
86  (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai <= *bi);
87  }
88 
89  //! \}
90 
91  //! \name Character Extractors
92  //! \{
93 
94  typename Traits::Char
95  get_char(const typename Traits::String& s, size_t depth) const {
96  const StringSet& ss = *static_cast<const StringSet*>(this);
97  return *ss.get_chars(s, depth);
98  }
99 
100  //! Return up to 1 characters of string s at iterator i packed into a
101  //! uint8_t (only works correctly for 8-bit characters)
102  uint8_t get_uint8(
103  const typename Traits::String& s, typename Traits::CharIterator i) const {
104  const StringSet& ss = *static_cast<const StringSet*>(this);
105 
106  if (ss.is_end(s, i)) return 0;
107  return uint8_t(*i);
108  }
109 
110  //! Return up to 2 characters of string s at iterator i packed into a
111  //! uint16_t (only works correctly for 8-bit characters)
112  uint16_t get_uint16(
113  const typename Traits::String& s, typename Traits::CharIterator i) const {
114  const StringSet& ss = *static_cast<const StringSet*>(this);
115 
116  uint16_t v = 0;
117  if (ss.is_end(s, i)) return v;
118  v = (uint16_t(*i) << 8);
119  ++i;
120  if (ss.is_end(s, i)) return v;
121  v |= (uint16_t(*i) << 0);
122  return v;
123  }
124 
125  //! Return up to 4 characters of string s at iterator i packed into a
126  //! uint32_t (only works correctly for 8-bit characters)
127  uint32_t get_uint32(
128  const typename Traits::String& s, typename Traits::CharIterator i) const {
129  const StringSet& ss = *static_cast<const StringSet*>(this);
130 
131  uint32_t v = 0;
132  if (ss.is_end(s, i)) return v;
133  v = (uint32_t(*i) << 24);
134  ++i;
135  if (ss.is_end(s, i)) return v;
136  v |= (uint32_t(*i) << 16);
137  ++i;
138  if (ss.is_end(s, i)) return v;
139  v |= (uint32_t(*i) << 8);
140  ++i;
141  if (ss.is_end(s, i)) return v;
142  v |= (uint32_t(*i) << 0);
143  return v;
144  }
145 
146  //! Return up to 8 characters of string s at iterator i packed into a
147  //! uint64_t (only works correctly for 8-bit characters)
148  uint64_t get_uint64(
149  const typename Traits::String& s, typename Traits::CharIterator i) const {
150  const StringSet& ss = *static_cast<const StringSet*>(this);
151 
152  uint64_t v = 0;
153  if (ss.is_end(s, i)) return v;
154  v = (uint64_t(*i) << 56);
155  ++i;
156  if (ss.is_end(s, i)) return v;
157  v |= (uint64_t(*i) << 48);
158  ++i;
159  if (ss.is_end(s, i)) return v;
160  v |= (uint64_t(*i) << 40);
161  ++i;
162  if (ss.is_end(s, i)) return v;
163  v |= (uint64_t(*i) << 32);
164  ++i;
165  if (ss.is_end(s, i)) return v;
166  v |= (uint64_t(*i) << 24);
167  ++i;
168  if (ss.is_end(s, i)) return v;
169  v |= (uint64_t(*i) << 16);
170  ++i;
171  if (ss.is_end(s, i)) return v;
172  v |= (uint64_t(*i) << 8);
173  ++i;
174  if (ss.is_end(s, i)) return v;
175  v |= (uint64_t(*i) << 0);
176  return v;
177  }
178 
179  uint8_t get_uint8(const typename Traits::String& s, size_t depth) const {
180  const StringSet& ss = *static_cast<const StringSet*>(this);
181  return get_uint8(s, ss.get_chars(s, depth));
182  }
183 
184  uint16_t get_uint16(const typename Traits::String& s, size_t depth) const {
185  const StringSet& ss = *static_cast<const StringSet*>(this);
186  return get_uint16(s, ss.get_chars(s, depth));
187  }
188 
189  uint32_t get_uint32(const typename Traits::String& s, size_t depth) const {
190  const StringSet& ss = *static_cast<const StringSet*>(this);
191  return get_uint32(s, ss.get_chars(s, depth));
192  }
193 
194  uint64_t get_uint64(const typename Traits::String& s, size_t depth) const {
195  const StringSet& ss = *static_cast<const StringSet*>(this);
196  return get_uint64(s, ss.get_chars(s, depth));
197  }
198 
199  //! \}
200 
201  //! Subset this string set using index range.
202  StringSet subi(size_t begin, size_t end) const {
203  const StringSet& ss = *static_cast<const StringSet*>(this);
204  return ss.sub(ss.begin() + begin, ss.begin() + end);
205  }
206 
207  bool check_order(const typename Traits::String& s1,
208  const typename Traits::String& s2) const {
209  const StringSet& ss = *static_cast<const StringSet*>(this);
210 
211  typename StringSet::CharIterator c1 = ss.get_chars(s1, 0);
212  typename StringSet::CharIterator c2 = ss.get_chars(s2, 0);
213 
214  while (ss.is_equal(s1, c1, s2, c2))
215  ++c1, ++c2;
216 
217  return ss.is_leq(s1, c1, s2, c2);
218  }
219 
220  bool check_order() const {
221  const StringSet& ss = *static_cast<const StringSet*>(this);
222 
223  for (typename Traits::Iterator pi = ss.begin();
224  pi + 1 != ss.end(); ++pi)
225  {
226  if (!check_order(*pi, *(pi + 1))) {
227  TLX_LOG1 << "check_order() failed at position " << pi - ss.begin();
228  return false;
229  }
230  }
231  return true;
232  }
233 
234  void print() const {
235  const StringSet& ss = *static_cast<const StringSet*>(this);
236  size_t i = 0;
237  for (typename Traits::Iterator pi = ss.begin(); pi != ss.end(); ++pi)
238  {
239  TLX_LOG1 << "[" << i++ << "] = " << *pi
240  << " = " << ss.get_string(*pi, 0);
241  }
242  }
243 };
244 
245 template <typename Type, typename StringSet>
246 inline
248 get_key(const StringSet& strset,
249  const typename StringSet::String& s, size_t depth) {
250  return strset.get_uint32(s, depth);
251 }
252 
253 template <typename Type, typename StringSet>
254 inline
256 get_key(const StringSet& strset,
257  const typename StringSet::String& s, size_t depth) {
258  return strset.get_uint64(s, depth);
259 }
260 
261 template <typename Type, typename StringSet>
262 inline
263 Type get_key_at(const StringSet& strset, size_t idx, size_t depth) {
264  return get_key<Type>(strset, strset.at(idx), depth);
265 }
266 
267 /******************************************************************************/
268 
269 /*!
270  * Traits class implementing StringSet concept for char* and unsigned char*
271  * strings.
272  */
273 template <typename CharType>
275 {
276 public:
277  //! exported alias for character type
278  typedef CharType Char;
279 
280  //! String reference: pointer to first character
281  typedef Char* String;
282 
283  //! Iterator over string references: pointer over pointers
284  typedef String* Iterator;
285 
286  //! iterator of characters in a string
287  typedef const Char* CharIterator;
288 
289  //! exported alias for assumed string container
290  typedef std::pair<Iterator, size_t> Container;
291 };
292 
293 /*!
294  * Class implementing StringSet concept for char* and unsigned char* strings.
295  */
296 template <typename CharType>
298  : public GenericCharStringSetTraits<CharType>,
299  public StringSetBase<GenericCharStringSet<CharType>,
300  GenericCharStringSetTraits<CharType> >
301 {
302 public:
304 
305  typedef typename Traits::Char Char;
306  typedef typename Traits::String String;
307  typedef typename Traits::Iterator Iterator;
309  typedef typename Traits::Container Container;
310 
311  //! Construct from begin and end string pointers
312  GenericCharStringSet(Iterator begin, Iterator end)
313  : begin_(begin), end_(end)
314  { }
315 
316  //! Construct from a string container
317  explicit GenericCharStringSet(const Container& c)
318  : begin_(c.first), end_(c.first + c.second)
319  { }
320 
321  //! Return size of string array
322  size_t size() const { return end_ - begin_; }
323  //! Iterator representing first String position
324  Iterator begin() const { return begin_; }
325  //! Iterator representing beyond last String position
326  Iterator end() const { return end_; }
327 
328  //! Iterator-based array access (readable and writable) to String objects.
329  String& operator [] (Iterator i) const
330  { return *i; }
331 
332  //! Return CharIterator for referenced string, which belong to this set.
333  CharIterator get_chars(const String& s, size_t depth) const
334  { return s + depth; }
335 
336  //! Returns true if CharIterator is at end of the given String
337  bool is_end(const String&, const CharIterator& i) const
338  { return (*i == 0); }
339 
340  //! Return complete string (for debugging purposes)
341  std::string get_string(const String& s, size_t depth = 0) const
342  { return std::string(reinterpret_cast<const char*>(s) + depth); }
343 
344  //! Subset this string set using iterator range.
345  GenericCharStringSet sub(Iterator begin, Iterator end) const
346  { return GenericCharStringSet(begin, end); }
347 
348  //! Allocate a new temporary string container with n empty Strings
349  static Container allocate(size_t n)
350  { return std::make_pair(new String[n], n); }
351 
352  //! Deallocate a temporary string container
353  static void deallocate(Container& c)
354  { delete[] c.first; c.first = nullptr; }
355 
356 protected:
357  //! array of string pointers
358  Iterator begin_, end_;
359 };
360 
363 
366 
367 /******************************************************************************/
368 
369 /*!
370  * Class implementing StringSet concept for a std::string objects.
371  */
373 {
374 public:
375  //! exported alias for character type
376  typedef uint8_t Char;
377 
378  //! String reference: std::string, which should be reference counted.
379  typedef std::string String;
380 
381  //! Iterator over string references: pointer to std::string.
382  typedef String* Iterator;
383 
384  //! iterator of characters in a string
385  typedef const Char* CharIterator;
386 
387  //! exported alias for assumed string container
388  typedef std::pair<Iterator, size_t> Container;
389 };
390 
391 /*!
392  * Class implementing StringSet concept for arrays of std::string objects.
393  */
395  : public StdStringSetTraits,
396  public StringSetBase<StdStringSet, StdStringSetTraits>
397 {
398 public:
399  //! Construct from begin and end string pointers
400  StdStringSet(const Iterator& begin, const Iterator& end)
401  : begin_(begin), end_(end)
402  { }
403 
404  //! Construct from a string container
405  explicit StdStringSet(Container& c)
406  : begin_(c.first), end_(c.first + c.second)
407  { }
408 
409  //! Return size of string array
410  size_t size() const { return end_ - begin_; }
411  //! Iterator representing first String position
412  Iterator begin() const { return begin_; }
413  //! Iterator representing beyond last String position
414  Iterator end() const { return end_; }
415 
416  //! Array access (readable and writable) to String objects.
417  String& operator [] (const Iterator& i) const
418  { return *i; }
419 
420  //! Return CharIterator for referenced string, which belongs to this set.
421  CharIterator get_chars(const String& s, size_t depth) const
422  { return reinterpret_cast<CharIterator>(s.data()) + depth; }
423 
424  //! Returns true if CharIterator is at end of the given String
425  bool is_end(const String& s, const CharIterator& i) const
426  { return (i >= reinterpret_cast<CharIterator>(s.data()) + s.size()); }
427 
428  //! Return complete string (for debugging purposes)
429  std::string get_string(const String& s, size_t depth = 0) const
430  { return s.substr(depth); }
431 
432  //! Subset this string set using iterator range.
433  StdStringSet sub(Iterator begin, Iterator end) const
434  { return StdStringSet(begin, end); }
435 
436  //! Allocate a new temporary string container with n empty Strings
437  static Container allocate(size_t n)
438  { return std::make_pair(new String[n], n); }
439 
440  //! Deallocate a temporary string container
441  static void deallocate(Container& c)
442  { delete[] c.first; c.first = nullptr; }
443 
444 protected:
445  //! pointers to std::string objects
446  Iterator begin_, end_;
447 };
448 
449 /******************************************************************************/
450 
451 /*!
452  * Class implementing StringSet concept for a std::unique_ptr<std::string
453  * objects, which are non-copyable.
454  */
456 {
457 public:
458  //! exported alias for character type
459  typedef uint8_t Char;
460 
461  //! String reference: std::string, which should be reference counted.
462  typedef std::unique_ptr<std::string> String;
463 
464  //! Iterator over string references: using std::vector's iterator
465  typedef String* Iterator;
466 
467  //! iterator of characters in a string
468  typedef const Char* CharIterator;
469 
470  //! exported alias for assumed string container
471  typedef std::pair<Iterator, size_t> Container;
472 };
473 
474 /*!
475  * Class implementing StringSet concept for a std::vector containing std::string
476  * objects.
477  */
479  : public UPtrStdStringSetTraits,
480  public StringSetBase<UPtrStdStringSet, UPtrStdStringSetTraits>
481 {
482 public:
483  //! Construct from begin and end string pointers
484  UPtrStdStringSet(const Iterator& begin, const Iterator& end)
485  : begin_(begin), end_(end)
486  { }
487 
488  //! Construct from a string container
490  : begin_(c.first), end_(c.first + c.second)
491  { }
492 
493  //! Return size of string array
494  size_t size() const { return end_ - begin_; }
495  //! Iterator representing first String position
496  Iterator begin() const { return begin_; }
497  //! Iterator representing beyond last String position
498  Iterator end() const { return end_; }
499 
500  //! Array access (readable and writable) to String objects.
501  String& operator [] (const Iterator& i) const
502  { return *i; }
503 
504  //! Return CharIterator for referenced string, which belongs to this set.
505  CharIterator get_chars(const String& s, size_t depth) const
506  { return reinterpret_cast<CharIterator>(s->data()) + depth; }
507 
508  //! Returns true if CharIterator is at end of the given String
509  bool is_end(const String& s, const CharIterator& i) const
510  { return (i >= reinterpret_cast<CharIterator>(s->data()) + s->size()); }
511 
512  //! Return complete string (for debugging purposes)
513  std::string get_string(const String& s, size_t depth = 0) const
514  { return s->substr(depth); }
515 
516  //! Subset this string set using iterator range.
517  UPtrStdStringSet sub(Iterator begin, Iterator end) const
518  { return UPtrStdStringSet(begin, end); }
519 
520  //! Allocate a new temporary string container with n empty Strings
521  static Container allocate(size_t n)
522  { return std::make_pair(new String[n], n); }
523 
524  //! Deallocate a temporary string container
525  static void deallocate(Container& c)
526  { delete[] c.first; c.first = nullptr; }
527 
528  void print() const {
529  size_t i = 0;
530  for (Iterator pi = begin(); pi != end(); ++pi)
531  {
532  TLX_LOG1 << "[" << i++ << "] = " << pi->get()
533  << " = " << get_string(*pi, 0);
534  }
535  }
536 
537 protected:
538  //! vector of std::string objects
539  Iterator begin_, end_;
540 };
541 
542 /******************************************************************************/
543 
544 /*!
545  * Class implementing StringSet concept for suffix sorting indexes of a
546  * std::string text object.
547  */
549 {
550 public:
551  //! exported alias for assumed string container
552  typedef std::string Text;
553 
554  //! exported alias for character type
555  typedef uint8_t Char;
556 
557  //! String reference: suffix index of the text.
558  typedef typename Text::size_type String;
559 
560  //! Iterator over string references: using std::vector's iterator over
561  //! suffix array vector
562  typedef typename std::vector<String>::iterator Iterator;
563 
564  //! iterator of characters in a string
565  typedef const Char* CharIterator;
566 
567  //! exported alias for assumed string container
568  typedef std::pair<Text, std::vector<String> > Container;
569 };
570 
571 /*!
572  * Class implementing StringSet concept for suffix sorting indexes of a
573  * std::string text object.
574  */
576  : public StringSuffixSetTraits,
577  public StringSetBase<StringSuffixSet, StringSuffixSetTraits>
578 {
579 public:
580  //! Construct from begin and end string pointers
581  StringSuffixSet(const Text& text,
582  const Iterator& begin, const Iterator& end)
583  : text_(&text),
584  begin_(begin), end_(end)
585  { }
586 
587  //! Initializing constructor which fills output vector sa with indices.
588  static StringSuffixSet
589  Initialize(const Text& text, std::vector<String>& sa) {
590  sa.resize(text.size());
591  for (size_t i = 0; i < text.size(); ++i)
592  sa[i] = i;
593  return StringSuffixSet(text, sa.begin(), sa.end());
594  }
595 
596  //! Return size of string array
597  size_t size() const { return end_ - begin_; }
598  //! Iterator representing first String position
599  Iterator begin() const { return begin_; }
600  //! Iterator representing beyond last String position
601  Iterator end() const { return end_; }
602 
603  //! Array access (readable and writable) to String objects.
604  String& operator [] (const Iterator& i) const
605  { return *i; }
606 
607  //! Return CharIterator for referenced string, which belongs to this set.
608  CharIterator get_chars(const String& s, size_t depth) const
609  { return reinterpret_cast<CharIterator>(text_->data()) + s + depth; }
610 
611  //! Returns true if CharIterator is at end of the given String
612  bool is_end(const String&, const CharIterator& i) const
613  { return (i >= reinterpret_cast<CharIterator>(text_->data()) + text_->size()); }
614 
615  //! Return complete string (for debugging purposes)
616  std::string get_string(const String& s, size_t depth = 0) const
617  { return text_->substr(s + depth); }
618 
619  //! Subset this string set using iterator range.
621  { return StringSuffixSet(*text_, begin, end); }
622 
623  //! Allocate a new temporary string container with n empty Strings
624  Container allocate(size_t n) const
625  { return std::make_pair(*text_, std::vector<String>(n)); }
626 
627  //! Deallocate a temporary string container
628  static void deallocate(Container& c)
629  { std::vector<String> v; v.swap(c.second); }
630 
631  //! Construct from a string container
633  : text_(&c.first),
634  begin_(c.second.begin()), end_(c.second.end())
635  { }
636 
637 protected:
638  //! reference to base text
639  const Text* text_;
640 
641  //! iterators inside the output suffix array.
642  Iterator begin_, end_;
643 };
644 
645 /******************************************************************************/
646 
647 } // namespace sort_strings_detail
648 
649 //! \}
650 
651 } // namespace tlx
652 
653 #endif // !TLX_SORT_STRINGS_STRING_SET_HEADER
654 
655 /******************************************************************************/
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
Definition: string_set.hpp:616
Char * String
String reference: pointer to first character.
Definition: string_set.hpp:281
GenericCharStringSet(const Container &c)
Construct from a string container.
Definition: string_set.hpp:317
Iterator begin() const
Iterator representing first String position.
Definition: string_set.hpp:599
StdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
Definition: string_set.hpp:400
StringSuffixSet(Container &c)
Construct from a string container.
Definition: string_set.hpp:632
GenericCharStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Definition: string_set.hpp:345
uint16_t get_uint16(const typename Traits::String &s, size_t depth) const
Definition: string_set.hpp:184
static void deallocate(Container &c)
Deallocate a temporary string container.
Definition: string_set.hpp:628
Class implementing StringSet concept for a std::unique_ptr<std::string objects, which are non-copyabl...
Definition: string_set.hpp:455
uint8_t get_uint8(const typename Traits::String &s, size_t depth) const
Definition: string_set.hpp:179
std::pair< Text, std::vector< String > > Container
exported alias for assumed string container
Definition: string_set.hpp:568
CharType Char
exported alias for character type
Definition: string_set.hpp:278
Traits::String & at(size_t i) const
index-based array access (readable and writable) to String objects.
Definition: string_set.hpp:52
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
Definition: string_set.hpp:513
static void deallocate(Container &c)
Deallocate a temporary string container.
Definition: string_set.hpp:441
Traits class implementing StringSet concept for char* and unsigned char* strings. ...
Definition: string_set.hpp:274
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
Definition: string_set.hpp:437
Iterator end() const
Iterator representing beyond last String position.
Definition: string_set.hpp:414
static void deallocate(Container &c)
Deallocate a temporary string container.
Definition: string_set.hpp:525
uint8_t Char
exported alias for character type
Definition: string_set.hpp:459
std::pair< Iterator, size_t > Container
exported alias for assumed string container
Definition: string_set.hpp:471
uint32_t get_uint32(const typename Traits::String &s, size_t depth) const
Definition: string_set.hpp:189
const Char * CharIterator
iterator of characters in a string
Definition: string_set.hpp:385
uint64_t get_uint64(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 8 characters of string s at iterator i packed into a uint64_t (only works correctly for ...
Definition: string_set.hpp:148
std::pair< Iterator, size_t > Container
exported alias for assumed string container
Definition: string_set.hpp:290
StringSuffixSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Definition: string_set.hpp:620
UPtrStdStringSet(Container &c)
Construct from a string container.
Definition: string_set.hpp:489
uint64_t get_uint64(const typename Traits::String &s, size_t depth) const
Definition: string_set.hpp:194
Class implementing StringSet concept for suffix sorting indexes of a std::string text object...
Definition: string_set.hpp:548
Class implementing StringSet concept for char* and unsigned char* strings.
Definition: string_set.hpp:297
GenericCharStringSet< const char > CCharStringSet
Definition: string_set.hpp:364
GenericCharStringSet< char > CharStringSet
Definition: string_set.hpp:361
UPtrStdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
Definition: string_set.hpp:484
Class implementing StringSet concept for arrays of std::string objects.
Definition: string_set.hpp:394
StringSet subi(size_t begin, size_t end) const
Subset this string set using index range.
Definition: string_set.hpp:202
enable_if< sizeof(Type)==4, uint32_t >::type get_key(const StringSet &strset, const typename StringSet::String &s, size_t depth)
Definition: string_set.hpp:248
std::pair< Iterator, size_t > Container
exported alias for assumed string container
Definition: string_set.hpp:388
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
Definition: string_set.hpp:505
GenericCharStringSet< const unsigned char > CUCharStringSet
Definition: string_set.hpp:365
String * Iterator
Iterator over string references: using std::vector&#39;s iterator.
Definition: string_set.hpp:465
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
Definition: string_set.hpp:429
bool is_less(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
Definition: string_set.hpp:70
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
Definition: string_set.hpp:341
uint8_t Char
exported alias for character type
Definition: string_set.hpp:555
Class implementing StringSet concept for a std::string objects.
Definition: string_set.hpp:372
String * Iterator
Iterator over string references: pointer to std::string.
Definition: string_set.hpp:382
size_t size() const
Return size of string array.
Definition: string_set.hpp:494
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
Definition: string_set.hpp:509
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
Definition: string_set.hpp:349
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
Definition: string_set.hpp:608
uint16_t get_uint16(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 2 characters of string s at iterator i packed into a uint16_t (only works correctly for ...
Definition: string_set.hpp:112
bool check_order(const typename Traits::String &s1, const typename Traits::String &s2) const
Definition: string_set.hpp:207
const Char * CharIterator
iterator of characters in a string
Definition: string_set.hpp:468
SFINAE enable_if – copy of std::enable_if<> with less extra cruft.
Definition: enable_if.hpp:21
GenericCharStringSetTraits< CharType > Traits
Definition: string_set.hpp:303
uint8_t Char
exported alias for character type
Definition: string_set.hpp:376
Text::size_type String
String reference: suffix index of the text.
Definition: string_set.hpp:558
Traits::Char get_char(const typename Traits::String &s, size_t depth) const
Definition: string_set.hpp:95
bool is_leq(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
Definition: string_set.hpp:80
StringSuffixSet(const Text &text, const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
Definition: string_set.hpp:581
bool is_equal(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check equality of two strings a and b at char iterators ai and bi.
Definition: string_set.hpp:61
StdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Definition: string_set.hpp:433
Class implementing StringSet concept for suffix sorting indexes of a std::string text object...
Definition: string_set.hpp:575
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
Definition: string_set.hpp:612
GenericCharStringSet< unsigned char > UCharStringSet
Definition: string_set.hpp:362
Iterator end() const
Iterator representing beyond last String position.
Definition: string_set.hpp:498
const Char * CharIterator
iterator of characters in a string
Definition: string_set.hpp:565
Iterator end() const
Iterator representing beyond last String position.
Definition: string_set.hpp:326
GenericCharStringSet(Iterator begin, Iterator end)
Construct from begin and end string pointers.
Definition: string_set.hpp:312
StdStringSet(Container &c)
Construct from a string container.
Definition: string_set.hpp:405
std::unique_ptr< std::string > String
String reference: std::string, which should be reference counted.
Definition: string_set.hpp:462
uint32_t get_uint32(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 4 characters of string s at iterator i packed into a uint32_t (only works correctly for ...
Definition: string_set.hpp:127
static void deallocate(Container &c)
Deallocate a temporary string container.
Definition: string_set.hpp:353
std::string String
String reference: std::string, which should be reference counted.
Definition: string_set.hpp:379
static StringSuffixSet Initialize(const Text &text, std::vector< String > &sa)
Initializing constructor which fills output vector sa with indices.
Definition: string_set.hpp:589
Class implementing StringSet concept for a std::vector containing std::string objects.
Definition: string_set.hpp:478
Container allocate(size_t n) const
Allocate a new temporary string container with n empty Strings.
Definition: string_set.hpp:624
Iterator end() const
Iterator representing beyond last String position.
Definition: string_set.hpp:601
Iterator begin() const
Iterator representing first String position.
Definition: string_set.hpp:496
const Char * CharIterator
iterator of characters in a string
Definition: string_set.hpp:287
size_t size() const
Return size of string array.
Definition: string_set.hpp:410
size_t size() const
Return size of string array.
Definition: string_set.hpp:597
UPtrStdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Definition: string_set.hpp:517
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
Definition: string_set.hpp:337
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
Definition: string_set.hpp:421
Base class for common string set functions, included via CRTP.
Definition: string_set.hpp:48
#define TLX_LOG1
Definition: core.hpp:145
uint8_t get_uint8(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 1 characters of string s at iterator i packed into a uint8_t (only works correctly for 8...
Definition: string_set.hpp:102
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belong to this set.
Definition: string_set.hpp:333
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
Definition: string_set.hpp:425
std::string Text
exported alias for assumed string container
Definition: string_set.hpp:552
Iterator begin() const
Iterator representing first String position.
Definition: string_set.hpp:324
std::vector< String >::iterator Iterator
Iterator over string references: using std::vector&#39;s iterator over suffix array vector.
Definition: string_set.hpp:562
Type get_key_at(const StringSet &strset, size_t idx, size_t depth)
Definition: string_set.hpp:263
Iterator begin() const
Iterator representing first String position.
Definition: string_set.hpp:412
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
Definition: string_set.hpp:521
String * Iterator
Iterator over string references: pointer over pointers.
Definition: string_set.hpp:284
size_t size() const
Return size of string array.
Definition: string_set.hpp:322
const Text * text_
reference to base text
Definition: string_set.hpp:639