tlx
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
split_quoted.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/string/split_quoted.cpp
3  *
4  * Part of tlx - http://panthema.net/tlx
5  *
6  * Copyright (C) 2016-2018 Timo Bingmann <tb@panthema.net>
7  *
8  * All rights reserved. Published under the Boost Software License, Version 1.0
9  ******************************************************************************/
10 
12 
13 #include <stdexcept>
14 
15 namespace tlx {
16 
17 std::vector<std::string>
18 split_quoted(const std::string& str, char sep, char quote, char escape) {
19 
20  std::vector<std::string> out;
21 
22  std::string::const_iterator it = str.begin();
23  std::string entry;
24 
25  for ( ; it != str.end(); )
26  {
27  if (*it == sep) {
28  // skip separator outside of fields
29  ++it;
30  }
31  else if (*it == quote) {
32  // parse quoted entry
33  ++it;
34 
35  while (true) {
36  if (it == str.end()) {
37  throw std::runtime_error(
38  "unmatched end quote in split_quoted().");
39  }
40  else if (*it == quote) {
41  ++it;
42  if (it == str.end()) {
43  // last quote and end-of-line
44  out.emplace_back(std::move(entry));
45  return out;
46  }
47  else if (*it == sep) {
48  // quote + sep -> end of this entry
49  out.emplace_back(std::move(entry));
50  ++it;
51  break;
52  }
53  else {
54  throw std::runtime_error(
55  std::string("extra quote enclosed in entry,"
56  " followed by ") + *it);
57  }
58  }
59  else if (*it == escape) {
60  ++it;
61  if (it == str.end()) {
62  throw std::runtime_error(
63  "escape as last character in string");
64  }
65  else if (*it == quote) {
66  // escape + quote -> quote
67  entry += *it++;
68  }
69  else if (*it == escape) {
70  // escape + escape -> escape
71  entry += *it++;
72  }
73  else if (*it == 'n') {
74  // escape + n -> new line
75  entry += '\n', ++it;
76  }
77  else if (*it == 'r') {
78  // escape + r -> carriage return
79  entry += '\r', ++it;
80  }
81  else if (*it == 't') {
82  // escape + t -> tab
83  entry += '\t', ++it;
84  }
85  else {
86  throw std::runtime_error(
87  std::string("escape followed by "
88  "unknown character") + *it);
89  }
90  }
91  else {
92  // normal character
93  entry += *it++;
94  }
95  }
96  }
97  else {
98  // parse unquoted entry
99  while (true) {
100  if (it == str.end()) {
101  // end-of-line
102  out.emplace_back(std::move(entry));
103  return out;
104  }
105  else if (*it == sep) {
106  // sep -> end of this entry
107  out.emplace_back(std::move(entry));
108  ++it;
109  break;
110  }
111  else {
112  // normal character
113  entry += *it++;
114  }
115  }
116  }
117  }
118 
119  return out;
120 }
121 
122 std::vector<std::string> split_quoted(const std::string& str) {
123  return split_quoted(str, ' ', '"', '\\');
124 }
125 
126 } // namespace tlx
127 
128 /******************************************************************************/
std::vector< std::string > split_quoted(const std::string &str, char sep, char quote, char escape)
Split the given string at each separator character into distinct substrings.