libpqxx
The C++ client library for PostgreSQL
array-composite.hxx
Go to the documentation of this file.
1 #if !defined(PQXX_ARRAY_COMPOSITE_HXX)
2 # define PQXX_ARRAY_COMPOSITE_HXX
3 
4 # include <cassert>
5 
6 # include "pqxx/util.hxx"
7 
9 # include "pqxx/strconv.hxx"
10 
11 namespace pqxx::internal
12 {
13 // The width in bytes of a single ASCII character. In other words, one.
14 constexpr std::size_t one_ascii_char{1u};
15 
16 
17 // Find the end of a double-quoted string.
25 template<encoding_group ENC>
26 PQXX_INLINE_COV inline constexpr std::size_t
27 scan_double_quoted_string(std::string_view input, std::size_t pos, sl loc)
28 {
29  assert(input[pos] == '"');
30  auto const sz{std::size(input)};
31 
32  // Skip over the opening double-quote, and after that, any leading
33  // "un-interesting" characters.
34  pos = find_ascii_char<ENC, '"', '\\'>(input, pos + one_ascii_char, loc);
35  while (pos < sz)
36  {
37  // No need to check for a multibyte character here: if it's multibyte, its
38  // first byte won't match either of these ASCII characters.
39  switch (input[pos])
40  {
41  case '"':
42  // Is this the closing quote we're looking for? Scan ahead to find out.
43  pos += one_ascii_char;
44  if (pos >= sz)
45  {
46  // Clear-cut case. This is the closing quote and it's right at the end
47  // of the input.
48  return pos;
49  }
50  else if (input[pos] == '"')
51  {
52  // What we found is a doubled-up double-quote. That's the other way of
53  // escaping them. Why can't this ever be simple?
54  pos += one_ascii_char;
55  if (pos >= sz)
56  throw argument_error{
57  "Unexpected end of string: double double-quote."};
58  }
59  else
60  {
61  // This was the closing quote (though not at the end of the input).
62  // We are now at the one-past-end position.
63  return pos;
64  }
65  break;
66 
67  case '\\':
68  // Backslash escape. Move on to the next character, so that at the end
69  // of the iteration we'll skip right over it.
70  pos += one_ascii_char;
71  if (pos >= sz)
72  throw argument_error{"Unexpected end of string: backslash.", loc};
73 
74  if ((input[pos] == '\\') or (input[pos] == '"'))
75  {
76  // As you'd expect: the backslash escapes a double-quote, or another
77  // backslash. Move past it, or the find_ascii_char<>() at the end of
78  // the iteration will just stop here again.
79  pos += one_ascii_char;
80  if (pos >= sz)
81  throw argument_error{
82  "Unexpected end of string: escape sequence.", loc};
83  }
84  break;
85  }
86 
87  // We've reached the end of one iteration without reaching the end of the
88  // string.
89  pos = find_ascii_char<ENC, '"', '\\'>(input, pos, loc);
90  }
91 
92  // If we got here, we never found the closing double-quote.
93  throw argument_error{
94  "Missing closing double-quote: " + std::string{input}, loc};
95 }
96 
97 
98 // TODO: Needs version with caller-supplied buffer.
100 
104 template<encoding_group ENC>
105 PQXX_INLINE_COV inline constexpr std::string
106 parse_double_quoted_string(std::string_view input, std::size_t pos, sl loc)
107 {
108  std::string output;
109  auto const end{std::size(input)};
110  assert((end - pos) > 1);
111  assert(input[end - 1] == '"');
112 
113  // Maximum output size is same as the input size, minus the opening and
114  // closing quotes. Or in the extreme opposite case, the real number could be
115  // half that. Usually it'll be a pretty close estimate.
116  output.reserve(std::size_t(end - pos - 2));
117 
118  auto const closing_quote{end - 1};
119 
120  // We're at the starting quote. Skip it.
121  assert(pos < closing_quote);
122  assert(input[pos] == '"');
123  pos += one_ascii_char;
124  assert(pos <= closing_quote);
125 
126  // In theory, the closing quote should mean that there's no need for the
127  // find_ascii_char() call to check for end-of-string inside its loop. Not
128  // sure whether the compiler will be smart enough to see that though.
129  assert(input[closing_quote] == '"');
130 
131  while (pos < closing_quote)
132  {
133  auto const next{find_ascii_char<ENC, '"', '\\'>(input, pos, loc)};
134  output.append(input.substr(pos, next - pos));
135  pos = next;
136  assert(pos <= closing_quote);
137  assert((input[pos] == '"') or (input[pos] == '\\'));
138 
139  if (pos >= closing_quote)
140  return output;
141 
142  // We're at either a backslash or a double-quote... and we're not at the
143  // closing quote. Therefore, we're at an escape character. Skip it.
144  pos += one_ascii_char;
145 
146  // We are now at the escaped character.
147  // If the input has been scanned correctly, the string can't end here.
148  assert(pos < closing_quote);
149 
150  if ((input[pos] == '"') or (input[pos] == '\\'))
151  {
152  // We know this is a single-byte character. Append that (skipping the
153  // escaping character) and move on to the next character.
154  output.push_back(input[pos]);
155  pos += one_ascii_char;
156  }
157  else
158  {
159  // This could be a multibyte character. But no matter: we can let the
160  // next iteration handle it like any run-of-the-mill character.
161  }
162  }
163  assert(pos == closing_quote);
164 
165  return output;
166 }
167 
168 
170 
177 template<encoding_group ENC, char... STOP>
178 PQXX_INLINE_COV inline constexpr std::size_t
179 scan_unquoted_string(std::string_view input, std::size_t pos, sl loc)
180 {
181  return find_ascii_char<ENC, STOP...>(input, pos, loc);
182 }
183 
184 
186 
191 template<encoding_group ENC>
192 PQXX_INLINE_ONLY inline constexpr std::string_view
193 parse_unquoted_string(std::string_view input, std::size_t pos, sl)
194 {
195  return input.substr(pos);
196 }
197 
198 
200 
223 template<encoding_group ENC, typename T>
225  std::size_t &index, std::string_view input, std::size_t &pos, T &field,
226  std::size_t last_field, sl loc)
227 {
228  assert(index <= last_field);
229  assert(pos < std::size(input));
230  conversion_context const c{ENC, loc};
231 
232  // Expect a field.
233  switch (input[pos])
234  {
235  case ',':
236  case ')':
237  case ']':
238  // The field is empty, i.e, null.
239  if constexpr (has_null<T>())
240  field = make_null<T>();
241  else
242  throw conversion_error{
243  std::format(
244  "Can't read composite field {}: C++ type {} does not support nulls.",
245  to_string(index), name_type<T>()),
246  loc};
247  break;
248 
249  case '"': {
250  auto const stop{scan_double_quoted_string<ENC>(input, pos, loc)};
251  PQXX_ASSUME(stop > pos);
252  auto const text{
253  parse_double_quoted_string<ENC>(input.substr(0, stop), pos, loc)};
254  field = from_string<T>(text, c);
255  pos = stop;
256  }
257  break;
258 
259  default: {
260  // Parse an unquoted string field. It ends when we see a comma (meaning
261  // there's a next field after it), or a closing parenthesis or bracket
262  // (meaning we're at the last field).
263  auto const stop{scan_unquoted_string<ENC, ',', ')', ']'>(input, pos, loc)};
264  PQXX_ASSUME(stop >= pos);
265  field = from_string<T>(input.substr(pos, stop - pos), c);
266  pos = stop;
267  }
268  break;
269  }
270 
271  // End of field. Expect a comma or a closing parenthesis.
272 
273  if (index < last_field)
274  {
275  // There's another field coming after this one.
276  if (input[pos] != ',')
277  throw conversion_error{
278  std::format(
279  "Found '{}' in composite value where comma was expected: '{}.",
280  input[pos], input),
281  loc};
282  pos += one_ascii_char;
283  }
284  else
285  {
286  // We're parsing the last field.
287  if (input[pos] == ',')
288  throw conversion_error{
289  std::format(
290  "Composite value contained more fields than the expected {}: '{}'.",
291  to_string(last_field, c), std::data(input)),
292  loc};
293  if (input[pos] != ')' and input[pos] != ']')
294  throw conversion_error{
295  std::format(
296  "Composite value has unexpected characters where closing "
297  "parenthesis "
298  "was expected: '{}'.",
299  std::string{input}),
300  loc};
301 
302  pos += one_ascii_char;
303 
304  if (pos != std::size(input))
305  throw conversion_error{
306  std::format(
307  "Composite value has unexpected text after closing parenthesis: "
308  "'{}'.",
309  std::string{input}),
310  loc};
311  }
312  ++index;
313 }
314 
315 
317 template<typename T>
318 using composite_field_parser = void (*)(
319  std::size_t &index, std::string_view input, std::size_t &pos, T &field,
320  std::size_t last_field, sl loc);
321 
322 
324 template<typename T>
327 {
328  switch (c.enc)
329  {
331  throw usage_error{
332  "Tried to parse array/composite without knowing its text encoding.",
333  c.loc};
334 
336  return parse_composite_field<encoding_group::ascii_safe>;
338  return parse_composite_field<encoding_group::two_tier>;
340  return parse_composite_field<encoding_group::gb18030>;
342  return parse_composite_field<encoding_group::sjis>;
343  }
344  throw internal_error{
345  std::format("Unexpected encoding group code: {}.", to_string(c.enc)),
346  c.loc};
347 }
348 
349 
351 template<typename T>
353 {
354  if constexpr (is_unquoted_safe<T>)
355  {
356  // Safe to copy, without quotes or escaping. Drop the terminating zero.
357  return size_buffer(field) - 1;
358  }
359  else
360  {
361  // + Opening quote.
362  // + Field budget.
363  // - Terminating zero.
364  // + Escaping for each byte in the field's string representation.
365  // - Escaping for terminating zero.
366  // + Closing quote.
367  return 1 + 2 * (size_buffer(field) - 1) + 1;
368  }
369 }
370 
371 
372 template<typename T>
374  std::span<char> buf, std::size_t &pos, T const &field, ctx c)
375 {
376  if constexpr (is_unquoted_safe<T>)
377  {
378  // No need for quoting or escaping. Convert it straight into its final
379  // place in the buffer.
380  pos += into_buf(buf.subspan(pos), field, c);
381  }
382  else
383  {
384  // The field may need escaping, which means we need an intermediate buffer.
385  // To avoid allocating that at run time, we use the end of the buffer that
386  // we have.
387  auto const budget{size_buffer(field)};
388  assert(budget < std::size(buf));
389  // C++26: Use buf.at().
390  buf[pos++] = '"';
391 
392  // Now escape buf into its final position.
393  for (char const x : to_buf(buf.last(budget), field, c))
394  {
395  if ((x == '"') or (x == '\\'))
396  // C++26: Use buf.at().
397  buf[pos++] = '\\';
398 
399  // C++26: Use buf.at().
400  buf[pos++] = x;
401  }
402 
403  // C++26: Use buf.at().
404  buf[pos++] = '"';
405  }
406 
407  // C++26: Use buf.at().
408  buf[pos++] = ',';
409 }
410 
411 
413 
416 template<nonbinary_range TYPE>
417 [[nodiscard]] PQXX_INLINE_COV inline std::size_t array_into_buf(
418  std::span<char> buf, TYPE const &value, std::size_t budget, ctx c)
419 {
420  using elt_type = std::remove_cvref_t<value_type<TYPE>>;
421 
422  if (std::cmp_less(std::size(buf), budget))
423  throw conversion_overrun{
424  "Not enough buffer space to convert array to string.", c.loc};
425 
426  std::size_t here{0u};
427  // C++26: Use buf.at().
428  buf[here++] = '{';
429 
430  bool nonempty{false};
431  for (auto const &elt : value)
432  {
433  static constexpr zview s_null{"NULL"};
434  if (is_null(elt))
435  {
436  here = copy_chars<false>(s_null, buf, here, c.loc);
437  }
438  else if constexpr (is_sql_array<elt_type>)
439  {
440  // Render nested array in-place.
441  here += pqxx::into_buf(buf.subspan(here), elt, c);
442  }
443  else if constexpr (is_unquoted_safe<elt_type>)
444  {
445  // No need to quote or escape. Just convert the value straight into
446  // its place in the array.
447  here += pqxx::into_buf(buf.subspan(here), elt, c);
448  }
449  else
450  {
451  // Quote & escape.
452 
453  // C++26: Use buf.at().
454  buf[here++] = '"';
455 
456  auto const sz{std::size(buf)}, elt_budget{pqxx::size_buffer(elt)};
457  // Use the tail end of the destination buffer as an intermediate
458  // buffer.
459  assert(std::cmp_less(elt_budget, sz - here));
460  auto const from{pqxx::to_buf(buf.last(elt_budget), elt, c)};
461  auto const end{std::size(from)};
462  auto const find{get_char_finder<'\\', '"'>(c.enc, c.loc)};
463 
464  // Copy the intermediate buffer into the final buffer, but escape
465  // using backslashes. The tricky part here is to handle encodings right.
466  std::size_t i{0};
467  while (i < end)
468  {
469  auto next{find(from, i, c.loc)};
470  here =
471  copy_chars<false>({std::data(from) + i, next - i}, buf, here, c.loc);
472  if (next < end)
473  {
474  // We hit either a quote or a backslash. Insert an escape
475  // character (which is always a simple single ASCII byte).
476  // C++26: Use buf.at().
477  buf[here++] = '\\';
478  // C++26: Use buf.at().
479  // Copy the escaped character itself. This is another simple single
480  // ASCII byte.
481  // TODO: Can we restructure this to leave that to the next iteration?
482  buf[here++] = from[next++];
483  }
484  i = next;
485  }
486  // Copy any final text.
487  here =
488  copy_chars<false>({std::data(from) + i, end - i}, buf, here, c.loc);
489 
490  // C++26:Use buf.at().
491  buf[here++] = '"';
492  }
493  // C++26:Use buf.at().
494  buf[here++] = array_separator<elt_type>;
495  nonempty = true;
496  }
497 
498  // Erase that last comma, if present.
499  if (nonempty)
500  here--;
501 
502  // C++26:Use buf.at().
503  buf[here++] = '}';
504 
505  return here;
506 }
507 } // namespace pqxx::internal
508 #endif
Reference to a field in a result set.
Definition: field.hxx:309
Marker-type wrapper: zero-terminated std::string_view.
Definition: zview.hxx:55
Invalid argument passed to libpqxx, similar to std::invalid_argument.
Definition: except.hxx:599
Value conversion failed, e.g. when converting "Hello" to int.
Definition: except.hxx:612
Could not convert value to string: not enough buffer space.
Definition: except.hxx:638
Internal error in libpqxx library.
Definition: except.hxx:558
Error in usage of libpqxx library, similar to std::logic_error.
Definition: except.hxx:580
#define PQXX_ASSUME(condition)
Definition: header-pre.hxx:228
#define PQXX_INLINE_COV
Don't generate out-of-line version of inline function for coverage runs.
Definition: header-pre.hxx:98
#define PQXX_INLINE_ONLY
Definition: header-pre.hxx:83
Private namespace for libpqxx's internal use; do not access.
Definition: connection.cxx:333
PQXX_INLINE_ONLY void write_composite_field(std::span< char > buf, std::size_t &pos, T const &field, ctx c)
Definition: array-composite.hxx:373
PQXX_INLINE_COV void parse_composite_field(std::size_t &index, std::string_view input, std::size_t &pos, T &field, std::size_t last_field, sl loc)
Parse a field of a composite-type value.
Definition: array-composite.hxx:224
PQXX_INLINE_COV std::size_t size_composite_field_buffer(T const &field)
Conservatively estimate buffer size needed for a composite field.
Definition: array-composite.hxx:352
PQXX_PURE PQXX_RETURNS_NONNULL constexpr PQXX_INLINE_COV char_finder_func * get_char_finder(encoding_group enc, sl loc)
Look up a character search function for an encoding group.
Definition: encodings.hxx:310
void(*)(std::size_t &index, std::string_view input, std::size_t &pos, T &field, std::size_t last_field, sl loc) composite_field_parser
Pointer to an encoding-specific specialisation of parse_composite_field.
Definition: array-composite.hxx:320
constexpr PQXX_INLINE_COV composite_field_parser< T > specialize_parse_composite_field(conversion_context const &c)
Look up implementation of parse_composite_field for ENC.
Definition: array-composite.hxx:326
constexpr PQXX_INLINE_ONLY std::string_view parse_unquoted_string(std::string_view input, std::size_t pos, sl)
Parse an unquoted array entry or cfield of a composite-type field.
Definition: array-composite.hxx:193
constexpr std::size_t one_ascii_char
Definition: array-composite.hxx:14
constexpr PQXX_INLINE_COV std::string parse_double_quoted_string(std::string_view input, std::size_t pos, sl loc)
Un-quote and un-escape a double-quoted SQL string.
Definition: array-composite.hxx:106
constexpr PQXX_INLINE_COV std::size_t scan_unquoted_string(std::string_view input, std::size_t pos, sl loc)
Find the end of an unquoted string in an array or composite-type value.
Definition: array-composite.hxx:179
PQXX_INLINE_COV std::size_t array_into_buf(std::span< char > buf, TYPE const &value, std::size_t budget, ctx c)
Write an SQL array representation into buf.
Definition: array-composite.hxx:417
constexpr PQXX_INLINE_COV std::size_t find_ascii_char(std::string_view haystack, std::size_t here, sl loc)
Find any of the ASCII characters in NEEDLE in haystack.
Definition: encodings.hxx:108
constexpr PQXX_INLINE_COV std::size_t scan_double_quoted_string(std::string_view input, std::size_t pos, sl loc)
Definition: array-composite.hxx:27
std::string_view to_buf(std::span< char > buf, TYPE const &value, ctx c={})
Represent value as SQL text, optionally using buf as storage.
Definition: strconv.hxx:430
std::source_location sl
Convenience alias for std::source_location. It's just too long.
Definition: types.hxx:38
PQXX_LIBEXPORT std::string to_string(field_ref const &value, ctx)
Convert a field_ref to a string.
Definition: field.hxx:891
constexpr bool is_unquoted_safe< T >
Definition: conversions.hxx:244
encoding_group
Definition: encoding_group.hxx:40
@ two_tier
Low byte is ASCII, high byte starts a 2-byte character.
@ sjis
Non-ASCII-safe: Japanese JIS and Shift JIS.
@ unknown
Default: indeterminate encoding. All we know is it supports ASCII.
@ ascii_safe
"ASCII-safe" encodings.
@ gb18030
Non-ASCII-safe: GB18030 for Chinese (Traditional & Simplified).
constexpr std::size_t size_buffer(TYPE const &...value) noexcept
Estimate how much buffer space is needed to represent values as a string.
Definition: strconv.hxx:399
constexpr bool is_null(TYPE const &value) noexcept
Is value a null?
Definition: strconv.hxx:764
std::size_t into_buf(std::span< char > buf, TYPE const &value, ctx c={})
Write an SQL representation of value into buf.
Definition: strconv.hxx:454
conversion_context const & ctx
Convenience alias: const reference to a pqxx::conversion_context.
Definition: strconv.hxx:201
format
Format code: is data text or binary?
Definition: types.hxx:121
Contextual parameters for string conversions implementations.
Definition: strconv.hxx:163
sl loc
A std::source_location for the call.
Definition: strconv.hxx:183
encoding_group enc
Encoding group describing the client text encoding.
Definition: strconv.hxx:172