libpqxx
The C++ client library for PostgreSQL
array-composite.hxx
Go to the documentation of this file.
1 #if !defined(PQXX_ARRAY_COMPOSITE_HXX)
2 # define PQXX_ARRAY_COMPOSITE_HXX
3 
4 # include <cassert>
5 
6 # include "pqxx/util.hxx"
7 
9 # include "pqxx/strconv.hxx"
10 
11 namespace pqxx::internal
12 {
13 // The width in bytes of a single ASCII character. In other words, one.
14 constexpr std::size_t one_ascii_char{1u};
15 
16 
17 // Find the end of a double-quoted string.
25 template<encoding_group ENC>
26 PQXX_INLINE_COV inline constexpr std::size_t
27 scan_double_quoted_string(std::string_view input, std::size_t pos, sl loc)
28 {
29  assert(input[pos] == '"');
30  auto const sz{std::size(input)};
31 
32  // Skip over the opening double-quote, and after that, any leading
33  // "un-interesting" characters.
34  pos = find_ascii_char<ENC, '"', '\\'>(input, pos + one_ascii_char, loc);
35  while (pos < sz)
36  {
37  // No need to check for a multibyte character here: if it's multibyte, its
38  // first byte won't match either of these ASCII characters.
39  switch (input[pos])
40  {
41  case '"':
42  // Is this the closing quote we're looking for? Scan ahead to find out.
43  pos += one_ascii_char;
44  if (pos >= sz)
45  {
46  // Clear-cut case. This is the closing quote and it's right at the end
47  // of the input.
48  return pos;
49  }
50  else if (input[pos] == '"')
51  {
52  // What we found is a doubled-up double-quote. That's the other way of
53  // escaping them. Why can't this ever be simple?
54  pos += one_ascii_char;
55  if (pos >= sz)
56  throw argument_error{
57  "Unexpected end of string: double double-quote."};
58  }
59  else
60  {
61  // This was the closing quote (though not at the end of the input).
62  // We are now at the one-past-end position.
63  return pos;
64  }
65  break;
66 
67  case '\\':
68  // Backslash escape. Move on to the next character, so that at the end
69  // of the iteration we'll skip right over it.
70  pos += one_ascii_char;
71  if (pos >= sz)
72  throw argument_error{"Unexpected end of string: backslash.", loc};
73 
74  if ((input[pos] == '\\') or (input[pos] == '"'))
75  {
76  // As you'd expect: the backslash escapes a double-quote, or another
77  // backslash. Move past it, or the find_ascii_char<>() at the end of
78  // the iteration will just stop here again.
79  pos += one_ascii_char;
80  if (pos >= sz)
81  throw argument_error{
82  "Unexpected end of string: escape sequence.", loc};
83  }
84  break;
85  }
86 
87  // We've reached the end of one iteration without reaching the end of the
88  // string.
89  pos = find_ascii_char<ENC, '"', '\\'>(input, pos, loc);
90  }
91 
92  // If we got here, we never found the closing double-quote.
93  throw argument_error{
94  "Missing closing double-quote: " + std::string{input}, loc};
95 }
96 
97 
98 // TODO: Needs version with caller-supplied buffer.
100 
104 template<encoding_group ENC>
105 PQXX_INLINE_COV inline constexpr std::string
106 parse_double_quoted_string(std::string_view input, std::size_t pos, sl loc)
107 {
108  std::string output;
109  auto const end{std::size(input)};
110  assert((end - pos) > 1);
111  assert(input[end - 1] == '"');
112 
113  // Maximum output size is same as the input size, minus the opening and
114  // closing quotes. Or in the extreme opposite case, the real number could be
115  // half that. Usually it'll be a pretty close estimate.
116  output.reserve(std::size_t(end - pos - 2));
117 
118  auto const closing_quote{end - 1};
119 
120  // We're at the starting quote. Skip it.
121  assert(pos < closing_quote);
122  assert(input[pos] == '"');
123  pos += one_ascii_char;
124  assert(pos <= closing_quote);
125 
126  // In theory, the closing quote should mean that there's no need for the
127  // find_ascii_char() call to check for end-of-string inside its loop. Not
128  // sure whether the compiler will be smart enough to see that though.
129  assert(input[closing_quote] == '"');
130 
131  while (pos < closing_quote)
132  {
133  auto const next{find_ascii_char<ENC, '"', '\\'>(input, pos, loc)};
134  output.append(input.substr(pos, next - pos));
135  pos = next;
136  assert(pos <= closing_quote);
137  assert((input[pos] == '"') or (input[pos] == '\\'));
138 
139  if (pos >= closing_quote)
140  return output;
141 
142  // We're at either a backslash or a double-quote... and we're not at the
143  // closing quote. Therefore, we're at an escape character. Skip it.
144  pos += one_ascii_char;
145 
146  // We are now at the escaped character.
147  // If the input has been scanned correctly, the string can't end here.
148  assert(pos < closing_quote);
149 
150  if ((input[pos] == '"') or (input[pos] == '\\'))
151  {
152  // We know this is a single-byte character. Append that (skipping the
153  // escaping character) and move on to the next character.
154  output.push_back(input[pos]);
155  pos += one_ascii_char;
156  }
157  else
158  {
159  // This could be a multibyte character. But no matter: we can let the
160  // next iteration handle it like any run-of-the-mill character.
161  }
162  }
163  assert(pos == closing_quote);
164 
165  return output;
166 }
167 
168 
170 
177 template<encoding_group ENC, char... STOP>
178 PQXX_INLINE_COV inline constexpr std::size_t
179 scan_unquoted_string(std::string_view input, std::size_t pos, sl loc)
180 {
181  return find_ascii_char<ENC, STOP...>(input, pos, loc);
182 }
183 
184 
186 
191 template<encoding_group ENC>
192 PQXX_INLINE_ONLY inline constexpr std::string_view
193 parse_unquoted_string(std::string_view input, std::size_t pos, sl)
194 {
195  return input.substr(pos);
196 }
197 
198 
200 
223 template<encoding_group ENC, typename T>
225  std::size_t &index, std::string_view input, std::size_t &pos, T &field,
226  std::size_t last_field, sl loc)
227 {
228  assert(index <= last_field);
229  assert(pos < std::size(input));
230  conversion_context const c{ENC, loc};
231 
232  // Expect a field.
233  switch (input[pos])
234  {
235  case ',':
236  case ')':
237  case ']':
238  // The field is empty, i.e, null.
239  if constexpr (has_null<T>())
240  field = make_null<T>();
241  else
242  throw conversion_error{
243  std::format(
244  "Can't read composite field {}: C++ type {} does not support nulls.",
245  to_string(index), name_type<T>()),
246  loc};
247  break;
248 
249  case '"': {
250  auto const stop{scan_double_quoted_string<ENC>(input, pos, loc)};
251  PQXX_ASSUME(stop > pos);
252  auto const text{
253  parse_double_quoted_string<ENC>(input.substr(0, stop), pos, loc)};
254  field = from_string<T>(text, c);
255  pos = stop;
256  }
257  break;
258 
259  default: {
260  // Parse an unquoted string field. It ends when we see a comma (meaning
261  // there's a next field after it), or a closing parenthesis or bracket
262  // (meaning we're at the last field).
263  auto const stop{scan_unquoted_string<ENC, ',', ')', ']'>(input, pos, loc)};
264  PQXX_ASSUME(stop >= pos);
265  field = from_string<T>(input.substr(pos, stop - pos), c);
266  pos = stop;
267  }
268  break;
269  }
270 
271  // End of field. Expect a comma or a closing parenthesis.
272 
273  if (index < last_field)
274  {
275  // There's another field coming after this one.
276  if (input[pos] != ',')
277  throw conversion_error{
278  std::format(
279  "Found '{}' in composite value where comma was expected: '{}.",
280  input[pos], input),
281  loc};
282  pos += one_ascii_char;
283  }
284  else
285  {
286  // We're parsing the last field.
287  if (input[pos] == ',')
288  throw conversion_error{
289  std::format(
290  "Composite value contained more fields than the expected {}: '{}'.",
291  to_string(last_field, c), input),
292  loc};
293  if (input[pos] != ')' and input[pos] != ']')
294  throw conversion_error{
295  std::format(
296  "Composite value has unexpected characters where closing "
297  "parenthesis "
298  "was expected: '{}'.",
299  input),
300  loc};
301 
302  pos += one_ascii_char;
303 
304  if (pos != std::size(input))
305  throw conversion_error{
306  std::format(
307  "Composite value has unexpected text after closing parenthesis: "
308  "'{}'.",
309  input),
310  loc};
311  }
312  ++index;
313 }
314 
315 
317 template<typename T>
318 using composite_field_parser = void (*)(
319  std::size_t &index, std::string_view input, std::size_t &pos, T &field,
320  std::size_t last_field, sl loc);
321 
322 
324 template<typename T>
327 {
328  switch (c.enc)
329  {
331  throw usage_error{
332  "Tried to parse array/composite without knowing its text encoding.",
333  c.loc};
334 
336  return parse_composite_field<encoding_group::ascii_safe>;
338  return parse_composite_field<encoding_group::two_tier>;
340  return parse_composite_field<encoding_group::gb18030>;
342  return parse_composite_field<encoding_group::sjis>;
343  }
344  throw internal_error{
345  std::format(
346  "Unexpected encoding group code: {}.",
347  static_cast<std::underlying_type_t<encoding_group>>(c.enc)),
348  c.loc};
349 }
350 
351 
353 template<typename T>
355 {
356  if constexpr (is_unquoted_safe<T>)
357  {
358  // Safe to copy, without quotes or escaping. Drop the terminating zero.
359  return size_buffer(field) - 1;
360  }
361  else
362  {
363  // + Opening quote.
364  // + Field budget.
365  // - Terminating zero.
366  // + Escaping for each byte in the field's string representation.
367  // - Escaping for terminating zero.
368  // + Closing quote.
369  return 1 + 2 * (size_buffer(field) - 1) + 1;
370  }
371 }
372 
373 
374 template<typename T>
376  std::span<char> buf, std::size_t &pos, T const &field, ctx c)
377 {
378  if constexpr (is_unquoted_safe<T>)
379  {
380  // No need for quoting or escaping. Convert it straight into its final
381  // place in the buffer.
382  pos += into_buf(buf.subspan(pos), field, c);
383  }
384  else
385  {
386  // The field may need escaping, which means we need an intermediate buffer.
387  // To avoid allocating that at run time, we use the end of the buffer that
388  // we have.
389  auto const budget{size_buffer(field)};
390  assert(budget < std::size(buf));
391  // C++26: Use buf.at().
392  buf[pos++] = '"';
393 
394  // Now escape buf into its final position.
395  for (char const x : to_buf(buf.last(budget), field, c))
396  {
397  if ((x == '"') or (x == '\\'))
398  // C++26: Use buf.at().
399  buf[pos++] = '\\';
400 
401  // C++26: Use buf.at().
402  buf[pos++] = x;
403  }
404 
405  // C++26: Use buf.at().
406  buf[pos++] = '"';
407  }
408 
409  // C++26: Use buf.at().
410  buf[pos++] = ',';
411 }
412 
413 
415 
418 template<nonbinary_range TYPE>
419 [[nodiscard]] PQXX_INLINE_COV inline std::size_t array_into_buf(
420  std::span<char> buf, TYPE const &value, std::size_t budget, ctx c)
421 {
422  using elt_type = std::remove_cvref_t<value_type<TYPE>>;
423 
424  if (std::cmp_less(std::size(buf), budget))
425  throw conversion_overrun{
426  "Not enough buffer space to convert array to string.", c.loc};
427 
428  std::size_t here{0u};
429  // C++26: Use buf.at().
430  buf[here++] = '{';
431 
432  bool nonempty{false};
433  for (auto const &elt : value)
434  {
435  static constexpr zview s_null{"NULL"};
436  if (is_null(elt))
437  {
438  here = copy_chars<false>(s_null, buf, here, c.loc);
439  }
440  else if constexpr (is_sql_array<elt_type>)
441  {
442  // Render nested array in-place.
443  here += pqxx::into_buf(buf.subspan(here), elt, c);
444  }
445  else if constexpr (is_unquoted_safe<elt_type>)
446  {
447  // No need to quote or escape. Just convert the value straight into
448  // its place in the array.
449  here += pqxx::into_buf(buf.subspan(here), elt, c);
450  }
451  else
452  {
453  // Quote & escape.
454 
455  // C++26: Use buf.at().
456  buf[here++] = '"';
457 
458  auto const elt_budget{pqxx::size_buffer(elt)};
459  // Use the tail end of the destination buffer as an intermediate
460  // buffer.
461  assert(std::cmp_less(elt_budget, std::size(buf) - here));
462  auto const from{pqxx::to_buf(buf.last(elt_budget), elt, c)};
463  auto const end{std::size(from)};
464  auto const find{get_char_finder<'\\', '"'>(c.enc, c.loc)};
465 
466  // Copy the intermediate buffer into the final buffer, but escape
467  // using backslashes. The tricky part here is to handle encodings right.
468  std::size_t i{0};
469  while (i < end)
470  {
471  auto next{find(from, i, c.loc)};
472  if (std::cmp_greater(here + next - i, std::size(buf)))
473  throw conversion_overrun{
474  std::format(
475  "Text copy exceeded buffer space: tried to copy {} bytes "
476  "into a buffer of {} bytes at offset {} ('{}').",
477  next - i, std::size(buf), here, from.substr(i)),
478  c.loc};
479  std::memmove(std::data(buf) + here, std::data(from) + i, next - i);
480  here += (next - i);
481  if (next < end)
482  {
483  // We hit either a quote or a backslash. Insert an escape
484  // character (which is always a simple single ASCII byte).
485  // C++26: Use buf.at().
486  buf[here++] = '\\';
487  // C++26: Use buf.at().
488  // Copy the escaped character itself. This is another simple single
489  // ASCII byte.
490  // TODO: Can we restructure this to leave that to the next iteration?
491  buf[here++] = from[next++];
492  }
493  i = next;
494  }
495  // Copy any final text.
496  here =
497  copy_chars<false>({std::data(from) + i, end - i}, buf, here, c.loc);
498 
499  // C++26:Use buf.at().
500  buf[here++] = '"';
501  }
502  // C++26:Use buf.at().
503  buf[here++] = array_separator<elt_type>;
504  nonempty = true;
505  }
506 
507  // Erase that last comma, if present.
508  if (nonempty)
509  here--;
510 
511  // C++26:Use buf.at().
512  buf[here++] = '}';
513 
514  return here;
515 }
516 } // namespace pqxx::internal
517 #endif
Reference to a field in a result set.
Definition: field.hxx:309
Marker-type wrapper: zero-terminated std::string_view.
Definition: zview.hxx:55
Invalid argument passed to libpqxx, similar to std::invalid_argument.
Definition: except.hxx:599
Value conversion failed, e.g. when converting "Hello" to int.
Definition: except.hxx:612
Could not convert value to string: not enough buffer space.
Definition: except.hxx:638
Internal error in libpqxx library.
Definition: except.hxx:558
Error in usage of libpqxx library, similar to std::logic_error.
Definition: except.hxx:580
#define PQXX_ASSUME(condition)
Definition: header-pre.hxx:247
#define PQXX_INLINE_COV
Don't generate out-of-line version of inline function for coverage runs.
Definition: header-pre.hxx:106
#define PQXX_INLINE_ONLY
Definition: header-pre.hxx:91
Private namespace for libpqxx's internal use; do not access.
Definition: connection.cxx:333
PQXX_INLINE_ONLY void write_composite_field(std::span< char > buf, std::size_t &pos, T const &field, ctx c)
Definition: array-composite.hxx:375
PQXX_INLINE_COV void parse_composite_field(std::size_t &index, std::string_view input, std::size_t &pos, T &field, std::size_t last_field, sl loc)
Parse a field of a composite-type value.
Definition: array-composite.hxx:224
PQXX_INLINE_COV std::size_t size_composite_field_buffer(T const &field)
Conservatively estimate buffer size needed for a composite field.
Definition: array-composite.hxx:354
PQXX_PURE PQXX_RETURNS_NONNULL constexpr PQXX_INLINE_COV char_finder_func * get_char_finder(encoding_group enc, sl loc)
Look up a character search function for an encoding group.
Definition: encodings.hxx:304
void(*)(std::size_t &index, std::string_view input, std::size_t &pos, T &field, std::size_t last_field, sl loc) composite_field_parser
Pointer to an encoding-specific specialisation of parse_composite_field.
Definition: array-composite.hxx:320
PQXX_INLINE_COV constexpr PQXX_HOT std::size_t find_ascii_char(std::string_view haystack, std::size_t here, sl loc)
Find any of the ASCII characters in NEEDLE in haystack.
Definition: encodings.hxx:102
constexpr PQXX_INLINE_COV composite_field_parser< T > specialize_parse_composite_field(conversion_context const &c)
Look up implementation of parse_composite_field for ENC.
Definition: array-composite.hxx:326
constexpr PQXX_INLINE_ONLY std::string_view parse_unquoted_string(std::string_view input, std::size_t pos, sl)
Parse an unquoted array entry or cfield of a composite-type field.
Definition: array-composite.hxx:193
constexpr std::size_t one_ascii_char
Definition: array-composite.hxx:14
constexpr PQXX_INLINE_COV std::string parse_double_quoted_string(std::string_view input, std::size_t pos, sl loc)
Un-quote and un-escape a double-quoted SQL string.
Definition: array-composite.hxx:106
constexpr PQXX_INLINE_COV std::size_t scan_unquoted_string(std::string_view input, std::size_t pos, sl loc)
Find the end of an unquoted string in an array or composite-type value.
Definition: array-composite.hxx:179
PQXX_INLINE_COV std::size_t array_into_buf(std::span< char > buf, TYPE const &value, std::size_t budget, ctx c)
Write an SQL array representation into buf.
Definition: array-composite.hxx:419
constexpr PQXX_INLINE_COV std::size_t scan_double_quoted_string(std::string_view input, std::size_t pos, sl loc)
Definition: array-composite.hxx:27
std::string_view to_buf(std::span< char > buf, TYPE const &value, ctx c={})
Represent value as SQL text, optionally using buf as storage.
Definition: strconv.hxx:430
std::source_location sl
Convenience alias for std::source_location. It's just too long.
Definition: types.hxx:38
PQXX_LIBEXPORT std::string to_string(field_ref const &value, ctx)
Convert a field_ref to a string.
Definition: field.hxx:891
constexpr bool is_unquoted_safe< T >
Definition: conversions.hxx:248
encoding_group
Definition: encoding_group.hxx:40
@ two_tier
Low byte is ASCII, high byte starts a 2-byte character.
@ sjis
Non-ASCII-safe: Japanese JIS and Shift JIS.
@ unknown
Default: indeterminate encoding. All we know is it supports ASCII.
@ ascii_safe
"ASCII-safe" encodings.
@ gb18030
Non-ASCII-safe: GB18030 for Chinese (Traditional & Simplified).
constexpr std::size_t size_buffer(TYPE const &...value) noexcept
Estimate how much buffer space is needed to represent values as a string.
Definition: strconv.hxx:399
constexpr bool is_null(TYPE const &value) noexcept
Is value a null?
Definition: strconv.hxx:764
std::size_t into_buf(std::span< char > buf, TYPE const &value, ctx c={})
Write an SQL representation of value into buf.
Definition: strconv.hxx:454
conversion_context const & ctx
Convenience alias: const reference to a pqxx::conversion_context.
Definition: strconv.hxx:201
format
Format code: is data text or binary?
Definition: types.hxx:121
Contextual parameters for string conversions implementations.
Definition: strconv.hxx:163
sl loc
A std::source_location for the call.
Definition: strconv.hxx:183
encoding_group enc
Encoding group describing the client text encoding.
Definition: strconv.hxx:172