libpqxx
The C++ client library for PostgreSQL
array.hxx
1 /* Handling of SQL arrays.
2  *
3  * DO NOT INCLUDE THIS FILE DIRECTLY; include pqxx/field instead.
4  *
5  * Copyright (c) 2000-2024, Jeroen T. Vermeulen.
6  *
7  * See COPYING for copyright license. If you did not receive a file called
8  * COPYING with this source code, please notify the distributor of this
9  * mistake, or contact the author.
10  */
11 #ifndef PQXX_H_ARRAY
12 #define PQXX_H_ARRAY
13 
14 #if !defined(PQXX_HEADER_PRE)
15 # error "Include libpqxx headers as <pqxx/header>, not <pqxx/header.hxx>."
16 #endif
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <stdexcept>
21 #include <string>
22 #include <type_traits>
23 #include <utility>
24 #include <vector>
25 
26 #include "pqxx/connection.hxx"
27 #include "pqxx/internal/array-composite.hxx"
28 #include "pqxx/internal/encoding_group.hxx"
29 #include "pqxx/internal/encodings.hxx"
30 
31 
32 namespace pqxx
33 {
34 // TODO: Specialise for string_view/zview, allocate all strings in one buffer.
35 
37 
52 template<
53  typename ELEMENT, std::size_t DIMENSIONS = 1u,
54  char SEPARATOR = array_separator<ELEMENT>>
55 class array final
56 {
57 public:
59 
68  array(std::string_view data, connection const &conn) :
69  array{data, pqxx::internal::enc_group(conn.encoding_id())}
70  {}
71 
73 
75  constexpr std::size_t dimensions() noexcept { return DIMENSIONS; }
76 
78 
82  std::array<std::size_t, DIMENSIONS> const &sizes() noexcept
83  {
84  return m_extents;
85  }
86 
87  template<typename... INDEX> ELEMENT const &at(INDEX... index) const
88  {
89  static_assert(sizeof...(index) == DIMENSIONS);
90  check_bounds(index...);
91  return m_elts.at(locate(index...));
92  }
93 
95 
103  template<typename... INDEX> ELEMENT const &operator[](INDEX... index) const
104  {
105  static_assert(sizeof...(index) == DIMENSIONS);
106  return m_elts[locate(index...)];
107  }
108 
110 
115  constexpr auto cbegin() const noexcept { return m_elts.cbegin(); }
117  constexpr auto cend() const noexcept { return m_elts.cend(); }
119  constexpr auto crbegin() const noexcept { return m_elts.crbegin(); }
121  constexpr auto crend() const noexcept { return m_elts.crend(); }
122 
124 
127  constexpr std::size_t size() const noexcept { return m_elts.size(); }
128 
130 
145  constexpr auto ssize() const noexcept
146  {
147  return static_cast<std::ptrdiff_t>(size());
148  }
149 
151 
153  constexpr auto front() const noexcept { return m_elts.front(); }
154 
156 
158  constexpr auto back() const noexcept { return m_elts.back(); }
159 
160 private:
162 
170  void check_dims(std::string_view data)
171  {
172  auto sz{std::size(data)};
173  if (sz < DIMENSIONS * 2)
174  throw conversion_error{pqxx::internal::concat(
175  "Trying to parse a ", DIMENSIONS, "-dimensional array out of '", data,
176  "'.")};
177 
178  // Making some assumptions here:
179  // * The array holds no extraneous whitespace.
180  // * None of the sub-arrays can be null.
181  // * Only ASCII characters start off with a byte in the 0-127 range.
182  //
183  // Given those, the input must start with a sequence of DIMENSIONS bytes
184  // with the ASCII value for '{'; and likewise it must end with a sequence
185  // of DIMENSIONS bytes with the ASCII value for '}'.
186 
187  if (data[0] != '{')
188  throw conversion_error{"Malformed array: does not start with '{'."};
189  for (std::size_t i{0}; i < DIMENSIONS; ++i)
190  if (data[i] != '{')
191  throw conversion_error{pqxx::internal::concat(
192  "Expecting ", DIMENSIONS, "-dimensional array, but found ", i, ".")};
193  if (data[DIMENSIONS] == '{')
194  throw conversion_error{pqxx::internal::concat(
195  "Tried to parse ", DIMENSIONS,
196  "-dimensional array from array data that has more dimensions.")};
197  for (std::size_t i{0}; i < DIMENSIONS; ++i)
198  if (data[sz - 1 - i] != '}')
199  throw conversion_error{
200  "Malformed array: does not end in the right number of '}'."};
201  }
202 
203  explicit array(std::string_view data, pqxx::internal::encoding_group enc)
204  {
205  using group = pqxx::internal::encoding_group;
206  switch (enc)
207  {
208  case group::MONOBYTE: parse<group::MONOBYTE>(data); break;
209  case group::BIG5: parse<group::BIG5>(data); break;
210  case group::EUC_CN: parse<group::EUC_CN>(data); break;
211  case group::EUC_JP: parse<group::EUC_JP>(data); break;
212  case group::EUC_KR: parse<group::EUC_KR>(data); break;
213  case group::EUC_TW: parse<group::EUC_TW>(data); break;
214  case group::GB18030: parse<group::GB18030>(data); break;
215  case group::GBK: parse<group::GBK>(data); break;
216  case group::JOHAB: parse<group::JOHAB>(data); break;
217  case group::MULE_INTERNAL: parse<group::MULE_INTERNAL>(data); break;
218  case group::SJIS: parse<group::SJIS>(data); break;
219  case group::UHC: parse<group::UHC>(data); break;
220  case group::UTF8: parse<group::UTF8>(data); break;
221  default: PQXX_UNREACHABLE; break;
222  }
223  }
224 
226 
229  std::size_t parse_field_end(std::string_view data, std::size_t here) const
230  {
231  auto const sz{std::size(data)};
232  if (here < sz)
233  switch (data[here])
234  {
235  case SEPARATOR:
236  ++here;
237  if (here >= sz)
238  throw conversion_error{"Array looks truncated."};
239  switch (data[here])
240  {
241  case SEPARATOR:
242  throw conversion_error{"Array contains double separator."};
243  case '}': throw conversion_error{"Array contains trailing separator."};
244  default: break;
245  }
246  break;
247  case '}': break;
248  default:
249  throw conversion_error{pqxx::internal::concat(
250  "Unexpected character in array: ",
251  static_cast<unsigned>(static_cast<unsigned char>(data[here])),
252  " where separator or closing brace expected.")};
253  }
254  return here;
255  }
256 
258 
263  constexpr std::size_t estimate_elements(std::string_view data) const noexcept
264  {
265  // Dirty trick: just count the number of bytes that look as if they may be
266  // separators. At the very worst we may overestimate by a factor of two or
267  // so, in exceedingly rare cases, on some encodings.
268  auto const separators{
269  std::count(std::begin(data), std::end(data), SEPARATOR)};
270  // The number of dimensions makes no difference here. It's still one
271  // separator between consecutive elements, just possibly with some extra
272  // braces as well.
273  return static_cast<std::size_t>(separators + 1);
274  }
275 
276  template<pqxx::internal::encoding_group ENC>
277  void parse(std::string_view data)
278  {
279  static_assert(DIMENSIONS > 0u, "Can't create a zero-dimensional array.");
280  auto const sz{std::size(data)};
281  check_dims(data);
282 
283  m_elts.reserve(estimate_elements(data));
284 
285  // We discover the array's extents along each of the dimensions, starting
286  // with the final dimension and working our way towards the first. At any
287  // given point during parsing, we know the extents starting at this
288  // dimension.
289  std::size_t know_extents_from{DIMENSIONS};
290 
291  // Currently parsing this dimension. We start off at -1, relying on C++'s
292  // well-defined rollover for unsigned numbers.
293  // The actual outermost dimension of the array is 0, and the innermost is
294  // at the end. But, the array as a whole is enclosed in braces just like
295  // each row. So we act like there's an anomalous "outer" dimension holding
296  // the entire array.
297  constexpr std::size_t outer{std::size_t{0u} - std::size_t{1u}};
298 
299  // We start parsing at the fictional outer dimension. The input begins
300  // with opening braces, one for each dimension, so we'll start off by
301  // bumping all the way to the innermost dimension.
302  std::size_t dim{outer};
303 
304  // Extent counters, one per "real" dimension.
305  // Note initialiser syntax; this zero-initialises all elements.
306  std::array<std::size_t, DIMENSIONS> extents{};
307 
308  // Current parsing position.
309  std::size_t here{0};
310  PQXX_ASSUME(here <= sz);
311  while (here < sz)
312  {
313  if (data[here] == '{')
314  {
315  if (dim == outer)
316  {
317  // This must be the initial opening brace.
318  if (know_extents_from != DIMENSIONS)
319  throw conversion_error{
320  "Array text representation closed and reopened its outside "
321  "brace pair."};
322  assert(here == 0);
323  PQXX_ASSUME(here == 0);
324  }
325  else
326  {
327  if (dim >= (DIMENSIONS - 1))
328  throw conversion_error{
329  "Array seems to have inconsistent number of dimensions."};
330  ++extents[dim];
331  }
332  // (Rolls over to zero if we're coming from the outer dimension.)
333  ++dim;
334  extents[dim] = 0u;
335  ++here;
336  }
337  else if (data[here] == '}')
338  {
339  if (dim == outer)
340  throw conversion_error{"Array has spurious '}'."};
341  if (dim < know_extents_from)
342  {
343  // We just finished parsing our first row in this dimension.
344  // Now we know the array dimension's extent.
345  m_extents[dim] = extents[dim];
346  know_extents_from = dim;
347  }
348  else
349  {
350  if (extents[dim] != m_extents[dim])
351  throw conversion_error{"Rows in array have inconsistent sizes."};
352  }
353  // Bump back down to the next-lower dimension. Which may be the outer
354  // dimension, through underflow.
355  --dim;
356  ++here;
357  here = parse_field_end(data, here);
358  }
359  else
360  {
361  // Found an array element. The actual elements always live in the
362  // "inner" dimension.
363  if (dim != DIMENSIONS - 1)
364  throw conversion_error{
365  "Malformed array: found element where sub-array was expected."};
366  assert(dim != outer);
367  ++extents[dim];
368  std::size_t end;
369  switch (data[here])
370  {
371  case '\0': throw conversion_error{"Unexpected zero byte in array."};
372  case ',': throw conversion_error{"Array contains empty field."};
373  case '"': {
374  // Double-quoted string. We parse it into a buffer before parsing
375  // the resulting string as an element. This seems wasteful: the
376  // string might not contain any special characters. So it's
377  // tempting to check, and try to use a string_view and avoid a
378  // useless copy step. But. Even besides the branch prediction
379  // risk, the very fact that the back-end chose to quote the string
380  // indicates that there is some kind of special character in there.
381  // So in practice, this optimisation would only apply if the only
382  // special characters in the string were commas.
383  end = pqxx::internal::scan_double_quoted_string<ENC>(
384  std::data(data), std::size(data), here);
385  // TODO: scan_double_quoted_string() with reusable buffer.
386  std::string const buf{
387  pqxx::internal::parse_double_quoted_string<ENC>(
388  std::data(data), end, here)};
389  m_elts.emplace_back(from_string<ELEMENT>(buf));
390  }
391  break;
392  default: {
393  // Unquoted string. An unquoted string is always literal, no
394  // escaping or encoding, so we don't need to parse it into a
395  // buffer. We can just read it as a string_view.
396  end = pqxx::internal::scan_unquoted_string<ENC, SEPARATOR, '}'>(
397  std::data(data), std::size(data), here);
398  std::string_view const field{
399  std::string_view{std::data(data) + here, end - here}};
400  if (field == "NULL")
401  {
402  if constexpr (nullness<ELEMENT>::has_null)
403  m_elts.emplace_back(nullness<ELEMENT>::null());
404  else
405  throw unexpected_null{pqxx::internal::concat(
406  "Array contains a null ", type_name<ELEMENT>,
407  ". Consider making it an array of std::optional<",
408  type_name<ELEMENT>, "> instead.")};
409  }
410  else
411  m_elts.emplace_back(from_string<ELEMENT>(field));
412  }
413  }
414  here = end;
415  PQXX_ASSUME(here <= sz);
416  here = parse_field_end(data, here);
417  }
418  }
419 
420  if (dim != outer)
421  throw conversion_error{"Malformed array; may be truncated."};
422  assert(know_extents_from == 0);
423  PQXX_ASSUME(know_extents_from == 0);
424 
425  init_factors();
426  }
427 
429  void init_factors() noexcept
430  {
431  std::size_t factor{1};
432  for (std::size_t dim{DIMENSIONS - 1}; dim > 0; --dim)
433  {
434  factor *= m_extents[dim];
435  m_factors[dim - 1] = factor;
436  }
437  }
438 
440  template<typename... INDEX> std::size_t locate(INDEX... index) const noexcept
441  {
442  static_assert(
443  sizeof...(index) == DIMENSIONS,
444  "Indexing array with wrong number of dimensions.");
445  return add_index(index...);
446  }
447 
448  template<typename OUTER, typename... INDEX>
449  constexpr std::size_t add_index(OUTER outer, INDEX... indexes) const noexcept
450  {
451  std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
452  if constexpr (sizeof...(indexes) == 0)
453  {
454  return first;
455  }
456  else
457  {
458  static_assert(sizeof...(indexes) < DIMENSIONS);
459  // (Offset by 1 here because the outer dimension is not in there.)
460  constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
461  static_assert(dimension < DIMENSIONS);
462  return first * m_factors[dimension] + add_index(indexes...);
463  }
464  }
465 
467 
469  template<typename OUTER, typename... INDEX>
470  constexpr void check_bounds(OUTER outer, INDEX... indexes) const
471  {
472  std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
473  static_assert(sizeof...(indexes) < DIMENSIONS);
474  // (Offset by 1 here because the outer dimension is not in there.)
475  constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
476  static_assert(dimension < DIMENSIONS);
477  if (first >= m_extents[dimension])
478  throw range_error{pqxx::internal::concat(
479  "Array index for dimension ", dimension, " is out of bounds: ", first,
480  " >= ", m_extents[dimension])};
481 
482  // Now check the rest of the indexes, if any.
483  if constexpr (sizeof...(indexes) > 0)
484  check_bounds(indexes...);
485  }
486 
488  std::vector<ELEMENT> m_elts;
489 
491  std::array<std::size_t, DIMENSIONS> m_extents;
492 
494 
501  std::array<std::size_t, DIMENSIONS - 1> m_factors;
502 };
503 
504 
506 
528 class PQXX_LIBEXPORT array_parser
529 {
530 public:
532  enum class juncture
533  {
535  row_start,
537  row_end,
539  null_value,
541  string_value,
543  done,
544  };
545 
547 
551  explicit array_parser(
552  std::string_view input,
553  internal::encoding_group = internal::encoding_group::MONOBYTE);
554 
556 
562  std::pair<juncture, std::string> get_next() { return (this->*m_impl)(); }
563 
564 private:
565  std::string_view m_input;
566 
568  std::size_t m_pos = 0u;
569 
571 
576  using implementation = std::pair<juncture, std::string> (array_parser::*)();
577 
579  static implementation
580  specialize_for_encoding(pqxx::internal::encoding_group enc);
581 
583  implementation m_impl;
584 
586  template<pqxx::internal::encoding_group>
587  std::pair<juncture, std::string> parse_array_step();
588 
589  template<pqxx::internal::encoding_group>
590  std::string::size_type scan_double_quoted_string() const;
591  template<pqxx::internal::encoding_group>
592  std::string parse_double_quoted_string(std::string::size_type end) const;
593  template<pqxx::internal::encoding_group>
594  std::string::size_type scan_unquoted_string() const;
595  template<pqxx::internal::encoding_group>
596  std::string parse_unquoted_string(std::string::size_type end) const;
597 
598  template<pqxx::internal::encoding_group>
599  std::string::size_type scan_glyph(std::string::size_type pos) const;
600  template<pqxx::internal::encoding_group>
601  std::string::size_type
602  scan_glyph(std::string::size_type pos, std::string::size_type end) const;
603 };
604 } // namespace pqxx
605 #endif
Low-level array parser.
Definition: array.hxx:529
juncture
What's the latest thing found in the array?
Definition: array.hxx:533
std::pair< juncture, std::string > get_next()
Parse the next step in the array.
Definition: array.hxx:562
An SQL array received from the database.
Definition: array.hxx:56
constexpr auto back() const noexcept
Refer to the last element, if any.
Definition: array.hxx:158
constexpr auto cend() const noexcept
Return end point of iteration.
Definition: array.hxx:117
std::array< std::size_t, DIMENSIONS > const & sizes() noexcept
Return the sizes of this array in each of its dimensions.
Definition: array.hxx:82
constexpr auto crbegin() const noexcept
Begin reverse iteration.
Definition: array.hxx:119
constexpr std::size_t size() const noexcept
Number of elements in the array.
Definition: array.hxx:127
ELEMENT const & operator[](INDEX... index) const
Access element (without bounds check).
Definition: array.hxx:103
constexpr auto ssize() const noexcept
Number of elements in the array (as a signed number).
Definition: array.hxx:145
constexpr std::size_t dimensions() noexcept
How many dimensions does this array have?
Definition: array.hxx:75
constexpr auto cbegin() const noexcept
Begin iteration of individual elements.
Definition: array.hxx:115
array(std::string_view data, connection const &conn)
Parse an SQL array, read as text from a pqxx::result or stream.
Definition: array.hxx:68
constexpr auto crend() const noexcept
Return end point of reverse iteration.
Definition: array.hxx:121
constexpr auto front() const noexcept
Refer to the first element, if any.
Definition: array.hxx:153
Connection to a database.
Definition: connection.hxx:230
std::string concat(TYPE... item)
Efficiently combine a bunch of items into one big string.
Definition: concat.hxx:31
The home of all libpqxx classes, functions, templates, etc.
Definition: array.cxx:27
static TYPE null()
Return a null value.
static bool has_null
Does this type have a null value?
Definition: strconv.hxx:93