libpqxx  7.9.0
array.hxx
1 /* Handling of SQL arrays.
2  *
3  * DO NOT INCLUDE THIS FILE DIRECTLY; include pqxx/field instead.
4  *
5  * Copyright (c) 2000-2024, Jeroen T. Vermeulen.
6  *
7  * See COPYING for copyright license. If you did not receive a file called
8  * COPYING with this source code, please notify the distributor of this
9  * mistake, or contact the author.
10  */
11 #ifndef PQXX_H_ARRAY
12 #define PQXX_H_ARRAY
13 
14 #if !defined(PQXX_HEADER_PRE)
15 # error "Include libpqxx headers as <pqxx/header>, not <pqxx/header.hxx>."
16 #endif
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <stdexcept>
21 #include <string>
22 #include <type_traits>
23 #include <utility>
24 #include <vector>
25 
26 #include "pqxx/connection.hxx"
27 #include "pqxx/internal/array-composite.hxx"
28 #include "pqxx/internal/encoding_group.hxx"
29 #include "pqxx/internal/encodings.hxx"
30 
31 
32 namespace pqxx
33 {
34 // TODO: Specialise for string_view/zview, allocate all strings in one buffer.
35 
37 
52 template<
53  typename ELEMENT, std::size_t DIMENSIONS = 1u,
54  char SEPARATOR = array_separator<ELEMENT>>
55 class array final
56 {
57 public:
59 
68  array(std::string_view data, connection const &conn) :
69  array{data, pqxx::internal::enc_group(conn.encoding_id())}
70  {}
71 
73 
75  constexpr std::size_t dimensions() noexcept { return DIMENSIONS; }
76 
78 
82  std::array<std::size_t, DIMENSIONS> const &sizes() noexcept
83  {
84  return m_extents;
85  }
86 
87  template<typename... INDEX> ELEMENT const &at(INDEX... index) const
88  {
89  static_assert(sizeof...(index) == DIMENSIONS);
90  check_bounds(index...);
91  return m_elts.at(locate(index...));
92  }
93 
95 
103  template<typename... INDEX> ELEMENT const &operator[](INDEX... index) const
104  {
105  static_assert(sizeof...(index) == DIMENSIONS);
106  return m_elts[locate(index...)];
107  }
108 
110 
115  constexpr auto cbegin() const noexcept { return m_elts.cbegin(); }
117  constexpr auto cend() const noexcept { return m_elts.cend(); }
119  constexpr auto crbegin() const noexcept { return m_elts.crbegin(); }
121  constexpr auto crend() const noexcept { return m_elts.crend(); }
122 
124 
127  constexpr std::size_t size() const noexcept { return m_elts.size(); }
128 
130 
145  constexpr auto ssize() const noexcept
146  {
147  return static_cast<std::ptrdiff_t>(size());
148  }
149 
151 
153  constexpr auto front() const noexcept { return m_elts.front(); }
154 
156 
158  constexpr auto back() const noexcept { return m_elts.back(); }
159 
160 private:
162 
170  void check_dims(std::string_view data)
171  {
172  auto sz{std::size(data)};
173  if (sz < DIMENSIONS * 2)
174  throw conversion_error{pqxx::internal::concat(
175  "Trying to parse a ", DIMENSIONS, "-dimensional array out of '", data,
176  "'.")};
177 
178  // Making some assumptions here:
179  // * The array holds no extraneous whitespace.
180  // * None of the sub-arrays can be null.
181  // * Only ASCII characters start off with a byte in the 0-127 range.
182  //
183  // Given those, the input must start with a sequence of DIMENSIONS bytes
184  // with the ASCII value for '{'; and likewise it must end with a sequence
185  // of DIMENSIONS bytes with the ASCII value for '}'.
186 
187  if (data[0] != '{')
188  throw conversion_error{"Malformed array: does not start with '{'."};
189  for (std::size_t i{0}; i < DIMENSIONS; ++i)
190  if (data[i] != '{')
191  throw conversion_error{pqxx::internal::concat(
192  "Expecting ", DIMENSIONS, "-dimensional array, but found ", i, ".")};
193  if (data[DIMENSIONS] == '{')
194  throw conversion_error{pqxx::internal::concat(
195  "Tried to parse ", DIMENSIONS,
196  "-dimensional array from array data that has more dimensions.")};
197  for (std::size_t i{0}; i < DIMENSIONS; ++i)
198  if (data[sz - 1 - i] != '}')
199  throw conversion_error{
200  "Malformed array: does not end in the right number of '}'."};
201  }
202 
203  explicit array(std::string_view data, pqxx::internal::encoding_group enc)
204  {
205  using group = pqxx::internal::encoding_group;
206  switch (enc)
207  {
208  case group::MONOBYTE: parse<group::MONOBYTE>(data); break;
209  case group::BIG5: parse<group::BIG5>(data); break;
210  case group::EUC_CN: parse<group::EUC_CN>(data); break;
211  case group::EUC_JP: parse<group::EUC_JP>(data); break;
212  case group::EUC_KR: parse<group::EUC_KR>(data); break;
213  case group::EUC_TW: parse<group::EUC_TW>(data); break;
214  case group::GB18030: parse<group::GB18030>(data); break;
215  case group::GBK: parse<group::GBK>(data); break;
216  case group::JOHAB: parse<group::JOHAB>(data); break;
217  case group::MULE_INTERNAL: parse<group::MULE_INTERNAL>(data); break;
218  case group::SJIS: parse<group::SJIS>(data); break;
219  case group::UHC: parse<group::UHC>(data); break;
220  case group::UTF8: parse<group::UTF8>(data); break;
221  }
222  }
223 
225 
228  std::size_t parse_field_end(std::string_view data, std::size_t here) const
229  {
230  auto const sz{std::size(data)};
231  if (here < sz)
232  switch (data[here])
233  {
234  case SEPARATOR:
235  ++here;
236  if (here >= sz)
237  throw conversion_error{"Array looks truncated."};
238  switch (data[here])
239  {
240  case SEPARATOR:
241  throw conversion_error{"Array contains double separator."};
242  case '}': throw conversion_error{"Array contains trailing separator."};
243  default: break;
244  }
245  break;
246  case '}': break;
247  default:
248  throw conversion_error{pqxx::internal::concat(
249  "Unexpected character in array: ",
250  static_cast<unsigned>(static_cast<unsigned char>(data[here])),
251  " where separator or closing brace expected.")};
252  }
253  return here;
254  }
255 
257 
262  constexpr std::size_t estimate_elements(std::string_view data) const noexcept
263  {
264  // Dirty trick: just count the number of bytes that look as if they may be
265  // separators. At the very worst we may overestimate by a factor of two or
266  // so, in exceedingly rare cases, on some encodings.
267  auto const separators{
268  std::count(std::begin(data), std::end(data), SEPARATOR)};
269  // The number of dimensions makes no difference here. It's still one
270  // separator between consecutive elements, just possibly with some extra
271  // braces as well.
272  return static_cast<std::size_t>(separators + 1);
273  }
274 
275  template<pqxx::internal::encoding_group ENC>
276  void parse(std::string_view data)
277  {
278  static_assert(DIMENSIONS > 0u, "Can't create a zero-dimensional array.");
279  auto const sz{std::size(data)};
280  check_dims(data);
281 
282  m_elts.reserve(estimate_elements(data));
283 
284  // We discover the array's extents along each of the dimensions, starting
285  // with the final dimension and working our way towards the first. At any
286  // given point during parsing, we know the extents starting at this
287  // dimension.
288  std::size_t know_extents_from{DIMENSIONS};
289 
290  // Currently parsing this dimension. We start off at -1, relying on C++'s
291  // well-defined rollover for unsigned numbers.
292  // The actual outermost dimension of the array is 0, and the innermost is
293  // at the end. But, the array as a whole is enclosed in braces just like
294  // each row. So we act like there's an anomalous "outer" dimension holding
295  // the entire array.
296  constexpr std::size_t outer{std::size_t{0u} - std::size_t{1u}};
297 
298  // We start parsing at the fictional outer dimension. The input begins
299  // with opening braces, one for each dimension, so we'll start off by
300  // bumping all the way to the innermost dimension.
301  std::size_t dim{outer};
302 
303  // Extent counters, one per "real" dimension.
304  // Note initialiser syntax; this should zero-initialise all elements.
305  std::array<std::size_t, DIMENSIONS> extents{};
306 #if !defined(NDEBUG)
307  for (auto const e : extents) assert(e == 0u);
308 #endif
309 
310  // Current parsing position.
311  std::size_t here{0};
312  while (here < sz)
313  {
314  if (data[here] == '{')
315  {
316  if (dim == outer)
317  {
318  // This must be the initial opening brace.
319  if (know_extents_from != DIMENSIONS)
320  throw conversion_error{
321  "Array text representation closed and reopened its outside "
322  "brace pair."};
323  assert(here == 0);
324  }
325  else
326  {
327  if (dim >= (DIMENSIONS - 1))
328  throw conversion_error{
329  "Array seems to have inconsistent number of dimensions."};
330  ++extents[dim];
331  }
332  // (Rolls over to zero if we're coming from the outer dimension.)
333  ++dim;
334  extents[dim] = 0u;
335  ++here;
336  }
337  else if (data[here] == '}')
338  {
339  if (dim == outer)
340  throw conversion_error{"Array has spurious '}'."};
341  if (dim < know_extents_from)
342  {
343  // We just finished parsing our first row in this dimension.
344  // Now we know the array dimension's extent.
345  m_extents[dim] = extents[dim];
346  know_extents_from = dim;
347  }
348  else
349  {
350  if (extents[dim] != m_extents[dim])
351  throw conversion_error{"Rows in array have inconsistent sizes."};
352  }
353  // Bump back down to the next-lower dimension. Which may be the outer
354  // dimension, through underflow.
355  --dim;
356  ++here;
357  here = parse_field_end(data, here);
358  }
359  else
360  {
361  // Found an array element. The actual elements always live in the
362  // "inner" dimension.
363  if (dim != DIMENSIONS - 1)
364  throw conversion_error{
365  "Malformed array: found element where sub-array was expected."};
366  assert(dim != outer);
367  ++extents[dim];
368  std::size_t end;
369  switch (data[here])
370  {
371  case '\0': throw conversion_error{"Unexpected zero byte in array."};
372  case ',': throw conversion_error{"Array contains empty field."};
373  case '"': {
374  // Double-quoted string. We parse it into a buffer before parsing
375  // the resulting string as an element. This seems wasteful: the
376  // string might not contain any special characters. So it's
377  // tempting to check, and try to use a string_view and avoid a
378  // useless copy step. But. Even besides the branch prediction
379  // risk, the very fact that the back-end chose to quote the string
380  // indicates that there is some kind of special character in there.
381  // So in practice, this optimisation would only apply if the only
382  // special characters in the string were commas.
383  end = pqxx::internal::scan_double_quoted_string<ENC>(
384  std::data(data), std::size(data), here);
385  // TODO: scan_double_quoted_string() with reusable buffer.
386  std::string const buf{
387  pqxx::internal::parse_double_quoted_string<ENC>(
388  std::data(data), end, here)};
389  m_elts.emplace_back(from_string<ELEMENT>(buf));
390  }
391  break;
392  default: {
393  // Unquoted string. An unquoted string is always literal, no
394  // escaping or encoding, so we don't need to parse it into a
395  // buffer. We can just read it as a string_view.
396  end = pqxx::internal::scan_unquoted_string<ENC, SEPARATOR, '}'>(
397  std::data(data), std::size(data), here);
398  std::string_view const field{
399  std::string_view{std::data(data) + here, end - here}};
400  if (field == "NULL")
401  {
402  if constexpr (nullness<ELEMENT>::has_null)
403  m_elts.emplace_back(nullness<ELEMENT>::null());
404  else
405  throw unexpected_null{pqxx::internal::concat(
406  "Array contains a null ", type_name<ELEMENT>,
407  ". Consider making it an array of std::optional<",
408  type_name<ELEMENT>, "> instead.")};
409  }
410  else
411  m_elts.emplace_back(from_string<ELEMENT>(field));
412  }
413  }
414  here = end;
415  here = parse_field_end(data, here);
416  }
417  }
418 
419  if (dim != outer)
420  throw conversion_error{"Malformed array; may be truncated."};
421  assert(know_extents_from == 0);
422 
423  init_factors();
424  }
425 
427  void init_factors() noexcept
428  {
429  std::size_t factor{1};
430  for (std::size_t dim{DIMENSIONS - 1}; dim > 0; --dim)
431  {
432  factor *= m_extents[dim];
433  m_factors[dim - 1] = factor;
434  }
435  }
436 
438  template<typename... INDEX> std::size_t locate(INDEX... index) const noexcept
439  {
440  static_assert(
441  sizeof...(index) == DIMENSIONS,
442  "Indexing array with wrong number of dimensions.");
443  return add_index(index...);
444  }
445 
446  template<typename OUTER, typename... INDEX>
447  constexpr std::size_t add_index(OUTER outer, INDEX... indexes) const noexcept
448  {
449  std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
450  if constexpr (sizeof...(indexes) == 0)
451  {
452  return first;
453  }
454  else
455  {
456  static_assert(sizeof...(indexes) < DIMENSIONS);
457  // (Offset by 1 here because the outer dimension is not in there.)
458  constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
459  static_assert(dimension < DIMENSIONS);
460  return first * m_factors[dimension] + add_index(indexes...);
461  }
462  }
463 
465 
467  template<typename OUTER, typename... INDEX>
468  constexpr void check_bounds(OUTER outer, INDEX... indexes) const
469  {
470  std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
471  static_assert(sizeof...(indexes) < DIMENSIONS);
472  // (Offset by 1 here because the outer dimension is not in there.)
473  constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
474  static_assert(dimension < DIMENSIONS);
475  if (first >= m_extents[dimension])
476  throw range_error{pqxx::internal::concat(
477  "Array index for dimension ", dimension, " is out of bounds: ", first,
478  " >= ", m_extents[dimension])};
479 
480  // Now check the rest of the indexes, if any.
481  if constexpr (sizeof...(indexes) > 0)
482  check_bounds(indexes...);
483  }
484 
486  std::vector<ELEMENT> m_elts;
487 
489  std::array<std::size_t, DIMENSIONS> m_extents;
490 
492 
499  std::array<std::size_t, DIMENSIONS - 1> m_factors;
500 };
501 
502 
504 
526 class PQXX_LIBEXPORT array_parser
527 {
528 public:
530  enum class juncture
531  {
533  row_start,
535  row_end,
537  null_value,
539  string_value,
541  done,
542  };
543 
545 
549  explicit array_parser(
550  std::string_view input,
551  internal::encoding_group = internal::encoding_group::MONOBYTE);
552 
554 
560  std::pair<juncture, std::string> get_next() { return (this->*m_impl)(); }
561 
562 private:
563  std::string_view m_input;
564 
566  std::size_t m_pos = 0u;
567 
569 
574  using implementation = std::pair<juncture, std::string> (array_parser::*)();
575 
577  static implementation
578  specialize_for_encoding(pqxx::internal::encoding_group enc);
579 
581  implementation m_impl;
582 
584  template<pqxx::internal::encoding_group>
585  std::pair<juncture, std::string> parse_array_step();
586 
587  template<pqxx::internal::encoding_group>
588  std::string::size_type scan_double_quoted_string() const;
589  template<pqxx::internal::encoding_group>
590  std::string parse_double_quoted_string(std::string::size_type end) const;
591  template<pqxx::internal::encoding_group>
592  std::string::size_type scan_unquoted_string() const;
593  template<pqxx::internal::encoding_group>
594  std::string parse_unquoted_string(std::string::size_type end) const;
595 
596  template<pqxx::internal::encoding_group>
597  std::string::size_type scan_glyph(std::string::size_type pos) const;
598  template<pqxx::internal::encoding_group>
599  std::string::size_type
600  scan_glyph(std::string::size_type pos, std::string::size_type end) const;
601 };
602 } // namespace pqxx
603 #endif
The home of all libpqxx classes, functions, templates, etc.
Definition: array.hxx:33
An SQL array received from the database.
Definition: array.hxx:56
constexpr auto back() const noexcept
Refer to the last element, if any.
Definition: array.hxx:158
constexpr auto cend() const noexcept
Return end point of iteration.
Definition: array.hxx:117
std::array< std::size_t, DIMENSIONS > const & sizes() noexcept
Return the sizes of this array in each of its dimensions.
Definition: array.hxx:82
constexpr auto crbegin() const noexcept
Begin reverse iteration.
Definition: array.hxx:119
constexpr std::size_t size() const noexcept
Number of elements in the array.
Definition: array.hxx:127
ELEMENT const & operator[](INDEX... index) const
Access element (without bounds check).
Definition: array.hxx:103
constexpr auto ssize() const noexcept
Number of elements in the array (as a signed number).
Definition: array.hxx:145
constexpr std::size_t dimensions() noexcept
How many dimensions does this array have?
Definition: array.hxx:75
constexpr auto cbegin() const noexcept
Begin iteration of individual elements.
Definition: array.hxx:115
ELEMENT const & at(INDEX... index) const
Definition: array.hxx:87
array(std::string_view data, connection const &conn)
Parse an SQL array, read as text from a pqxx::result or stream.
Definition: array.hxx:68
constexpr auto crend() const noexcept
Return end point of reverse iteration.
Definition: array.hxx:121
constexpr auto front() const noexcept
Refer to the first element, if any.
Definition: array.hxx:153
Low-level array parser.
Definition: array.hxx:527
juncture
What's the latest thing found in the array?
Definition: array.hxx:531
std::pair< juncture, std::string > get_next()
Parse the next step in the array.
Definition: array.hxx:560
Connection to a database.
Definition: connection.hxx:233
static TYPE null()
Return a null value.
static bool has_null
Does this type have a null value?
Definition: strconv.hxx:93