libpqxx
The C++ client library for PostgreSQL
Loading...
Searching...
No Matches
array.hxx
Go to the documentation of this file.
1/* Handling of SQL arrays.
2 *
3 * DO NOT INCLUDE THIS FILE DIRECTLY; include pqxx/field instead.
4 *
5 * Copyright (c) 2000-2025, Jeroen T. Vermeulen.
6 *
7 * See COPYING for copyright license. If you did not receive a file called
8 * COPYING with this source code, please notify the distributor of this
9 * mistake, or contact the author.
10 */
11#ifndef PQXX_H_ARRAY
12#define PQXX_H_ARRAY
13
14#if !defined(PQXX_HEADER_PRE)
15# error "Include libpqxx headers as <pqxx/header>, not <pqxx/header.hxx>."
16#endif
17
18#include <algorithm>
19#include <cassert>
20#include <optional>
21#include <stdexcept>
22#include <string>
23#include <type_traits>
24#include <utility>
25#include <vector>
26
27#include "pqxx/connection.hxx"
31
32
33namespace pqxx
34{
35// TODO: Specialise for string_view/zview, allocate all strings in one buffer.
36
38
53template<
54 typename ELEMENT, std::size_t DIMENSIONS = 1u,
55 char SEPARATOR = array_separator<ELEMENT>>
56class array final
57{
58public:
60
69 array(std::string_view data, connection const &cx) :
70 array{data, pqxx::internal::enc_group(cx.encoding_id())}
71 {}
72
74
76 constexpr std::size_t dimensions() noexcept { return DIMENSIONS; }
77
79
83 std::array<std::size_t, DIMENSIONS> const &sizes() noexcept
84 {
85 return m_extents;
86 }
87
88 template<typename... INDEX> ELEMENT const &at(INDEX... index) const
89 {
90 static_assert(sizeof...(index) == DIMENSIONS);
91 check_bounds(index...);
92 return m_elts.at(locate(index...));
93 }
94
96
104 template<typename... INDEX> ELEMENT const &operator[](INDEX... index) const
105 {
106 static_assert(sizeof...(index) == DIMENSIONS);
107 return m_elts[locate(index...)];
108 }
109
111
116 constexpr auto cbegin() const noexcept { return m_elts.cbegin(); }
118 constexpr auto cend() const noexcept { return m_elts.cend(); }
120 constexpr auto crbegin() const noexcept { return m_elts.crbegin(); }
122 constexpr auto crend() const noexcept { return m_elts.crend(); }
123
125
128 constexpr std::size_t size() const noexcept { return m_elts.size(); }
129
131
146 constexpr auto ssize() const noexcept
147 {
148 return static_cast<std::ptrdiff_t>(size());
149 }
150
152
154 constexpr auto front() const noexcept { return m_elts.front(); }
155
157
159 constexpr auto back() const noexcept { return m_elts.back(); }
160
161private:
163
171 void check_dims(std::string_view data)
172 {
173 auto sz{std::size(data)};
174 if (sz < DIMENSIONS * 2)
175 throw conversion_error{pqxx::internal::concat(
176 "Trying to parse a ", DIMENSIONS, "-dimensional array out of '", data,
177 "'.")};
178
179 // Making some assumptions here:
180 // * The array holds no extraneous whitespace.
181 // * None of the sub-arrays can be null.
182 // * Only ASCII characters start off with a byte in the 0-127 range.
183 //
184 // Given those, the input must start with a sequence of DIMENSIONS bytes
185 // with the ASCII value for '{'; and likewise it must end with a sequence
186 // of DIMENSIONS bytes with the ASCII value for '}'.
187
188 if (data[0] != '{')
189 throw conversion_error{"Malformed array: does not start with '{'."};
190 for (std::size_t i{0}; i < DIMENSIONS; ++i)
191 if (data[i] != '{')
192 throw conversion_error{pqxx::internal::concat(
193 "Expecting ", DIMENSIONS, "-dimensional array, but found ", i, ".")};
194 if (data[DIMENSIONS] == '{')
195 throw conversion_error{pqxx::internal::concat(
196 "Tried to parse ", DIMENSIONS,
197 "-dimensional array from array data that has more dimensions.")};
198 for (std::size_t i{0}; i < DIMENSIONS; ++i)
199 if (data[sz - 1 - i] != '}')
200 throw conversion_error{
201 "Malformed array: does not end in the right number of '}'."};
202 }
203
204 // Allow fields to construct arrays passing the encoding group.
205 // Couldn't make this work through a call gate, thanks to the templating.
206 friend class ::pqxx::field;
207
208 array(std::string_view data, pqxx::internal::encoding_group enc)
209 {
210 using group = pqxx::internal::encoding_group;
211 switch (enc)
212 {
213 case group::MONOBYTE: parse<group::MONOBYTE>(data); break;
214 case group::BIG5: parse<group::BIG5>(data); break;
215 case group::EUC_CN: parse<group::EUC_CN>(data); break;
216 case group::EUC_JP: parse<group::EUC_JP>(data); break;
217 case group::EUC_KR: parse<group::EUC_KR>(data); break;
218 case group::EUC_TW: parse<group::EUC_TW>(data); break;
219 case group::GB18030: parse<group::GB18030>(data); break;
220 case group::GBK: parse<group::GBK>(data); break;
221 case group::JOHAB: parse<group::JOHAB>(data); break;
222 case group::MULE_INTERNAL: parse<group::MULE_INTERNAL>(data); break;
223 case group::SJIS: parse<group::SJIS>(data); break;
224 case group::UHC: parse<group::UHC>(data); break;
225 case group::UTF8: parse<group::UTF8>(data); break;
226 default: PQXX_UNREACHABLE; break;
227 }
228 }
229
231
234 std::size_t parse_field_end(std::string_view data, std::size_t here) const
235 {
236 auto const sz{std::size(data)};
237 if (here < sz)
238 switch (data[here])
239 {
240 case SEPARATOR:
241 ++here;
242 if (here >= sz)
243 throw conversion_error{"Array looks truncated."};
244 switch (data[here])
245 {
246 case SEPARATOR:
247 throw conversion_error{"Array contains double separator."};
248 case '}': throw conversion_error{"Array contains trailing separator."};
249 default: break;
250 }
251 break;
252 case '}': break;
253 default:
254 throw conversion_error{pqxx::internal::concat(
255 "Unexpected character in array: ",
256 static_cast<unsigned>(static_cast<unsigned char>(data[here])),
257 " where separator or closing brace expected.")};
258 }
259 return here;
260 }
261
263
268 constexpr std::size_t estimate_elements(std::string_view data) const noexcept
269 {
270 // Dirty trick: just count the number of bytes that look as if they may be
271 // separators. At the very worst we may overestimate by a factor of two or
272 // so, in exceedingly rare cases, on some encodings.
273 auto const separators{
274 std::count(std::begin(data), std::end(data), SEPARATOR)};
275 // The number of dimensions makes no difference here. It's still one
276 // separator between consecutive elements, just possibly with some extra
277 // braces as well.
278 return static_cast<std::size_t>(separators + 1);
279 }
280
281 template<pqxx::internal::encoding_group ENC>
282 void parse(std::string_view data)
283 {
284 static_assert(DIMENSIONS > 0u, "Can't create a zero-dimensional array.");
285 auto const sz{std::size(data)};
286 check_dims(data);
287
288 m_elts.reserve(estimate_elements(data));
289
290 // We discover the array's extents along each of the dimensions, starting
291 // with the final dimension and working our way towards the first. At any
292 // given point during parsing, we know the extents starting at this
293 // dimension.
294 std::size_t know_extents_from{DIMENSIONS};
295
296 // Currently parsing this dimension. We start off at -1, relying on C++'s
297 // well-defined rollover for unsigned numbers.
298 // The actual outermost dimension of the array is 0, and the innermost is
299 // at the end. But, the array as a whole is enclosed in braces just like
300 // each row. So we act like there's an anomalous "outer" dimension holding
301 // the entire array.
302 constexpr std::size_t outer{std::size_t{0u} - std::size_t{1u}};
303
304 // We start parsing at the fictional outer dimension. The input begins
305 // with opening braces, one for each dimension, so we'll start off by
306 // bumping all the way to the innermost dimension.
307 std::size_t dim{outer};
308
309 // Extent counters, one per "real" dimension.
310 // Note initialiser syntax; this zero-initialises all elements.
311 std::array<std::size_t, DIMENSIONS> extents{};
312
313 // Current parsing position.
314 std::size_t here{0};
315 PQXX_ASSUME(here <= sz);
316 while (here < sz)
317 {
318 if (data[here] == '{')
319 {
320 if (dim == outer)
321 {
322 // This must be the initial opening brace.
323 if (know_extents_from != DIMENSIONS)
324 throw conversion_error{
325 "Array text representation closed and reopened its outside "
326 "brace pair."};
327 assert(here == 0);
328 PQXX_ASSUME(here == 0);
329 }
330 else
331 {
332 if (dim >= (DIMENSIONS - 1))
333 throw conversion_error{
334 "Array seems to have inconsistent number of dimensions."};
335 ++extents[dim];
336 }
337 // (Rolls over to zero if we're coming from the outer dimension.)
338 ++dim;
339 extents[dim] = 0u;
340 ++here;
341 }
342 else if (data[here] == '}')
343 {
344 if (dim == outer)
345 throw conversion_error{"Array has spurious '}'."};
346 if (dim < know_extents_from)
347 {
348 // We just finished parsing our first row in this dimension.
349 // Now we know the array dimension's extent.
350 m_extents[dim] = extents[dim];
351 know_extents_from = dim;
352 }
353 else
354 {
355 if (extents[dim] != m_extents[dim])
356 throw conversion_error{"Rows in array have inconsistent sizes."};
357 }
358 // Bump back down to the next-lower dimension. Which may be the outer
359 // dimension, through underflow.
360 --dim;
361 ++here;
362 here = parse_field_end(data, here);
363 }
364 else
365 {
366 // Found an array element. The actual elements always live in the
367 // "inner" dimension.
368 if (dim != DIMENSIONS - 1)
369 throw conversion_error{
370 "Malformed array: found element where sub-array was expected."};
371 assert(dim != outer);
372 ++extents[dim];
373 std::size_t end;
374 switch (data[here])
375 {
376 case '\0': throw conversion_error{"Unexpected zero byte in array."};
377 case ',': throw conversion_error{"Array contains empty field."};
378 case '"': {
379 // Double-quoted string. We parse it into a buffer before parsing
380 // the resulting string as an element. This seems wasteful: the
381 // string might not contain any special characters. So it's
382 // tempting to check, and try to use a string_view and avoid a
383 // useless copy step. But. Even besides the branch prediction
384 // risk, the very fact that the back-end chose to quote the string
385 // indicates that there is some kind of special character in there.
386 // So in practice, this optimisation would only apply if the only
387 // special characters in the string were commas.
389 std::data(data), std::size(data), here);
390 // TODO: scan_double_quoted_string() with reusable buffer.
391 std::string const buf{
393 std::data(data), end, here)};
394 m_elts.emplace_back(from_string<ELEMENT>(buf));
395 }
396 break;
397 default: {
398 // Unquoted string. An unquoted string is always literal, no
399 // escaping or encoding, so we don't need to parse it into a
400 // buffer. We can just read it as a string_view.
402 std::data(data), std::size(data), here);
403 std::string_view const field{
404 std::string_view{std::data(data) + here, end - here}};
405 if (field == "NULL")
406 {
407 if constexpr (nullness<ELEMENT>::has_null)
408 m_elts.emplace_back(nullness<ELEMENT>::null());
409 else
410 throw unexpected_null{pqxx::internal::concat(
411 "Array contains a null ", type_name<ELEMENT>,
412 ". Consider making it an array of std::optional<",
413 type_name<ELEMENT>, "> instead.")};
414 }
415 else
416 m_elts.emplace_back(from_string<ELEMENT>(field));
417 }
418 }
419 here = end;
420 PQXX_ASSUME(here <= sz);
421 here = parse_field_end(data, here);
422 }
423 }
424
425 if (dim != outer)
426 throw conversion_error{"Malformed array; may be truncated."};
427 assert(know_extents_from == 0);
428 PQXX_ASSUME(know_extents_from == 0);
429
430 init_factors();
431 }
432
434 void init_factors() noexcept
435 {
436 std::size_t factor{1};
437 for (std::size_t dim{DIMENSIONS - 1}; dim > 0; --dim)
438 {
439 factor *= m_extents[dim];
440 m_factors[dim - 1] = factor;
441 }
442 }
443
445 template<typename... INDEX> std::size_t locate(INDEX... index) const noexcept
446 {
447 static_assert(
448 sizeof...(index) == DIMENSIONS,
449 "Indexing array with wrong number of dimensions.");
450 return add_index(index...);
451 }
452
453 template<typename OUTER, typename... INDEX>
454 constexpr std::size_t add_index(OUTER outer, INDEX... indexes) const noexcept
455 {
456 std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
457 if constexpr (sizeof...(indexes) == 0)
458 {
459 return first;
460 }
461 else
462 {
463 static_assert(sizeof...(indexes) < DIMENSIONS);
464 // (Offset by 1 here because the outer dimension is not in there.)
465 constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
466 static_assert(dimension < DIMENSIONS);
467 return first * m_factors[dimension] + add_index(indexes...);
468 }
469 }
470
472
474 template<typename OUTER, typename... INDEX>
475 constexpr void check_bounds(OUTER outer, INDEX... indexes) const
476 {
477 std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
478 static_assert(sizeof...(indexes) < DIMENSIONS);
479 // (Offset by 1 here because the outer dimension is not in there.)
480 constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
481 static_assert(dimension < DIMENSIONS);
482 if (first >= m_extents[dimension])
483 throw range_error{pqxx::internal::concat(
484 "Array index for dimension ", dimension, " is out of bounds: ", first,
485 " >= ", m_extents[dimension])};
486
487 // Now check the rest of the indexes, if any.
488 if constexpr (sizeof...(indexes) > 0)
489 check_bounds(indexes...);
490 }
491
493 std::vector<ELEMENT> m_elts;
494
496 std::array<std::size_t, DIMENSIONS> m_extents;
497
499
506 std::array<std::size_t, DIMENSIONS - 1> m_factors;
507};
508
509
511
531class PQXX_LIBEXPORT array_parser
532{
533public:
535 enum class juncture
536 {
538 row_start,
540 row_end,
542 null_value,
544 string_value,
546 done,
547 };
548
550
554 explicit array_parser(
555 std::string_view input,
556 internal::encoding_group = internal::encoding_group::MONOBYTE);
557
559
565 std::pair<juncture, std::string> get_next() { return (this->*m_impl)(); }
566
567private:
568 std::string_view m_input;
569
571 std::size_t m_pos = 0u;
572
574
579 using implementation = std::pair<juncture, std::string> (array_parser::*)();
580
582 static implementation
583 specialize_for_encoding(pqxx::internal::encoding_group enc);
584
586 implementation m_impl;
587
589 template<pqxx::internal::encoding_group>
590 std::pair<juncture, std::string> parse_array_step();
591
592 template<pqxx::internal::encoding_group>
593 std::string::size_type scan_double_quoted_string() const;
594 template<pqxx::internal::encoding_group>
595 std::string parse_double_quoted_string(std::string::size_type end) const;
596 template<pqxx::internal::encoding_group>
597 std::string::size_type scan_unquoted_string() const;
598 template<pqxx::internal::encoding_group>
599 std::string_view parse_unquoted_string(std::string::size_type end) const;
600
601 template<pqxx::internal::encoding_group>
602 std::string::size_type scan_glyph(std::string::size_type pos) const;
603 template<pqxx::internal::encoding_group>
604 std::string::size_type
605 scan_glyph(std::string::size_type pos, std::string::size_type end) const;
606};
607} // namespace pqxx
608#endif
constexpr auto back() const noexcept
Refer to the last element, if any.
Definition array.hxx:159
constexpr auto cend() const noexcept
Return end point of iteration.
Definition array.hxx:118
constexpr auto crbegin() const noexcept
Begin reverse iteration.
Definition array.hxx:120
ELEMENT const & operator[](INDEX... index) const
Access element (without bounds check).
Definition array.hxx:104
friend class ::pqxx::field
Definition array.hxx:206
constexpr std::size_t size() const noexcept
Number of elements in the array.
Definition array.hxx:128
constexpr auto ssize() const noexcept
Number of elements in the array (as a signed number).
Definition array.hxx:146
constexpr std::size_t dimensions() noexcept
How many dimensions does this array have?
Definition array.hxx:76
constexpr auto cbegin() const noexcept
Begin iteration of individual elements.
Definition array.hxx:116
ELEMENT const & at(INDEX... index) const
Definition array.hxx:88
constexpr auto crend() const noexcept
Return end point of reverse iteration.
Definition array.hxx:122
std::array< std::size_t, DIMENSIONS > const & sizes() noexcept
Return the sizes of this array in each of its dimensions.
Definition array.hxx:83
array(std::string_view data, connection const &cx)
Parse an SQL array, read as text from a pqxx::result or stream.
Definition array.hxx:69
constexpr auto front() const noexcept
Refer to the first element, if any.
Definition array.hxx:154
#define PQXX_LIBEXPORT
Definition header-pre.hxx:157
#define PQXX_ASSUME(condition)
Definition header-pre.hxx:189
Internal items for libpqxx' own use. Do not use these yourself.
Definition encodings.cxx:33
std::string concat(TYPE... item)
Efficiently combine a bunch of items into one big string.
Definition concat.hxx:31
encoding_group
Definition encoding_group.hxx:19
std::string_view parse_unquoted_string(char const input[], std::size_t end, std::size_t pos)
Parse an unquoted array entry or cfield of a composite-type field.
Definition array-composite.hxx:149
std::size_t scan_double_quoted_string(char const input[], std::size_t size, std::size_t pos)
Definition array-composite.hxx:20
std::size_t scan_unquoted_string(char const input[], std::size_t size, std::size_t pos)
Find the end of an unquoted string in an array or composite-type value.
Definition array-composite.hxx:131
std::string parse_double_quoted_string(char const input[], std::size_t end, std::size_t pos)
Un-quote and un-escape a double-quoted SQL string.
Definition array-composite.hxx:84
The home of all libpqxx classes, functions, templates, etc.
Definition array.cxx:27
std::string const type_name
A human-readable name for a type, used in error messages and such.
Definition strconv.hxx:81
constexpr char array_separator
Element separator between SQL array elements of this type.
Definition strconv.hxx:560
T from_string(field const &value)
Convert a field's value to type T.
Definition field.hxx:548
TO check_cast(FROM value, std::string_view description)
Cast a numeric value to another type, or throw if it underflows/overflows.
Definition util.hxx:153
static TYPE null()
Return a null value.
static bool has_null
Does this type have a null value?
Definition strconv.hxx:94
#define PQXX_UNREACHABLE
Definition util.hxx:50