9#ifndef PQXX_H_ENCODINGS
10#define PQXX_H_ENCODINGS
51template<
char... NEEDLE>
54 std::size_t here = 0u)
56 auto const sz{std::size(haystack)};
57 auto const data{std::data(haystack)};
60 auto next{scanner(data, sz, here)};
64 if ((... or (data[here] == NEEDLE)))
86template<
typename CALLABLE>
89 std::size_t buffer_len, std::size_t start = 0)
92 for (std::size_t here = start, next; here < buffer_len; here = next)
94 next = scan(buffer, buffer_len, here);
96 callback(buffer + here, buffer + next);
105get_byte(
char const buffer[], std::size_t offset)
noexcept
107 return static_cast<unsigned char>(buffer[offset]);
111[[noreturn]]
PQXX_COLD void throw_for_encoding_error(
112 char const *encoding_name,
char const buffer[], std::size_t start,
116 s <<
"Invalid byte sequence for encoding " << encoding_name <<
" at byte "
117 << start <<
": " << std::hex << std::setw(2) << std::setfill(
'0');
118 for (std::size_t i{0}; i < count; ++i)
120 s <<
"0x" <<
static_cast<unsigned int>(get_byte(buffer, start + i));
124 throw pqxx::argument_error{s.str()};
130between_inc(
unsigned char value,
unsigned bottom,
unsigned top)
132 return value >= bottom and value <= top;
147 call(
char const buffer[], std::size_t buffer_len, std::size_t start);
162find_ascii_char(std::string_view haystack, std::size_t here)
166 static_assert((... and ((NEEDLE & 0x80) == 0)));
168 auto const sz{std::size(haystack)};
169 auto const data{std::data(haystack)};
193 if ((... or (data[here] == NEEDLE)))
215 static_assert((... and ((NEEDLE >> 7) == 0)));
217 auto const sz{std::size(haystack)};
218 auto const data{std::data(haystack)};
222 while ((... and (data[here] != NEEDLE)))
235 call(
char const [], std::size_t buffer_len, std::size_t start)
238 if (start >= buffer_len)
250 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
252 if (start >= buffer_len)
255 auto const byte1{get_byte(buffer, start)};
259 if (not between_inc(byte1, 0x81, 0xfe) or (start + 2 > buffer_len))
261 throw_for_encoding_error(
"BIG5", buffer, start, 1);
263 auto const byte2{get_byte(buffer, start + 1)};
265 not between_inc(byte2, 0x40, 0x7e) and
266 not between_inc(byte2, 0xa1, 0xfe))
268 throw_for_encoding_error(
"BIG5", buffer, start, 2);
290 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
292 if (start >= buffer_len)
293 return std::string::npos;
295 auto const byte1{get_byte(buffer, start)};
299 if (not between_inc(byte1, 0xa1, 0xf7) or start + 2 > buffer_len)
301 throw_for_encoding_error(
"EUC_CN", buffer, start, 1);
303 auto const byte2{get_byte(buffer, start + 1)};
304 if (not between_inc(byte2, 0xa1, 0xfe))
306 throw_for_encoding_error(
"EUC_CN", buffer, start, 2);
321 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
323 if (start >= buffer_len)
324 return std::string::npos;
326 auto const byte1{get_byte(buffer, start)};
330 if (start + 2 > buffer_len)
332 throw_for_encoding_error(
"EUC_JP", buffer, start, 1);
334 auto const byte2{get_byte(buffer, start + 1)};
337 if (not between_inc(byte2, 0xa1, 0xfe))
339 throw_for_encoding_error(
"EUC_JP", buffer, start, 2);
344 if (between_inc(byte1, 0xa1, 0xfe))
346 if (not between_inc(byte2, 0xa1, 0xfe))
348 throw_for_encoding_error(
"EUC_JP", buffer, start, 2);
353 if (byte1 == 0x8f and start + 3 <= buffer_len)
355 auto const byte3{get_byte(buffer, start + 2)};
357 not between_inc(byte2, 0xa1, 0xfe) or
358 not between_inc(byte3, 0xa1, 0xfe))
360 throw_for_encoding_error(
"EUC_JP", buffer, start, 3);
365 throw_for_encoding_error(
"EUC_JP", buffer, start, 1);
374 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
376 if (start >= buffer_len)
379 auto const byte1{get_byte(buffer, start)};
383 if (not between_inc(byte1, 0xa1, 0xfe) or start + 2 > buffer_len)
385 throw_for_encoding_error(
"EUC_KR", buffer, start, 1);
387 auto const byte2{get_byte(buffer, start + 1)};
388 if (not between_inc(byte2, 0xa1, 0xfe))
390 throw_for_encoding_error(
"EUC_KR", buffer, start, 1);
401 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
403 if (start >= buffer_len)
405 return std::string::npos;
407 auto const byte1{get_byte(buffer, start)};
411 if (start + 2 > buffer_len)
413 throw_for_encoding_error(
"EUC_KR", buffer, start, 1);
415 auto const byte2{get_byte(buffer, start + 1)};
416 if (between_inc(byte1, 0xa1, 0xfe))
418 if (not between_inc(byte2, 0xa1, 0xfe))
420 throw_for_encoding_error(
"EUC_KR", buffer, start, 2);
425 if (byte1 != 0x8e or start + 4 > buffer_len)
427 throw_for_encoding_error(
"EUC_KR", buffer, start, 1);
430 between_inc(byte2, 0xa1, 0xb0) and
431 between_inc(get_byte(buffer, start + 2), 0xa1, 0xfe) and
432 between_inc(get_byte(buffer, start + 3), 0xa1, 0xfe))
436 throw_for_encoding_error(
"EUC_KR", buffer, start, 4);
445 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
447 if (start >= buffer_len)
450 auto const byte1{get_byte(buffer, start)};
454 throw_for_encoding_error(
"GB18030", buffer, start, buffer_len - start);
456 if (start + 2 > buffer_len)
458 throw_for_encoding_error(
"GB18030", buffer, start, buffer_len - start);
460 auto const byte2{get_byte(buffer, start + 1)};
461 if (between_inc(byte2, 0x40, 0xfe))
465 throw_for_encoding_error(
"GB18030", buffer, start, 2);
470 if (start + 4 > buffer_len)
472 throw_for_encoding_error(
"GB18030", buffer, start, buffer_len - start);
475 between_inc(byte2, 0x30, 0x39) and
476 between_inc(get_byte(buffer, start + 2), 0x81, 0xfe) and
477 between_inc(get_byte(buffer, start + 3), 0x30, 0x39))
481 throw_for_encoding_error(
"GB18030", buffer, start, 4);
490 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
492 if (start >= buffer_len)
495 auto const byte1{get_byte(buffer, start)};
499 if (start + 2 > buffer_len)
501 throw_for_encoding_error(
"GBK", buffer, start, 1);
503 auto const byte2{get_byte(buffer, start + 1)};
505 (between_inc(byte1, 0xa1, 0xa9) and between_inc(byte2, 0xa1, 0xfe)) or
506 (between_inc(byte1, 0xb0, 0xf7) and between_inc(byte2, 0xa1, 0xfe)) or
507 (between_inc(byte1, 0x81, 0xa0) and between_inc(byte2, 0x40, 0xfe) and
509 (between_inc(byte1, 0xaa, 0xfe) and between_inc(byte2, 0x40, 0xa0) and
511 (between_inc(byte1, 0xa8, 0xa9) and between_inc(byte2, 0x40, 0xa0) and
513 (between_inc(byte1, 0xaa, 0xaf) and between_inc(byte2, 0xa1, 0xfe)) or
514 (between_inc(byte1, 0xf8, 0xfe) and between_inc(byte2, 0xa1, 0xfe)) or
515 (between_inc(byte1, 0xa1, 0xa7) and between_inc(byte2, 0x40, 0xa0) and
520 throw_for_encoding_error(
"GBK", buffer, start, 2);
537 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
539 if (start >= buffer_len)
542 auto const byte1{get_byte(buffer, start)};
546 if (start + 2 > buffer_len)
548 throw_for_encoding_error(
"JOHAB", buffer, start, 1);
550 auto const byte2{get_byte(buffer, start)};
552 (between_inc(byte1, 0x84, 0xd3) and
553 (between_inc(byte2, 0x41, 0x7e) or between_inc(byte2, 0x81, 0xfe))) or
554 ((between_inc(byte1, 0xd8, 0xde) or between_inc(byte1, 0xe0, 0xf9)) and
555 (between_inc(byte2, 0x31, 0x7e) or between_inc(byte2, 0x91, 0xfe))))
559 throw_for_encoding_error(
"JOHAB", buffer, start, 2);
574 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
576 if (start >= buffer_len)
579 auto const byte1{get_byte(buffer, start)};
583 if (start + 2 > buffer_len)
585 throw_for_encoding_error(
"MULE_INTERNAL", buffer, start, 1);
587 auto const byte2{get_byte(buffer, start + 1)};
588 if (between_inc(byte1, 0x81, 0x8d) and byte2 >= 0xa0)
591 if (start + 3 > buffer_len)
593 throw_for_encoding_error(
"MULE_INTERNAL", buffer, start, 2);
596 ((byte1 == 0x9a and between_inc(byte2, 0xa0, 0xdf)) or
597 (byte1 == 0x9b and between_inc(byte2, 0xe0, 0xef)) or
598 (between_inc(byte1, 0x90, 0x99) and byte2 >= 0xa0)) and
602 if (start + 4 > buffer_len)
604 throw_for_encoding_error(
"MULE_INTERNAL", buffer, start, 3);
607 ((byte1 == 0x9c and between_inc(byte2, 0xf0, 0xf4)) or
608 (byte1 == 0x9d and between_inc(byte2, 0xf5, 0xfe))) and
609 get_byte(buffer, start + 2) >= 0xa0 and
610 get_byte(buffer, start + 4) >= 0xa0)
614 throw_for_encoding_error(
"MULE_INTERNAL", buffer, start, 4);
631 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
633 if (start >= buffer_len)
634 return std::string::npos;
636 auto const byte1{get_byte(buffer, start)};
637 if (byte1 < 0x80 or between_inc(byte1, 0xa1, 0xdf))
641 not between_inc(byte1, 0x81, 0x9f) and
642 not between_inc(byte1, 0xe0, 0xfc))
644 throw_for_encoding_error(
"SJIS", buffer, start, 1);
646 if (start + 2 > buffer_len)
648 throw_for_encoding_error(
"SJIS", buffer, start, buffer_len - start);
650 auto const byte2{get_byte(buffer, start + 1)};
653 throw_for_encoding_error(
"SJIS", buffer, start, 2);
655 if (between_inc(byte2, 0x40, 0x9e) or between_inc(byte2, 0x9f, 0xfc))
659 throw_for_encoding_error(
"SJIS", buffer, start, 2);
668 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
670 if (start >= buffer_len)
673 auto const byte1{get_byte(buffer, start)};
677 if (start + 2 > buffer_len)
679 throw_for_encoding_error(
"UHC", buffer, start, buffer_len - start);
681 auto const byte2{get_byte(buffer, start + 1)};
682 if (between_inc(byte1, 0x80, 0xc6))
685 between_inc(byte2, 0x41, 0x5a) or between_inc(byte2, 0x61, 0x7a) or
686 between_inc(byte2, 0x80, 0xfe))
690 throw_for_encoding_error(
"UHC", buffer, start, 2);
693 if (between_inc(byte1, 0xa1, 0xfe))
695 if (not between_inc(byte2, 0xa1, 0xfe))
697 throw_for_encoding_error(
"UHC", buffer, start, 2);
702 throw_for_encoding_error(
"UHC", buffer, start, 1);
711 call(
char const buffer[], std::size_t buffer_len, std::size_t start)
713 if (start >= buffer_len)
716 auto const byte1{get_byte(buffer, start)};
720 if (start + 2 > buffer_len)
722 throw_for_encoding_error(
"UTF8", buffer, start, buffer_len - start);
724 auto const byte2{get_byte(buffer, start + 1)};
725 if (between_inc(byte1, 0xc0, 0xdf))
727 if (not between_inc(byte2, 0x80, 0xbf))
729 throw_for_encoding_error(
"UTF8", buffer, start, 2);
734 if (start + 3 > buffer_len)
736 throw_for_encoding_error(
"UTF8", buffer, start, buffer_len - start);
738 auto const byte3{get_byte(buffer, start + 2)};
739 if (between_inc(byte1, 0xe0, 0xef))
741 if (between_inc(byte2, 0x80, 0xbf) and between_inc(byte3, 0x80, 0xbf))
745 throw_for_encoding_error(
"UTF8", buffer, start, 3);
748 if (start + 4 > buffer_len)
750 throw_for_encoding_error(
"UTF8", buffer, start, buffer_len - start);
752 if (between_inc(byte1, 0xf0, 0xf7))
755 between_inc(byte2, 0x80, 0xbf) and between_inc(byte3, 0x80, 0xbf) and
756 between_inc(get_byte(buffer, start + 3), 0x80, 0xbf))
760 throw_for_encoding_error(
"UTF8", buffer, start, 4);
764 throw_for_encoding_error(
"UTF8", buffer, start, 1);
813template<
char... NEEDLE>
821 return pqxx::internal::find_ascii_char<
838 "Unexpected encoding group: ", as_if,
" (mapped from ", enc,
").")};
847template<
char... NEEDLE>
873 "Unexpected encoding group: ", as_if,
" (mapped from ", enc,
").")};
Internal error in libpqxx library.
Definition except.hxx:242
Internal items for libpqxx' own use. Do not use these yourself.
Definition encodings.cxx:33
std::string concat(TYPE... item)
Efficiently combine a bunch of items into one big string.
Definition concat.hxx:31
PQXX_PURE constexpr char_finder_func * get_char_finder(encoding_group enc)
Look up a character search function for an encoding group.
Definition encodings.hxx:815
PQXX_PURE std::size_t find_s_ascii_char(std::string_view haystack, std::size_t here)
Find first of NEEDLE ASCII chars in haystack.
Definition encodings.hxx:211
PQXX_PURE char const * name_encoding(int encoding_id)
Return PostgreSQL's name for encoding enum value.
PQXX_PURE constexpr char_finder_func * get_s_char_finder(encoding_group enc)
Look up a "sentry" character search function for an encoding group.
Definition encodings.hxx:849
encoding_group
Definition encoding_group.hxx:19
@ EUC_KR
Definition encoding_group.hxx:29
@ EUC_JP
Definition encoding_group.hxx:28
@ GB18030
Definition encoding_group.hxx:31
@ EUC_CN
Definition encoding_group.hxx:27
@ BIG5
Definition encoding_group.hxx:26
@ EUC_TW
Definition encoding_group.hxx:30
@ JOHAB
Definition encoding_group.hxx:33
@ MULE_INTERNAL
Definition encoding_group.hxx:34
@ GBK
Definition encoding_group.hxx:32
@ SJIS
Definition encoding_group.hxx:35
@ MONOBYTE
Definition encoding_group.hxx:21
@ UHC
Definition encoding_group.hxx:36
@ UTF8
Definition encoding_group.hxx:37
pqxx::internal::encoding_group enc_group(std::string_view encoding_name)
Convert libpq encoding name to its libpqxx encoding group.
Definition encodings.cxx:35
void for_glyphs(encoding_group enc, CALLABLE callback, char const buffer[], std::size_t buffer_len, std::size_t start=0)
Iterate over the glyphs in a buffer.
Definition encodings.hxx:87
PQXX_LIBEXPORT glyph_scanner_func * get_glyph_scanner(encoding_group)
Look up the glyph scanner function for a given encoding group.
std::size_t(std::string_view haystack, std::size_t start) char_finder_func
Function type: "find first occurrence of specific any of ASCII characters.".
Definition encoding_group.hxx:70
std::size_t find_char(glyph_scanner_func *scanner, std::string_view haystack, std::size_t here=0u)
Find any of the ASCII characters NEEDLE in haystack.
Definition encodings.hxx:52
constexpr encoding_group map_ascii_search_group(encoding_group enc) noexcept
Just for searching an ASCII character, what encoding can we use here?
Definition encodings.hxx:785
std::size_t(char const buffer[], std::size_t buffer_len, std::size_t start) glyph_scanner_func
Function type: "find the end of the current glyph.".
Definition encoding_group.hxx:52
The home of all libpqxx classes, functions, templates, etc.
Definition array.cxx:27
#define PQXX_DECLARE_ENUM_CONVERSION(ENUM)
Macro: Define a string conversion for an enum type.
Definition strconv.hxx:416
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:250
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:290
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:321
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:374
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:401
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:445
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:490
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:537
static PQXX_PURE constexpr std::size_t call(char const [], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:235
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:574
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:631
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:668
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Definition encodings.hxx:711
Wrapper struct template for "find next glyph" functions.
Definition encodings.hxx:143
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Find the next glyph in buffer after position start.