25 return c ==
'\r' || c ==
'\n' || c ==
'\t';
29 return 0x101010101010101ull * v;
32constexpr bool to_lower_ascii(
char* input,
size_t length)
noexcept {
34 uint64_t broadcast_Ap =
broadcast(128 -
'A');
35 uint64_t broadcast_Zp =
broadcast(128 -
'Z' - 1);
36 uint64_t non_ascii = 0;
39 for (; i + 7 < length; i += 8) {
41 memcpy(&word, input + i,
sizeof(word));
42 non_ascii |= (word & broadcast_80);
44 (((word + broadcast_Ap) ^ (word + broadcast_Zp)) & broadcast_80) >> 2;
45 memcpy(input + i, &word,
sizeof(word));
49 memcpy(&word, input + i, length - i);
50 non_ascii |= (word & broadcast_80);
52 (((word + broadcast_Ap) ^ (word + broadcast_Zp)) & broadcast_80) >> 2;
53 memcpy(input + i, &word, length - i);
55 return non_ascii == 0;
59 std::string_view user_input)
noexcept {
61 if (user_input.size() < 16) {
78 static uint8_t rnt_array[16] = {1, 0, 0, 0, 0, 0, 0, 0,
79 0, 9, 10, 0, 0, 13, 0, 0};
80 const uint8x16_t rnt = vld1q_u8(rnt_array);
82 uint8x16_t running{0};
83 for (; i + 15 < user_input.size(); i += 16) {
84 uint8x16_t word = vld1q_u8((
const uint8_t*)user_input.data() + i);
86 running = vorrq_u8(running, vceqq_u8(vqtbl1q_u8(rnt, word), word));
88 if (i < user_input.size()) {
90 vld1q_u8((
const uint8_t*)user_input.data() + user_input.length() - 16);
91 running = vorrq_u8(running, vceqq_u8(vqtbl1q_u8(rnt, word), word));
93 return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0;
97 std::string_view user_input)
noexcept {
99 if (user_input.size() < 16) {
104 const __m128i mask1 = _mm_set1_epi8(
'\r');
105 const __m128i mask2 = _mm_set1_epi8(
'\n');
106 const __m128i mask3 = _mm_set1_epi8(
'\t');
109 for (; i + 15 < user_input.size(); i += 16) {
110 __m128i word = _mm_loadu_si128((
const __m128i*)(user_input.data() + i));
111 running = _mm_or_si128(
112 _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
113 _mm_cmpeq_epi8(word, mask2))),
114 _mm_cmpeq_epi8(word, mask3));
116 if (i < user_input.size()) {
117 __m128i word = _mm_loadu_si128(
118 (
const __m128i*)(user_input.data() + user_input.length() - 16));
119 running = _mm_or_si128(
120 _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
121 _mm_cmpeq_epi8(word, mask2))),
122 _mm_cmpeq_epi8(word, mask3));
124 return _mm_movemask_epi8(running) != 0;
128 std::string_view user_input)
noexcept {
130 if (user_input.size() < 16) {
135 const __m128i mask1 = __lsx_vrepli_b(
'\r');
136 const __m128i mask2 = __lsx_vrepli_b(
'\n');
137 const __m128i mask3 = __lsx_vrepli_b(
'\t');
140 for (; i + 15 < user_input.size(); i += 16) {
141 __m128i word = __lsx_vld((
const __m128i*)(user_input.data() + i), 0);
142 running = __lsx_vor_v(
143 __lsx_vor_v(running, __lsx_vor_v(__lsx_vseq_b(word, mask1),
144 __lsx_vseq_b(word, mask2))),
145 __lsx_vseq_b(word, mask3));
147 if (i < user_input.size()) {
148 __m128i word = __lsx_vld(
149 (
const __m128i*)(user_input.data() + user_input.length() - 16), 0);
150 running = __lsx_vor_v(
151 __lsx_vor_v(running, __lsx_vor_v(__lsx_vseq_b(word, mask1),
152 __lsx_vseq_b(word, mask2))),
153 __lsx_vseq_b(word, mask3));
155 if (__lsx_bz_v(running))
return false;
160 std::string_view user_input)
noexcept {
161 auto has_zero_byte = [](uint64_t v) {
162 return ((v - 0x0101010101010101) & ~(v) & 0x8080808080808080);
169 for (; i + 7 < user_input.size(); i += 8) {
171 memcpy(&word, user_input.data() + i,
sizeof(word));
172 uint64_t xor1 = word ^ mask1;
173 uint64_t xor2 = word ^ mask2;
174 uint64_t xor3 = word ^ mask3;
175 running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
177 if (i < user_input.size()) {
179 memcpy(&word, user_input.data() + i, user_input.size() - i);
180 uint64_t xor1 = word ^ mask1;
181 uint64_t xor2 = word ^ mask2;
182 uint64_t xor3 = word ^ mask3;
183 running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
195 std::array<uint8_t, 256>
result{};
196 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
197 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|'}) {
204 const char c)
noexcept {
210 std::array<uint8_t, 256>
result{};
211 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
212 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|',
'%'}) {
215 for (uint8_t c = 0; c <= 32; c++) {
218 for (
size_t c = 127; c < 255; c++) {
227 const char c)
noexcept {
232 const char* input,
size_t length)
noexcept {
234 uint8_t accumulator{};
235 for (; i + 4 <= length; i += 4) {
241 for (; i < length; i++) {
247constexpr static std::array<uint8_t, 256>
249 std::array<uint8_t, 256>
result{};
250 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
251 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|',
'%'}) {
254 for (uint8_t c =
'A'; c <=
'Z'; c++) {
257 for (uint8_t c = 0; c <= 32; c++) {
260 for (
size_t c = 127; c < 255; c++) {
267contains_forbidden_domain_code_point_or_upper(
const char* input,
268 size_t length)
noexcept {
270 uint8_t accumulator{};
271 for (; i + 4 <= length; i += 4) {
281 for (; i < length; i++) {
290 std::array<bool, 256>
result{};
291 for (
size_t c = 0; c < 256; c++) {
292 result[c] = (c >=
'0' && c <=
'9') || (c >=
'a' && c <=
'z') ||
293 (c >=
'A' && c <=
'Z') || c ==
'+' || c ==
'-' || c ==
'.';
306 return (c >=
'0' && c <=
'9') || (c >=
'A' && c <=
'F') ||
307 (c >=
'a' && c <=
'f');
313 return (c >=
'0' && c <=
'9');
322 return (
unsigned char)c <=
' ';
326 const char c)
noexcept {
327 return c ==
'\t' || c ==
'\n' || c ==
'\r';
331 "..",
"%2e.",
".%2e",
"%2e%2e"};
334 std::string_view input)
noexcept {
339 uint64_t half_length = uint64_t(input.size()) / 2;
340 if (half_length - 1 > 2) {
345 if ((input[0] !=
'.') && (input[0] !=
'%')) {
349 int hash_value = (input.size() + (
unsigned)(input[0])) & 3;
351 if (target.size() != input.size()) {
356 auto prefix_equal_unsafe = [](std::string_view a, std::string_view b) {
358 memcpy(&A, a.data(),
sizeof(A));
359 memcpy(&B, b.data(),
sizeof(B));
362 if (!prefix_equal_unsafe(input, target)) {
365 for (
size_t i = 2; i < input.size(); i++) {
367 if ((uint8_t((c | 0x20) - 0x61) <= 25 ? (c | 0x20) : c) != target[i]) {
383 std::string_view input)
noexcept {
384 return input ==
"." || input ==
"%2e" || input ==
"%2E";
388 return (c >=
'0' && c <=
'9') || (c >=
'a' && c <=
'f');
392 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11,
393 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
394 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15};
395unsigned constexpr convert_hex_to_binary(
const char c)
noexcept {
399std::string percent_decode(
const std::string_view input,
size_t first_percent) {
402 if (first_percent == std::string_view::npos) {
403 return std::string(input);
406 dest.reserve(input.length());
407 dest.append(input.substr(0, first_percent));
408 const char* pointer = input.data() + first_percent;
409 const char* end = input.data() + input.size();
412 while (pointer < end) {
413 const char ch = pointer[0];
414 size_t remaining = end - pointer - 1;
415 if (ch !=
'%' || remaining < 2 ||
417 (!is_ascii_hex_digit(pointer[1]) ||
418 !is_ascii_hex_digit(pointer[2])))) {
422 unsigned a = convert_hex_to_binary(pointer[1]);
423 unsigned b = convert_hex_to_binary(pointer[2]);
424 char c =
static_cast<char>(a * 16 + b);
432std::string percent_encode(
const std::string_view input,
433 const uint8_t character_set[]) {
434 auto pointer = std::ranges::find_if(input, [character_set](
const char c) {
438 if (pointer == input.end()) {
439 return std::string(input);
443 result.reserve(input.length());
445 result.append(input.substr(0, std::distance(input.begin(), pointer)));
447 for (; pointer != input.end(); pointer++) {
458template <
bool append>
459bool percent_encode(
const std::string_view input,
const uint8_t character_set[],
461 ada_log(
"percent_encode ", input,
" to output string while ",
462 append ?
"appending" :
"overwriting");
463 auto pointer = std::ranges::find_if(input, [character_set](
const char c) {
466 ada_log(
"percent_encode done checking, moved to ",
467 std::distance(input.begin(), pointer));
470 if (pointer == input.end()) {
471 ada_log(
"percent_encode encoding not needed.");
474 if constexpr (!append) {
477 ada_log(
"percent_encode appending ", std::distance(input.begin(), pointer),
480 out.append(input.data(), std::distance(input.begin(), pointer));
481 ada_log(
"percent_encode processing ", std::distance(pointer, input.end()),
483 for (; pointer != input.end(); pointer++) {
493bool to_ascii(std::optional<std::string>& out,
const std::string_view plain,
494 size_t first_percent) {
495 std::string percent_decoded_buffer;
496 std::string_view input = plain;
497 if (first_percent != std::string_view::npos) {
498 percent_decoded_buffer = unicode::percent_decode(plain, first_percent);
499 input = percent_decoded_buffer;
503 if (idna_ascii.empty() || contains_forbidden_domain_code_point(
504 idna_ascii.data(), idna_ascii.size())) {
507 out = std::move(idna_ascii);
511std::string percent_encode(
const std::string_view input,
512 const uint8_t character_set[],
size_t index) {
515 out.append(input.data(), index);
516 auto pointer = input.begin() + index;
517 for (; pointer != input.end(); pointer++) {
Definitions of the character sets used by unicode functions.
Declaration of the character sets used by unicode functions.
Common definitions for cross-platform compiler support.
#define ADA_PUSH_DISABLE_ALL_WARNINGS
#define ADA_POP_DISABLE_WARNINGS
#define ada_really_inline
ada_really_inline constexpr bool bit_at(const uint8_t a[], const uint8_t i)
std::string to_ascii(std::string_view ut8_string)
Includes the declarations for unicode operations.
static constexpr std::array< uint8_t, 256 > is_forbidden_domain_code_point_table
static constexpr std::array< uint8_t, 256 > is_forbidden_domain_code_point_table_or_upper
static constexpr char hex_to_binary_table[]
constexpr uint64_t broadcast(uint8_t v) noexcept
constexpr std::string_view table_is_double_dot_path_segment[]
constexpr bool is_tabs_or_newline(char c) noexcept
static constexpr std::array< uint8_t, 256 > is_forbidden_host_code_point_table
static constexpr std::array< bool, 256 > is_alnum_plus_table
tl::expected< result_type, ada::errors > result
Definitions for all unicode specific functions.