Ada 3.3.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern_helpers-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_HELPERS_INL_H
6#define ADA_URL_PATTERN_HELPERS_INL_H
7
8#include <optional>
9#include <string_view>
10
11#include "ada/common_defs.h"
12#include "ada/expected.h"
14#include "ada/implementation.h"
15
16#if ADA_INCLUDE_URL_PATTERN
17namespace ada::url_pattern_helpers {
18#if defined(ADA_TESTING) || defined(ADA_LOGGING)
19inline std::string to_string(token_type type) {
20 switch (type) {
21 case token_type::INVALID_CHAR:
22 return "INVALID_CHAR";
23 case token_type::OPEN:
24 return "OPEN";
25 case token_type::CLOSE:
26 return "CLOSE";
27 case token_type::REGEXP:
28 return "REGEXP";
29 case token_type::NAME:
30 return "NAME";
31 case token_type::CHAR:
32 return "CHAR";
33 case token_type::ESCAPED_CHAR:
34 return "ESCAPED_CHAR";
35 case token_type::OTHER_MODIFIER:
36 return "OTHER_MODIFIER";
37 case token_type::ASTERISK:
38 return "ASTERISK";
39 case token_type::END:
40 return "END";
41 default:
43 }
44}
45#endif // defined(ADA_TESTING) || defined(ADA_LOGGING)
46
47template <url_pattern_regex::regex_concept regex_provider>
48constexpr void constructor_string_parser<regex_provider>::rewind() {
49 // Set parser's token index to parser's component start.
50 token_index = component_start;
51 // Set parser's token increment to 0.
52 token_increment = 0;
53}
54
55template <url_pattern_regex::regex_concept regex_provider>
56constexpr bool constructor_string_parser<regex_provider>::is_hash_prefix() {
57 // Return the result of running is a non-special pattern char given parser,
58 // parser's token index and "#".
59 return is_non_special_pattern_char(token_index, '#');
60}
61
62template <url_pattern_regex::regex_concept regex_provider>
63constexpr bool constructor_string_parser<regex_provider>::is_search_prefix() {
64 // If result of running is a non-special pattern char given parser, parser's
65 // token index and "?" is true, then return true.
66 if (is_non_special_pattern_char(token_index, '?')) {
67 return true;
68 }
69
70 // If parser's token list[parser's token index]'s value is not "?", then
71 // return false.
72 if (token_list[token_index].value != "?") {
73 return false;
74 }
75
76 // If previous index is less than 0, then return true.
77 if (token_index == 0) return true;
78 // Let previous index be parser's token index - 1.
79 auto previous_index = token_index - 1;
80 // Let previous token be the result of running get a safe token given parser
81 // and previous index.
82 auto previous_token = get_safe_token(previous_index);
83 ADA_ASSERT_TRUE(previous_token);
84 // If any of the following are true, then return false:
85 // - previous token's type is "name".
86 // - previous token's type is "regexp".
87 // - previous token's type is "close".
88 // - previous token's type is "asterisk".
89 return !(previous_token->type == token_type::NAME ||
90 previous_token->type == token_type::REGEXP ||
91 previous_token->type == token_type::CLOSE ||
92 previous_token->type == token_type::ASTERISK);
93}
94
95template <url_pattern_regex::regex_concept regex_provider>
96constexpr bool
97constructor_string_parser<regex_provider>::is_non_special_pattern_char(
98 size_t index, uint32_t value) const {
99 // Let token be the result of running get a safe token given parser and index.
100 auto token = get_safe_token(index);
101 ADA_ASSERT_TRUE(token);
102
103 // If token's value is not value, then return false.
104 // TODO: Remove this once we make sure get_safe_token returns a non-empty
105 // string.
106 if (!token->value.empty() &&
107 static_cast<uint32_t>(token->value[0]) != value) {
108 return false;
109 }
110
111 // If any of the following are true:
112 // - token's type is "char";
113 // - token's type is "escaped-char"; or
114 // - token's type is "invalid-char",
115 // - then return true.
116 return token->type == token_type::CHAR ||
117 token->type == token_type::ESCAPED_CHAR ||
118 token->type == token_type::INVALID_CHAR;
119}
120
121template <url_pattern_regex::regex_concept regex_provider>
122constexpr const token*
123constructor_string_parser<regex_provider>::get_safe_token(size_t index) const {
124 // If index is less than parser's token list's size, then return parser's
125 // token list[index].
126 if (index < token_list.size()) [[likely]] {
127 return &token_list[index];
128 }
129
130 // Assert: parser's token list's size is greater than or equal to 1.
131 ADA_ASSERT_TRUE(!token_list.empty());
132
133 // Let token be parser's token list[last index].
134 // Assert: token's type is "end".
135 ADA_ASSERT_TRUE(token_list.back().type == token_type::END);
136
137 // Return token.
138 return &token_list.back();
139}
140
141template <url_pattern_regex::regex_concept regex_provider>
142constexpr bool constructor_string_parser<regex_provider>::is_group_open()
143 const {
144 // If parser's token list[parser's token index]'s type is "open", then return
145 // true.
146 return token_list[token_index].type == token_type::OPEN;
147}
148
149template <url_pattern_regex::regex_concept regex_provider>
150constexpr bool constructor_string_parser<regex_provider>::is_group_close()
151 const {
152 // If parser's token list[parser's token index]'s type is "close", then return
153 // true.
154 return token_list[token_index].type == token_type::CLOSE;
155}
156
157template <url_pattern_regex::regex_concept regex_provider>
158constexpr bool
159constructor_string_parser<regex_provider>::next_is_authority_slashes() const {
160 // If the result of running is a non-special pattern char given parser,
161 // parser's token index + 1, and "/" is false, then return false.
162 if (!is_non_special_pattern_char(token_index + 1, '/')) {
163 return false;
164 }
165 // If the result of running is a non-special pattern char given parser,
166 // parser's token index + 2, and "/" is false, then return false.
167 if (!is_non_special_pattern_char(token_index + 2, '/')) {
168 return false;
169 }
170 return true;
171}
172
173template <url_pattern_regex::regex_concept regex_provider>
174constexpr bool constructor_string_parser<regex_provider>::is_protocol_suffix()
175 const {
176 // Return the result of running is a non-special pattern char given parser,
177 // parser's token index, and ":".
178 return is_non_special_pattern_char(token_index, ':');
179}
180
181template <url_pattern_regex::regex_concept regex_provider>
182void constructor_string_parser<regex_provider>::change_state(State new_state,
183 size_t skip) {
184 // If parser's state is not "init", not "authority", and not "done", then set
185 // parser's result[parser's state] to the result of running make a component
186 // string given parser.
187 if (state != State::INIT && state != State::AUTHORITY &&
188 state != State::DONE) {
189 auto value = make_component_string();
190 // TODO: Simplify this.
191 switch (state) {
192 case State::PROTOCOL: {
193 result.protocol = value;
194 break;
195 }
196 case State::USERNAME: {
197 result.username = value;
198 break;
199 }
200 case State::PASSWORD: {
201 result.password = value;
202 break;
203 }
204 case State::HOSTNAME: {
205 result.hostname = value;
206 break;
207 }
208 case State::PORT: {
209 result.port = value;
210 break;
211 }
212 case State::PATHNAME: {
213 result.pathname = value;
214 break;
215 }
216 case State::SEARCH: {
217 result.search = value;
218 break;
219 }
220 case State::HASH: {
221 result.hash = value;
222 break;
223 }
224 default:
226 }
227 }
228
229 // If parser's state is not "init" and new state is not "done", then:
230 if (state != State::INIT && new_state != State::DONE) {
231 // If parser's state is "protocol", "authority", "username", or "password";
232 // new state is "port", "pathname", "search", or "hash"; and parser's
233 // result["hostname"] does not exist, then set parser's result["hostname"]
234 // to the empty string.
235 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
236 state == State::USERNAME || state == State::PASSWORD) &&
237 (new_state == State::PORT || new_state == State::PATHNAME ||
238 new_state == State::SEARCH || new_state == State::HASH) &&
239 !result.hostname)
240 result.hostname = "";
241 }
242
243 // If parser's state is "protocol", "authority", "username", "password",
244 // "hostname", or "port"; new state is "search" or "hash"; and parser's
245 // result["pathname"] does not exist, then:
246 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
247 state == State::USERNAME || state == State::PASSWORD ||
248 state == State::HOSTNAME || state == State::PORT) &&
249 (new_state == State::SEARCH || new_state == State::HASH) &&
250 !result.pathname) {
251 if (protocol_matches_a_special_scheme_flag) {
252 result.pathname = "/";
253 } else {
254 // Otherwise, set parser's result["pathname"] to the empty string.
255 result.pathname = "";
256 }
257 }
258
259 // If parser's state is "protocol", "authority", "username", "password",
260 // "hostname", "port", or "pathname"; new state is "hash"; and parser's
261 // result["search"] does not exist, then set parser's result["search"] to
262 // the empty string.
263 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
264 state == State::USERNAME || state == State::PASSWORD ||
265 state == State::HOSTNAME || state == State::PORT ||
266 state == State::PATHNAME) &&
267 new_state == State::HASH && !result.search) {
268 result.search = "";
269 }
270
271 // Set parser's state to new state.
272 state = new_state;
273 // Increment parser's token index by skip.
274 token_index += skip;
275 // Set parser's component start to parser's token index.
276 component_start = token_index;
277 // Set parser's token increment to 0.
278 token_increment = 0;
279}
280
281template <url_pattern_regex::regex_concept regex_provider>
282std::string constructor_string_parser<regex_provider>::make_component_string() {
283 // Assert: parser's token index is less than parser's token list's size.
284 ADA_ASSERT_TRUE(token_index < token_list.size());
285
286 // Let token be parser's token list[parser's token index].
287 // Let end index be token's index.
288 const auto end_index = token_list[token_index].index;
289 // Let component start token be the result of running get a safe token given
290 // parser and parser's component start.
291 const auto component_start_token = get_safe_token(component_start);
292 ADA_ASSERT_TRUE(component_start_token);
293 // Let component start input index be component start token's index.
294 const auto component_start_input_index = component_start_token->index;
295 // Return the code point substring from component start input index to end
296 // index within parser's input.
297 return input.substr(component_start_input_index,
298 end_index - component_start_input_index);
299}
300
301template <url_pattern_regex::regex_concept regex_provider>
302constexpr bool
303constructor_string_parser<regex_provider>::is_an_identity_terminator() const {
304 // Return the result of running is a non-special pattern char given parser,
305 // parser's token index, and "@".
306 return is_non_special_pattern_char(token_index, '@');
307}
308
309template <url_pattern_regex::regex_concept regex_provider>
310constexpr bool constructor_string_parser<regex_provider>::is_pathname_start()
311 const {
312 // Return the result of running is a non-special pattern char given parser,
313 // parser's token index, and "/".
314 return is_non_special_pattern_char(token_index, '/');
315}
316
317template <url_pattern_regex::regex_concept regex_provider>
318constexpr bool constructor_string_parser<regex_provider>::is_password_prefix()
319 const {
320 // Return the result of running is a non-special pattern char given parser,
321 // parser's token index, and ":".
322 return is_non_special_pattern_char(token_index, ':');
323}
324
325template <url_pattern_regex::regex_concept regex_provider>
326constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_open()
327 const {
328 // Return the result of running is a non-special pattern char given parser,
329 // parser's token index, and "[".
330 return is_non_special_pattern_char(token_index, '[');
331}
332
333template <url_pattern_regex::regex_concept regex_provider>
334constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_close()
335 const {
336 // Return the result of running is a non-special pattern char given parser,
337 // parser's token index, and "]".
338 return is_non_special_pattern_char(token_index, ']');
339}
340
341template <url_pattern_regex::regex_concept regex_provider>
342constexpr bool constructor_string_parser<regex_provider>::is_port_prefix()
343 const {
344 // Return the result of running is a non-special pattern char given parser,
345 // parser's token index, and ":".
346 return is_non_special_pattern_char(token_index, ':');
347}
348
349constexpr void Tokenizer::get_next_code_point() {
350 ada_log("Tokenizer::get_next_code_point called with index=", next_index);
351 ADA_ASSERT_TRUE(next_index < input.size());
352 // this assumes that we have a valid, non-truncated UTF-8 stream.
353 code_point = 0;
354 size_t number_bytes = 0;
355 unsigned char first_byte = input[next_index];
356
357 if ((first_byte & 0x80) == 0) {
358 // 1-byte character (ASCII)
359 next_index++;
360 code_point = first_byte;
361 ada_log("Tokenizer::get_next_code_point returning ASCII code point=",
362 uint32_t(code_point));
363 ada_log("Tokenizer::get_next_code_point next_index =", next_index,
364 " input.size()=", input.size());
365 return;
366 }
367 ada_log("Tokenizer::get_next_code_point read first byte=",
368 uint32_t(first_byte));
369 if ((first_byte & 0xE0) == 0xC0) {
370 code_point = first_byte & 0x1F;
371 number_bytes = 2;
372 ada_log("Tokenizer::get_next_code_point two bytes");
373 } else if ((first_byte & 0xF0) == 0xE0) {
374 code_point = first_byte & 0x0F;
375 number_bytes = 3;
376 ada_log("Tokenizer::get_next_code_point three bytes");
377 } else if ((first_byte & 0xF8) == 0xF0) {
378 code_point = first_byte & 0x07;
379 number_bytes = 4;
380 ada_log("Tokenizer::get_next_code_point four bytes");
381 }
382 ADA_ASSERT_TRUE(number_bytes + next_index <= input.size());
383
384 for (size_t i = 1 + next_index; i < number_bytes + next_index; ++i) {
385 unsigned char byte = input[i];
386 ada_log("Tokenizer::get_next_code_point read byte=", uint32_t(byte));
387 code_point = (code_point << 6) | (byte & 0x3F);
388 }
389 ada_log("Tokenizer::get_next_code_point returning non-ASCII code point=",
390 uint32_t(code_point));
391 ada_log("Tokenizer::get_next_code_point next_index =", next_index,
392 " input.size()=", input.size());
393 next_index += number_bytes;
394}
395
396constexpr void Tokenizer::seek_and_get_next_code_point(size_t new_index) {
397 ada_log("Tokenizer::seek_and_get_next_code_point called with new_index=",
398 new_index);
399 // Set tokenizer's next index to index.
400 next_index = new_index;
401 // Run get the next code point given tokenizer.
402 get_next_code_point();
403}
404
405inline void Tokenizer::add_token(token_type type, size_t next_position,
406 size_t value_position, size_t value_length) {
407 ada_log("Tokenizer::add_token called with type=", to_string(type),
408 " next_position=", next_position, " value_position=", value_position);
409 ADA_ASSERT_TRUE(next_position >= value_position);
410
411 // Let token be a new token.
412 // Set token's type to type.
413 // Set token's index to tokenizer's index.
414 // Set token's value to the code point substring from value position with
415 // length value length within tokenizer's input.
416 // Append token to the back of tokenizer's token list.
417 token_list.emplace_back(type, index,
418 input.substr(value_position, value_length));
419 // Set tokenizer's index to next position.
420 index = next_position;
421}
422
423inline void Tokenizer::add_token_with_default_length(token_type type,
424 size_t next_position,
425 size_t value_position) {
426 // Let computed length be next position - value position.
427 auto computed_length = next_position - value_position;
428 // Run add a token given tokenizer, type, next position, value position, and
429 // computed length.
430 add_token(type, next_position, value_position, computed_length);
431}
432
433inline void Tokenizer::add_token_with_defaults(token_type type) {
434 ada_log("Tokenizer::add_token_with_defaults called with type=",
435 to_string(type));
436 // Run add a token with default length given tokenizer, type, tokenizer's next
437 // index, and tokenizer's index.
438 add_token_with_default_length(type, next_index, index);
439}
440
441inline ada_warn_unused std::optional<errors>
442Tokenizer::process_tokenizing_error(size_t next_position,
443 size_t value_position) {
444 // If tokenizer's policy is "strict", then throw a TypeError.
445 if (policy == token_policy::strict) {
446 ada_log("process_tokenizing_error failed with next_position=",
447 next_position, " value_position=", value_position);
448 return errors::type_error;
449 }
450 // Assert: tokenizer's policy is "lenient".
451 ADA_ASSERT_TRUE(policy == token_policy::lenient);
452 // Run add a token with default length given tokenizer, "invalid-char", next
453 // position, and value position.
454 add_token_with_default_length(token_type::INVALID_CHAR, next_position,
455 value_position);
456 return std::nullopt;
457}
458
459template <url_pattern_encoding_callback F>
460token* url_pattern_parser<F>::try_consume_modifier_token() {
461 // Let token be the result of running try to consume a token given parser and
462 // "other-modifier".
463 auto token = try_consume_token(token_type::OTHER_MODIFIER);
464 // If token is not null, then return token.
465 if (token) return token;
466 // Set token to the result of running try to consume a token given parser and
467 // "asterisk".
468 // Return token.
469 return try_consume_token(token_type::ASTERISK);
470}
471
472template <url_pattern_encoding_callback F>
473token* url_pattern_parser<F>::try_consume_regexp_or_wildcard_token(
474 const token* name_token) {
475 // Let token be the result of running try to consume a token given parser and
476 // "regexp".
477 auto token = try_consume_token(token_type::REGEXP);
478 // If name token is null and token is null, then set token to the result of
479 // running try to consume a token given parser and "asterisk".
480 if (!name_token && !token) {
481 token = try_consume_token(token_type::ASTERISK);
482 }
483 // Return token.
484 return token;
485}
486
487template <url_pattern_encoding_callback F>
488token* url_pattern_parser<F>::try_consume_token(token_type type) {
489 ada_log("url_pattern_parser::try_consume_token called with type=",
490 to_string(type));
491 // Assert: parser's index is less than parser's token list size.
492 ADA_ASSERT_TRUE(index < tokens.size());
493 // Let next token be parser's token list[parser's index].
494 auto& next_token = tokens[index];
495 // If next token's type is not type return null.
496 if (next_token.type != type) return nullptr;
497 // Increase parser's index by 1.
498 index++;
499 // Return next token.
500 return &next_token;
501}
502
503template <url_pattern_encoding_callback F>
504std::string url_pattern_parser<F>::consume_text() {
505 // Let result be the empty string.
506 std::string result{};
507 // While true:
508 while (true) {
509 // Let token be the result of running try to consume a token given parser
510 // and "char".
511 auto token = try_consume_token(token_type::CHAR);
512 // If token is null, then set token to the result of running try to consume
513 // a token given parser and "escaped-char".
514 if (!token) token = try_consume_token(token_type::ESCAPED_CHAR);
515 // If token is null, then break.
516 if (!token) break;
517 // Append token's value to the end of result.
518 result.append(token->value);
519 }
520 // Return result.
521 return result;
522}
523
524template <url_pattern_encoding_callback F>
525bool url_pattern_parser<F>::consume_required_token(token_type type) {
526 ada_log("url_pattern_parser::consume_required_token called with type=",
527 to_string(type));
528 // Let result be the result of running try to consume a token given parser and
529 // type.
530 return try_consume_token(type) != nullptr;
531}
532
533template <url_pattern_encoding_callback F>
534std::optional<errors>
535url_pattern_parser<F>::maybe_add_part_from_the_pending_fixed_value() {
536 // If parser's pending fixed value is the empty string, then return.
537 if (pending_fixed_value.empty()) {
538 ada_log("pending_fixed_value is empty");
539 return std::nullopt;
540 }
541 // Let encoded value be the result of running parser's encoding callback given
542 // parser's pending fixed value.
543 auto encoded_value = encoding_callback(pending_fixed_value);
544 if (!encoded_value) {
545 ada_log("failed to encode pending_fixed_value: ", pending_fixed_value);
546 return encoded_value.error();
547 }
548 // Set parser's pending fixed value to the empty string.
549 pending_fixed_value.clear();
550 // Let part be a new part whose type is "fixed-text", value is encoded value,
551 // and modifier is "none".
552 // Append part to parser's part list.
553 parts.emplace_back(url_pattern_part_type::FIXED_TEXT,
554 std::move(*encoded_value),
555 url_pattern_part_modifier::none);
556 return std::nullopt;
557}
558
559template <url_pattern_encoding_callback F>
560std::optional<errors> url_pattern_parser<F>::add_part(
561 std::string_view prefix, token* name_token, token* regexp_or_wildcard_token,
562 std::string_view suffix, token* modifier_token) {
563 // Let modifier be "none".
564 auto modifier = url_pattern_part_modifier::none;
565 // If modifier token is not null:
566 if (modifier_token) {
567 // If modifier token's value is "?" then set modifier to "optional".
568 if (modifier_token->value == "?") {
569 modifier = url_pattern_part_modifier::optional;
570 } else if (modifier_token->value == "*") {
571 // Otherwise if modifier token's value is "*" then set modifier to
572 // "zero-or-more".
573 modifier = url_pattern_part_modifier::zero_or_more;
574 } else if (modifier_token->value == "+") {
575 // Otherwise if modifier token's value is "+" then set modifier to
576 // "one-or-more".
577 modifier = url_pattern_part_modifier::one_or_more;
578 }
579 }
580 // If name token is null and regexp or wildcard token is null and modifier
581 // is "none":
582 if (!name_token && !regexp_or_wildcard_token &&
583 modifier == url_pattern_part_modifier::none) {
584 // Append prefix to the end of parser's pending fixed value.
585 pending_fixed_value.append(prefix);
586 return std::nullopt;
587 }
588 // Run maybe add a part from the pending fixed value given parser.
589 if (auto error = maybe_add_part_from_the_pending_fixed_value()) {
590 return *error;
591 }
592 // If name token is null and regexp or wildcard token is null:
593 if (!name_token && !regexp_or_wildcard_token) {
594 // Assert: suffix is the empty string.
595 ADA_ASSERT_TRUE(suffix.empty());
596 // If prefix is the empty string, then return.
597 if (prefix.empty()) return std::nullopt;
598 // Let encoded value be the result of running parser's encoding callback
599 // given prefix.
600 auto encoded_value = encoding_callback(prefix);
601 if (!encoded_value) {
602 return encoded_value.error();
603 }
604 // Let part be a new part whose type is "fixed-text", value is encoded
605 // value, and modifier is modifier.
606 // Append part to parser's part list.
607 parts.emplace_back(url_pattern_part_type::FIXED_TEXT,
608 std::move(*encoded_value), modifier);
609 return std::nullopt;
610 }
611 // Let regexp value be the empty string.
612 std::string regexp_value{};
613 // If regexp or wildcard token is null, then set regexp value to parser's
614 // segment wildcard regexp.
615 if (!regexp_or_wildcard_token) {
616 regexp_value = segment_wildcard_regexp;
617 } else if (regexp_or_wildcard_token->type == token_type::ASTERISK) {
618 // Otherwise if regexp or wildcard token's type is "asterisk", then set
619 // regexp value to the full wildcard regexp value.
620 regexp_value = ".*";
621 } else {
622 // Otherwise set regexp value to regexp or wildcard token's value.
623 regexp_value = regexp_or_wildcard_token->value;
624 }
625 // Let type be "regexp".
626 auto type = url_pattern_part_type::REGEXP;
627 // If regexp value is parser's segment wildcard regexp:
628 if (regexp_value == segment_wildcard_regexp) {
629 // Set type to "segment-wildcard".
630 type = url_pattern_part_type::SEGMENT_WILDCARD;
631 // Set regexp value to the empty string.
632 regexp_value.clear();
633 } else if (regexp_value == ".*") {
634 // Otherwise if regexp value is the full wildcard regexp value:
635 // Set type to "full-wildcard".
636 type = url_pattern_part_type::FULL_WILDCARD;
637 // Set regexp value to the empty string.
638 regexp_value.clear();
639 }
640 // Let name be the empty string.
641 std::string name{};
642 // If name token is not null, then set name to name token's value.
643 if (name_token) {
644 name = name_token->value;
645 } else if (regexp_or_wildcard_token != nullptr) {
646 // Otherwise if regexp or wildcard token is not null:
647 // Set name to parser's next numeric name, serialized.
648 name = std::to_string(next_numeric_name);
649 // Increment parser's next numeric name by 1.
650 next_numeric_name++;
651 }
652 // If the result of running is a duplicate name given parser and name is
653 // true, then throw a TypeError.
654 if (std::ranges::any_of(
655 parts, [&name](const auto& part) { return part.name == name; })) {
656 return errors::type_error;
657 }
658 // Let encoded prefix be the result of running parser's encoding callback
659 // given prefix.
660 auto encoded_prefix = encoding_callback(prefix);
661 if (!encoded_prefix) return encoded_prefix.error();
662 // Let encoded suffix be the result of running parser's encoding callback
663 // given suffix.
664 auto encoded_suffix = encoding_callback(suffix);
665 if (!encoded_suffix) return encoded_suffix.error();
666 // Let part be a new part whose type is type, value is regexp value,
667 // modifier is modifier, name is name, prefix is encoded prefix, and suffix
668 // is encoded suffix.
669 // Append part to parser's part list.
670 parts.emplace_back(type, std::move(regexp_value), modifier, std::move(name),
671 std::move(*encoded_prefix), std::move(*encoded_suffix));
672 return std::nullopt;
673}
674
675template <url_pattern_encoding_callback F>
676tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
677 std::string_view input, url_pattern_compile_component_options& options,
678 F& encoding_callback) {
679 ada_log("parse_pattern_string input=", input);
680 // Let parser be a new pattern parser whose encoding callback is encoding
681 // callback and segment wildcard regexp is the result of running generate a
682 // segment wildcard regexp given options.
683 auto parser = url_pattern_parser<F>(
684 encoding_callback, generate_segment_wildcard_regexp(options));
685 // Set parser's token list to the result of running tokenize given input and
686 // "strict".
687 auto tokenize_result = tokenize(input, token_policy::strict);
688 if (!tokenize_result) {
689 ada_log("parse_pattern_string tokenize failed");
690 return tl::unexpected(tokenize_result.error());
691 }
692 parser.tokens = std::move(*tokenize_result);
693
694 // While parser's index is less than parser's token list's size:
695 while (parser.can_continue()) {
696 // Let char token be the result of running try to consume a token given
697 // parser and "char".
698 auto char_token = parser.try_consume_token(token_type::CHAR);
699 // Let name token be the result of running try to consume a token given
700 // parser and "name".
701 auto name_token = parser.try_consume_token(token_type::NAME);
702 // Let regexp or wildcard token be the result of running try to consume a
703 // regexp or wildcard token given parser and name token.
704 auto regexp_or_wildcard_token =
705 parser.try_consume_regexp_or_wildcard_token(name_token);
706 // If name token is not null or regexp or wildcard token is not null:
707 if (name_token || regexp_or_wildcard_token) {
708 // Let prefix be the empty string.
709 std::string prefix{};
710 // If char token is not null then set prefix to char token's value.
711 if (char_token) prefix = char_token->value;
712 // If prefix is not the empty string and not options's prefix code point:
713 if (!prefix.empty() && prefix != options.get_prefix()) {
714 // Append prefix to the end of parser's pending fixed value.
715 parser.pending_fixed_value.append(prefix);
716 // Set prefix to the empty string.
717 prefix.clear();
718 }
719 // Run maybe add a part from the pending fixed value given parser.
720 if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) {
721 ada_log("maybe_add_part_from_the_pending_fixed_value failed");
722 return tl::unexpected(*error);
723 }
724 // Let modifier token be the result of running try to consume a modifier
725 // token given parser.
726 auto modifier_token = parser.try_consume_modifier_token();
727 // Run add a part given parser, prefix, name token, regexp or wildcard
728 // token, the empty string, and modifier token.
729 if (auto error =
730 parser.add_part(prefix, name_token, regexp_or_wildcard_token, "",
731 modifier_token)) {
732 ada_log("parser.add_part failed");
733 return tl::unexpected(*error);
734 }
735 // Continue.
736 continue;
737 }
738
739 // Let fixed token be char token.
740 auto fixed_token = char_token;
741 // If fixed token is null, then set fixed token to the result of running try
742 // to consume a token given parser and "escaped-char".
743 if (!fixed_token)
744 fixed_token = parser.try_consume_token(token_type::ESCAPED_CHAR);
745 // If fixed token is not null:
746 if (fixed_token) {
747 // Append fixed token's value to parser's pending fixed value.
748 parser.pending_fixed_value.append(fixed_token->value);
749 // Continue.
750 continue;
751 }
752 // Let open token be the result of running try to consume a token given
753 // parser and "open".
754 auto open_token = parser.try_consume_token(token_type::OPEN);
755 // If open token is not null:
756 if (open_token) {
757 // Set prefix be the result of running consume text given parser.
758 auto prefix_ = parser.consume_text();
759 // Set name token to the result of running try to consume a token given
760 // parser and "name".
761 name_token = parser.try_consume_token(token_type::NAME);
762 // Set regexp or wildcard token to the result of running try to consume a
763 // regexp or wildcard token given parser and name token.
764 regexp_or_wildcard_token =
765 parser.try_consume_regexp_or_wildcard_token(name_token);
766 // Let suffix be the result of running consume text given parser.
767 auto suffix_ = parser.consume_text();
768 // Run consume a required token given parser and "close".
769 if (!parser.consume_required_token(token_type::CLOSE)) {
770 ada_log("parser.consume_required_token failed");
771 return tl::unexpected(errors::type_error);
772 }
773 // Set modifier token to the result of running try to consume a modifier
774 // token given parser.
775 auto modifier_token = parser.try_consume_modifier_token();
776 // Run add a part given parser, prefix, name token, regexp or wildcard
777 // token, suffix, and modifier token.
778 if (auto error =
779 parser.add_part(prefix_, name_token, regexp_or_wildcard_token,
780 suffix_, modifier_token)) {
781 return tl::unexpected(*error);
782 }
783 // Continue.
784 continue;
785 }
786 // Run maybe add a part from the pending fixed value given parser.
787 if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) {
788 ada_log("maybe_add_part_from_the_pending_fixed_value failed on line 992");
789 return tl::unexpected(*error);
790 }
791 // Run consume a required token given parser and "end".
792 if (!parser.consume_required_token(token_type::END)) {
793 return tl::unexpected(errors::type_error);
794 }
795 }
796 ada_log("parser.parts size is: ", parser.parts.size());
797 // Return parser's part list.
798 return parser.parts;
799}
800
801template <url_pattern_regex::regex_concept regex_provider>
802bool protocol_component_matches_special_scheme(
803 url_pattern_component<regex_provider>& component) {
804 // let's avoid unnecessary copy here.
805 auto& regex = component.regexp;
806 return regex_provider::regex_match("http", regex) ||
807 regex_provider::regex_match("https", regex) ||
808 regex_provider::regex_match("ws", regex) ||
809 regex_provider::regex_match("wss", regex) ||
810 regex_provider::regex_match("ftp", regex);
811}
812
813template <url_pattern_regex::regex_concept regex_provider>
814inline std::optional<errors> constructor_string_parser<
815 regex_provider>::compute_protocol_matches_special_scheme_flag() {
816 ada_log(
817 "constructor_string_parser::compute_protocol_matches_special_scheme_"
818 "flag");
819 // Let protocol string be the result of running make a component string given
820 // parser.
821 auto protocol_string = make_component_string();
822 // Let protocol component be the result of compiling a component given
823 // protocol string, canonicalize a protocol, and default options.
824 auto protocol_component = url_pattern_component<regex_provider>::compile(
825 protocol_string, canonicalize_protocol,
826 url_pattern_compile_component_options::DEFAULT);
827 if (!protocol_component) {
828 ada_log("url_pattern_component::compile failed for protocol_string ",
829 protocol_string);
830 return protocol_component.error();
831 }
832 // If the result of running protocol component matches a special scheme given
833 // protocol component is true, then set parser's protocol matches a special
834 // scheme flag to true.
835 if (protocol_component_matches_special_scheme(*protocol_component)) {
836 protocol_matches_a_special_scheme_flag = true;
837 }
838 return std::nullopt;
839}
840
841template <url_pattern_regex::regex_concept regex_provider>
842tl::expected<url_pattern_init, errors>
843constructor_string_parser<regex_provider>::parse(std::string_view input) {
844 ada_log("constructor_string_parser::parse input=", input);
845 // Let parser be a new constructor string parser whose input is input and
846 // token list is the result of running tokenize given input and "lenient".
847 auto token_list = tokenize(input, token_policy::lenient);
848 if (!token_list) {
849 return tl::unexpected(token_list.error());
850 }
851 auto parser = constructor_string_parser(input, std::move(*token_list));
852
853 // While parser's token index is less than parser's token list size:
854 while (parser.token_index < parser.token_list.size()) {
855 // Set parser's token increment to 1.
856 parser.token_increment = 1;
857
858 // If parser's token list[parser's token index]'s type is "end" then:
859 if (parser.token_list[parser.token_index].type == token_type::END) {
860 // If parser's state is "init":
861 if (parser.state == State::INIT) {
862 // Run rewind given parser.
863 parser.rewind();
864 // If the result of running is a hash prefix given parser is true, then
865 // run change state given parser, "hash" and 1.
866 if (parser.is_hash_prefix()) {
867 parser.change_state(State::HASH, 1);
868 } else if (parser.is_search_prefix()) {
869 // Otherwise if the result of running is a search prefix given parser
870 // is true: Run change state given parser, "search" and 1.
871 parser.change_state(State::SEARCH, 1);
872 } else {
873 // Run change state given parser, "pathname" and 0.
874 parser.change_state(State::PATHNAME, 0);
875 }
876 // Increment parser's token index by parser's token increment.
877 parser.token_index += parser.token_increment;
878 // Continue.
879 continue;
880 }
881
882 if (parser.state == State::AUTHORITY) {
883 // If parser's state is "authority":
884 // Run rewind and set state given parser, and "hostname".
885 parser.rewind();
886 parser.change_state(State::HOSTNAME, 0);
887 // Increment parser's token index by parser's token increment.
888 parser.token_index += parser.token_increment;
889 // Continue.
890 continue;
891 }
892
893 // Run change state given parser, "done" and 0.
894 parser.change_state(State::DONE, 0);
895 // Break.
896 break;
897 }
898
899 // If the result of running is a group open given parser is true:
900 if (parser.is_group_open()) {
901 // Increment parser's group depth by 1.
902 parser.group_depth += 1;
903 // Increment parser's token index by parser's token increment.
904 parser.token_index += parser.token_increment;
905 }
906
907 // If parser's group depth is greater than 0:
908 if (parser.group_depth > 0) {
909 // If the result of running is a group close given parser is true, then
910 // decrement parser's group depth by 1.
911 if (parser.is_group_close()) {
912 parser.group_depth -= 1;
913 } else {
914 // Increment parser's token index by parser's token increment.
915 parser.token_index += parser.token_increment;
916 continue;
917 }
918 }
919
920 // Switch on parser's state and run the associated steps:
921 switch (parser.state) {
922 case State::INIT: {
923 // If the result of running is a protocol suffix given parser is true:
924 if (parser.is_protocol_suffix()) {
925 // Run rewind and set state given parser and "protocol".
926 parser.rewind();
927 parser.change_state(State::PROTOCOL, 0);
928 }
929 break;
930 }
931 case State::PROTOCOL: {
932 // If the result of running is a protocol suffix given parser is true:
933 if (parser.is_protocol_suffix()) {
934 // Run compute protocol matches a special scheme flag given parser.
935 if (const auto error =
936 parser.compute_protocol_matches_special_scheme_flag()) {
937 ada_log("compute_protocol_matches_special_scheme_flag failed");
938 return tl::unexpected(*error);
939 }
940 // Let next state be "pathname".
941 auto next_state = State::PATHNAME;
942 // Let skip be 1.
943 auto skip = 1;
944 // If the result of running next is authority slashes given parser is
945 // true:
946 if (parser.next_is_authority_slashes()) {
947 // Set next state to "authority".
948 next_state = State::AUTHORITY;
949 // Set skip to 3.
950 skip = 3;
951 } else if (parser.protocol_matches_a_special_scheme_flag) {
952 // Otherwise if parser's protocol matches a special scheme flag is
953 // true, then set next state to "authority".
954 next_state = State::AUTHORITY;
955 }
956
957 // Run change state given parser, next state, and skip.
958 parser.change_state(next_state, skip);
959 }
960 break;
961 }
962 case State::AUTHORITY: {
963 // If the result of running is an identity terminator given parser is
964 // true, then run rewind and set state given parser and "username".
965 if (parser.is_an_identity_terminator()) {
966 parser.rewind();
967 parser.change_state(State::USERNAME, 0);
968 } else if (parser.is_pathname_start() || parser.is_search_prefix() ||
969 parser.is_hash_prefix()) {
970 // Otherwise if any of the following are true:
971 // - the result of running is a pathname start given parser;
972 // - the result of running is a search prefix given parser; or
973 // - the result of running is a hash prefix given parser,
974 // then run rewind and set state given parser and "hostname".
975 parser.rewind();
976 parser.change_state(State::HOSTNAME, 0);
977 }
978 break;
979 }
980 case State::USERNAME: {
981 // If the result of running is a password prefix given parser is true,
982 // then run change state given parser, "password", and 1.
983 if (parser.is_password_prefix()) {
984 parser.change_state(State::PASSWORD, 1);
985 } else if (parser.is_an_identity_terminator()) {
986 // Otherwise if the result of running is an identity terminator given
987 // parser is true, then run change state given parser, "hostname",
988 // and 1.
989 parser.change_state(State::HOSTNAME, 1);
990 }
991 break;
992 }
993 case State::PASSWORD: {
994 // If the result of running is an identity terminator given parser is
995 // true, then run change state given parser, "hostname", and 1.
996 if (parser.is_an_identity_terminator()) {
997 parser.change_state(State::HOSTNAME, 1);
998 }
999 break;
1000 }
1001 case State::HOSTNAME: {
1002 // If the result of running is an IPv6 open given parser is true, then
1003 // increment parser's hostname IPv6 bracket depth by 1.
1004 if (parser.is_an_ipv6_open()) {
1005 parser.hostname_ipv6_bracket_depth += 1;
1006 } else if (parser.is_an_ipv6_close()) {
1007 // Otherwise if the result of running is an IPv6 close given parser is
1008 // true, then decrement parser's hostname IPv6 bracket depth by 1.
1009 parser.hostname_ipv6_bracket_depth -= 1;
1010 } else if (parser.is_port_prefix() &&
1011 parser.hostname_ipv6_bracket_depth == 0) {
1012 // Otherwise if the result of running is a port prefix given parser is
1013 // true and parser's hostname IPv6 bracket depth is zero, then run
1014 // change state given parser, "port", and 1.
1015 parser.change_state(State::PORT, 1);
1016 } else if (parser.is_pathname_start()) {
1017 // Otherwise if the result of running is a pathname start given parser
1018 // is true, then run change state given parser, "pathname", and 0.
1019 parser.change_state(State::PATHNAME, 0);
1020 } else if (parser.is_search_prefix()) {
1021 // Otherwise if the result of running is a search prefix given parser
1022 // is true, then run change state given parser, "search", and 1.
1023 parser.change_state(State::SEARCH, 1);
1024 } else if (parser.is_hash_prefix()) {
1025 // Otherwise if the result of running is a hash prefix given parser is
1026 // true, then run change state given parser, "hash", and 1.
1027 parser.change_state(State::HASH, 1);
1028 }
1029
1030 break;
1031 }
1032 case State::PORT: {
1033 // If the result of running is a pathname start given parser is true,
1034 // then run change state given parser, "pathname", and 0.
1035 if (parser.is_pathname_start()) {
1036 parser.change_state(State::PATHNAME, 0);
1037 } else if (parser.is_search_prefix()) {
1038 // Otherwise if the result of running is a search prefix given parser
1039 // is true, then run change state given parser, "search", and 1.
1040 parser.change_state(State::SEARCH, 1);
1041 } else if (parser.is_hash_prefix()) {
1042 // Otherwise if the result of running is a hash prefix given parser is
1043 // true, then run change state given parser, "hash", and 1.
1044 parser.change_state(State::HASH, 1);
1045 }
1046 break;
1047 }
1048 case State::PATHNAME: {
1049 // If the result of running is a search prefix given parser is true,
1050 // then run change state given parser, "search", and 1.
1051 if (parser.is_search_prefix()) {
1052 parser.change_state(State::SEARCH, 1);
1053 } else if (parser.is_hash_prefix()) {
1054 // Otherwise if the result of running is a hash prefix given parser is
1055 // true, then run change state given parser, "hash", and 1.
1056 parser.change_state(State::HASH, 1);
1057 }
1058 break;
1059 }
1060 case State::SEARCH: {
1061 // If the result of running is a hash prefix given parser is true, then
1062 // run change state given parser, "hash", and 1.
1063 if (parser.is_hash_prefix()) {
1064 parser.change_state(State::HASH, 1);
1065 }
1066 break;
1067 }
1068 case State::HASH: {
1069 // Do nothing
1070 break;
1071 }
1072 default: {
1073 // Assert: This step is never reached.
1074 unreachable();
1075 }
1076 }
1077
1078 // Increment parser's token index by parser's token increment.
1079 parser.token_index += parser.token_increment;
1080 }
1081
1082 // If parser's result contains "hostname" and not "port", then set parser's
1083 // result["port"] to the empty string.
1084 if (parser.result.hostname && !parser.result.port) {
1085 parser.result.port = "";
1086 }
1087
1088 // Return parser's result.
1089 return parser.result;
1090}
1091
1092} // namespace ada::url_pattern_helpers
1093#endif // ADA_INCLUDE_URL_PATTERN
1094#endif
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
#define ada_warn_unused
Definition common_defs.h:85
Definitions for user facing functions for parsing URL and it's components.
errors
Definition errors.h:10
state
Definition state.h:17
ada_warn_unused std::string_view to_string(encoding_type type)
void unreachable()
tl::expected< result_type, ada::errors > result
ada::url_pattern_regex::std_regex_provider regex_provider
Definition url_pattern.cc:9
Declaration for the URLPattern helpers.