Ada 3.3.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_INL_H
6#define ADA_URL_PATTERN_INL_H
7
8#include "ada/common_defs.h"
10#include "ada/url_pattern.h"
11
12#include <algorithm>
13#include <string_view>
14#include <utility>
15
16#if ADA_INCLUDE_URL_PATTERN
17namespace ada {
18
19inline bool url_pattern_init::operator==(const url_pattern_init& other) const {
20 return protocol == other.protocol && username == other.username &&
21 password == other.password && hostname == other.hostname &&
22 port == other.port && search == other.search && hash == other.hash &&
23 pathname == other.pathname;
24}
25
26inline bool url_pattern_component_result::operator==(
27 const url_pattern_component_result& other) const {
28 return input == other.input && groups == other.groups;
29}
30
31template <url_pattern_regex::regex_concept regex_provider>
32url_pattern_component_result
33url_pattern_component<regex_provider>::create_component_match_result(
34 std::string&& input,
35 std::vector<std::optional<std::string>>&& exec_result) {
36 // Let result be a new URLPatternComponentResult.
37 // Set result["input"] to input.
38 // Let groups be a record<USVString, (USVString or undefined)>.
39 auto result =
40 url_pattern_component_result{.input = std::move(input), .groups = {}};
41
42 // Optimization: Let's reserve the size.
43 result.groups.reserve(exec_result.size());
44
45 // We explicitly start iterating from 0 even though the spec
46 // says we should start from 1. This case is handled by the
47 // std_regex_provider.
48 for (size_t index = 0; index < exec_result.size(); index++) {
49 result.groups.insert({
50 group_name_list[index],
51 std::move(exec_result[index]),
52 });
53 }
54 return result;
55}
56
57template <url_pattern_regex::regex_concept regex_provider>
58std::string_view url_pattern<regex_provider>::get_protocol() const
60 // Return this's associated URL pattern's protocol component's pattern string.
61 return protocol_component.pattern;
62}
63template <url_pattern_regex::regex_concept regex_provider>
64std::string_view url_pattern<regex_provider>::get_username() const
66 // Return this's associated URL pattern's username component's pattern string.
67 return username_component.pattern;
68}
69template <url_pattern_regex::regex_concept regex_provider>
70std::string_view url_pattern<regex_provider>::get_password() const
72 // Return this's associated URL pattern's password component's pattern string.
73 return password_component.pattern;
74}
75template <url_pattern_regex::regex_concept regex_provider>
76std::string_view url_pattern<regex_provider>::get_hostname() const
78 // Return this's associated URL pattern's hostname component's pattern string.
79 return hostname_component.pattern;
80}
81template <url_pattern_regex::regex_concept regex_provider>
82std::string_view url_pattern<regex_provider>::get_port() const
84 // Return this's associated URL pattern's port component's pattern string.
85 return port_component.pattern;
86}
87template <url_pattern_regex::regex_concept regex_provider>
88std::string_view url_pattern<regex_provider>::get_pathname() const
90 // Return this's associated URL pattern's pathname component's pattern string.
91 return pathname_component.pattern;
92}
93template <url_pattern_regex::regex_concept regex_provider>
94std::string_view url_pattern<regex_provider>::get_search() const
96 // Return this's associated URL pattern's search component's pattern string.
97 return search_component.pattern;
98}
99template <url_pattern_regex::regex_concept regex_provider>
100std::string_view url_pattern<regex_provider>::get_hash() const
102 // Return this's associated URL pattern's hash component's pattern string.
103 return hash_component.pattern;
104}
105template <url_pattern_regex::regex_concept regex_provider>
106bool url_pattern<regex_provider>::ignore_case() const {
107 return ignore_case_;
108}
109template <url_pattern_regex::regex_concept regex_provider>
110bool url_pattern<regex_provider>::has_regexp_groups() const {
111 // If this's associated URL pattern's has regexp groups, then return true.
112 return protocol_component.has_regexp_groups ||
113 username_component.has_regexp_groups ||
114 password_component.has_regexp_groups ||
115 hostname_component.has_regexp_groups ||
116 port_component.has_regexp_groups ||
117 pathname_component.has_regexp_groups ||
118 search_component.has_regexp_groups || hash_component.has_regexp_groups;
119}
120
121inline bool url_pattern_part::is_regexp() const noexcept {
122 return type == url_pattern_part_type::REGEXP;
123}
124
125inline std::string_view url_pattern_compile_component_options::get_delimiter()
126 const {
127 if (delimiter) {
128 return {&delimiter.value(), 1};
129 }
130 return {};
131}
132
133inline std::string_view url_pattern_compile_component_options::get_prefix()
134 const {
135 if (prefix) {
136 return {&prefix.value(), 1};
137 }
138 return {};
139}
140
141template <url_pattern_regex::regex_concept regex_provider>
142template <url_pattern_encoding_callback F>
143tl::expected<url_pattern_component<regex_provider>, errors>
144url_pattern_component<regex_provider>::compile(
145 std::string_view input, F& encoding_callback,
146 url_pattern_compile_component_options& options) {
147 ada_log("url_pattern_component::compile input: ", input);
148 // Let part list be the result of running parse a pattern string given input,
149 // options, and encoding callback.
150 auto part_list = url_pattern_helpers::parse_pattern_string(input, options,
151 encoding_callback);
152
153 if (!part_list) {
154 ada_log("parse_pattern_string failed");
155 return tl::unexpected(part_list.error());
156 }
157
158 // Let (regular expression string, name list) be the result of running
159 // generate a regular expression and name list given part list and options.
160 auto [regular_expression_string, name_list] =
161 url_pattern_helpers::generate_regular_expression_and_name_list(*part_list,
162 options);
163
164 ada_log("regular expression string: ", regular_expression_string);
165
166 // Let pattern string be the result of running generate a pattern
167 // string given part list and options.
168 auto pattern_string =
169 url_pattern_helpers::generate_pattern_string(*part_list, options);
170
171 // Let regular expression be RegExpCreate(regular expression string,
172 // flags). If this throws an exception, catch it, and throw a
173 // TypeError.
174 std::optional<typename regex_provider::regex_type> regular_expression =
175 regex_provider::create_instance(regular_expression_string,
176 options.ignore_case);
177
178 if (!regular_expression) {
179 return tl::unexpected(errors::type_error);
180 }
181
182 // For each part of part list:
183 // - If part's type is "regexp", then set has regexp groups to true.
184 const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
185 const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
186
187 ada_log("has regexp groups: ", has_regexp_groups);
188
189 // Return a new component whose pattern string is pattern string, regular
190 // expression is regular expression, group name list is name list, and has
191 // regexp groups is has regexp groups.
192 return url_pattern_component<regex_provider>(
193 std::move(pattern_string), std::move(*regular_expression),
194 std::move(name_list), has_regexp_groups);
195}
196
197template <url_pattern_regex::regex_concept regex_provider>
198result<std::optional<url_pattern_result>> url_pattern<regex_provider>::exec(
199 const url_pattern_input& input, const std::string_view* base_url) {
200 // Return the result of match given this's associated URL pattern, input, and
201 // baseURL if given.
202 return match(input, base_url);
203}
204
205template <url_pattern_regex::regex_concept regex_provider>
206result<bool> url_pattern<regex_provider>::test(
207 const url_pattern_input& input, const std::string_view* base_url) {
208 // TODO: Optimization opportunity. Rather than returning `url_pattern_result`
209 // Implement a fast path just like `can_parse()` in ada_url.
210 // Let result be the result of match given this's associated URL pattern,
211 // input, and baseURL if given.
212 // If result is null, return false.
213 if (auto result = match(input, base_url); result.has_value()) {
214 return result->has_value();
215 }
216 return tl::unexpected(errors::type_error);
217}
218
219template <url_pattern_regex::regex_concept regex_provider>
220result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
221 const url_pattern_input& input, const std::string_view* base_url_string) {
222 std::string protocol{};
223 std::string username{};
224 std::string password{};
225 std::string hostname{};
226 std::string port{};
227 std::string pathname{};
228 std::string search{};
229 std::string hash{};
230
231 // Let inputs be an empty list.
232 // Append input to inputs.
233 std::vector inputs{input};
234
235 // If input is a URLPatternInit then:
236 if (std::holds_alternative<url_pattern_init>(input)) {
237 ada_log(
238 "url_pattern::match called with url_pattern_init and base_url_string=",
239 base_url_string);
240 // If baseURLString was given, throw a TypeError.
241 if (base_url_string) {
242 ada_log("failed to match because base_url_string was given");
243 return tl::unexpected(errors::type_error);
244 }
245
246 // Let applyResult be the result of process a URLPatternInit given input,
247 // "url", protocol, username, password, hostname, port, pathname, search,
248 // and hash.
249 auto apply_result = url_pattern_init::process(
250 std::get<url_pattern_init>(input), url_pattern_init::process_type::url,
251 protocol, username, password, hostname, port, pathname, search, hash);
252
253 // If this throws an exception, catch it, and return null.
254 if (!apply_result.has_value()) {
255 ada_log("match returned std::nullopt because process threw");
256 return std::nullopt;
257 }
258
259 // Set protocol to applyResult["protocol"].
260 ADA_ASSERT_TRUE(apply_result->protocol.has_value());
261 protocol = std::move(apply_result->protocol.value());
262
263 // Set username to applyResult["username"].
264 ADA_ASSERT_TRUE(apply_result->username.has_value());
265 username = std::move(apply_result->username.value());
266
267 // Set password to applyResult["password"].
268 ADA_ASSERT_TRUE(apply_result->password.has_value());
269 password = std::move(apply_result->password.value());
270
271 // Set hostname to applyResult["hostname"].
272 ADA_ASSERT_TRUE(apply_result->hostname.has_value());
273 hostname = std::move(apply_result->hostname.value());
274
275 // Set port to applyResult["port"].
276 ADA_ASSERT_TRUE(apply_result->port.has_value());
277 port = std::move(apply_result->port.value());
278
279 // Set pathname to applyResult["pathname"].
280 ADA_ASSERT_TRUE(apply_result->pathname.has_value());
281 pathname = std::move(apply_result->pathname.value());
282
283 // Set search to applyResult["search"].
284 ADA_ASSERT_TRUE(apply_result->search.has_value());
285 if (apply_result->search->starts_with("?")) {
286 search = apply_result->search->substr(1);
287 } else {
288 search = std::move(apply_result->search.value());
289 }
290
291 // Set hash to applyResult["hash"].
292 ADA_ASSERT_TRUE(apply_result->hash.has_value());
293 ADA_ASSERT_TRUE(!apply_result->hash->starts_with("#"));
294 hash = std::move(apply_result->hash.value());
295 } else {
296 ADA_ASSERT_TRUE(std::holds_alternative<std::string_view>(input));
297
298 // Let baseURL be null.
299 result<url_aggregator> base_url;
300
301 // If baseURLString was given, then:
302 if (base_url_string) {
303 // Let baseURL be the result of parsing baseURLString.
304 base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
305
306 // If baseURL is failure, return null.
307 if (!base_url) {
308 ada_log("match returned std::nullopt because failed to parse base_url=",
309 *base_url_string);
310 return std::nullopt;
311 }
312
313 // Append baseURLString to inputs.
314 inputs.emplace_back(*base_url_string);
315 }
316
317 url_aggregator* base_url_value =
318 base_url.has_value() ? &*base_url : nullptr;
319
320 // Set url to the result of parsing input given baseURL.
321 auto url = ada::parse<url_aggregator>(std::get<std::string_view>(input),
322 base_url_value);
323
324 // If url is failure, return null.
325 if (!url) {
326 ada_log("match returned std::nullopt because url failed");
327 return std::nullopt;
328 }
329
330 // Set protocol to url's scheme.
331 // IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':'
332 // is removed. Similar work was done on workerd:
333 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
334 protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
335 // Set username to url's username.
336 username = url->get_username();
337 // Set password to url's password.
338 password = url->get_password();
339 // Set hostname to url's host, serialized, or the empty string if the value
340 // is null.
341 hostname = url->get_hostname();
342 // Set port to url's port, serialized, or the empty string if the value is
343 // null.
344 port = url->get_port();
345 // Set pathname to the result of URL path serializing url.
346 pathname = url->get_pathname();
347 // Set search to url's query or the empty string if the value is null.
348 // IMPORTANT: Not documented on the URLPattern spec, but search prefix '?'
349 // is removed. Similar work was done on workerd:
350 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
351 if (url->has_search()) {
352 auto view = url->get_search();
353 search = view.starts_with("?") ? url->get_search().substr(1) : view;
354 }
355 // Set hash to url's fragment or the empty string if the value is null.
356 // IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
357 // removed. Similar work was done on workerd:
358 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
359 if (url->has_hash()) {
360 auto view = url->get_hash();
361 hash = view.starts_with("#") ? url->get_hash().substr(1) : view;
362 }
363 }
364
365 // Let protocolExecResult be RegExpBuiltinExec(urlPattern's protocol
366 // component's regular expression, protocol).
367 auto protocol_exec_result =
368 regex_provider::regex_search(protocol, protocol_component.regexp);
369
370 if (!protocol_exec_result) {
371 return std::nullopt;
372 }
373
374 // Let usernameExecResult be RegExpBuiltinExec(urlPattern's username
375 // component's regular expression, username).
376 auto username_exec_result =
377 regex_provider::regex_search(username, username_component.regexp);
378
379 if (!username_exec_result) {
380 return std::nullopt;
381 }
382
383 // Let passwordExecResult be RegExpBuiltinExec(urlPattern's password
384 // component's regular expression, password).
385 auto password_exec_result =
386 regex_provider::regex_search(password, password_component.regexp);
387
388 if (!password_exec_result) {
389 return std::nullopt;
390 }
391
392 // Let hostnameExecResult be RegExpBuiltinExec(urlPattern's hostname
393 // component's regular expression, hostname).
394 auto hostname_exec_result =
395 regex_provider::regex_search(hostname, hostname_component.regexp);
396
397 if (!hostname_exec_result) {
398 return std::nullopt;
399 }
400
401 // Let portExecResult be RegExpBuiltinExec(urlPattern's port component's
402 // regular expression, port).
403 auto port_exec_result =
404 regex_provider::regex_search(port, port_component.regexp);
405
406 if (!port_exec_result) {
407 return std::nullopt;
408 }
409
410 // Let pathnameExecResult be RegExpBuiltinExec(urlPattern's pathname
411 // component's regular expression, pathname).
412 auto pathname_exec_result =
413 regex_provider::regex_search(pathname, pathname_component.regexp);
414
415 if (!pathname_exec_result) {
416 return std::nullopt;
417 }
418
419 // Let searchExecResult be RegExpBuiltinExec(urlPattern's search component's
420 // regular expression, search).
421 auto search_exec_result =
422 regex_provider::regex_search(search, search_component.regexp);
423
424 if (!search_exec_result) {
425 return std::nullopt;
426 }
427
428 // Let hashExecResult be RegExpBuiltinExec(urlPattern's hash component's
429 // regular expression, hash).
430 auto hash_exec_result =
431 regex_provider::regex_search(hash, hash_component.regexp);
432
433 if (!hash_exec_result) {
434 return std::nullopt;
435 }
436
437 // Let result be a new URLPatternResult.
438 auto result = url_pattern_result{};
439 // Set result["inputs"] to inputs.
440 result.inputs = std::move(inputs);
441 // Set result["protocol"] to the result of creating a component match result
442 // given urlPattern's protocol component, protocol, and protocolExecResult.
443 result.protocol = protocol_component.create_component_match_result(
444 std::move(protocol), std::move(*protocol_exec_result));
445
446 // Set result["username"] to the result of creating a component match result
447 // given urlPattern's username component, username, and usernameExecResult.
448 result.username = username_component.create_component_match_result(
449 std::move(username), std::move(*username_exec_result));
450
451 // Set result["password"] to the result of creating a component match result
452 // given urlPattern's password component, password, and passwordExecResult.
453 result.password = password_component.create_component_match_result(
454 std::move(password), std::move(*password_exec_result));
455
456 // Set result["hostname"] to the result of creating a component match result
457 // given urlPattern's hostname component, hostname, and hostnameExecResult.
458 result.hostname = hostname_component.create_component_match_result(
459 std::move(hostname), std::move(*hostname_exec_result));
460
461 // Set result["port"] to the result of creating a component match result given
462 // urlPattern's port component, port, and portExecResult.
463 result.port = port_component.create_component_match_result(
464 std::move(port), std::move(*port_exec_result));
465
466 // Set result["pathname"] to the result of creating a component match result
467 // given urlPattern's pathname component, pathname, and pathnameExecResult.
468 result.pathname = pathname_component.create_component_match_result(
469 std::move(pathname), std::move(*pathname_exec_result));
470
471 // Set result["search"] to the result of creating a component match result
472 // given urlPattern's search component, search, and searchExecResult.
473 result.search = search_component.create_component_match_result(
474 std::move(search), std::move(*search_exec_result));
475
476 // Set result["hash"] to the result of creating a component match result given
477 // urlPattern's hash component, hash, and hashExecResult.
478 result.hash = hash_component.create_component_match_result(
479 std::move(hash), std::move(*hash_exec_result));
480
481 return result;
482}
483
484} // namespace ada
485#endif // ADA_INCLUDE_URL_PATTERN
486#endif
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
#define ada_lifetime_bound
Definition ada_idna.h:13
errors
Definition errors.h:10
@ type_error
Definition errors.h:10
template ada::result< url_aggregator > parse< url_aggregator >(std::string_view input, const url_aggregator *base_url)
tl::expected< result_type, ada::errors > result
Lightweight URL struct.
Generic URL struct reliant on std::string instantiation.
Definition url.h:45
std::string get_search() const noexcept
Definition url.cpp:643
constexpr std::string_view get_pathname() const noexcept
Definition url-inl.h:46
std::string get_hash() const noexcept
Definition url.cpp:662
std::string get_hostname() const noexcept
Definition url.cpp:639
const std::string & get_password() const noexcept
Definition url.cpp:654
std::string get_port() const noexcept
Definition url.cpp:658
const std::string & get_username() const noexcept
Definition url.cpp:650
constexpr bool has_search() const noexcept override
Definition url-inl.h:164
std::string get_protocol() const noexcept
Definition url.cpp:617
constexpr bool has_hash() const noexcept override
Definition url-inl.h:160
Declaration for the URLPattern implementation.
Declaration for the URLPattern helpers.