diff --git a/Libraries/LibHTTP/HTTP.h b/Libraries/LibHTTP/HTTP.h index c3e757562ee..86599c53aba 100644 --- a/Libraries/LibHTTP/HTTP.h +++ b/Libraries/LibHTTP/HTTP.h @@ -39,6 +39,36 @@ constexpr bool is_http_tab_or_space(u32 code_point) return code_point == 0x09u || code_point == 0x20u; } +constexpr bool is_http_token_code_point(u32 code_point) +{ + if ((code_point >= '0' && code_point <= '9') + || (code_point >= 'A' && code_point <= 'Z') + || (code_point >= 'a' && code_point <= 'z')) { + return true; + } + + switch (code_point) { + case '!': + case '#': + case '$': + case '%': + case '&': + case '\'': + case '*': + case '+': + case '-': + case '.': + case '^': + case '_': + case '`': + case '|': + case '~': + return true; + default: + return false; + } +} + enum class HttpQuotedStringExtractValue { No, Yes, diff --git a/Libraries/LibHTTP/Header.cpp b/Libraries/LibHTTP/Header.cpp index 91d2eb03dd6..7507d189d39 100644 --- a/Libraries/LibHTTP/Header.cpp +++ b/Libraries/LibHTTP/Header.cpp @@ -6,6 +6,8 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include +#include #include #include #include @@ -13,7 +15,6 @@ #include #include #include -#include #include #include @@ -82,8 +83,7 @@ Optional> Header::extract_header_values() const bool is_header_name(StringView header_name) { // A header name is a byte sequence that matches the field-name token production. - Regex regex { R"~~~(^[A-Za-z0-9!#$%&'*+\-.^_`|~]+$)~~~" }; - return regex.has_match(header_name); + return !header_name.is_empty() && all_of(header_name, is_http_token_code_point); } // https://fetch.spec.whatwg.org/#header-value diff --git a/Libraries/LibHTTP/Method.cpp b/Libraries/LibHTTP/Method.cpp index 36de5365446..0182edc137b 100644 --- a/Libraries/LibHTTP/Method.cpp +++ b/Libraries/LibHTTP/Method.cpp @@ -5,8 +5,9 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include +#include #include -#include namespace HTTP { @@ -14,8 +15,7 @@ namespace HTTP { bool is_method(StringView method) { // A method is a byte sequence that matches the method token production. - Regex regex { R"~~~(^[A-Za-z0-9!#$%&'*+\-.^_`|~]+$)~~~" }; - return regex.has_match(method); + return !method.is_empty() && all_of(method, is_http_token_code_point); } // https://fetch.spec.whatwg.org/#cors-safelisted-method diff --git a/Libraries/LibJS/Bytecode/RegexTable.cpp b/Libraries/LibJS/Bytecode/RegexTable.cpp index 768a92a89a9..d548bd1d2d3 100644 --- a/Libraries/LibJS/Bytecode/RegexTable.cpp +++ b/Libraries/LibJS/Bytecode/RegexTable.cpp @@ -8,23 +8,4 @@ namespace JS::Bytecode { -RegexTableIndex RegexTable::insert(ParsedRegex parsed_regex) -{ - Regex regex(parsed_regex.regex, parsed_regex.pattern.to_byte_string(), parsed_regex.flags); - m_regexes.append(move(regex)); - return m_regexes.size() - 1; -} - -Regex const& RegexTable::get(RegexTableIndex index) const -{ - return m_regexes[index.value()]; -} - -void RegexTable::dump() const -{ - outln("Regex Table:"); - for (size_t i = 0; i < m_regexes.size(); i++) - outln("{}: {}", i, m_regexes[i].pattern_value); -} - } diff --git a/Libraries/LibJS/Bytecode/RegexTable.h b/Libraries/LibJS/Bytecode/RegexTable.h index 4356c0d07fe..61bfb503dbc 100644 --- a/Libraries/LibJS/Bytecode/RegexTable.h +++ b/Libraries/LibJS/Bytecode/RegexTable.h @@ -7,21 +7,11 @@ #pragma once #include -#include -#include -#include -#include namespace JS::Bytecode { AK_TYPEDEF_DISTINCT_NUMERIC_GENERAL(u32, RegexTableIndex, Comparison); -struct ParsedRegex { - regex::Parser::Result regex; - String pattern; - regex::RegexOptions flags; -}; - class RegexTable { AK_MAKE_NONMOVABLE(RegexTable); AK_MAKE_NONCOPYABLE(RegexTable); @@ -29,13 +19,7 @@ class RegexTable { public: RegexTable() = default; - RegexTableIndex insert(ParsedRegex); - Regex const& get(RegexTableIndex) const; - void dump() const; - bool is_empty() const { return m_regexes.is_empty(); } - -private: - Vector> m_regexes; + bool is_empty() const { return true; } }; } diff --git a/Libraries/LibRegex/ECMAScriptRegex.cpp b/Libraries/LibRegex/ECMAScriptRegex.cpp new file mode 100644 index 00000000000..221509cd97b --- /dev/null +++ b/Libraries/LibRegex/ECMAScriptRegex.cpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2026-present, the Ladybird developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +namespace regex { + +struct ECMAScriptRegex::Impl { + CompiledRustRegex rust_regex; + Vector named_groups; +}; + +ErrorOr ECMAScriptRegex::compile(StringView utf8_pattern, ECMAScriptCompileFlags flags) +{ + RustRegexFlags rust_flags {}; + rust_flags.global = flags.global; + rust_flags.ignore_case = flags.ignore_case; + rust_flags.multiline = flags.multiline; + rust_flags.dot_all = flags.dot_all; + rust_flags.unicode = flags.unicode; + rust_flags.unicode_sets = flags.unicode_sets; + rust_flags.sticky = flags.sticky; + rust_flags.has_indices = flags.has_indices; + + auto compiled = CompiledRustRegex::compile(utf8_pattern, rust_flags); + if (compiled.is_error()) + return compiled.release_error(); + + auto rust_regex = compiled.release_value(); + + Vector named_groups; + named_groups.ensure_capacity(rust_regex.named_groups().size()); + for (auto const& rg : rust_regex.named_groups()) + named_groups.unchecked_append({ .name = rg.name, .index = rg.index }); + + auto impl = adopt_own(*new Impl { + .rust_regex = move(rust_regex), + .named_groups = move(named_groups), + }); + return ECMAScriptRegex(move(impl)); +} + +ECMAScriptRegex::~ECMAScriptRegex() = default; + +ECMAScriptRegex::ECMAScriptRegex(ECMAScriptRegex&& other) = default; +ECMAScriptRegex& ECMAScriptRegex::operator=(ECMAScriptRegex&& other) = default; + +ECMAScriptRegex::ECMAScriptRegex(OwnPtr impl) + : m_impl(move(impl)) +{ +} + +MatchResult ECMAScriptRegex::exec(Utf16View input, size_t start_pos) const +{ + auto result = m_impl->rust_regex.exec_internal(input, start_pos); + if (result == 1) + return MatchResult::Match; + if (result == -1) + return MatchResult::LimitExceeded; + return MatchResult::NoMatch; +} + +int ECMAScriptRegex::capture_slot(unsigned int slot) const +{ + return m_impl->rust_regex.capture_slot(slot); +} + +MatchResult ECMAScriptRegex::test(Utf16View input, size_t start_pos) const +{ + auto result = m_impl->rust_regex.test(input, start_pos); + if (result == 1) + return MatchResult::Match; + if (result == -1) + return MatchResult::LimitExceeded; + return MatchResult::NoMatch; +} + +unsigned int ECMAScriptRegex::capture_count() const +{ + return m_impl->rust_regex.capture_count(); +} + +unsigned int ECMAScriptRegex::total_groups() const +{ + return m_impl->rust_regex.total_groups(); +} + +Vector const& ECMAScriptRegex::named_groups() const +{ + return m_impl->named_groups; +} + +int ECMAScriptRegex::find_all(Utf16View input, size_t start_pos) const +{ + return m_impl->rust_regex.find_all(input, start_pos); +} + +ECMAScriptRegex::MatchPair ECMAScriptRegex::find_all_match(int index) const +{ + auto pair = m_impl->rust_regex.find_all_match(index); + return { pair.start, pair.end }; +} + +} diff --git a/Libraries/LibRegex/ECMAScriptRegex.h b/Libraries/LibRegex/ECMAScriptRegex.h new file mode 100644 index 00000000000..d18a1c5c6bc --- /dev/null +++ b/Libraries/LibRegex/ECMAScriptRegex.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2026-present, the Ladybird developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace regex { + +enum class MatchResult : i8 { + Match, + NoMatch, + LimitExceeded, +}; + +struct ECMAScriptCompileFlags { + bool global {}; + bool ignore_case {}; + bool multiline {}; + bool dot_all {}; + bool unicode {}; + bool unicode_sets {}; + bool sticky {}; + bool has_indices {}; +}; + +struct ECMAScriptNamedCaptureGroup { + String name; + unsigned int index; +}; + +class REGEX_API ECMAScriptRegex { + AK_MAKE_NONCOPYABLE(ECMAScriptRegex); + +public: + static ErrorOr compile(StringView utf8_pattern, ECMAScriptCompileFlags); + + ~ECMAScriptRegex(); + ECMAScriptRegex(ECMAScriptRegex&&); + ECMAScriptRegex& operator=(ECMAScriptRegex&&); + + /// Execute and fill internal capture buffer. + /// After a successful call, read results via capture_slot(). + [[nodiscard]] MatchResult exec(Utf16View input, size_t start_pos) const; + + /// Read a capture slot from the internal buffer (after exec). + /// Even slots are start positions, odd slots are end positions. + /// Returns -1 for unmatched captures. + int capture_slot(unsigned int slot) const; + + /// Test for a match without filling capture buffer. + [[nodiscard]] MatchResult test(Utf16View input, size_t start_pos) const; + + /// Number of numbered capture groups (excluding group 0). + unsigned int capture_count() const; + + /// Total number of capture groups including group 0. + unsigned int total_groups() const; + + /// Named capture groups with their indices. + Vector const& named_groups() const; + + /// Find all non-overlapping matches. Returns number of matches found. + /// Access results via find_all_match(i) after calling. + int find_all(Utf16View input, size_t start_pos) const; + + struct MatchPair { + int start; + int end; + }; + + /// Get the i-th match from find_all results. + MatchPair find_all_match(int index) const; + +private: + struct Impl; + ECMAScriptRegex(OwnPtr); + OwnPtr m_impl; +}; + +} // namespace regex diff --git a/Libraries/LibURL/Pattern/Component.cpp b/Libraries/LibURL/Pattern/Component.cpp index dcad8e4baa5..668de358edb 100644 --- a/Libraries/LibURL/Pattern/Component.cpp +++ b/Libraries/LibURL/Pattern/Component.cpp @@ -4,7 +4,8 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include +#include +#include #include #include #include @@ -19,10 +20,10 @@ bool protocol_component_matches_a_special_scheme(Component const& protocol_compo // 2. For each scheme of special scheme list: for (StringView scheme : special_schemes()) { // 1. Let test result be RegExpBuiltinExec(protocol component’s regular expression, scheme). - auto test_result = protocol_component.regular_expression->match(scheme); + auto test_result = protocol_component.matches(scheme); // 2. If test result is not null, then return true. - if (test_result.success) + if (test_result) return true; } @@ -225,27 +226,23 @@ PatternErrorOr Component::compile(Utf8View const& input, PatternParse // 3. Let flags be an empty string. // NOTE: These flags match the flags for the empty string of the LibJS RegExp implementation. - auto flags = regex::RegexOptions { - (regex::ECMAScriptFlags)regex::AllFlags::SingleMatch - | (regex::ECMAScriptFlags)regex::AllFlags::Global - | regex::ECMAScriptFlags::BrowserExtended - }; + regex::ECMAScriptCompileFlags flags {}; // 4. If options’s ignore case is true then set flags to "vi". if (options.ignore_case) { - flags |= regex::ECMAScriptFlags::UnicodeSets; - flags |= regex::ECMAScriptFlags::Insensitive; + flags.unicode_sets = true; + flags.ignore_case = true; } // 5. Otherwise set flags to "v" else { - flags |= regex::ECMAScriptFlags::UnicodeSets; + flags.unicode_sets = true; } // 6. Let regular expression be RegExpCreate(regular expression string, flags). If this throws an exception, catch // it, and throw a TypeError. - auto regex = make>(regular_expression_string.to_byte_string(), flags); - if (regex->parser_result.error != regex::Error::NoError) - return ErrorInfo { MUST(String::formatted("RegExp compile error: {}", regex->error_string())) }; + auto regex = regex::ECMAScriptRegex::compile(regular_expression_string.bytes_as_string_view(), flags); + if (regex.is_error()) + return ErrorInfo { MUST(String::formatted("RegExp compile error: {}", regex.release_error())) }; // 7. Let pattern string be the result of running generate a pattern string given part list and options. auto pattern_string = generate_a_pattern_string(part_list, options); @@ -266,14 +263,45 @@ PatternErrorOr Component::compile(Utf8View const& input, PatternParse // group name list is name list, and has regexp groups is has regexp groups. return Component { .pattern_string = move(pattern_string), - .regular_expression = move(regex), + .regular_expression = adopt_own(*new regex::ECMAScriptRegex(regex.release_value())), .group_name_list = move(name_list), .has_regexp_groups = has_regexp_groups, }; } +Component::ExecutionResult Component::execute(String const& input) const +{ + auto utf16_input = Utf16String::from_utf8(input); + auto match_result = regular_expression->exec(utf16_input.utf16_view(), 0); + if (match_result != regex::MatchResult::Match) + return {}; + + ExecutionResult result; + result.success = true; + result.captures.ensure_capacity(group_name_list.size()); + for (size_t index = 1; index <= group_name_list.size(); ++index) { + auto start = regular_expression->capture_slot(index * 2); + auto end = regular_expression->capture_slot(index * 2 + 1); + if (start < 0 || end < 0) { + result.captures.append({}); + continue; + } + + auto capture = utf16_input.substring_view(static_cast(start), static_cast(end - start)); + result.captures.append(MUST(capture.to_utf8())); + } + + return result; +} + +bool Component::matches(StringView input) const +{ + auto utf16_input = Utf16String::from_utf8(input); + return regular_expression->test(utf16_input.utf16_view(), 0) == regex::MatchResult::Match; +} + // https://urlpattern.spec.whatwg.org/#create-a-component-match-result -Component::Result Component::create_match_result(String const& input, regex::RegexResult const& exec_result) const +Component::Result Component::create_match_result(String const& input, ExecutionResult const& exec_result) const { // 1. Let result be a new URLPatternComponentResult. Component::Result result; @@ -286,18 +314,18 @@ Component::Result Component::create_match_result(String const& input, regex::Reg // 4. Let index be 1. // 5. While index is less than or equal to component’s group name list’s size: + VERIFY(exec_result.captures.size() == group_name_list.size()); for (size_t index = 1; index <= group_name_list.size(); ++index) { - auto const& capture = exec_result.capture_group_matches[0][index - 1]; - // 1. Let name be component’s group name list[index − 1]. auto name = group_name_list[index - 1]; // 2. Let value be Get(execResult, ToString(index)). // 3. Set groups[name] to value. - if (capture.view.is_null()) + auto const& capture = exec_result.captures[index - 1]; + if (!capture.has_value()) groups.set(name, Empty {}); else - groups.set(name, MUST(capture.view.to_string())); + groups.set(name, *capture); // 4. Increment index by 1. } diff --git a/Libraries/LibURL/Pattern/Component.h b/Libraries/LibURL/Pattern/Component.h index ae4edf688cd..be1f9dffc35 100644 --- a/Libraries/LibURL/Pattern/Component.h +++ b/Libraries/LibURL/Pattern/Component.h @@ -6,9 +6,11 @@ #pragma once +#include +#include #include #include -#include +#include #include namespace URL::Pattern { @@ -23,7 +25,14 @@ struct Component { OrderedHashMap> groups; }; - Result create_match_result(String const& input, regex::RegexResult const& exec_result) const; + struct ExecutionResult { + bool success { false }; + Vector> captures; + }; + + Result create_match_result(String const& input, ExecutionResult const& exec_result) const; + ExecutionResult execute(String const& input) const; + bool matches(StringView input) const; // https://urlpattern.spec.whatwg.org/#component-pattern-string // pattern string, a well formed pattern string @@ -31,7 +40,7 @@ struct Component { // https://urlpattern.spec.whatwg.org/#component-regular-expression // regular expression, a RegExp - OwnPtr> regular_expression; + OwnPtr regular_expression; // https://urlpattern.spec.whatwg.org/#component-group-name-list // group name list, a list of strings diff --git a/Libraries/LibURL/Pattern/Pattern.cpp b/Libraries/LibURL/Pattern/Pattern.cpp index bfcefdda33f..f1dde1a3e49 100644 --- a/Libraries/LibURL/Pattern/Pattern.cpp +++ b/Libraries/LibURL/Pattern/Pattern.cpp @@ -307,42 +307,42 @@ PatternErrorOr> Pattern::match(Variant const } // 14. Let protocolExecResult be RegExpBuiltinExec(urlPattern’s protocol component's regular expression, protocol). - auto protocol_exec_result = m_protocol_component.regular_expression->match(protocol); + auto protocol_exec_result = m_protocol_component.execute(protocol); if (!protocol_exec_result.success) return OptionalNone {}; // 15. Let usernameExecResult be RegExpBuiltinExec(urlPattern’s username component's regular expression, username). - auto username_exec_result = m_username_component.regular_expression->match(username); + auto username_exec_result = m_username_component.execute(username); if (!username_exec_result.success) return OptionalNone {}; // 16. Let passwordExecResult be RegExpBuiltinExec(urlPattern’s password component's regular expression, password). - auto password_exec_result = m_password_component.regular_expression->match(password); + auto password_exec_result = m_password_component.execute(password); if (!password_exec_result.success) return OptionalNone {}; // 17. Let hostnameExecResult be RegExpBuiltinExec(urlPattern’s hostname component's regular expression, hostname). - auto hostname_exec_result = m_hostname_component.regular_expression->match(hostname); + auto hostname_exec_result = m_hostname_component.execute(hostname); if (!hostname_exec_result.success) return OptionalNone {}; // 18. Let portExecResult be RegExpBuiltinExec(urlPattern’s port component's regular expression, port). - auto port_exec_result = m_port_component.regular_expression->match(port); + auto port_exec_result = m_port_component.execute(port); if (!port_exec_result.success) return OptionalNone {}; // 19. Let pathnameExecResult be RegExpBuiltinExec(urlPattern’s pathname component's regular expression, pathname). - auto pathname_exec_result = m_pathname_component.regular_expression->match(pathname); + auto pathname_exec_result = m_pathname_component.execute(pathname); if (!pathname_exec_result.success) return OptionalNone {}; // 20. Let searchExecResult be RegExpBuiltinExec(urlPattern’s search component's regular expression, search). - auto search_exec_result = m_search_component.regular_expression->match(search); + auto search_exec_result = m_search_component.execute(search); if (!search_exec_result.success) return OptionalNone {}; // 21. Let hashExecResult be RegExpBuiltinExec(urlPattern’s hash component's regular expression, hash). - auto hash_exec_result = m_hash_component.regular_expression->match(hash); + auto hash_exec_result = m_hash_component.execute(hash); if (!hash_exec_result.success) return OptionalNone {}; diff --git a/Libraries/LibWeb/HTML/HTMLInputElement.cpp b/Libraries/LibWeb/HTML/HTMLInputElement.cpp index a5f235bd9d8..102c072090c 100644 --- a/Libraries/LibWeb/HTML/HTMLInputElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLInputElement.cpp @@ -275,7 +275,7 @@ Optional> HTMLInputElement::suggestions_sourc } // https://html.spec.whatwg.org/multipage/input.html#compiled-pattern-regular-expression -Optional> HTMLInputElement::compiled_pattern_regular_expression() const +Optional HTMLInputElement::compiled_pattern_regular_expression() const { // 1. If the element does not have a pattern attribute specified, then return nothing. The element has no compiled pattern regular expression. auto maybe_pattern = get_attribute(HTML::AttributeNames::pattern); @@ -283,20 +283,25 @@ Optional> HTMLInputElement::compiled_pattern_regular_expression() return {}; // 2. Let pattern be the value of the pattern attribute of the element. - auto pattern = maybe_pattern.release_value().to_byte_string(); + auto pattern = maybe_pattern.release_value(); // 3. Let regexpCompletion be RegExpCreate(pattern, "v"). - Regex regexp_completion(pattern, JS::RegExpObject::default_flags | ECMAScriptFlags::UnicodeSets); + regex::ECMAScriptCompileFlags compile_flags {}; + compile_flags.unicode_sets = true; + auto regexp_completion = regex::ECMAScriptRegex::compile(pattern.bytes_as_string_view(), compile_flags); // 4. If regexpCompletion is an abrupt completion, then return nothing. The element has no compiled pattern regular expression. - if (regexp_completion.parser_result.error != regex::Error::NoError) + if (regexp_completion.is_error()) return {}; // 5. Let anchoredPattern be the string "^(?:", followed by pattern, followed by ")$". - auto anchored_pattern = ByteString::formatted("^(?:{})$", pattern); + auto anchored_pattern = MUST(String::formatted("^(?:{})$", pattern)); // 6. Return ! RegExpCreate(anchoredPattern, "v"). - return Regex(anchored_pattern, JS::RegExpObject::default_flags | ECMAScriptFlags::UnicodeSets); + auto anchored = regex::ECMAScriptRegex::compile(anchored_pattern.bytes_as_string_view(), compile_flags); + if (anchored.is_error()) + return {}; + return anchored.release_value(); } // https://html.spec.whatwg.org/multipage/input.html#dom-input-files @@ -3557,7 +3562,13 @@ bool HTMLInputElement::suffering_from_being_missing() const } // https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address -static Regex const valid_email_address_regex = Regex("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"); +static regex::ECMAScriptRegex& valid_email_address_regex() +{ + static auto regex = MUST(regex::ECMAScriptRegex::compile( + "^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"sv, + regex::ECMAScriptCompileFlags {})); + return regex; +} // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#suffering-from-a-type-mismatch bool HTMLInputElement::suffering_from_a_type_mismatch() const @@ -3577,7 +3588,7 @@ bool HTMLInputElement::suffering_from_a_type_mismatch() const // When the multiple attribute is not specified on the element: While the value of the element is neither the // empty string nor a single valid email address, the element is suffering from a type mismatch. if (!has_attribute(HTML::AttributeNames::multiple)) - return !input.is_empty() && !valid_email_address_regex.match(input.utf16_view()).success; + return !input.is_empty() && valid_email_address_regex().exec(input.utf16_view(), 0) != regex::MatchResult::Match; // When the multiple attribute is specified on the element: While the value of the element is not a valid email // address list, the element is suffering from a type mismatch. @@ -3588,7 +3599,7 @@ bool HTMLInputElement::suffering_from_a_type_mismatch() const bool valid = true; input.for_each_split_view(',', SplitBehavior::Nothing, [&](auto const& address) { - if (valid_email_address_regex.match(address).success) + if (valid_email_address_regex().exec(address, 0) == regex::MatchResult::Match) return IterationDecision::Continue; valid = false; @@ -3633,7 +3644,7 @@ bool HTMLInputElement::suffering_from_a_pattern_mismatch() const bool valid = true; value.for_each_split_view(',', SplitBehavior::Nothing, [&](auto const& value) { - if (regexp_object->match(value).success) + if (regexp_object->exec(value, 0) == regex::MatchResult::Match) return IterationDecision::Continue; valid = false; @@ -3643,7 +3654,7 @@ bool HTMLInputElement::suffering_from_a_pattern_mismatch() const return !valid; } - return !regexp_object->match(value.utf16_view()).success; + return regexp_object->exec(value.utf16_view(), 0) != regex::MatchResult::Match; } // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#suffering-from-an-underflow diff --git a/Libraries/LibWeb/HTML/HTMLInputElement.h b/Libraries/LibWeb/HTML/HTMLInputElement.h index d5048935f3c..7dcfe8aabd1 100644 --- a/Libraries/LibWeb/HTML/HTMLInputElement.h +++ b/Libraries/LibWeb/HTML/HTMLInputElement.h @@ -9,7 +9,7 @@ #pragma once -#include +#include #include #include #include @@ -377,7 +377,7 @@ private: GC::Ptr m_resource_request; SelectedCoordinate m_selected_coordinate; - Optional> compiled_pattern_regular_expression() const; + Optional compiled_pattern_regular_expression() const; Optional> suggestions_source_element() const; diff --git a/Tests/LibHTTP/TestHTTPUtils.cpp b/Tests/LibHTTP/TestHTTPUtils.cpp index c0a38b2e69a..b467d2e96d4 100644 --- a/Tests/LibHTTP/TestHTTPUtils.cpp +++ b/Tests/LibHTTP/TestHTTPUtils.cpp @@ -11,6 +11,7 @@ #include #include #include +#include TEST_CASE(collect_an_http_quoted_string) { @@ -127,6 +128,22 @@ TEST_CASE(extract_cache_control_directive) EXPECT(!HTTP::contains_cache_control_directive("=4"sv, "max-age"sv)); } +TEST_CASE(token_validation) +{ + EXPECT(HTTP::is_method("GET"sv)); + EXPECT(HTTP::is_method("PATCH"sv)); + EXPECT(HTTP::is_method("M-SEARCH"sv)); + EXPECT(!HTTP::is_method(""sv)); + EXPECT(!HTTP::is_method("GET "sv)); + EXPECT(!HTTP::is_method("GE:T"sv)); + + EXPECT(HTTP::is_header_name("Content-Type"sv)); + EXPECT(HTTP::is_header_name("X-Custom_Header"sv)); + EXPECT(!HTTP::is_header_name(""sv)); + EXPECT(!HTTP::is_header_name("Content Type"sv)); + EXPECT(!HTTP::is_header_name("Content:Type"sv)); +} + TEST_CASE(extract_header_values) { struct TestHeader { diff --git a/Tests/LibURL/CMakeLists.txt b/Tests/LibURL/CMakeLists.txt index a56a497063c..f3960ee5c88 100644 --- a/Tests/LibURL/CMakeLists.txt +++ b/Tests/LibURL/CMakeLists.txt @@ -1,9 +1,10 @@ set(URL_TEST_SOURCES TestURL.cpp + TestURLPattern.cpp TestURLPatternConstructorStringParser.cpp TestPublicSuffix.cpp ) foreach(source IN LISTS URL_TEST_SOURCES) - ladybird_test("${source}" LibURL LIBS LibURL) + ladybird_test("${source}" LibURL LIBS LibURL LibRegex) endforeach() diff --git a/Tests/LibURL/TestURLPattern.cpp b/Tests/LibURL/TestURLPattern.cpp new file mode 100644 index 00000000000..de598fed169 --- /dev/null +++ b/Tests/LibURL/TestURLPattern.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2026-present, the Ladybird developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#include + +TEST_CASE(url_pattern_matches_named_groups) +{ + auto pattern = MUST(URL::Pattern::Pattern::create("https://example.com/:category/:id"_string)); + auto result = MUST(pattern.match("https://example.com/books/42"_string, {})); + VERIFY(result.has_value()); + + EXPECT_EQ(result->protocol.input, "https"_string); + EXPECT_EQ(result->pathname.input, "/books/42"_string); + EXPECT_EQ(result->pathname.groups.get("category"sv).value(), (Variant { "books"_string })); + EXPECT_EQ(result->pathname.groups.get("id"sv).value(), (Variant { "42"_string })); +} + +TEST_CASE(url_pattern_ignore_case_matching) +{ + auto pattern = MUST(URL::Pattern::Pattern::create("https://example.com/:value"_string, {}, URL::Pattern::IgnoreCase::Yes)); + auto result = MUST(pattern.match("https://example.com/CaseSensitive"_string, {})); + VERIFY(result.has_value()); + + EXPECT_EQ(result->pathname.groups.get("value"sv).value(), (Variant { "CaseSensitive"_string })); +} diff --git a/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt b/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt index 14edde7b0c0..b1cec167323 100644 Binary files a/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt and b/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt differ diff --git a/Tests/LibWeb/Text/expected/wpt-import/encoding/encodeInto.any.txt b/Tests/LibWeb/Text/expected/wpt-import/encoding/encodeInto.any.txt index c12d1a75994..3d6e4bedbd3 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/encoding/encodeInto.any.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/encoding/encodeInto.any.txt @@ -52,18 +52,18 @@ Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 0 Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler random Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 4, filler random Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler random -Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0 -Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0 -Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0 -Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0 -Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128 -Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128 -Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128 -Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128 -Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random -Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random -Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random -Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random +Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0 +Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0 +Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0 +Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0 +Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128 +Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128 +Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128 +Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128 +Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random +Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random +Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random +Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random Pass encodeInto() into ArrayBuffer with AU+df06 and destination length 4, offset 0, filler 0 Pass encodeInto() into SharedArrayBuffer with AU+df06 and destination length 4, offset 0, filler 0 Pass encodeInto() into ArrayBuffer with AU+df06 and destination length 4, offset 4, filler 0 diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/IdnaTestV2.window.txt b/Tests/LibWeb/Text/expected/wpt-import/url/IdnaTestV2.window.txt index ccf5a31611d..530c912925a 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/url/IdnaTestV2.window.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/url/IdnaTestV2.window.txt @@ -350,8 +350,8 @@ Pass ToASCII("⑷.four") U1 (ignored) Pass ToASCII("(4).four") U1 (ignored) Pass ToASCII("⑷.FOUR") U1 (ignored) Pass ToASCII("⑷.Four") U1 (ignored) -Pass ToASCII("aaU+d900z") V7; A3 -Pass ToASCII("AAU+d900Z") V7; A3 +Pass ToASCII("aU+d900z") V7; A3 +Pass ToASCII("AU+d900Z") V7; A3 Pass ToASCII("xn--") P4; A4_1 (ignored); A4_2 (ignored) Pass ToASCII("xn---") P4 Pass ToASCII("xn--ASCII-") P4 diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/a-element-origin.txt b/Tests/LibWeb/Text/expected/wpt-import/url/a-element-origin.txt index 09050ea24c1..fef31a799be 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/url/a-element-origin.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/url/a-element-origin.txt @@ -405,4 +405,4 @@ Pass Parsing origin: against Pass Parsing origin: against Pass Parsing origin: against Pass Parsing origin: against -Pass Parsing origin: against \ No newline at end of file +Pass Parsing origin: against \ No newline at end of file diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/url-constructor.any.txt b/Tests/LibWeb/Text/expected/wpt-import/url/url-constructor.any.txt index 34a597cebdc..8747e03297a 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/url/url-constructor.any.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/url/url-constructor.any.txt @@ -885,4 +885,4 @@ Pass Parsing: without base Pass Parsing: without base Pass Parsing: without base Pass Parsing: without base -Pass Parsing: without base \ No newline at end of file +Pass Parsing: without base \ No newline at end of file diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/url-origin.any.txt b/Tests/LibWeb/Text/expected/wpt-import/url/url-origin.any.txt index ea1926755b7..4c386b97268 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/url/url-origin.any.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/url/url-origin.any.txt @@ -406,4 +406,4 @@ Pass Origin parsing: without base Pass Origin parsing: without base Pass Origin parsing: without base Pass Origin parsing: without base -Pass Origin parsing: without base \ No newline at end of file +Pass Origin parsing: without base \ No newline at end of file