LibRegex: Add ECMAScriptRegex and migrate callers

Add `ECMAScriptRegex`, LibRegex's C++ facade for ECMAScript regexes.

The facade owns compilation, execution, captures, named groups, and
error translation for the Rust backend, which lets callers stop
depending on the legacy parser and matcher types directly. Use it in the
remaining non-LibJS callers: URLPattern, HTML input pattern handling,
and the places in LibHTTP that only needed token validation.

Where a full regex engine was unnecessary, replace those call sites with
direct character checks. Also update focused LibURL, LibHTTP, and WPT
coverage for the migrated callers and corrected surrogate handling.
This commit is contained in:
Andreas Kling
2026-03-25 10:52:20 +01:00
committed by Ali Mohammad Pur
parent 66fb0a8394
commit 34d954e2d7
Notes: github-actions[bot] 2026-03-27 16:35:21 +00:00
21 changed files with 394 additions and 104 deletions

View File

@@ -39,6 +39,36 @@ constexpr bool is_http_tab_or_space(u32 code_point)
return code_point == 0x09u || code_point == 0x20u;
}
constexpr bool is_http_token_code_point(u32 code_point)
{
if ((code_point >= '0' && code_point <= '9')
|| (code_point >= 'A' && code_point <= 'Z')
|| (code_point >= 'a' && code_point <= 'z')) {
return true;
}
switch (code_point) {
case '!':
case '#':
case '$':
case '%':
case '&':
case '\'':
case '*':
case '+':
case '-':
case '.':
case '^':
case '_':
case '`':
case '|':
case '~':
return true;
default:
return false;
}
}
enum class HttpQuotedStringExtractValue {
No,
Yes,

View File

@@ -6,6 +6,8 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/AllOf.h>
#include <AK/AnyOf.h>
#include <AK/GenericLexer.h>
#include <AK/QuickSort.h>
#include <LibHTTP/HTTP.h>
@@ -13,7 +15,6 @@
#include <LibHTTP/Method.h>
#include <LibIPC/Decoder.h>
#include <LibIPC/Encoder.h>
#include <LibRegex/Regex.h>
#include <LibTextCodec/Decoder.h>
#include <LibTextCodec/Encoder.h>
@@ -82,8 +83,7 @@ Optional<Vector<ByteString>> Header::extract_header_values() const
bool is_header_name(StringView header_name)
{
// A header name is a byte sequence that matches the field-name token production.
Regex<ECMA262Parser> regex { R"~~~(^[A-Za-z0-9!#$%&'*+\-.^_`|~]+$)~~~" };
return regex.has_match(header_name);
return !header_name.is_empty() && all_of(header_name, is_http_token_code_point);
}
// https://fetch.spec.whatwg.org/#header-value

View File

@@ -5,8 +5,9 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/AllOf.h>
#include <LibHTTP/HTTP.h>
#include <LibHTTP/Method.h>
#include <LibRegex/Regex.h>
namespace HTTP {
@@ -14,8 +15,7 @@ namespace HTTP {
bool is_method(StringView method)
{
// A method is a byte sequence that matches the method token production.
Regex<ECMA262Parser> regex { R"~~~(^[A-Za-z0-9!#$%&'*+\-.^_`|~]+$)~~~" };
return regex.has_match(method);
return !method.is_empty() && all_of(method, is_http_token_code_point);
}
// https://fetch.spec.whatwg.org/#cors-safelisted-method

View File

@@ -8,23 +8,4 @@
namespace JS::Bytecode {
RegexTableIndex RegexTable::insert(ParsedRegex parsed_regex)
{
Regex<ECMA262> regex(parsed_regex.regex, parsed_regex.pattern.to_byte_string(), parsed_regex.flags);
m_regexes.append(move(regex));
return m_regexes.size() - 1;
}
Regex<ECMA262> const& RegexTable::get(RegexTableIndex index) const
{
return m_regexes[index.value()];
}
void RegexTable::dump() const
{
outln("Regex Table:");
for (size_t i = 0; i < m_regexes.size(); i++)
outln("{}: {}", i, m_regexes[i].pattern_value);
}
}

View File

@@ -7,21 +7,11 @@
#pragma once
#include <AK/DistinctNumeric.h>
#include <AK/String.h>
#include <AK/Vector.h>
#include <LibRegex/Regex.h>
#include <LibRegex/RegexParser.h>
namespace JS::Bytecode {
AK_TYPEDEF_DISTINCT_NUMERIC_GENERAL(u32, RegexTableIndex, Comparison);
struct ParsedRegex {
regex::Parser::Result regex;
String pattern;
regex::RegexOptions<ECMAScriptFlags> flags;
};
class RegexTable {
AK_MAKE_NONMOVABLE(RegexTable);
AK_MAKE_NONCOPYABLE(RegexTable);
@@ -29,13 +19,7 @@ class RegexTable {
public:
RegexTable() = default;
RegexTableIndex insert(ParsedRegex);
Regex<ECMA262> const& get(RegexTableIndex) const;
void dump() const;
bool is_empty() const { return m_regexes.is_empty(); }
private:
Vector<Regex<ECMA262>> m_regexes;
bool is_empty() const { return true; }
};
}

View File

@@ -0,0 +1,109 @@
/*
* Copyright (c) 2026-present, the Ladybird developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/OwnPtr.h>
#include <LibRegex/ECMAScriptRegex.h>
#include <LibRegex/RustRegex.h>
namespace regex {
struct ECMAScriptRegex::Impl {
CompiledRustRegex rust_regex;
Vector<ECMAScriptNamedCaptureGroup> named_groups;
};
ErrorOr<ECMAScriptRegex, String> ECMAScriptRegex::compile(StringView utf8_pattern, ECMAScriptCompileFlags flags)
{
RustRegexFlags rust_flags {};
rust_flags.global = flags.global;
rust_flags.ignore_case = flags.ignore_case;
rust_flags.multiline = flags.multiline;
rust_flags.dot_all = flags.dot_all;
rust_flags.unicode = flags.unicode;
rust_flags.unicode_sets = flags.unicode_sets;
rust_flags.sticky = flags.sticky;
rust_flags.has_indices = flags.has_indices;
auto compiled = CompiledRustRegex::compile(utf8_pattern, rust_flags);
if (compiled.is_error())
return compiled.release_error();
auto rust_regex = compiled.release_value();
Vector<ECMAScriptNamedCaptureGroup> named_groups;
named_groups.ensure_capacity(rust_regex.named_groups().size());
for (auto const& rg : rust_regex.named_groups())
named_groups.unchecked_append({ .name = rg.name, .index = rg.index });
auto impl = adopt_own(*new Impl {
.rust_regex = move(rust_regex),
.named_groups = move(named_groups),
});
return ECMAScriptRegex(move(impl));
}
ECMAScriptRegex::~ECMAScriptRegex() = default;
ECMAScriptRegex::ECMAScriptRegex(ECMAScriptRegex&& other) = default;
ECMAScriptRegex& ECMAScriptRegex::operator=(ECMAScriptRegex&& other) = default;
ECMAScriptRegex::ECMAScriptRegex(OwnPtr<Impl> impl)
: m_impl(move(impl))
{
}
MatchResult ECMAScriptRegex::exec(Utf16View input, size_t start_pos) const
{
auto result = m_impl->rust_regex.exec_internal(input, start_pos);
if (result == 1)
return MatchResult::Match;
if (result == -1)
return MatchResult::LimitExceeded;
return MatchResult::NoMatch;
}
int ECMAScriptRegex::capture_slot(unsigned int slot) const
{
return m_impl->rust_regex.capture_slot(slot);
}
MatchResult ECMAScriptRegex::test(Utf16View input, size_t start_pos) const
{
auto result = m_impl->rust_regex.test(input, start_pos);
if (result == 1)
return MatchResult::Match;
if (result == -1)
return MatchResult::LimitExceeded;
return MatchResult::NoMatch;
}
unsigned int ECMAScriptRegex::capture_count() const
{
return m_impl->rust_regex.capture_count();
}
unsigned int ECMAScriptRegex::total_groups() const
{
return m_impl->rust_regex.total_groups();
}
Vector<ECMAScriptNamedCaptureGroup> const& ECMAScriptRegex::named_groups() const
{
return m_impl->named_groups;
}
int ECMAScriptRegex::find_all(Utf16View input, size_t start_pos) const
{
return m_impl->rust_regex.find_all(input, start_pos);
}
ECMAScriptRegex::MatchPair ECMAScriptRegex::find_all_match(int index) const
{
auto pair = m_impl->rust_regex.find_all_match(index);
return { pair.start, pair.end };
}
}

View File

@@ -0,0 +1,90 @@
/*
* Copyright (c) 2026-present, the Ladybird developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Error.h>
#include <AK/Noncopyable.h>
#include <AK/OwnPtr.h>
#include <AK/String.h>
#include <AK/Utf16View.h>
#include <AK/Vector.h>
#include <LibRegex/Export.h>
namespace regex {
enum class MatchResult : i8 {
Match,
NoMatch,
LimitExceeded,
};
struct ECMAScriptCompileFlags {
bool global {};
bool ignore_case {};
bool multiline {};
bool dot_all {};
bool unicode {};
bool unicode_sets {};
bool sticky {};
bool has_indices {};
};
struct ECMAScriptNamedCaptureGroup {
String name;
unsigned int index;
};
class REGEX_API ECMAScriptRegex {
AK_MAKE_NONCOPYABLE(ECMAScriptRegex);
public:
static ErrorOr<ECMAScriptRegex, String> compile(StringView utf8_pattern, ECMAScriptCompileFlags);
~ECMAScriptRegex();
ECMAScriptRegex(ECMAScriptRegex&&);
ECMAScriptRegex& operator=(ECMAScriptRegex&&);
/// Execute and fill internal capture buffer.
/// After a successful call, read results via capture_slot().
[[nodiscard]] MatchResult exec(Utf16View input, size_t start_pos) const;
/// Read a capture slot from the internal buffer (after exec).
/// Even slots are start positions, odd slots are end positions.
/// Returns -1 for unmatched captures.
int capture_slot(unsigned int slot) const;
/// Test for a match without filling capture buffer.
[[nodiscard]] MatchResult test(Utf16View input, size_t start_pos) const;
/// Number of numbered capture groups (excluding group 0).
unsigned int capture_count() const;
/// Total number of capture groups including group 0.
unsigned int total_groups() const;
/// Named capture groups with their indices.
Vector<ECMAScriptNamedCaptureGroup> const& named_groups() const;
/// Find all non-overlapping matches. Returns number of matches found.
/// Access results via find_all_match(i) after calling.
int find_all(Utf16View input, size_t start_pos) const;
struct MatchPair {
int start;
int end;
};
/// Get the i-th match from find_all results.
MatchPair find_all_match(int index) const;
private:
struct Impl;
ECMAScriptRegex(OwnPtr<Impl>);
OwnPtr<Impl> m_impl;
};
} // namespace regex

View File

@@ -4,7 +4,8 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibRegex/Regex.h>
#include <AK/Utf16String.h>
#include <LibRegex/ECMAScriptRegex.h>
#include <LibURL/Pattern/Component.h>
#include <LibURL/Pattern/PatternParser.h>
#include <LibURL/Pattern/String.h>
@@ -19,10 +20,10 @@ bool protocol_component_matches_a_special_scheme(Component const& protocol_compo
// 2. For each scheme of special scheme list:
for (StringView scheme : special_schemes()) {
// 1. Let test result be RegExpBuiltinExec(protocol components regular expression, scheme).
auto test_result = protocol_component.regular_expression->match(scheme);
auto test_result = protocol_component.matches(scheme);
// 2. If test result is not null, then return true.
if (test_result.success)
if (test_result)
return true;
}
@@ -225,27 +226,23 @@ PatternErrorOr<Component> Component::compile(Utf8View const& input, PatternParse
// 3. Let flags be an empty string.
// NOTE: These flags match the flags for the empty string of the LibJS RegExp implementation.
auto flags = regex::RegexOptions<ECMAScriptFlags> {
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
| (regex::ECMAScriptFlags)regex::AllFlags::Global
| regex::ECMAScriptFlags::BrowserExtended
};
regex::ECMAScriptCompileFlags flags {};
// 4. If optionss ignore case is true then set flags to "vi".
if (options.ignore_case) {
flags |= regex::ECMAScriptFlags::UnicodeSets;
flags |= regex::ECMAScriptFlags::Insensitive;
flags.unicode_sets = true;
flags.ignore_case = true;
}
// 5. Otherwise set flags to "v"
else {
flags |= regex::ECMAScriptFlags::UnicodeSets;
flags.unicode_sets = true;
}
// 6. Let regular expression be RegExpCreate(regular expression string, flags). If this throws an exception, catch
// it, and throw a TypeError.
auto regex = make<Regex<ECMA262>>(regular_expression_string.to_byte_string(), flags);
if (regex->parser_result.error != regex::Error::NoError)
return ErrorInfo { MUST(String::formatted("RegExp compile error: {}", regex->error_string())) };
auto regex = regex::ECMAScriptRegex::compile(regular_expression_string.bytes_as_string_view(), flags);
if (regex.is_error())
return ErrorInfo { MUST(String::formatted("RegExp compile error: {}", regex.release_error())) };
// 7. Let pattern string be the result of running generate a pattern string given part list and options.
auto pattern_string = generate_a_pattern_string(part_list, options);
@@ -266,14 +263,45 @@ PatternErrorOr<Component> Component::compile(Utf8View const& input, PatternParse
// group name list is name list, and has regexp groups is has regexp groups.
return Component {
.pattern_string = move(pattern_string),
.regular_expression = move(regex),
.regular_expression = adopt_own(*new regex::ECMAScriptRegex(regex.release_value())),
.group_name_list = move(name_list),
.has_regexp_groups = has_regexp_groups,
};
}
Component::ExecutionResult Component::execute(String const& input) const
{
auto utf16_input = Utf16String::from_utf8(input);
auto match_result = regular_expression->exec(utf16_input.utf16_view(), 0);
if (match_result != regex::MatchResult::Match)
return {};
ExecutionResult result;
result.success = true;
result.captures.ensure_capacity(group_name_list.size());
for (size_t index = 1; index <= group_name_list.size(); ++index) {
auto start = regular_expression->capture_slot(index * 2);
auto end = regular_expression->capture_slot(index * 2 + 1);
if (start < 0 || end < 0) {
result.captures.append({});
continue;
}
auto capture = utf16_input.substring_view(static_cast<size_t>(start), static_cast<size_t>(end - start));
result.captures.append(MUST(capture.to_utf8()));
}
return result;
}
bool Component::matches(StringView input) const
{
auto utf16_input = Utf16String::from_utf8(input);
return regular_expression->test(utf16_input.utf16_view(), 0) == regex::MatchResult::Match;
}
// https://urlpattern.spec.whatwg.org/#create-a-component-match-result
Component::Result Component::create_match_result(String const& input, regex::RegexResult const& exec_result) const
Component::Result Component::create_match_result(String const& input, ExecutionResult const& exec_result) const
{
// 1. Let result be a new URLPatternComponentResult.
Component::Result result;
@@ -286,18 +314,18 @@ Component::Result Component::create_match_result(String const& input, regex::Reg
// 4. Let index be 1.
// 5. While index is less than or equal to components group name lists size:
VERIFY(exec_result.captures.size() == group_name_list.size());
for (size_t index = 1; index <= group_name_list.size(); ++index) {
auto const& capture = exec_result.capture_group_matches[0][index - 1];
// 1. Let name be components group name list[index 1].
auto name = group_name_list[index - 1];
// 2. Let value be Get(execResult, ToString(index)).
// 3. Set groups[name] to value.
if (capture.view.is_null())
auto const& capture = exec_result.captures[index - 1];
if (!capture.has_value())
groups.set(name, Empty {});
else
groups.set(name, MUST(capture.view.to_string()));
groups.set(name, *capture);
// 4. Increment index by 1.
}

View File

@@ -6,9 +6,11 @@
#pragma once
#include <AK/HashMap.h>
#include <AK/Optional.h>
#include <AK/OwnPtr.h>
#include <AK/String.h>
#include <LibRegex/Regex.h>
#include <LibRegex/ECMAScriptRegex.h>
#include <LibURL/Pattern/PatternParser.h>
namespace URL::Pattern {
@@ -23,7 +25,14 @@ struct Component {
OrderedHashMap<String, Variant<String, Empty>> groups;
};
Result create_match_result(String const& input, regex::RegexResult const& exec_result) const;
struct ExecutionResult {
bool success { false };
Vector<Optional<String>> captures;
};
Result create_match_result(String const& input, ExecutionResult const& exec_result) const;
ExecutionResult execute(String const& input) const;
bool matches(StringView input) const;
// https://urlpattern.spec.whatwg.org/#component-pattern-string
// pattern string, a well formed pattern string
@@ -31,7 +40,7 @@ struct Component {
// https://urlpattern.spec.whatwg.org/#component-regular-expression
// regular expression, a RegExp
OwnPtr<Regex<ECMA262>> regular_expression;
OwnPtr<regex::ECMAScriptRegex> regular_expression;
// https://urlpattern.spec.whatwg.org/#component-group-name-list
// group name list, a list of strings

View File

@@ -307,42 +307,42 @@ PatternErrorOr<Optional<Result>> Pattern::match(Variant<String, Init, URL> const
}
// 14. Let protocolExecResult be RegExpBuiltinExec(urlPatterns protocol component's regular expression, protocol).
auto protocol_exec_result = m_protocol_component.regular_expression->match(protocol);
auto protocol_exec_result = m_protocol_component.execute(protocol);
if (!protocol_exec_result.success)
return OptionalNone {};
// 15. Let usernameExecResult be RegExpBuiltinExec(urlPatterns username component's regular expression, username).
auto username_exec_result = m_username_component.regular_expression->match(username);
auto username_exec_result = m_username_component.execute(username);
if (!username_exec_result.success)
return OptionalNone {};
// 16. Let passwordExecResult be RegExpBuiltinExec(urlPatterns password component's regular expression, password).
auto password_exec_result = m_password_component.regular_expression->match(password);
auto password_exec_result = m_password_component.execute(password);
if (!password_exec_result.success)
return OptionalNone {};
// 17. Let hostnameExecResult be RegExpBuiltinExec(urlPatterns hostname component's regular expression, hostname).
auto hostname_exec_result = m_hostname_component.regular_expression->match(hostname);
auto hostname_exec_result = m_hostname_component.execute(hostname);
if (!hostname_exec_result.success)
return OptionalNone {};
// 18. Let portExecResult be RegExpBuiltinExec(urlPatterns port component's regular expression, port).
auto port_exec_result = m_port_component.regular_expression->match(port);
auto port_exec_result = m_port_component.execute(port);
if (!port_exec_result.success)
return OptionalNone {};
// 19. Let pathnameExecResult be RegExpBuiltinExec(urlPatterns pathname component's regular expression, pathname).
auto pathname_exec_result = m_pathname_component.regular_expression->match(pathname);
auto pathname_exec_result = m_pathname_component.execute(pathname);
if (!pathname_exec_result.success)
return OptionalNone {};
// 20. Let searchExecResult be RegExpBuiltinExec(urlPatterns search component's regular expression, search).
auto search_exec_result = m_search_component.regular_expression->match(search);
auto search_exec_result = m_search_component.execute(search);
if (!search_exec_result.success)
return OptionalNone {};
// 21. Let hashExecResult be RegExpBuiltinExec(urlPatterns hash component's regular expression, hash).
auto hash_exec_result = m_hash_component.regular_expression->match(hash);
auto hash_exec_result = m_hash_component.execute(hash);
if (!hash_exec_result.success)
return OptionalNone {};

View File

@@ -275,7 +275,7 @@ Optional<GC::Ref<HTMLDataListElement const>> HTMLInputElement::suggestions_sourc
}
// https://html.spec.whatwg.org/multipage/input.html#compiled-pattern-regular-expression
Optional<Regex<ECMA262>> HTMLInputElement::compiled_pattern_regular_expression() const
Optional<regex::ECMAScriptRegex> HTMLInputElement::compiled_pattern_regular_expression() const
{
// 1. If the element does not have a pattern attribute specified, then return nothing. The element has no compiled pattern regular expression.
auto maybe_pattern = get_attribute(HTML::AttributeNames::pattern);
@@ -283,20 +283,25 @@ Optional<Regex<ECMA262>> HTMLInputElement::compiled_pattern_regular_expression()
return {};
// 2. Let pattern be the value of the pattern attribute of the element.
auto pattern = maybe_pattern.release_value().to_byte_string();
auto pattern = maybe_pattern.release_value();
// 3. Let regexpCompletion be RegExpCreate(pattern, "v").
Regex<ECMA262> regexp_completion(pattern, JS::RegExpObject::default_flags | ECMAScriptFlags::UnicodeSets);
regex::ECMAScriptCompileFlags compile_flags {};
compile_flags.unicode_sets = true;
auto regexp_completion = regex::ECMAScriptRegex::compile(pattern.bytes_as_string_view(), compile_flags);
// 4. If regexpCompletion is an abrupt completion, then return nothing. The element has no compiled pattern regular expression.
if (regexp_completion.parser_result.error != regex::Error::NoError)
if (regexp_completion.is_error())
return {};
// 5. Let anchoredPattern be the string "^(?:", followed by pattern, followed by ")$".
auto anchored_pattern = ByteString::formatted("^(?:{})$", pattern);
auto anchored_pattern = MUST(String::formatted("^(?:{})$", pattern));
// 6. Return ! RegExpCreate(anchoredPattern, "v").
return Regex<ECMA262>(anchored_pattern, JS::RegExpObject::default_flags | ECMAScriptFlags::UnicodeSets);
auto anchored = regex::ECMAScriptRegex::compile(anchored_pattern.bytes_as_string_view(), compile_flags);
if (anchored.is_error())
return {};
return anchored.release_value();
}
// https://html.spec.whatwg.org/multipage/input.html#dom-input-files
@@ -3557,7 +3562,13 @@ bool HTMLInputElement::suffering_from_being_missing() const
}
// https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
static Regex<ECMA262> const valid_email_address_regex = Regex<ECMA262>("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$");
static regex::ECMAScriptRegex& valid_email_address_regex()
{
static auto regex = MUST(regex::ECMAScriptRegex::compile(
"^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"sv,
regex::ECMAScriptCompileFlags {}));
return regex;
}
// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#suffering-from-a-type-mismatch
bool HTMLInputElement::suffering_from_a_type_mismatch() const
@@ -3577,7 +3588,7 @@ bool HTMLInputElement::suffering_from_a_type_mismatch() const
// When the multiple attribute is not specified on the element: While the value of the element is neither the
// empty string nor a single valid email address, the element is suffering from a type mismatch.
if (!has_attribute(HTML::AttributeNames::multiple))
return !input.is_empty() && !valid_email_address_regex.match(input.utf16_view()).success;
return !input.is_empty() && valid_email_address_regex().exec(input.utf16_view(), 0) != regex::MatchResult::Match;
// When the multiple attribute is specified on the element: While the value of the element is not a valid email
// address list, the element is suffering from a type mismatch.
@@ -3588,7 +3599,7 @@ bool HTMLInputElement::suffering_from_a_type_mismatch() const
bool valid = true;
input.for_each_split_view(',', SplitBehavior::Nothing, [&](auto const& address) {
if (valid_email_address_regex.match(address).success)
if (valid_email_address_regex().exec(address, 0) == regex::MatchResult::Match)
return IterationDecision::Continue;
valid = false;
@@ -3633,7 +3644,7 @@ bool HTMLInputElement::suffering_from_a_pattern_mismatch() const
bool valid = true;
value.for_each_split_view(',', SplitBehavior::Nothing, [&](auto const& value) {
if (regexp_object->match(value).success)
if (regexp_object->exec(value, 0) == regex::MatchResult::Match)
return IterationDecision::Continue;
valid = false;
@@ -3643,7 +3654,7 @@ bool HTMLInputElement::suffering_from_a_pattern_mismatch() const
return !valid;
}
return !regexp_object->match(value.utf16_view()).success;
return regexp_object->exec(value.utf16_view(), 0) != regex::MatchResult::Match;
}
// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#suffering-from-an-underflow

View File

@@ -9,7 +9,7 @@
#pragma once
#include <LibRegex/Regex.h>
#include <LibRegex/ECMAScriptRegex.h>
#include <LibWeb/DOM/DocumentLoadEventDelayer.h>
#include <LibWeb/DOM/Text.h>
#include <LibWeb/Export.h>
@@ -377,7 +377,7 @@ private:
GC::Ptr<SharedResourceRequest> m_resource_request;
SelectedCoordinate m_selected_coordinate;
Optional<Regex<ECMA262>> compiled_pattern_regular_expression() const;
Optional<regex::ECMAScriptRegex> compiled_pattern_regular_expression() const;
Optional<GC::Ref<HTMLDataListElement const>> suggestions_source_element() const;

View File

@@ -11,6 +11,7 @@
#include <LibHTTP/Cache/Utilities.h>
#include <LibHTTP/HTTP.h>
#include <LibHTTP/Header.h>
#include <LibHTTP/Method.h>
TEST_CASE(collect_an_http_quoted_string)
{
@@ -127,6 +128,22 @@ TEST_CASE(extract_cache_control_directive)
EXPECT(!HTTP::contains_cache_control_directive("=4"sv, "max-age"sv));
}
TEST_CASE(token_validation)
{
EXPECT(HTTP::is_method("GET"sv));
EXPECT(HTTP::is_method("PATCH"sv));
EXPECT(HTTP::is_method("M-SEARCH"sv));
EXPECT(!HTTP::is_method(""sv));
EXPECT(!HTTP::is_method("GET "sv));
EXPECT(!HTTP::is_method("GE:T"sv));
EXPECT(HTTP::is_header_name("Content-Type"sv));
EXPECT(HTTP::is_header_name("X-Custom_Header"sv));
EXPECT(!HTTP::is_header_name(""sv));
EXPECT(!HTTP::is_header_name("Content Type"sv));
EXPECT(!HTTP::is_header_name("Content:Type"sv));
}
TEST_CASE(extract_header_values)
{
struct TestHeader {

View File

@@ -1,9 +1,10 @@
set(URL_TEST_SOURCES
TestURL.cpp
TestURLPattern.cpp
TestURLPatternConstructorStringParser.cpp
TestPublicSuffix.cpp
)
foreach(source IN LISTS URL_TEST_SOURCES)
ladybird_test("${source}" LibURL LIBS LibURL)
ladybird_test("${source}" LibURL LIBS LibURL LibRegex)
endforeach()

View File

@@ -0,0 +1,30 @@
/*
* Copyright (c) 2026-present, the Ladybird developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <LibURL/Pattern/Pattern.h>
TEST_CASE(url_pattern_matches_named_groups)
{
auto pattern = MUST(URL::Pattern::Pattern::create("https://example.com/:category/:id"_string));
auto result = MUST(pattern.match("https://example.com/books/42"_string, {}));
VERIFY(result.has_value());
EXPECT_EQ(result->protocol.input, "https"_string);
EXPECT_EQ(result->pathname.input, "/books/42"_string);
EXPECT_EQ(result->pathname.groups.get("category"sv).value(), (Variant<String, Empty> { "books"_string }));
EXPECT_EQ(result->pathname.groups.get("id"sv).value(), (Variant<String, Empty> { "42"_string }));
}
TEST_CASE(url_pattern_ignore_case_matching)
{
auto pattern = MUST(URL::Pattern::Pattern::create("https://example.com/:value"_string, {}, URL::Pattern::IgnoreCase::Yes));
auto result = MUST(pattern.match("https://example.com/CaseSensitive"_string, {}));
VERIFY(result.has_value());
EXPECT_EQ(result->pathname.groups.get("value"sv).value(), (Variant<String, Empty> { "CaseSensitive"_string }));
}

View File

@@ -52,18 +52,18 @@ Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 0
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler random
Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 4, filler random
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler random
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random
Pass encodeInto() into ArrayBuffer with AU+df06 and destination length 4, offset 0, filler 0
Pass encodeInto() into SharedArrayBuffer with AU+df06 and destination length 4, offset 0, filler 0
Pass encodeInto() into ArrayBuffer with AU+df06 and destination length 4, offset 4, filler 0

View File

@@ -350,8 +350,8 @@ Pass ToASCII("⑷.four") U1 (ignored)
Pass ToASCII("(4).four") U1 (ignored)
Pass ToASCII("⑷.FOUR") U1 (ignored)
Pass ToASCII("⑷.Four") U1 (ignored)
Pass ToASCII("aaU+d900z") V7; A3
Pass ToASCII("AAU+d900Z") V7; A3
Pass ToASCII("aU+d900z") V7; A3
Pass ToASCII("AU+d900Z") V7; A3
Pass ToASCII("xn--") P4; A4_1 (ignored); A4_2 (ignored)
Pass ToASCII("xn---") P4
Pass ToASCII("xn--ASCII-") P4

View File

@@ -405,4 +405,4 @@ Pass Parsing origin: <non-special:\\opaque\path> against <about:blank>
Pass Parsing origin: <chrome-native://x:0> against <about:blank>
Pass Parsing origin: <chrome-resource://x:0> against <about:blank>
Pass Parsing origin: <chrome-search://x:0> against <about:blank>
Pass Parsing origin: <fuchsia-dir://x:0> against <about:blank>
Pass Parsing origin: <fuchsia-dir://x:0> against <about:blank>

View File

@@ -885,4 +885,4 @@ Pass Parsing: <non-special:\/opaque> without base
Pass Parsing: <///example.org/../path/../../> against <http://example.org/>
Pass Parsing: <///example.org/../path/../../path> against <http://example.org/>
Pass Parsing: </\/\//example.org/../path> against <http://example.org/>
Pass Parsing: <///abcdef/../> against <file:///>
Pass Parsing: <///abcdef/../> against <file:///>

View File

@@ -406,4 +406,4 @@ Pass Origin parsing: <non-special:\\opaque\path> without base
Pass Origin parsing: <chrome-native://x:0> without base
Pass Origin parsing: <chrome-resource://x:0> without base
Pass Origin parsing: <chrome-search://x:0> without base
Pass Origin parsing: <fuchsia-dir://x:0> without base
Pass Origin parsing: <fuchsia-dir://x:0> without base