mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-04-25 17:25:08 +02:00
LibRegex: Add ECMAScriptRegex and migrate callers
Add `ECMAScriptRegex`, LibRegex's C++ facade for ECMAScript regexes. The facade owns compilation, execution, captures, named groups, and error translation for the Rust backend, which lets callers stop depending on the legacy parser and matcher types directly. Use it in the remaining non-LibJS callers: URLPattern, HTML input pattern handling, and the places in LibHTTP that only needed token validation. Where a full regex engine was unnecessary, replace those call sites with direct character checks. Also update focused LibURL, LibHTTP, and WPT coverage for the migrated callers and corrected surrogate handling.
This commit is contained in:
committed by
Ali Mohammad Pur
parent
66fb0a8394
commit
34d954e2d7
Notes:
github-actions[bot]
2026-03-27 16:35:21 +00:00
Author: https://github.com/awesomekling Commit: https://github.com/LadybirdBrowser/ladybird/commit/34d954e2d70 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/8612 Reviewed-by: https://github.com/jdahlin Reviewed-by: https://github.com/trflynn89
@@ -39,6 +39,36 @@ constexpr bool is_http_tab_or_space(u32 code_point)
|
||||
return code_point == 0x09u || code_point == 0x20u;
|
||||
}
|
||||
|
||||
constexpr bool is_http_token_code_point(u32 code_point)
|
||||
{
|
||||
if ((code_point >= '0' && code_point <= '9')
|
||||
|| (code_point >= 'A' && code_point <= 'Z')
|
||||
|| (code_point >= 'a' && code_point <= 'z')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (code_point) {
|
||||
case '!':
|
||||
case '#':
|
||||
case '$':
|
||||
case '%':
|
||||
case '&':
|
||||
case '\'':
|
||||
case '*':
|
||||
case '+':
|
||||
case '-':
|
||||
case '.':
|
||||
case '^':
|
||||
case '_':
|
||||
case '`':
|
||||
case '|':
|
||||
case '~':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
enum class HttpQuotedStringExtractValue {
|
||||
No,
|
||||
Yes,
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/AllOf.h>
|
||||
#include <AK/AnyOf.h>
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/QuickSort.h>
|
||||
#include <LibHTTP/HTTP.h>
|
||||
@@ -13,7 +15,6 @@
|
||||
#include <LibHTTP/Method.h>
|
||||
#include <LibIPC/Decoder.h>
|
||||
#include <LibIPC/Encoder.h>
|
||||
#include <LibRegex/Regex.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibTextCodec/Encoder.h>
|
||||
|
||||
@@ -82,8 +83,7 @@ Optional<Vector<ByteString>> Header::extract_header_values() const
|
||||
bool is_header_name(StringView header_name)
|
||||
{
|
||||
// A header name is a byte sequence that matches the field-name token production.
|
||||
Regex<ECMA262Parser> regex { R"~~~(^[A-Za-z0-9!#$%&'*+\-.^_`|~]+$)~~~" };
|
||||
return regex.has_match(header_name);
|
||||
return !header_name.is_empty() && all_of(header_name, is_http_token_code_point);
|
||||
}
|
||||
|
||||
// https://fetch.spec.whatwg.org/#header-value
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/AllOf.h>
|
||||
#include <LibHTTP/HTTP.h>
|
||||
#include <LibHTTP/Method.h>
|
||||
#include <LibRegex/Regex.h>
|
||||
|
||||
namespace HTTP {
|
||||
|
||||
@@ -14,8 +15,7 @@ namespace HTTP {
|
||||
bool is_method(StringView method)
|
||||
{
|
||||
// A method is a byte sequence that matches the method token production.
|
||||
Regex<ECMA262Parser> regex { R"~~~(^[A-Za-z0-9!#$%&'*+\-.^_`|~]+$)~~~" };
|
||||
return regex.has_match(method);
|
||||
return !method.is_empty() && all_of(method, is_http_token_code_point);
|
||||
}
|
||||
|
||||
// https://fetch.spec.whatwg.org/#cors-safelisted-method
|
||||
|
||||
@@ -8,23 +8,4 @@
|
||||
|
||||
namespace JS::Bytecode {
|
||||
|
||||
RegexTableIndex RegexTable::insert(ParsedRegex parsed_regex)
|
||||
{
|
||||
Regex<ECMA262> regex(parsed_regex.regex, parsed_regex.pattern.to_byte_string(), parsed_regex.flags);
|
||||
m_regexes.append(move(regex));
|
||||
return m_regexes.size() - 1;
|
||||
}
|
||||
|
||||
Regex<ECMA262> const& RegexTable::get(RegexTableIndex index) const
|
||||
{
|
||||
return m_regexes[index.value()];
|
||||
}
|
||||
|
||||
void RegexTable::dump() const
|
||||
{
|
||||
outln("Regex Table:");
|
||||
for (size_t i = 0; i < m_regexes.size(); i++)
|
||||
outln("{}: {}", i, m_regexes[i].pattern_value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -7,21 +7,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <AK/DistinctNumeric.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibRegex/Regex.h>
|
||||
#include <LibRegex/RegexParser.h>
|
||||
|
||||
namespace JS::Bytecode {
|
||||
|
||||
AK_TYPEDEF_DISTINCT_NUMERIC_GENERAL(u32, RegexTableIndex, Comparison);
|
||||
|
||||
struct ParsedRegex {
|
||||
regex::Parser::Result regex;
|
||||
String pattern;
|
||||
regex::RegexOptions<ECMAScriptFlags> flags;
|
||||
};
|
||||
|
||||
class RegexTable {
|
||||
AK_MAKE_NONMOVABLE(RegexTable);
|
||||
AK_MAKE_NONCOPYABLE(RegexTable);
|
||||
@@ -29,13 +19,7 @@ class RegexTable {
|
||||
public:
|
||||
RegexTable() = default;
|
||||
|
||||
RegexTableIndex insert(ParsedRegex);
|
||||
Regex<ECMA262> const& get(RegexTableIndex) const;
|
||||
void dump() const;
|
||||
bool is_empty() const { return m_regexes.is_empty(); }
|
||||
|
||||
private:
|
||||
Vector<Regex<ECMA262>> m_regexes;
|
||||
bool is_empty() const { return true; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
109
Libraries/LibRegex/ECMAScriptRegex.cpp
Normal file
109
Libraries/LibRegex/ECMAScriptRegex.cpp
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <LibRegex/ECMAScriptRegex.h>
|
||||
#include <LibRegex/RustRegex.h>
|
||||
|
||||
namespace regex {
|
||||
|
||||
struct ECMAScriptRegex::Impl {
|
||||
CompiledRustRegex rust_regex;
|
||||
Vector<ECMAScriptNamedCaptureGroup> named_groups;
|
||||
};
|
||||
|
||||
ErrorOr<ECMAScriptRegex, String> ECMAScriptRegex::compile(StringView utf8_pattern, ECMAScriptCompileFlags flags)
|
||||
{
|
||||
RustRegexFlags rust_flags {};
|
||||
rust_flags.global = flags.global;
|
||||
rust_flags.ignore_case = flags.ignore_case;
|
||||
rust_flags.multiline = flags.multiline;
|
||||
rust_flags.dot_all = flags.dot_all;
|
||||
rust_flags.unicode = flags.unicode;
|
||||
rust_flags.unicode_sets = flags.unicode_sets;
|
||||
rust_flags.sticky = flags.sticky;
|
||||
rust_flags.has_indices = flags.has_indices;
|
||||
|
||||
auto compiled = CompiledRustRegex::compile(utf8_pattern, rust_flags);
|
||||
if (compiled.is_error())
|
||||
return compiled.release_error();
|
||||
|
||||
auto rust_regex = compiled.release_value();
|
||||
|
||||
Vector<ECMAScriptNamedCaptureGroup> named_groups;
|
||||
named_groups.ensure_capacity(rust_regex.named_groups().size());
|
||||
for (auto const& rg : rust_regex.named_groups())
|
||||
named_groups.unchecked_append({ .name = rg.name, .index = rg.index });
|
||||
|
||||
auto impl = adopt_own(*new Impl {
|
||||
.rust_regex = move(rust_regex),
|
||||
.named_groups = move(named_groups),
|
||||
});
|
||||
return ECMAScriptRegex(move(impl));
|
||||
}
|
||||
|
||||
ECMAScriptRegex::~ECMAScriptRegex() = default;
|
||||
|
||||
ECMAScriptRegex::ECMAScriptRegex(ECMAScriptRegex&& other) = default;
|
||||
ECMAScriptRegex& ECMAScriptRegex::operator=(ECMAScriptRegex&& other) = default;
|
||||
|
||||
ECMAScriptRegex::ECMAScriptRegex(OwnPtr<Impl> impl)
|
||||
: m_impl(move(impl))
|
||||
{
|
||||
}
|
||||
|
||||
MatchResult ECMAScriptRegex::exec(Utf16View input, size_t start_pos) const
|
||||
{
|
||||
auto result = m_impl->rust_regex.exec_internal(input, start_pos);
|
||||
if (result == 1)
|
||||
return MatchResult::Match;
|
||||
if (result == -1)
|
||||
return MatchResult::LimitExceeded;
|
||||
return MatchResult::NoMatch;
|
||||
}
|
||||
|
||||
int ECMAScriptRegex::capture_slot(unsigned int slot) const
|
||||
{
|
||||
return m_impl->rust_regex.capture_slot(slot);
|
||||
}
|
||||
|
||||
MatchResult ECMAScriptRegex::test(Utf16View input, size_t start_pos) const
|
||||
{
|
||||
auto result = m_impl->rust_regex.test(input, start_pos);
|
||||
if (result == 1)
|
||||
return MatchResult::Match;
|
||||
if (result == -1)
|
||||
return MatchResult::LimitExceeded;
|
||||
return MatchResult::NoMatch;
|
||||
}
|
||||
|
||||
unsigned int ECMAScriptRegex::capture_count() const
|
||||
{
|
||||
return m_impl->rust_regex.capture_count();
|
||||
}
|
||||
|
||||
unsigned int ECMAScriptRegex::total_groups() const
|
||||
{
|
||||
return m_impl->rust_regex.total_groups();
|
||||
}
|
||||
|
||||
Vector<ECMAScriptNamedCaptureGroup> const& ECMAScriptRegex::named_groups() const
|
||||
{
|
||||
return m_impl->named_groups;
|
||||
}
|
||||
|
||||
int ECMAScriptRegex::find_all(Utf16View input, size_t start_pos) const
|
||||
{
|
||||
return m_impl->rust_regex.find_all(input, start_pos);
|
||||
}
|
||||
|
||||
ECMAScriptRegex::MatchPair ECMAScriptRegex::find_all_match(int index) const
|
||||
{
|
||||
auto pair = m_impl->rust_regex.find_all_match(index);
|
||||
return { pair.start, pair.end };
|
||||
}
|
||||
|
||||
}
|
||||
90
Libraries/LibRegex/ECMAScriptRegex.h
Normal file
90
Libraries/LibRegex/ECMAScriptRegex.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Error.h>
|
||||
#include <AK/Noncopyable.h>
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Utf16View.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibRegex/Export.h>
|
||||
|
||||
namespace regex {
|
||||
|
||||
enum class MatchResult : i8 {
|
||||
Match,
|
||||
NoMatch,
|
||||
LimitExceeded,
|
||||
};
|
||||
|
||||
struct ECMAScriptCompileFlags {
|
||||
bool global {};
|
||||
bool ignore_case {};
|
||||
bool multiline {};
|
||||
bool dot_all {};
|
||||
bool unicode {};
|
||||
bool unicode_sets {};
|
||||
bool sticky {};
|
||||
bool has_indices {};
|
||||
};
|
||||
|
||||
struct ECMAScriptNamedCaptureGroup {
|
||||
String name;
|
||||
unsigned int index;
|
||||
};
|
||||
|
||||
class REGEX_API ECMAScriptRegex {
|
||||
AK_MAKE_NONCOPYABLE(ECMAScriptRegex);
|
||||
|
||||
public:
|
||||
static ErrorOr<ECMAScriptRegex, String> compile(StringView utf8_pattern, ECMAScriptCompileFlags);
|
||||
|
||||
~ECMAScriptRegex();
|
||||
ECMAScriptRegex(ECMAScriptRegex&&);
|
||||
ECMAScriptRegex& operator=(ECMAScriptRegex&&);
|
||||
|
||||
/// Execute and fill internal capture buffer.
|
||||
/// After a successful call, read results via capture_slot().
|
||||
[[nodiscard]] MatchResult exec(Utf16View input, size_t start_pos) const;
|
||||
|
||||
/// Read a capture slot from the internal buffer (after exec).
|
||||
/// Even slots are start positions, odd slots are end positions.
|
||||
/// Returns -1 for unmatched captures.
|
||||
int capture_slot(unsigned int slot) const;
|
||||
|
||||
/// Test for a match without filling capture buffer.
|
||||
[[nodiscard]] MatchResult test(Utf16View input, size_t start_pos) const;
|
||||
|
||||
/// Number of numbered capture groups (excluding group 0).
|
||||
unsigned int capture_count() const;
|
||||
|
||||
/// Total number of capture groups including group 0.
|
||||
unsigned int total_groups() const;
|
||||
|
||||
/// Named capture groups with their indices.
|
||||
Vector<ECMAScriptNamedCaptureGroup> const& named_groups() const;
|
||||
|
||||
/// Find all non-overlapping matches. Returns number of matches found.
|
||||
/// Access results via find_all_match(i) after calling.
|
||||
int find_all(Utf16View input, size_t start_pos) const;
|
||||
|
||||
struct MatchPair {
|
||||
int start;
|
||||
int end;
|
||||
};
|
||||
|
||||
/// Get the i-th match from find_all results.
|
||||
MatchPair find_all_match(int index) const;
|
||||
|
||||
private:
|
||||
struct Impl;
|
||||
ECMAScriptRegex(OwnPtr<Impl>);
|
||||
OwnPtr<Impl> m_impl;
|
||||
};
|
||||
|
||||
} // namespace regex
|
||||
@@ -4,7 +4,8 @@
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibRegex/Regex.h>
|
||||
#include <AK/Utf16String.h>
|
||||
#include <LibRegex/ECMAScriptRegex.h>
|
||||
#include <LibURL/Pattern/Component.h>
|
||||
#include <LibURL/Pattern/PatternParser.h>
|
||||
#include <LibURL/Pattern/String.h>
|
||||
@@ -19,10 +20,10 @@ bool protocol_component_matches_a_special_scheme(Component const& protocol_compo
|
||||
// 2. For each scheme of special scheme list:
|
||||
for (StringView scheme : special_schemes()) {
|
||||
// 1. Let test result be RegExpBuiltinExec(protocol component’s regular expression, scheme).
|
||||
auto test_result = protocol_component.regular_expression->match(scheme);
|
||||
auto test_result = protocol_component.matches(scheme);
|
||||
|
||||
// 2. If test result is not null, then return true.
|
||||
if (test_result.success)
|
||||
if (test_result)
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -225,27 +226,23 @@ PatternErrorOr<Component> Component::compile(Utf8View const& input, PatternParse
|
||||
|
||||
// 3. Let flags be an empty string.
|
||||
// NOTE: These flags match the flags for the empty string of the LibJS RegExp implementation.
|
||||
auto flags = regex::RegexOptions<ECMAScriptFlags> {
|
||||
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
||||
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
||||
| regex::ECMAScriptFlags::BrowserExtended
|
||||
};
|
||||
regex::ECMAScriptCompileFlags flags {};
|
||||
|
||||
// 4. If options’s ignore case is true then set flags to "vi".
|
||||
if (options.ignore_case) {
|
||||
flags |= regex::ECMAScriptFlags::UnicodeSets;
|
||||
flags |= regex::ECMAScriptFlags::Insensitive;
|
||||
flags.unicode_sets = true;
|
||||
flags.ignore_case = true;
|
||||
}
|
||||
// 5. Otherwise set flags to "v"
|
||||
else {
|
||||
flags |= regex::ECMAScriptFlags::UnicodeSets;
|
||||
flags.unicode_sets = true;
|
||||
}
|
||||
|
||||
// 6. Let regular expression be RegExpCreate(regular expression string, flags). If this throws an exception, catch
|
||||
// it, and throw a TypeError.
|
||||
auto regex = make<Regex<ECMA262>>(regular_expression_string.to_byte_string(), flags);
|
||||
if (regex->parser_result.error != regex::Error::NoError)
|
||||
return ErrorInfo { MUST(String::formatted("RegExp compile error: {}", regex->error_string())) };
|
||||
auto regex = regex::ECMAScriptRegex::compile(regular_expression_string.bytes_as_string_view(), flags);
|
||||
if (regex.is_error())
|
||||
return ErrorInfo { MUST(String::formatted("RegExp compile error: {}", regex.release_error())) };
|
||||
|
||||
// 7. Let pattern string be the result of running generate a pattern string given part list and options.
|
||||
auto pattern_string = generate_a_pattern_string(part_list, options);
|
||||
@@ -266,14 +263,45 @@ PatternErrorOr<Component> Component::compile(Utf8View const& input, PatternParse
|
||||
// group name list is name list, and has regexp groups is has regexp groups.
|
||||
return Component {
|
||||
.pattern_string = move(pattern_string),
|
||||
.regular_expression = move(regex),
|
||||
.regular_expression = adopt_own(*new regex::ECMAScriptRegex(regex.release_value())),
|
||||
.group_name_list = move(name_list),
|
||||
.has_regexp_groups = has_regexp_groups,
|
||||
};
|
||||
}
|
||||
|
||||
Component::ExecutionResult Component::execute(String const& input) const
|
||||
{
|
||||
auto utf16_input = Utf16String::from_utf8(input);
|
||||
auto match_result = regular_expression->exec(utf16_input.utf16_view(), 0);
|
||||
if (match_result != regex::MatchResult::Match)
|
||||
return {};
|
||||
|
||||
ExecutionResult result;
|
||||
result.success = true;
|
||||
result.captures.ensure_capacity(group_name_list.size());
|
||||
for (size_t index = 1; index <= group_name_list.size(); ++index) {
|
||||
auto start = regular_expression->capture_slot(index * 2);
|
||||
auto end = regular_expression->capture_slot(index * 2 + 1);
|
||||
if (start < 0 || end < 0) {
|
||||
result.captures.append({});
|
||||
continue;
|
||||
}
|
||||
|
||||
auto capture = utf16_input.substring_view(static_cast<size_t>(start), static_cast<size_t>(end - start));
|
||||
result.captures.append(MUST(capture.to_utf8()));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Component::matches(StringView input) const
|
||||
{
|
||||
auto utf16_input = Utf16String::from_utf8(input);
|
||||
return regular_expression->test(utf16_input.utf16_view(), 0) == regex::MatchResult::Match;
|
||||
}
|
||||
|
||||
// https://urlpattern.spec.whatwg.org/#create-a-component-match-result
|
||||
Component::Result Component::create_match_result(String const& input, regex::RegexResult const& exec_result) const
|
||||
Component::Result Component::create_match_result(String const& input, ExecutionResult const& exec_result) const
|
||||
{
|
||||
// 1. Let result be a new URLPatternComponentResult.
|
||||
Component::Result result;
|
||||
@@ -286,18 +314,18 @@ Component::Result Component::create_match_result(String const& input, regex::Reg
|
||||
|
||||
// 4. Let index be 1.
|
||||
// 5. While index is less than or equal to component’s group name list’s size:
|
||||
VERIFY(exec_result.captures.size() == group_name_list.size());
|
||||
for (size_t index = 1; index <= group_name_list.size(); ++index) {
|
||||
auto const& capture = exec_result.capture_group_matches[0][index - 1];
|
||||
|
||||
// 1. Let name be component’s group name list[index − 1].
|
||||
auto name = group_name_list[index - 1];
|
||||
|
||||
// 2. Let value be Get(execResult, ToString(index)).
|
||||
// 3. Set groups[name] to value.
|
||||
if (capture.view.is_null())
|
||||
auto const& capture = exec_result.captures[index - 1];
|
||||
if (!capture.has_value())
|
||||
groups.set(name, Empty {});
|
||||
else
|
||||
groups.set(name, MUST(capture.view.to_string()));
|
||||
groups.set(name, *capture);
|
||||
|
||||
// 4. Increment index by 1.
|
||||
}
|
||||
|
||||
@@ -6,9 +6,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/HashMap.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <AK/String.h>
|
||||
#include <LibRegex/Regex.h>
|
||||
#include <LibRegex/ECMAScriptRegex.h>
|
||||
#include <LibURL/Pattern/PatternParser.h>
|
||||
|
||||
namespace URL::Pattern {
|
||||
@@ -23,7 +25,14 @@ struct Component {
|
||||
OrderedHashMap<String, Variant<String, Empty>> groups;
|
||||
};
|
||||
|
||||
Result create_match_result(String const& input, regex::RegexResult const& exec_result) const;
|
||||
struct ExecutionResult {
|
||||
bool success { false };
|
||||
Vector<Optional<String>> captures;
|
||||
};
|
||||
|
||||
Result create_match_result(String const& input, ExecutionResult const& exec_result) const;
|
||||
ExecutionResult execute(String const& input) const;
|
||||
bool matches(StringView input) const;
|
||||
|
||||
// https://urlpattern.spec.whatwg.org/#component-pattern-string
|
||||
// pattern string, a well formed pattern string
|
||||
@@ -31,7 +40,7 @@ struct Component {
|
||||
|
||||
// https://urlpattern.spec.whatwg.org/#component-regular-expression
|
||||
// regular expression, a RegExp
|
||||
OwnPtr<Regex<ECMA262>> regular_expression;
|
||||
OwnPtr<regex::ECMAScriptRegex> regular_expression;
|
||||
|
||||
// https://urlpattern.spec.whatwg.org/#component-group-name-list
|
||||
// group name list, a list of strings
|
||||
|
||||
@@ -307,42 +307,42 @@ PatternErrorOr<Optional<Result>> Pattern::match(Variant<String, Init, URL> const
|
||||
}
|
||||
|
||||
// 14. Let protocolExecResult be RegExpBuiltinExec(urlPattern’s protocol component's regular expression, protocol).
|
||||
auto protocol_exec_result = m_protocol_component.regular_expression->match(protocol);
|
||||
auto protocol_exec_result = m_protocol_component.execute(protocol);
|
||||
if (!protocol_exec_result.success)
|
||||
return OptionalNone {};
|
||||
|
||||
// 15. Let usernameExecResult be RegExpBuiltinExec(urlPattern’s username component's regular expression, username).
|
||||
auto username_exec_result = m_username_component.regular_expression->match(username);
|
||||
auto username_exec_result = m_username_component.execute(username);
|
||||
if (!username_exec_result.success)
|
||||
return OptionalNone {};
|
||||
|
||||
// 16. Let passwordExecResult be RegExpBuiltinExec(urlPattern’s password component's regular expression, password).
|
||||
auto password_exec_result = m_password_component.regular_expression->match(password);
|
||||
auto password_exec_result = m_password_component.execute(password);
|
||||
if (!password_exec_result.success)
|
||||
return OptionalNone {};
|
||||
|
||||
// 17. Let hostnameExecResult be RegExpBuiltinExec(urlPattern’s hostname component's regular expression, hostname).
|
||||
auto hostname_exec_result = m_hostname_component.regular_expression->match(hostname);
|
||||
auto hostname_exec_result = m_hostname_component.execute(hostname);
|
||||
if (!hostname_exec_result.success)
|
||||
return OptionalNone {};
|
||||
|
||||
// 18. Let portExecResult be RegExpBuiltinExec(urlPattern’s port component's regular expression, port).
|
||||
auto port_exec_result = m_port_component.regular_expression->match(port);
|
||||
auto port_exec_result = m_port_component.execute(port);
|
||||
if (!port_exec_result.success)
|
||||
return OptionalNone {};
|
||||
|
||||
// 19. Let pathnameExecResult be RegExpBuiltinExec(urlPattern’s pathname component's regular expression, pathname).
|
||||
auto pathname_exec_result = m_pathname_component.regular_expression->match(pathname);
|
||||
auto pathname_exec_result = m_pathname_component.execute(pathname);
|
||||
if (!pathname_exec_result.success)
|
||||
return OptionalNone {};
|
||||
|
||||
// 20. Let searchExecResult be RegExpBuiltinExec(urlPattern’s search component's regular expression, search).
|
||||
auto search_exec_result = m_search_component.regular_expression->match(search);
|
||||
auto search_exec_result = m_search_component.execute(search);
|
||||
if (!search_exec_result.success)
|
||||
return OptionalNone {};
|
||||
|
||||
// 21. Let hashExecResult be RegExpBuiltinExec(urlPattern’s hash component's regular expression, hash).
|
||||
auto hash_exec_result = m_hash_component.regular_expression->match(hash);
|
||||
auto hash_exec_result = m_hash_component.execute(hash);
|
||||
if (!hash_exec_result.success)
|
||||
return OptionalNone {};
|
||||
|
||||
|
||||
@@ -275,7 +275,7 @@ Optional<GC::Ref<HTMLDataListElement const>> HTMLInputElement::suggestions_sourc
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/input.html#compiled-pattern-regular-expression
|
||||
Optional<Regex<ECMA262>> HTMLInputElement::compiled_pattern_regular_expression() const
|
||||
Optional<regex::ECMAScriptRegex> HTMLInputElement::compiled_pattern_regular_expression() const
|
||||
{
|
||||
// 1. If the element does not have a pattern attribute specified, then return nothing. The element has no compiled pattern regular expression.
|
||||
auto maybe_pattern = get_attribute(HTML::AttributeNames::pattern);
|
||||
@@ -283,20 +283,25 @@ Optional<Regex<ECMA262>> HTMLInputElement::compiled_pattern_regular_expression()
|
||||
return {};
|
||||
|
||||
// 2. Let pattern be the value of the pattern attribute of the element.
|
||||
auto pattern = maybe_pattern.release_value().to_byte_string();
|
||||
auto pattern = maybe_pattern.release_value();
|
||||
|
||||
// 3. Let regexpCompletion be RegExpCreate(pattern, "v").
|
||||
Regex<ECMA262> regexp_completion(pattern, JS::RegExpObject::default_flags | ECMAScriptFlags::UnicodeSets);
|
||||
regex::ECMAScriptCompileFlags compile_flags {};
|
||||
compile_flags.unicode_sets = true;
|
||||
auto regexp_completion = regex::ECMAScriptRegex::compile(pattern.bytes_as_string_view(), compile_flags);
|
||||
|
||||
// 4. If regexpCompletion is an abrupt completion, then return nothing. The element has no compiled pattern regular expression.
|
||||
if (regexp_completion.parser_result.error != regex::Error::NoError)
|
||||
if (regexp_completion.is_error())
|
||||
return {};
|
||||
|
||||
// 5. Let anchoredPattern be the string "^(?:", followed by pattern, followed by ")$".
|
||||
auto anchored_pattern = ByteString::formatted("^(?:{})$", pattern);
|
||||
auto anchored_pattern = MUST(String::formatted("^(?:{})$", pattern));
|
||||
|
||||
// 6. Return ! RegExpCreate(anchoredPattern, "v").
|
||||
return Regex<ECMA262>(anchored_pattern, JS::RegExpObject::default_flags | ECMAScriptFlags::UnicodeSets);
|
||||
auto anchored = regex::ECMAScriptRegex::compile(anchored_pattern.bytes_as_string_view(), compile_flags);
|
||||
if (anchored.is_error())
|
||||
return {};
|
||||
return anchored.release_value();
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/input.html#dom-input-files
|
||||
@@ -3557,7 +3562,13 @@ bool HTMLInputElement::suffering_from_being_missing() const
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
|
||||
static Regex<ECMA262> const valid_email_address_regex = Regex<ECMA262>("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$");
|
||||
static regex::ECMAScriptRegex& valid_email_address_regex()
|
||||
{
|
||||
static auto regex = MUST(regex::ECMAScriptRegex::compile(
|
||||
"^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"sv,
|
||||
regex::ECMAScriptCompileFlags {}));
|
||||
return regex;
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#suffering-from-a-type-mismatch
|
||||
bool HTMLInputElement::suffering_from_a_type_mismatch() const
|
||||
@@ -3577,7 +3588,7 @@ bool HTMLInputElement::suffering_from_a_type_mismatch() const
|
||||
// When the multiple attribute is not specified on the element: While the value of the element is neither the
|
||||
// empty string nor a single valid email address, the element is suffering from a type mismatch.
|
||||
if (!has_attribute(HTML::AttributeNames::multiple))
|
||||
return !input.is_empty() && !valid_email_address_regex.match(input.utf16_view()).success;
|
||||
return !input.is_empty() && valid_email_address_regex().exec(input.utf16_view(), 0) != regex::MatchResult::Match;
|
||||
|
||||
// When the multiple attribute is specified on the element: While the value of the element is not a valid email
|
||||
// address list, the element is suffering from a type mismatch.
|
||||
@@ -3588,7 +3599,7 @@ bool HTMLInputElement::suffering_from_a_type_mismatch() const
|
||||
bool valid = true;
|
||||
|
||||
input.for_each_split_view(',', SplitBehavior::Nothing, [&](auto const& address) {
|
||||
if (valid_email_address_regex.match(address).success)
|
||||
if (valid_email_address_regex().exec(address, 0) == regex::MatchResult::Match)
|
||||
return IterationDecision::Continue;
|
||||
|
||||
valid = false;
|
||||
@@ -3633,7 +3644,7 @@ bool HTMLInputElement::suffering_from_a_pattern_mismatch() const
|
||||
bool valid = true;
|
||||
|
||||
value.for_each_split_view(',', SplitBehavior::Nothing, [&](auto const& value) {
|
||||
if (regexp_object->match(value).success)
|
||||
if (regexp_object->exec(value, 0) == regex::MatchResult::Match)
|
||||
return IterationDecision::Continue;
|
||||
|
||||
valid = false;
|
||||
@@ -3643,7 +3654,7 @@ bool HTMLInputElement::suffering_from_a_pattern_mismatch() const
|
||||
return !valid;
|
||||
}
|
||||
|
||||
return !regexp_object->match(value.utf16_view()).success;
|
||||
return regexp_object->exec(value.utf16_view(), 0) != regex::MatchResult::Match;
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#suffering-from-an-underflow
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <LibRegex/Regex.h>
|
||||
#include <LibRegex/ECMAScriptRegex.h>
|
||||
#include <LibWeb/DOM/DocumentLoadEventDelayer.h>
|
||||
#include <LibWeb/DOM/Text.h>
|
||||
#include <LibWeb/Export.h>
|
||||
@@ -377,7 +377,7 @@ private:
|
||||
GC::Ptr<SharedResourceRequest> m_resource_request;
|
||||
SelectedCoordinate m_selected_coordinate;
|
||||
|
||||
Optional<Regex<ECMA262>> compiled_pattern_regular_expression() const;
|
||||
Optional<regex::ECMAScriptRegex> compiled_pattern_regular_expression() const;
|
||||
|
||||
Optional<GC::Ref<HTMLDataListElement const>> suggestions_source_element() const;
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <LibHTTP/Cache/Utilities.h>
|
||||
#include <LibHTTP/HTTP.h>
|
||||
#include <LibHTTP/Header.h>
|
||||
#include <LibHTTP/Method.h>
|
||||
|
||||
TEST_CASE(collect_an_http_quoted_string)
|
||||
{
|
||||
@@ -127,6 +128,22 @@ TEST_CASE(extract_cache_control_directive)
|
||||
EXPECT(!HTTP::contains_cache_control_directive("=4"sv, "max-age"sv));
|
||||
}
|
||||
|
||||
TEST_CASE(token_validation)
|
||||
{
|
||||
EXPECT(HTTP::is_method("GET"sv));
|
||||
EXPECT(HTTP::is_method("PATCH"sv));
|
||||
EXPECT(HTTP::is_method("M-SEARCH"sv));
|
||||
EXPECT(!HTTP::is_method(""sv));
|
||||
EXPECT(!HTTP::is_method("GET "sv));
|
||||
EXPECT(!HTTP::is_method("GE:T"sv));
|
||||
|
||||
EXPECT(HTTP::is_header_name("Content-Type"sv));
|
||||
EXPECT(HTTP::is_header_name("X-Custom_Header"sv));
|
||||
EXPECT(!HTTP::is_header_name(""sv));
|
||||
EXPECT(!HTTP::is_header_name("Content Type"sv));
|
||||
EXPECT(!HTTP::is_header_name("Content:Type"sv));
|
||||
}
|
||||
|
||||
TEST_CASE(extract_header_values)
|
||||
{
|
||||
struct TestHeader {
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
set(URL_TEST_SOURCES
|
||||
TestURL.cpp
|
||||
TestURLPattern.cpp
|
||||
TestURLPatternConstructorStringParser.cpp
|
||||
TestPublicSuffix.cpp
|
||||
)
|
||||
|
||||
foreach(source IN LISTS URL_TEST_SOURCES)
|
||||
ladybird_test("${source}" LibURL LIBS LibURL)
|
||||
ladybird_test("${source}" LibURL LIBS LibURL LibRegex)
|
||||
endforeach()
|
||||
|
||||
30
Tests/LibURL/TestURLPattern.cpp
Normal file
30
Tests/LibURL/TestURLPattern.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibTest/TestCase.h>
|
||||
|
||||
#include <LibURL/Pattern/Pattern.h>
|
||||
|
||||
TEST_CASE(url_pattern_matches_named_groups)
|
||||
{
|
||||
auto pattern = MUST(URL::Pattern::Pattern::create("https://example.com/:category/:id"_string));
|
||||
auto result = MUST(pattern.match("https://example.com/books/42"_string, {}));
|
||||
VERIFY(result.has_value());
|
||||
|
||||
EXPECT_EQ(result->protocol.input, "https"_string);
|
||||
EXPECT_EQ(result->pathname.input, "/books/42"_string);
|
||||
EXPECT_EQ(result->pathname.groups.get("category"sv).value(), (Variant<String, Empty> { "books"_string }));
|
||||
EXPECT_EQ(result->pathname.groups.get("id"sv).value(), (Variant<String, Empty> { "42"_string }));
|
||||
}
|
||||
|
||||
TEST_CASE(url_pattern_ignore_case_matching)
|
||||
{
|
||||
auto pattern = MUST(URL::Pattern::Pattern::create("https://example.com/:value"_string, {}, URL::Pattern::IgnoreCase::Yes));
|
||||
auto result = MUST(pattern.match("https://example.com/CaseSensitive"_string, {}));
|
||||
VERIFY(result.has_value());
|
||||
|
||||
EXPECT_EQ(result->pathname.groups.get("value"sv).value(), (Variant<String, Empty> { "CaseSensitive"_string }));
|
||||
}
|
||||
Binary file not shown.
@@ -52,18 +52,18 @@ Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 0
|
||||
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler random
|
||||
Pass encodeInto() into ArrayBuffer with 𝌆A and destination length 3, offset 4, filler random
|
||||
Pass encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler random
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 0
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 0
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler 128
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler 128
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 0, filler random
|
||||
Pass encodeInto() into ArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random
|
||||
Pass encodeInto() into SharedArrayBuffer with U+d834AU+df06A¥Hi and destination length 10, offset 4, filler random
|
||||
Pass encodeInto() into ArrayBuffer with AU+df06 and destination length 4, offset 0, filler 0
|
||||
Pass encodeInto() into SharedArrayBuffer with AU+df06 and destination length 4, offset 0, filler 0
|
||||
Pass encodeInto() into ArrayBuffer with AU+df06 and destination length 4, offset 4, filler 0
|
||||
|
||||
@@ -350,8 +350,8 @@ Pass ToASCII("⑷.four") U1 (ignored)
|
||||
Pass ToASCII("(4).four") U1 (ignored)
|
||||
Pass ToASCII("⑷.FOUR") U1 (ignored)
|
||||
Pass ToASCII("⑷.Four") U1 (ignored)
|
||||
Pass ToASCII("aaU+d900z") V7; A3
|
||||
Pass ToASCII("AAU+d900Z") V7; A3
|
||||
Pass ToASCII("aU+d900z") V7; A3
|
||||
Pass ToASCII("AU+d900Z") V7; A3
|
||||
Pass ToASCII("xn--") P4; A4_1 (ignored); A4_2 (ignored)
|
||||
Pass ToASCII("xn---") P4
|
||||
Pass ToASCII("xn--ASCII-") P4
|
||||
|
||||
@@ -405,4 +405,4 @@ Pass Parsing origin: <non-special:\\opaque\path> against <about:blank>
|
||||
Pass Parsing origin: <chrome-native://x:0> against <about:blank>
|
||||
Pass Parsing origin: <chrome-resource://x:0> against <about:blank>
|
||||
Pass Parsing origin: <chrome-search://x:0> against <about:blank>
|
||||
Pass Parsing origin: <fuchsia-dir://x:0> against <about:blank>
|
||||
Pass Parsing origin: <fuchsia-dir://x:0> against <about:blank>
|
||||
@@ -885,4 +885,4 @@ Pass Parsing: <non-special:\/opaque> without base
|
||||
Pass Parsing: <///example.org/../path/../../> against <http://example.org/>
|
||||
Pass Parsing: <///example.org/../path/../../path> against <http://example.org/>
|
||||
Pass Parsing: </\/\//example.org/../path> against <http://example.org/>
|
||||
Pass Parsing: <///abcdef/../> against <file:///>
|
||||
Pass Parsing: <///abcdef/../> against <file:///>
|
||||
@@ -406,4 +406,4 @@ Pass Origin parsing: <non-special:\\opaque\path> without base
|
||||
Pass Origin parsing: <chrome-native://x:0> without base
|
||||
Pass Origin parsing: <chrome-resource://x:0> without base
|
||||
Pass Origin parsing: <chrome-search://x:0> without base
|
||||
Pass Origin parsing: <fuchsia-dir://x:0> without base
|
||||
Pass Origin parsing: <fuchsia-dir://x:0> without base
|
||||
Reference in New Issue
Block a user