mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-04-26 01:35:08 +02:00
LibRegex: Add support for string literals in character classes
This commit is contained in:
committed by
Ali Mohammad Pur
parent
a49c39de32
commit
eed4dd3745
Notes:
github-actions[bot]
2025-11-26 10:35:42 +00:00
Author: https://github.com/aplefull Commit: https://github.com/LadybirdBrowser/ladybird/commit/eed4dd3745b Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/6867 Reviewed-by: https://github.com/alimpfard ✅
@@ -34,6 +34,15 @@ static constexpr StringView identity_escape_characters(bool unicode, bool browse
|
||||
return "^$\\.*+?()[]{}|"sv;
|
||||
}
|
||||
|
||||
static bool has_multiple_code_points(String const& str)
|
||||
{
|
||||
Utf8View utf8_view { str.bytes_as_string_view() };
|
||||
auto it = utf8_view.begin();
|
||||
if (it == utf8_view.end())
|
||||
return false;
|
||||
return ++it != utf8_view.end();
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool Parser::set_error(Error error)
|
||||
{
|
||||
if (m_parser_state.error == Error::NoError) {
|
||||
@@ -1855,7 +1864,16 @@ bool ECMA262Parser::parse_character_class(ByteCode& stack, size_t& match_length_
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::EndAndOr, 0 });
|
||||
}
|
||||
|
||||
match_length_minimum += 1;
|
||||
bool has_empty_string_set = any_of(compares, [this](auto const& compare) {
|
||||
if (compare.type != CharacterCompareType::StringSet)
|
||||
return false;
|
||||
|
||||
auto const& trie = m_parser_state.bytecode.string_set_table().get_u8_trie(compare.value);
|
||||
return trie.has_metadata() && trie.metadata_value();
|
||||
});
|
||||
|
||||
if (!has_empty_string_set)
|
||||
match_length_minimum += 1;
|
||||
stack.insert_bytecode_compare_values(move(compares));
|
||||
return true;
|
||||
}
|
||||
@@ -2299,6 +2317,15 @@ Optional<u32> ECMA262Parser::parse_class_set_character()
|
||||
return {};
|
||||
}
|
||||
|
||||
if (match(TokenType::EscapeSequence)) {
|
||||
auto escape_value = m_parser_state.current_token.value();
|
||||
consume();
|
||||
|
||||
if (escape_value[0] == '\\' && escape_value.length() == 2) {
|
||||
return escape_value[1];
|
||||
}
|
||||
}
|
||||
|
||||
auto start_position = tell();
|
||||
ArmedScopeGuard restore { [&] { back(tell() - start_position + 1); } };
|
||||
|
||||
@@ -2359,6 +2386,62 @@ bool ECMA262Parser::parse_class_set_operand(Vector<regex::CompareTypeAndValuePai
|
||||
{
|
||||
auto start_position = tell();
|
||||
|
||||
// ClassStringDisjunction :: "\q{" ClassStringDisjunctionContents "}"
|
||||
// ClassStringDisjunctionContents :: ClassString | ClassString "|" ClassStringDisjunctionContents
|
||||
// ClassString :: [empty] | NonEmptyClassString
|
||||
// NonEmptyClassString :: ClassCharacter NonEmptyClassString[opt]
|
||||
if (try_skip("\\q"sv)) {
|
||||
if (!match(TokenType::LeftCurly)) {
|
||||
back(2);
|
||||
return false;
|
||||
}
|
||||
consume();
|
||||
|
||||
Vector<String> strings;
|
||||
StringBuilder current_string;
|
||||
|
||||
while (!match(TokenType::RightCurly)) {
|
||||
if (done()) {
|
||||
set_error(Error::MismatchingBrace);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (match(TokenType::Pipe)) {
|
||||
consume();
|
||||
strings.append(MUST(current_string.to_string()));
|
||||
current_string.clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
auto character = parse_class_set_character();
|
||||
if (!character.has_value()) {
|
||||
if (has_error())
|
||||
return false;
|
||||
set_error(Error::InvalidCharacterClass);
|
||||
return false;
|
||||
}
|
||||
|
||||
current_string.append_code_point(character.value());
|
||||
}
|
||||
|
||||
strings.append(MUST(current_string.to_string()));
|
||||
consume(TokenType::RightCurly, Error::MismatchingBrace);
|
||||
|
||||
bool is_negated = any_of(compares, [](auto const& compare) {
|
||||
return compare.type == CharacterCompareType::Inverse;
|
||||
});
|
||||
|
||||
if (is_negated && any_of(strings, has_multiple_code_points)) {
|
||||
set_error(Error::NegatedCharacterClassStrings);
|
||||
return false;
|
||||
}
|
||||
|
||||
auto string_set_index = m_parser_state.bytecode.string_set_table().set(strings);
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::StringSet, string_set_index });
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ClassSetOperand :: ClassSetCharacter | ClassStringDisjunction | NestedClass
|
||||
if (auto character = parse_class_set_character(); character.has_value()) {
|
||||
compares.append({ CharacterCompareType::Char, character.value() });
|
||||
@@ -2414,15 +2497,6 @@ bool ECMA262Parser::parse_class_set_operand(Vector<regex::CompareTypeAndValuePai
|
||||
if (has_error())
|
||||
return false;
|
||||
|
||||
// ClassStringDisjunction :: "\q{" ClassStringDisjunctionContents "}"
|
||||
// ClassStringDisjunctionContents :: ClassString | ClassString "|" ClassStringDisjunctionContents
|
||||
// ClassString :: [empty] | NonEmptyClassString
|
||||
// NonEmptyClassString :: ClassCharacter NonEmptyClassString[opt]
|
||||
if (try_skip("\\q{"sv)) {
|
||||
// FIXME: Implement this :P
|
||||
return set_error(Error::InvalidCharacterClass);
|
||||
}
|
||||
|
||||
back(tell() - start_position + 1);
|
||||
return false;
|
||||
}
|
||||
@@ -2491,14 +2565,17 @@ bool ECMA262Parser::parse_nested_class(Vector<regex::CompareTypeAndValuePair>& c
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto const& compare : compares) {
|
||||
if (compare.type == CharacterCompareType::Inverse) {
|
||||
set_error(Error::NegatedCharacterClassStrings);
|
||||
return;
|
||||
}
|
||||
auto strings = Unicode::get_property_strings(property);
|
||||
|
||||
bool is_negated = any_of(compares, [](auto const& compare) {
|
||||
return compare.type == CharacterCompareType::Inverse;
|
||||
});
|
||||
|
||||
if (is_negated && any_of(strings, has_multiple_code_points)) {
|
||||
set_error(Error::NegatedCharacterClassStrings);
|
||||
return;
|
||||
}
|
||||
|
||||
auto strings = Unicode::get_property_strings(property);
|
||||
if (!strings.is_empty()) {
|
||||
auto string_set_index = m_parser_state.bytecode.string_set_table().set(move(strings));
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::StringSet, string_set_index });
|
||||
|
||||
Reference in New Issue
Block a user