ladybird/Tests/LibRegex/TestRegex.cpp

/*
 * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <LibTest/TestCase.h> // import first, to prevent warning of VERIFY* redefinition

#include <AK/Debug.h>
#include <AK/StringBuilder.h>
#include <AK/Tuple.h>
#include <LibRegex/Regex.h>
#include <LibRegex/RegexDebug.h>
#include <LibRegex/RegexMatcher.h>
#include <stdio.h>

static ECMAScriptOptions match_test_api_options(ECMAScriptOptions const options)
{
    return options;
}

static PosixOptions match_test_api_options(PosixOptions const options)
{
    return options;
}

template<typename... Flags>
static constexpr ECMAScriptFlags combine_flags(Flags&&... flags)
requires((IsSame<Flags, ECMAScriptFlags> && ...))
{
    return static_cast<ECMAScriptFlags>((static_cast<regex::FlagsUnderlyingType>(flags) | ...));
}

TEST_CASE(regex_options_ecmascript)
{
    ECMAScriptOptions eo;
    eo |= ECMAScriptFlags::Global;

    EXPECT(eo.has_flag_set(ECMAScriptFlags::Global));
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Insensitive));

    eo = match_test_api_options(ECMAScriptFlags::Global | ECMAScriptFlags::Insensitive | ECMAScriptFlags::Sticky);
    EXPECT(eo.has_flag_set(ECMAScriptFlags::Global));
    EXPECT(eo.has_flag_set(ECMAScriptFlags::Insensitive));
    EXPECT(eo.has_flag_set(ECMAScriptFlags::Sticky));
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Unicode));
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Multiline));
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::SingleLine));

    eo &= ECMAScriptFlags::Insensitive;
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Global));
    EXPECT(eo.has_flag_set(ECMAScriptFlags::Insensitive));
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Multiline));

    eo &= ECMAScriptFlags::Sticky;
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Global));
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Insensitive));
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Multiline));
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Sticky));

    eo = ~ECMAScriptFlags::Insensitive;
    EXPECT(eo.has_flag_set(ECMAScriptFlags::Global));
    EXPECT(!eo.has_flag_set(ECMAScriptFlags::Insensitive));
    EXPECT(eo.has_flag_set(ECMAScriptFlags::Multiline));
    EXPECT(eo.has_flag_set(ECMAScriptFlags::Sticky));
}

TEST_CASE(regex_options_posix)
{
    PosixOptions eo;
    eo |= PosixFlags::Global;

    EXPECT(eo.has_flag_set(PosixFlags::Global));
    EXPECT(!eo.has_flag_set(PosixFlags::Insensitive));

    eo = match_test_api_options(PosixFlags::Global | PosixFlags::Insensitive | PosixFlags::MatchNotBeginOfLine);
    EXPECT(eo.has_flag_set(PosixFlags::Global));
    EXPECT(eo.has_flag_set(PosixFlags::Insensitive));
    EXPECT(eo.has_flag_set(PosixFlags::MatchNotBeginOfLine));
    EXPECT(!eo.has_flag_set(PosixFlags::Unicode));
    EXPECT(!eo.has_flag_set(PosixFlags::Multiline));

    eo &= PosixFlags::Insensitive;
    EXPECT(!eo.has_flag_set(PosixFlags::Global));
    EXPECT(eo.has_flag_set(PosixFlags::Insensitive));
    EXPECT(!eo.has_flag_set(PosixFlags::Multiline));

    eo &= PosixFlags::MatchNotBeginOfLine;
    EXPECT(!eo.has_flag_set(PosixFlags::Global));
    EXPECT(!eo.has_flag_set(PosixFlags::Insensitive));
    EXPECT(!eo.has_flag_set(PosixFlags::Multiline));

    eo = ~PosixFlags::Insensitive;
    EXPECT(eo.has_flag_set(PosixFlags::Global));
    EXPECT(!eo.has_flag_set(PosixFlags::Insensitive));
    EXPECT(eo.has_flag_set(PosixFlags::Multiline));
}

TEST_CASE(regex_lexer)
{
    Lexer l("/[.*+?^${}()|[\\]\\\\]/g"sv);
    EXPECT(l.next().type() == regex::TokenType::Slash);
    EXPECT(l.next().type() == regex::TokenType::LeftBracket);
    EXPECT(l.next().type() == regex::TokenType::Period);
    EXPECT(l.next().type() == regex::TokenType::Asterisk);
    EXPECT(l.next().type() == regex::TokenType::Plus);
    EXPECT(l.next().type() == regex::TokenType::Questionmark);
    EXPECT(l.next().type() == regex::TokenType::Circumflex);
    EXPECT(l.next().type() == regex::TokenType::Dollar);
    EXPECT(l.next().type() == regex::TokenType::LeftCurly);
    EXPECT(l.next().type() == regex::TokenType::RightCurly);
    EXPECT(l.next().type() == regex::TokenType::LeftParen);
    EXPECT(l.next().type() == regex::TokenType::RightParen);
    EXPECT(l.next().type() == regex::TokenType::Pipe);
    EXPECT(l.next().type() == regex::TokenType::LeftBracket);
    EXPECT(l.next().type() == regex::TokenType::EscapeSequence);
    EXPECT(l.next().type() == regex::TokenType::EscapeSequence);
    EXPECT(l.next().type() == regex::TokenType::RightBracket);
    EXPECT(l.next().type() == regex::TokenType::Slash);
    EXPECT(l.next().type() == regex::TokenType::Char);
}

TEST_CASE(parser_error_parens)
{
    ByteString pattern = "test()test";
    Lexer l(pattern);
    PosixExtendedParser p(l);
    p.parse();
    EXPECT(p.has_error());
    EXPECT(p.error() == regex::Error::EmptySubExpression);
}

TEST_CASE(parser_error_special_characters_used_at_wrong_place)
{
    ByteString pattern;
    Vector<char, 5> chars = { '*', '+', '?', '{' };
    StringBuilder b;

    Lexer l;
    PosixExtended p(l);

    for (auto& ch : chars) {
        // First in ere
        b.clear();
        b.append(ch);
        pattern = b.to_byte_string();
        l.set_source(pattern);
        p.parse();
        EXPECT(p.has_error());
        EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);

        // After vertical line
        b.clear();
        b.append("a|"sv);
        b.append(ch);
        pattern = b.to_byte_string();
        l.set_source(pattern);
        p.parse();
        EXPECT(p.has_error());
        EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);

        // After circumflex
        b.clear();
        b.append('^');
        b.append(ch);
        pattern = b.to_byte_string();
        l.set_source(pattern);
        p.parse();
        EXPECT(p.has_error());
        EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);

        // After dollar
        b.clear();
        b.append('$');
        b.append(ch);
        pattern = b.to_byte_string();
        l.set_source(pattern);
        p.parse();
        EXPECT(p.has_error());
        EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);

        // After left parens
        b.clear();
        b.append('(');
        b.append(ch);
        b.append(')');
        pattern = b.to_byte_string();
        l.set_source(pattern);
        p.parse();
        EXPECT(p.has_error());
        EXPECT(p.error() == regex::Error::InvalidRepetitionMarker);
    }
}

TEST_CASE(parser_error_vertical_line_used_at_wrong_place)
{
    Lexer l;
    PosixExtended p(l);

    // First in ere
    l.set_source("|asdf"sv);
    p.parse();
    EXPECT(p.has_error());
    EXPECT(p.error() == regex::Error::EmptySubExpression);

    // Last in ere
    l.set_source("asdf|"sv);
    p.parse();
    EXPECT(p.has_error());
    EXPECT(p.error() == regex::Error::EmptySubExpression);

    // After left parens
    l.set_source("(|asdf)"sv);
    p.parse();
    EXPECT(p.has_error());
    EXPECT(p.error() == regex::Error::EmptySubExpression);

    // Proceed right parens
    l.set_source("(asdf)|"sv);
    p.parse();
    EXPECT(p.has_error());
    EXPECT(p.error() == regex::Error::EmptySubExpression);
}

TEST_CASE(catch_all_first)
{
    Regex<PosixExtended> re("^.*$");
    RegexResult m;
    re.match("Hello World"sv, m);
    EXPECT(m.count == 1);
    EXPECT(re.match("Hello World"sv, m));
}

TEST_CASE(catch_all)
{
    Regex<PosixExtended> re("^.*$", PosixFlags::Global);

    EXPECT(re.has_match("Hello World"sv));
    EXPECT(re.match("Hello World"sv).success);
    EXPECT(re.match("Hello World"sv).count == 1);

    EXPECT(has_match("Hello World"sv, re));
    auto res = match("Hello World"sv, re);
    EXPECT(res.success);
    EXPECT(res.count == 1);
    EXPECT(res.matches.size() == 1);
    EXPECT(res.matches.first().view == "Hello World");
}

TEST_CASE(catch_all_again)
{
    Regex<PosixExtended> re("^.*$", PosixFlags::Extra);
    EXPECT_EQ(has_match("Hello World"sv, re), true);
}

TEST_CASE(catch_all_newline)
{
    Regex<PosixExtended> re("^.*$", PosixFlags::Multiline);
    RegexResult result;
    String aaa = "Hello World\nTest\n1234\n"_string;
    auto lambda = [&]() {
        result = match(aaa, re);
        EXPECT_EQ(result.success, true);
    };
    lambda();
    EXPECT_EQ(result.count, 3u);
    EXPECT_EQ(result.matches.at(0).view, "Hello World");
    EXPECT_EQ(result.matches.at(1).view, "Test");
    EXPECT_EQ(result.matches.at(2).view, "1234");
}

TEST_CASE(catch_all_newline_view)
{
    Regex<PosixExtended> re("^.*$", PosixFlags::Multiline);
    RegexResult result;

    String aaa = "Hello World\nTest\n1234\n"_string;
    result = match(aaa, re);
    EXPECT_EQ(result.success, true);
    EXPECT_EQ(result.count, 3u);
    ByteString str = "Hello World";
    EXPECT_EQ(result.matches.at(0).view, str.view());
    EXPECT_EQ(result.matches.at(1).view, "Test");
    EXPECT_EQ(result.matches.at(2).view, "1234");
}

TEST_CASE(catch_all_newline_2)
{
    Regex<PosixExtended> re("^.*$");
    RegexResult result;
    result = match("Hello World\nTest\n1234\n"sv, re, PosixFlags::Multiline);
    EXPECT_EQ(result.success, true);
    EXPECT_EQ(result.count, 3u);
    EXPECT_EQ(result.matches.at(0).view, "Hello World");
    EXPECT_EQ(result.matches.at(1).view, "Test");
    EXPECT_EQ(result.matches.at(2).view, "1234");

    result = match("Hello World\nTest\n1234\n"sv, re);
    EXPECT_EQ(result.success, true);
    EXPECT_EQ(result.count, 1u);
    EXPECT_EQ(result.matches.at(0).view, "Hello World\nTest\n1234\n");
}

TEST_CASE(match_all_character_class)
{
    Regex<PosixExtended> re("[[:alpha:]]");
    String str = "[Window]\nOpacity=255\nAudibleBeep=0\n"_string;
    RegexResult result = match(str, re, PosixFlags::Global);

    EXPECT_EQ(result.success, true);
    EXPECT_EQ(result.count, 24u);
    EXPECT_EQ(result.matches.at(0).view, "W");
    EXPECT_EQ(result.matches.at(1).view, "i");
    EXPECT_EQ(result.matches.at(2).view, "n");
}

TEST_CASE(match_character_class_with_assertion)
{
    Regex<PosixExtended> re("[[:alpha:]]+$");
    String str = "abcdef"_string;
    RegexResult result = match(str, re);

    EXPECT_EQ(result.success, true);
    EXPECT_EQ(result.count, 1u);
}

TEST_CASE(example_for_git_commit)
{
    Regex<PosixExtended> re("^.*$");
    auto result = re.match("Well, hello friends!\nHello World!"sv);

    EXPECT(result.success);
    EXPECT(result.count == 1);
    EXPECT(result.matches.at(0).view.starts_with("Well"sv));
    EXPECT(result.matches.at(0).view.length() == 33);

    EXPECT(re.has_match("Well,...."sv));

    result = re.match("Well, hello friends!\nHello World!"sv, PosixFlags::Multiline);

    EXPECT(result.success);
    EXPECT(result.count == 2);
    EXPECT(result.matches.at(0).view == "Well, hello friends!");
    EXPECT(result.matches.at(1).view == "Hello World!");
}

TEST_CASE(email_address)
{
    Regex<PosixExtended> re("^[A-Z0-9a-z._%+-]{1,64}@([A-Za-z0-9-]{1,63}\\.){1,125}[A-Za-z]{2,63}$");
    EXPECT(re.has_match("hello.world@domain.tld"sv));
    EXPECT(re.has_match("this.is.a.very_long_email_address@world.wide.web"sv));
}

TEST_CASE(ini_file_entries)
{
    Regex<PosixExtended> re("[[:alpha:]]*=([[:digit:]]*)|\\[(.*)\\]");
    RegexResult result;

    if constexpr (REGEX_DEBUG) {
        RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
        regex_dbg.print_raw_bytecode(re);
        regex_dbg.print_header();
        regex_dbg.print_bytecode(re);
    }

    ByteString haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
    EXPECT_EQ(re.search(haystack.view(), result, PosixFlags::Multiline), true);
    EXPECT_EQ(result.count, 3u);

    if constexpr (REGEX_DEBUG) {
        for (auto& v : result.matches)
            fprintf(stderr, "%s\n", v.view.to_byte_string().characters());
    }

    EXPECT_EQ(result.matches.at(0).view, "[Window]");
    EXPECT_EQ(result.capture_group_matches.at(0).at(1).view, "Window");
    EXPECT_EQ(result.matches.at(1).view, "Opacity=255");
    EXPECT_EQ(result.matches.at(1).line, 1u);
    EXPECT_EQ(result.matches.at(1).column, 0u);
    EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "255");
    EXPECT_EQ(result.capture_group_matches.at(1).at(0).line, 1u);
    EXPECT_EQ(result.capture_group_matches.at(1).at(0).column, 8u);
    EXPECT_EQ(result.matches.at(2).view, "AudibleBeep=0");
    EXPECT_EQ(result.capture_group_matches.at(2).at(0).view, "0");
    EXPECT_EQ(result.capture_group_matches.at(2).at(0).line, 2u);
    EXPECT_EQ(result.capture_group_matches.at(2).at(0).column, 12u);
}

TEST_CASE(ini_file_entries2)
{
    Regex<PosixExtended> re("[[:alpha:]]*=([[:digit:]]*)");
    RegexResult result;

    ByteString haystack = "ViewMode=Icon";

    EXPECT_EQ(re.match(haystack.view(), result), false);
    EXPECT_EQ(result.count, 0u);

    EXPECT_EQ(re.search(haystack.view(), result), true);
    EXPECT_EQ(result.count, 1u);
}

TEST_CASE(named_capture_group)
{
    Regex<PosixExtended> re("[[:alpha:]]*=(?<Test>[[:digit:]]*)");
    RegexResult result;

    if constexpr (REGEX_DEBUG) {
        RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
        regex_dbg.print_raw_bytecode(re);
        regex_dbg.print_header();
        regex_dbg.print_bytecode(re);
    }

    String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n"_string;
    EXPECT_EQ(re.search(haystack, result, PosixFlags::Multiline), true);
    EXPECT_EQ(result.count, 2u);
    EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
    EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255");
    EXPECT_EQ(re.parser_result.bytecode.visit([&](auto& bytecode) { return bytecode.get_string(result.capture_group_matches.at(0).at(0).capture_group_name); }), "Test");
    EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0");
    EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0");
    EXPECT_EQ(re.parser_result.bytecode.visit([&](auto& bytecode) { return bytecode.get_string(result.capture_group_matches.at(1).at(0).capture_group_name); }), "Test");
}

TEST_CASE(ecma262_named_capture_group_with_dollar_sign)
{
    Regex<ECMA262> re("[a-zA-Z]*=(?<$Test$>[0-9]*)");
    RegexResult result;

    if constexpr (REGEX_DEBUG) {
        RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
        regex_dbg.print_raw_bytecode(re);
        regex_dbg.print_header();
        regex_dbg.print_bytecode(re);
    }

    String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n"_string;
    EXPECT_EQ(re.search(haystack, result, ECMAScriptFlags::Multiline), true);
    EXPECT_EQ(result.count, 2u);
    EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
    EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255");
    EXPECT_EQ(re.parser_result.bytecode.visit([&](auto& bytecode) { return bytecode.get_string(result.capture_group_matches.at(0).at(0).capture_group_name); }), "$Test$");
    EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0");
    EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0");
    EXPECT_EQ(re.parser_result.bytecode.visit([&](auto& bytecode) { return bytecode.get_string(result.capture_group_matches.at(1).at(0).capture_group_name); }), "$Test$");
}

TEST_CASE(a_star)
{
    Regex<PosixExtended> re("a*");
    RegexResult result;

    if constexpr (REGEX_DEBUG) {
        RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
        regex_dbg.print_raw_bytecode(re);
        regex_dbg.print_header();
        regex_dbg.print_bytecode(re);
    }

    ByteString haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n";
    EXPECT_EQ(re.search(haystack.view(), result, PosixFlags::Multiline), true);
    EXPECT_EQ(result.count, 32u);
    if (result.count == 32u) {
        EXPECT_EQ(result.matches.at(0).view.length(), 0u);
        EXPECT_EQ(result.matches.at(10).view.length(), 1u);
        EXPECT_EQ(result.matches.at(10).view, "a");
        EXPECT_EQ(result.matches.at(31).view.length(), 0u);
    }
}

TEST_CASE(simple_period_end_benchmark)
{
    Regex<PosixExtended> re("hello.$");
    RegexResult m;
    EXPECT_EQ(re.search("Hello1"sv, m), false);
    EXPECT_EQ(re.search("hello1hello1"sv, m), true);
    EXPECT_EQ(re.search("hello2hell"sv, m), false);
    EXPECT_EQ(re.search("hello?"sv, m), true);
}

TEST_CASE(posix_extended_nested_capture_group)
{
    Regex<PosixExtended> re("(h(e(?<llo>llo)))"); // group 0 -> "hello", group 1 -> "ello", group 2/"llo" -> "llo"
    auto result = re.match("hello"sv);
    EXPECT(result.success);
    EXPECT_EQ(result.capture_group_matches.size(), 1u);
    EXPECT_EQ(result.capture_group_matches[0].size(), 3u);
    EXPECT_EQ(result.capture_group_matches[0][0].view, "hello"sv);
    EXPECT_EQ(result.capture_group_matches[0][1].view, "ello"sv);
    EXPECT_EQ(result.capture_group_matches[0][2].view, "llo"sv);
}

auto parse_test_case_long_disjunction_chain = ByteString::repeated("a|"sv, 100000);

TEST_CASE(ECMA262_parse)
{
    struct _test {
        StringView pattern;
        regex::Error expected_error { regex::Error::NoError };
        regex::ECMAScriptFlags flags {};
    };

    _test const tests[] {
        { "^hello.$"sv },
        { "^(hello.)$"sv },
        { "^h{0,1}ello.$"sv },
        { "^hello\\W$"sv },
        { "^hell\\w.$"sv },
        { "^hell\\x6f1$"sv }, // ^hello1$
        { "^hel(?:l\\w).$"sv },
        { "^hel(?<LO>l\\w).$"sv },
        { "^[-a-zA-Z\\w\\s]+$"sv },
        { "\\bhello\\B"sv },
        { "^[\\w+/_-]+[=]{0,2}$"sv },                        // #4189
        { "^(?:[^<]*(<[\\w\\W]+>)[^>]*$|#([\\w\\-]*)$)"sv }, // #4189
        { "\\/"sv },                                         // #4189
        { ",/=-:"sv },                                       // #4243
        { "\\x"sv },                                         // Even invalid escapes are allowed if ~unicode.
        { "\\x1"sv },                                        // Even invalid escapes are allowed if ~unicode.
        { "\\x1"sv, regex::Error::InvalidPattern, regex::ECMAScriptFlags::Unicode },
        { "\\x11"sv },
        { "\\x11"sv, regex::Error::NoError, regex::ECMAScriptFlags::Unicode },
        { "\\"sv, regex::Error::InvalidTrailingEscape },
        { "(?"sv, regex::Error::InvalidCaptureGroup },
        { "\\u1234"sv, regex::Error::NoError, regex::ECMAScriptFlags::Unicode },
        { "[\\u1234]"sv, regex::Error::NoError, regex::ECMAScriptFlags::Unicode },
        { "\\u1"sv, regex::Error::InvalidPattern, regex::ECMAScriptFlags::Unicode },
        { "[\\u1]"sv, regex::Error::InvalidPattern, regex::ECMAScriptFlags::Unicode },
        { ",(?"sv, regex::Error::InvalidCaptureGroup }, // #4583
        { "{1}"sv, regex::Error::InvalidPattern },
        { "{1,2}"sv, regex::Error::InvalidPattern },
        { "\\uxxxx"sv, regex::Error::NoError },
        { "\\uxxxx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\ud83d"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
        { "\\ud83d\\uxxxx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\u{0}"sv },
        { "\\u{0}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
        { "\\u{10ffff}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
        { "\\u{10ffff"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\u{10ffffx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\u{110000}"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\p"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\p{"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\p{}"sv, regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
        { "\\p{AsCiI}"sv, regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
        { "\\p{hello friends}"sv, regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
        { "\\p{Prepended_Concatenation_Mark}"sv, regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
        { "\\p{ASCII}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
        { "\\\\p{1}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
        { "\\\\p{AsCiI}"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\\\p{ASCII}"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\c"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "\\c"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "[\\c]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "[\\c]"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\c`"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "\\c`"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "[\\c`]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "[\\c`]"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\A"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "\\A"sv, regex::Error::InvalidCharacterClass, ECMAScriptFlags::Unicode },
        { "[\\A]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "[\\A]"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\0"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "\\0"sv, regex::Error::NoError, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
        { "\\00"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "\\00"sv, regex::Error::InvalidCharacterClass, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
        { "[\\0]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "[\\0]"sv, regex::Error::NoError, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
        { "[\\00]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "[\\00]"sv, regex::Error::InvalidPattern, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
        { "\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
        { "[\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/]"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
        { "]"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "]"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\]"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
        { "}"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
        { "}"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
        { "\\}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
        { "a{9007199254740991}"sv }, // 2^53 - 1
        { "a{9007199254740991,}"sv },
        { "a{9007199254740991,9007199254740991}"sv },
        { "a{9007199254740992}"sv },
        { "a{9007199254740992,}"sv },
        { "a{9007199254740991,9007199254740992}"sv },
        { "a{9007199254740992,9007199254740991}"sv },
        { "a{9007199254740992,9007199254740992}"sv },
        { "a{1,99999999999999999999999999999999999999999999999999}"sv },
        { "a{99999999999999999999999999999999999999999999999999,1}"sv, regex::Error::InvalidBraceContent },
        { "a{99999999999999999999999999999999999999999999999999}"sv },
        { "a{2147483647}"sv }, // 2^31 - 1
        { "a{2147483648}"sv }, // 2^31
        { "a{2147483648,2147483647}"sv },
        { "a{2147483647,2147483646}"sv, regex::Error::InvalidBraceContent },
        { "(?<a>a)(?<a>b)"sv, regex::Error::DuplicateNamedCapture },
        { "(?<a>a)(?<b>b)(?<a>c)"sv, regex::Error::DuplicateNamedCapture },
        { "(?<a>(?<a>a))"sv, regex::Error::DuplicateNamedCapture },
        { "(?:(?<x>a)|(?<y>a)(?<x>b))(?:(?<z>c)|(?<z>d))"sv }, // Duplicate named capturing groups in separate alternatives should parse correctly
        { "(?<1a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
        { "(?<\\a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
        { "(?<\ta>a)"sv, regex::Error::InvalidNameForCaptureGroup },
        { "(?<$$_$$>a)"sv },
        { "(?<ÿ>a)"sv },
        { "(?<𝓑𝓻𝓸𝔀𝓷>a)"sv },
        { "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, // #12373, quantifiable assertions.
        { parse_test_case_long_disjunction_chain.view() },                                                                                 // A whole lot of disjunctions, should not overflow the stack.
        { "(\"|')(?:(?!\\2)[^\\\\\\r\\n]|\\\\.)*\\2"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended },                         // LegacyOctalEscapeSequence should not consume too many chars (and should not crash)
        // #18324, Capture group counter skipped past EOF.
        { "\\1[\\"sv, regex::Error::InvalidNumber },
        { "(?ii:a)"sv, regex::Error::RepeatedModifierFlag },
        { "(?i-i:a)"sv, regex::Error::RepeatedModifierFlag },
        { "(?-ii:a)"sv, regex::Error::RepeatedModifierFlag },
        { "(?-:a)"sv, regex::Error::InvalidModifierGroup },
        { "(?-ig:a)"sv, regex::Error::InvalidModifierGroup },
        { "(?-x:a)"sv, regex::Error::InvalidModifierGroup },
        { "(?i)"sv, regex::Error::InvalidCaptureGroup },
        { "(?-i)"sv, regex::Error::InvalidCaptureGroup },
    };

    for (auto& test : tests) {
        Regex<ECMA262> re(test.pattern, test.flags);
        EXPECT_EQ(re.parser_result.error, test.expected_error);
        if constexpr (REGEX_DEBUG) {
            dbgln("\n");
            RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
            regex_dbg.print_raw_bytecode(re);
            regex_dbg.print_header();
            regex_dbg.print_bytecode(re);
            dbgln("\n");
        }
    }
}

TEST_CASE(ECMA262_match)
{
    constexpr auto global_multiline = ECMAScriptFlags::Global | ECMAScriptFlags::Multiline;

    struct _test {
        StringView pattern;
        StringView subject;
        bool matches { true };
        ECMAScriptFlags options {};
    };
    constexpr _test tests[] {
        { "^hello.$"sv, "hello1"sv },
        { "^(hello.)$"sv, "hello1"sv },
        { "^h{0,1}ello.$"sv, "ello1"sv },
        { "^hello\\W$"sv, "hello!"sv },
        { "^hell\\w.$"sv, "hellx!"sv },
        { "^hell\\x6f1$"sv, "hello1"sv },
        { "^hel(?<LO>l.)1$"sv, "hello1"sv },
        { "^hel(?<LO>l.)1*\\k<LO>.$"sv, "hello1lo1"sv },
        { "^[-a-z1-3\\s]+$"sv, "hell2 o1"sv },
        { "^[\\0-\\x1f]$"sv, "\n"sv },
        { .pattern = "\\bhello\\B"sv, .subject = "hello1"sv, .options = ECMAScriptFlags::Global },
        { "\\b.*\\b"sv, "hello1"sv },
        { "[^\\D\\S]{2}"sv, "1 "sv, false },
        { "bar(?=f.)foo"sv, "barfoo"sv },
        { "bar(?=foo)bar"sv, "barbar"sv, false },
        { "bar(?!foo)bar"sv, "barbar"sv, true },
        { "bar(?!bar)bar"sv, "barbar"sv, false },
        { "bar.*(?<=foo)"sv, "barbar"sv, false },
        { "bar.*(?<!foo)"sv, "barbar"sv, true },
        { "((...)X)+"sv, "fooXbarXbazX"sv, true },
        { "(?:)"sv, ""sv, true },
        { "\\^"sv, "^"sv },
        { "\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/"sv, "^$\\.*+?()[]{}|/"sv, true, ECMAScriptFlags::Unicode },
        { "[\\^\\$\\\\\\.\\*\\+\\?\\(\\)\\[\\]\\{\\}\\|\\/]{15}"sv, "^$\\.*+?()[]{}|/"sv, true, ECMAScriptFlags::Unicode },
        { "(a{2}){3}"sv, "aaaaaa"sv },
        { "(a{2}){3}"sv, "aaaabaa"sv, false },
        { "(a{2}){4}"sv, "aaaaaaaa"sv },
        { "(a{2}){4}"sv, "aaaaaabaa"sv, false },
        { "(a{3}){2}"sv, "aaaaaa"sv },
        { "(a{3}){2}"sv, "aaaabaa"sv, false },
        { "(a{4}){2}"sv, "aaaaaaaa"sv },
        { "(a{4}){2}"sv, "aaaaaabaa"sv, false },
        { "\\u{4}"sv, "uuuu"sv },
        { "(?<=.{3})f"sv, "abcdef"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
        { "(?<=.{3})f"sv, "abc😀ef"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
        // ECMA262, B.1.4. Regular Expression Pattern extensions for browsers
        { "{"sv, "{"sv, true, ECMAScriptFlags::BrowserExtended },
        { "\\5"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },
        { "\\05"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },
        { "\\455"sv, "\0455"sv, true, ECMAScriptFlags::BrowserExtended },
        { "\\314"sv, "\314"sv, true, ECMAScriptFlags::BrowserExtended },
        { "\\c"sv, "\\c"sv, true, ECMAScriptFlags::BrowserExtended },
        { "\\cf"sv, "\06"sv, true, ECMAScriptFlags::BrowserExtended },
        { "\\c1"sv, "\\c1"sv, true, ECMAScriptFlags::BrowserExtended },
        { "[\\c1]"sv, "\x11"sv, true, ECMAScriptFlags::BrowserExtended },
        { "[\\w-\\d]"sv, "-"sv, true, ECMAScriptFlags::BrowserExtended },
        // #5517, appears to be matching JS expressions that involve regular expressions...
        {
            "^(?:^^\\.?|[!+-]|!=|!==|#|%|%=|&|&&|&&=|&=|\\(|\\*|\\*=|\\+=|,|-=|->|\\/|\\/=|:|::|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|[?@[^]|\\^=|\\^\\^|\\^\\^=|{|\\||\\|=|\\|\\||\\|\\|=|~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*(\\/(?=[^*/])(?:[^/[\\\\]|\\\\[\\S\\s]|\\[(?:[^\\\\\\]]|\\\\[\\S\\s])*(?:]|$))+\\/)"sv,
            "return /xx/"sv,
            true,
            ECMAScriptFlags::BrowserExtended,
        },
        // #5518
        { "a{2,}"sv, "aaaa"sv },
        { "\\0"sv, "\0"sv, true, ECMAScriptFlags::BrowserExtended },
        { "\\0"sv, "\0"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
        { "\\01"sv, "\1"sv, true, ECMAScriptFlags::BrowserExtended },
        { "[\\0]"sv, "\0"sv, true, ECMAScriptFlags::BrowserExtended },
        { "[\\0]"sv, "\0"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::BrowserExtended) },
        { "[\\01]"sv, "\1"sv, true, ECMAScriptFlags::BrowserExtended },
        // #9686, Should allow null bytes in pattern
        { "(\0|a)"sv, "a"sv, true },
        // #6042, Groups inside lookarounds may be referenced outside, but their contents appear empty if the pattern in the lookaround fails.
        { "(.*?)a(?!(a+)b\\2c)\\2(.*)"sv, "baaabaac"sv, true },
        // #11940, Global (not the 'g' flag) regexps should attempt to match the zero-length end of the string too.
        { "a|$"sv, "x"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
        // #12126, ECMA262 regexp should match literal newlines without the 's' flag.
        { "foo\nbar"sv, "foo\nbar"sv, true },
        // #12126, ECMA262 regexp should match newline with [^].
        { "foo[^]bar"sv, "foo\nbar"sv, true },
        // Insensitive lookup table: characters in a range do not necessarily lie in the same range after being converted to lowercase.
        { "^[_A-Z]+$"sv, "_aA"sv, true, ECMAScriptFlags::Insensitive },
        { "^[a-sy-z]$"sv, "b"sv, true, ECMAScriptFlags::Insensitive },
        { "^[a-sy-z]$"sv, "y"sv, true, ECMAScriptFlags::Insensitive },
        { "^[a-sy-z]$"sv, "u"sv, false, ECMAScriptFlags::Insensitive },
        // Dot should not match any of CR/LF/LS/PS in ECMA262 mode without DotAll.
        { "."sv, "\n\r\u2028\u2029"sv, false },
        // $ should accept all LineTerminators in ECMA262 mode with Multiline.
        { "a$"sv, "a\r\n"sv, true, global_multiline.value() },
        { "^a"sv, "\ra"sv, true, global_multiline.value() },
        { "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() },
        // ladybird#968, ?+ should not loop forever. */
        { "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv, "?category=54&shippable=1&baby_age=p,0,1,3"sv, false },
        // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities.
        { "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true },
        // Optimizer bug, ignoring references that weren't bound in the current or past block, ladybird#2281
        { "(a)(?=a*\\1)"sv, "aaaa"sv, true, global_multiline.value() },
        // Optimizer bug, wrong Repeat basic block splits.
        { "[ a](b{2})"sv, "abb"sv, true },
        // See above.
        { "^ {0,3}(([\\`\\~])\\2{2,})\\s*([\\*_]*)\\s*([^\\*_\\s]*).*$"sv, ""sv, false },
        // See above, also ladybird#2931.
        {
            "^(\\d{4}|[+-]\\d{6})(?:-?(\\d{2})(?:-?(\\d{2}))?)?(?:[ T]?(\\d{2}):?(\\d{2})(?::?(\\d{2})(?:[,.](\\d{1,}))?)?(?:(Z)|([+-])(\\d{2})(?::?(\\d{2}))?)?)?$"sv,
            ""sv,
            false,
        },
        // Optimizer bug, ignoring an enabled trailing 'invert' when comparing blocks, ladybird#3421.
        { "[^]*[^]"sv, "i"sv, true },
        { "xx|...|...."sv, "cd"sv, false },
        // Tests nested lookahead with alternation - verifies proper save/restore stack cleanup
        { "a(?=.(?=c)|b)b"sv, "ab"sv, true },
        { "(?=)(?=\\d)"sv, "smart"sv, false },
        // Backrefs are cleared after lookaheads, the indices should be checked before lookup.
        { "(?!(b))\\1"sv, "a"sv, false },
        // String table merge bug: inverse map should be merged regardless of available direct mappings.
        { "((?<x>a)|(?<x>b))"sv, "aa"sv, false },
        // Insensitive charclasses should accept upper/lowercase in pattern (lookup table should still be ordered if insensitive lookup is used), ladybird#5399.
        { "[aBc]"sv, "b"sv, true, ECMAScriptFlags::Insensitive },
        // Optimizer bug: nested 'or' compare ops caused a crash, ladybird#6647.
        { "([[[]]])*0"sv, ""sv, false, ECMAScriptFlags::UnicodeSets },
        { "(([[[]]]{2,})\\s)*"sv, ""sv, true, (ECMAScriptFlags::UnicodeSets | ECMAScriptFlags::Global).value() },
        // Optimizer bug: duplicated rseekto ops output for the same fork.
        { "(.*a)?(x)"sv, "x"sv, true },
        // Optimizer bug: invalid forkif jump target calculation in tree-layout alternatives
        { "ab|a(?:^|x)"sv, "ab"sv, true },
        // Optimizer bug: process rseekto candidates in the correct order.
        { "(.*)/client-(.*)\\.js$"sv, "/client-abc.js"sv, true },
        // Optimizer bug: overlapping character classes and ranges not detected.
        { "^a*\\w"sv, "aa"sv, true },
        { "^a*[a-z]"sv, "aa"sv, true },
        { "^\\w*\\d"sv, "1"sv, true },
        { "^\\w*[\\u212A]"sv, "K"sv, true, combine_flags(ECMAScriptFlags::Insensitive, ECMAScriptFlags::Unicode) },
        // Optimizer bug: case-insensitive matching was not considered during atomic rewrite.
        { "^a*A\\d"sv, "aaaa5"sv, true, ECMAScriptFlags::Insensitive },
        // Quantified lookahead assertions should not affect match_length_minimum.
        { "[a-e](?!Z){2}"sv, "aZZZZ bZZZ cZZ dZ e"sv, true, combine_flags(ECMAScriptFlags::Global, ECMAScriptFlags::BrowserExtended) },
        { "[a-e](?!Z){2,}"sv, "aZZZZ bZZZ cZZ dZ e"sv, true, combine_flags(ECMAScriptFlags::Global, ECMAScriptFlags::BrowserExtended) },
        { "[a-e](?!Z){2,3}"sv, "aZZZZ bZZZ cZZ dZ e"sv, true, combine_flags(ECMAScriptFlags::Global, ECMAScriptFlags::BrowserExtended) },
    };

    for (auto& test : tests) {
        Regex<ECMA262> re(test.pattern, test.options);
        if constexpr (REGEX_DEBUG) {
            dbgln("\n");
            RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
            regex_dbg.print_raw_bytecode(re);
            regex_dbg.print_header();
            regex_dbg.print_bytecode(re);
            dbgln("\n");
        }
        EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
        EXPECT_EQ(re.match(test.subject).success, test.matches);
    }
}

TEST_CASE(lookbehind)
{
    struct _test {
        StringView pattern;
        StringView subject;
        bool matches { true };
        ECMAScriptFlags options {};
    };
    constexpr _test tests[] {
        { "(?<=(ab|abc))d"sv, "abcd"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
        { "(?<=a.*)b"sv, "a b"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
        { "(?<=[a|b|c]*)[^a|b|c]{3}"sv, "abcdef"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
        { "(?<=\\b)\\b"sv, "ab"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
    };

    for (auto& test : tests) {
        Regex<ECMA262> re(test.pattern, test.options);
        if constexpr (REGEX_DEBUG) {
            dbgln("\n");
            RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
            regex_dbg.print_raw_bytecode(re);
            regex_dbg.print_header();
            regex_dbg.print_bytecode(re);
            dbgln("\n");
        }
        EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
        EXPECT_EQ(re.match(test.subject).success, test.matches);
    }

    struct _captureTest {
        StringView pattern;
        StringView subject;
        size_t capture_index;
        StringView expected_match;
        ECMAScriptFlags options {};
    };

    constexpr _captureTest capture_tests[] {
        { "(?<=(a|cc))b"sv, "ccb"sv, 0, "cc"sv, ECMAScriptFlags::Global },
        { "((?<=\\b)[d-f]{3})"sv, "abc def"sv, 0, "def"sv, (ECMAScriptFlags)regex::AllFlags::Global },
        { "(?<=(b+))c"sv, "abbbbbbc"sv, 0, "bbbbbb"sv, ECMAScriptFlags::Global },
        { "(?<=((?:b\\d{2})+))c"sv, "ab12b23b34c"sv, 0, "b12b23b34"sv, ECMAScriptFlags::Global },
    };

    for (auto& test : capture_tests) {
        Regex<ECMA262> re(test.pattern, test.options);
        auto result = re.match(test.subject);
        EXPECT_EQ(result.capture_group_matches.first()[test.capture_index].view.to_byte_string(), test.expected_match);
    }
}

TEST_CASE(ECMA262_unicode_parser_error)
{
    struct _test {
        StringView pattern;
        regex::Error error;
    };

    constexpr _test tests[] {
        { "([^\\:]+?)"sv, regex::Error::InvalidPattern },
    };

    for (auto test : tests) {
        Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Unicode);
        EXPECT_EQ(re.parser_result.error, test.error);
    }
}

TEST_CASE(ECMA262_unicode_match)
{
    constexpr auto space_and_line_terminator_code_points = Array { 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF };

    StringBuilder builder;
    for (u32 code_point : space_and_line_terminator_code_points)
        builder.append_code_point(code_point);
    auto space_and_line_terminators = builder.to_byte_string();

    struct _test {
        StringView pattern;
        StringView subject;
        bool matches { true };
        ECMAScriptFlags options {};
    };
    _test tests[] {
        { "\xf0\x9d\x8c\x86"sv, "abcdef"sv, false, ECMAScriptFlags::Unicode },
        { "[\xf0\x9d\x8c\x86]"sv, "abcdef"sv, false, ECMAScriptFlags::Unicode },
        { "\\ud83d"sv, "😀"sv, true },
        { "\\ud83d"sv, "😀"sv, false, ECMAScriptFlags::Unicode },
        { "\\ude00"sv, "😀"sv, true },
        { "\\ude00"sv, "😀"sv, false, ECMAScriptFlags::Unicode },
        { "\\ud83d\\ude00"sv, "😀"sv, true },
        { "\\ud83d\\ude00"sv, "😀"sv, true, ECMAScriptFlags::Unicode },
        { "\\u{1f600}"sv, "😀"sv, true, ECMAScriptFlags::Unicode },
        { "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true },
        { "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true, ECMAScriptFlags::Unicode },
        { "(?<=.{3})f"sv, "abcdef"sv, true, ECMAScriptFlags::Unicode },
        { "(?<=.{3})f"sv, "abc😀ef"sv, true, ECMAScriptFlags::Unicode },
        { "(?<𝓑𝓻𝓸𝔀𝓷>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
        { "(?<\\u{1d4d1}\\u{1d4fb}\\u{1d4f8}\\u{1d500}\\u{1d4f7}>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
        { "(?<\\ud835\\udcd1\\ud835\\udcfb\\ud835\\udcf8\\ud835\\udd00\\ud835\\udcf7>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
        { "^\\s+$"sv, space_and_line_terminators },
        { "^\\s+$"sv, space_and_line_terminators, true, ECMAScriptFlags::Unicode },
        { "[\\u0390]"sv, "\u1fd3"sv, false, ECMAScriptFlags::Unicode },
        { "[\\u1fd3]"sv, "\u0390"sv, false, ECMAScriptFlags::Unicode },
        { "[\\u0390]"sv, "\u1fd3"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
        { "[\\u1fd3]"sv, "\u0390"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
        { "[\\u03b0]"sv, "\u1fe3"sv, false, ECMAScriptFlags::Unicode },
        { "[\\u1fe3]"sv, "\u03b0"sv, false, ECMAScriptFlags::Unicode },
        { "[\\u03b0]"sv, "\u1fe3"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
        { "[\\u1fe3]"sv, "\u03b0"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
        { "[\\ufb05]"sv, "\ufb06"sv, false, ECMAScriptFlags::Unicode },
        { "[\\ufb06]"sv, "\ufb05"sv, false, ECMAScriptFlags::Unicode },
        { "[\\ufb05]"sv, "\ufb06"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
        { "[\\ufb06]"sv, "\ufb05"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },

        // https://github.com/LadybirdBrowser/ladybird/issues/5549
        { "[\\ud800-\\udbff][\\udc00-\\udfff]"sv, "😀"sv, true },
        { "[\\ud800-\\udbff][\\udc00-\\udfff]"sv, "😀"sv, false, ECMAScriptFlags::Unicode },
        { "[\\ud800-\\udbff][\\udc00-\\udfff]"sv, "a"sv, false },
        { "[\\ud800-\\udbff][\\udc00-\\udfff]"sv, "a"sv, false, ECMAScriptFlags::Unicode },
        {
            "\\ud83c[\\udffb-\\udfff](?=\\ud83c[\\udffb-\\udfff])|(?:[^\\ud800-\\udfff][\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]?|[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff]|[\\ud800-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?(?:\\u200d(?:[^\\ud800-\\udfff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?)*"sv,
            "😀"sv,
            true,
        },
        { "(?<before>\\w*)\\s*(?<emoji>\\p{Emoji}+)\\s*(?<after>\\w*)"sv, "Hey 🎉 there! I love 🍕 pizza"sv, true, ECMAScriptFlags::Unicode },
        // Optimizer bug: case-insensitive matching was not considered during atomic rewrite.
        { "^\\u{017f}*s"sv, "\u017fs"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
        { "^\\u{212A}*k"sv, "\u212Ak"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
        { "^\\u{03C3}*\\u{03A3}"sv, "\u03C3\u03A3"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) },
    };

    for (auto& test : tests) {
        Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);

        auto subject = Utf16String::from_utf8(test.subject);
        Utf16View view { subject };

        if constexpr (REGEX_DEBUG) {
            dbgln("\n");
            RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
            regex_dbg.print_raw_bytecode(re);
            regex_dbg.print_header();
            regex_dbg.print_bytecode(re);
            dbgln("\n");
        }

        EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
        EXPECT_EQ(re.match(view).success, test.matches);
    }
}

TEST_CASE(ECMA262_unicode_sets_parser_error)
{
    struct _test {
        StringView pattern;
        regex::Error error;
    };

    constexpr _test tests[] {
        { "[[]"sv, regex::Error::InvalidPattern },
        { "[[x[]]]"sv, regex::Error::NoError }, // #23691, should not crash on empty charclass within AndOr.
        { "[[^\\u0430-\\u044f][\\p{RGI_Emoji}]]"sv, regex::Error::NoError },
        { "[^[[\\p{RGI_Emoji}]--[A-Z]]]"sv, regex::Error::NegatedCharacterClassStrings },
        { "[^[^\\p{RGI_Emoji}]]"sv, regex::Error::NegatedCharacterClassStrings },
        { "[\\[]"sv, regex::Error::NoError },
        { "[\\[\\]]"sv, regex::Error::NoError },
        { "[\\S[\\[]]"sv, regex::Error::NoError },
        { "[\\S&&[\\[]]"sv, regex::Error::NoError },
        { "[\\S--[\\[]]"sv, regex::Error::NoError },
    };

    for (auto test : tests) {
        Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::UnicodeSets);
        EXPECT_EQ(re.parser_result.error, test.error);
    }
}

TEST_CASE(ECMA262_unicode_sets_match)
{
    struct _test {
        StringView pattern;
        StringView subject;
        bool matches { true };
        ECMAScriptFlags options {};
    };

    constexpr _test tests[] {
        { "[\\w--x]"sv, "x"sv, false },
        { "[\\w&&x]"sv, "y"sv, false },
        { "[\\w--x]"sv, "y"sv, true },
        { "[\\w&&x]"sv, "x"sv, true },
        { "[[0-9\\w]--x--6]"sv, "6"sv, false },
        { "[[0-9\\w]--x--6]"sv, "x"sv, false },
        { "[[0-9\\w]--x--6]"sv, "y"sv, true },
        { "[[0-9\\w]--x--6]"sv, "9"sv, true },
        { "[\\w&&\\d]"sv, "a"sv, false },
        { "[\\w&&\\d]"sv, "4"sv, true },
        { "([^\\:]+?)"sv, "a"sv, true },
        { "[[a][]]"sv, "a"sv, true }, // ladybird#6647
    };

    for (auto& test : tests) {
        Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::UnicodeSets | test.options);
        if constexpr (REGEX_DEBUG) {
            dbgln("\n");
            RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
            regex_dbg.print_raw_bytecode(re);
            regex_dbg.print_header();
            regex_dbg.print_bytecode(re);
            dbgln("\n");
        }

        EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
        auto result = re.match(test.subject).success;
        EXPECT_EQ(result, test.matches);
    }
}

TEST_CASE(ECMA262_property_match)
{
    struct _test {
        StringView pattern;
        StringView subject;
        bool matches { true };
        ECMAScriptFlags options {};
    };

    constexpr _test tests[] {
        { "\\p{ASCII}"sv, "a"sv, false },
        { "\\p{ASCII}"sv, "p{ASCII}"sv, true },
        { "\\p{ASCII}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{ASCII}"sv, "😀"sv, false, ECMAScriptFlags::Unicode },
        { "\\P{ASCII}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
        { "\\P{ASCII}"sv, "😀"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{ASCII_Hex_Digit}"sv, "1"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{ASCII_Hex_Digit}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{ASCII_Hex_Digit}"sv, "x"sv, false, ECMAScriptFlags::Unicode },
        { "\\P{ASCII_Hex_Digit}"sv, "1"sv, false, ECMAScriptFlags::Unicode },
        { "\\P{ASCII_Hex_Digit}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
        { "\\P{ASCII_Hex_Digit}"sv, "x"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{Any}"sv, "\xcd\xb8"sv, true, ECMAScriptFlags::Unicode },       // U+0378, which is an unassigned code point.
        { "\\P{Any}"sv, "\xcd\xb8"sv, false, ECMAScriptFlags::Unicode },      // U+0378, which is an unassigned code point.
        { "\\p{Assigned}"sv, "\xcd\xb8"sv, false, ECMAScriptFlags::Unicode }, // U+0378, which is an unassigned code point.
        { "\\P{Assigned}"sv, "\xcd\xb8"sv, true, ECMAScriptFlags::Unicode },  // U+0378, which is an unassigned code point.
        { "\\p{Lu}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
        { "\\p{Lu}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{Lu}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
        { "\\p{Cased_Letter}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{Cased_Letter}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{Cased_Letter}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
        { "\\P{Cased_Letter}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
        { "\\P{Cased_Letter}"sv, "A"sv, false, ECMAScriptFlags::Unicode },
        { "\\P{Cased_Letter}"sv, "9"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{General_Category=Cased_Letter}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{General_Category=Cased_Letter}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{General_Category=Cased_Letter}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
        { "\\p{gc=Cased_Letter}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{gc=Cased_Letter}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{gc=Cased_Letter}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
        { "\\p{Script=Latin}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{Script=Latin}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{Script=Latin}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
        { "\\p{sc=Latin}"sv, "a"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{sc=Latin}"sv, "A"sv, true, ECMAScriptFlags::Unicode },
        { "\\p{sc=Latin}"sv, "9"sv, false, ECMAScriptFlags::Unicode },
        { "\\p{Script_Extensions=Deva}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
        { "\\p{Script_Extensions=Beng}"sv, "\xe1\xb3\x95"sv, true, ECMAScriptFlags::Unicode }, // U+01CD5
        { "\\p{Script_Extensions=Deva}"sv, "\xe1\xb3\x95"sv, true, ECMAScriptFlags::Unicode }, // U+01CD5
        { "\\p{scx=Deva}"sv, "a"sv, false, ECMAScriptFlags::Unicode },
        { "\\p{scx=Beng}"sv, "\xe1\xb3\x95"sv, true, ECMAScriptFlags::Unicode }, // U+01CD5
        { "\\p{scx=Deva}"sv, "\xe1\xb3\x95"sv, true, ECMAScriptFlags::Unicode }, // U+01CD5
    };

    for (auto& test : tests) {
        Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);

        auto subject = Utf16String::from_utf8(test.subject);
        Utf16View view { subject };

        if constexpr (REGEX_DEBUG) {
            dbgln("\n");
            RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
            regex_dbg.print_raw_bytecode(re);
            regex_dbg.print_header();
            regex_dbg.print_bytecode(re);
            dbgln("\n");
        }

        EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
        EXPECT_EQ(re.match(view).success, test.matches);
    }
}

TEST_CASE(replace)
{
    struct _test {
        StringView pattern;
        StringView replacement;
        StringView subject;
        StringView expected;
        ECMAScriptFlags options {};
    };

    constexpr _test tests[] {
        { "foo(.+)"sv, "aaa"sv, "test"sv, "test"sv },
        { "foo(.+)"sv, "test\\1"sv, "foobar"sv, "testbar"sv },
        { "foo(.+)"sv, "\\2\\1"sv, "foobar"sv, "\\2bar"sv },
        { "foo(.+)"sv, "\\\\\\1"sv, "foobar"sv, "\\bar"sv },
        { "foo(.)"sv, "a\\1"sv, "fooxfooy"sv, "axay"sv, ECMAScriptFlags::Multiline },
    };

    for (auto& test : tests) {
        Regex<ECMA262> re(test.pattern, test.options);
        if constexpr (REGEX_DEBUG) {
            dbgln("\n");
            RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
            regex_dbg.print_raw_bytecode(re);
            regex_dbg.print_header();
            regex_dbg.print_bytecode(re);
            dbgln("\n");
        }
        EXPECT_EQ(re.parser_result.error, regex::Error::NoError);
        EXPECT_EQ(re.replace(test.subject, test.replacement), test.expected);
    }
}

TEST_CASE(case_insensitive_match)
{
    Regex<PosixExtended> re("cd", PosixFlags::Insensitive | PosixFlags::Global);
    auto result = re.match("AEKFCD"sv);

    EXPECT_EQ(result.success, true);
    if (result.success) {
        EXPECT_EQ(result.matches.at(0).column, 4ul);
    }
}

TEST_CASE(extremely_long_fork_chain)
{
    Regex<ECMA262> re("(?:aa)*");
    auto input = MUST(String::repeated('a', 1000));
    auto result = re.match(input);
    EXPECT_EQ(result.success, true);
}

TEST_CASE(nullable_quantifiers)
{
    Regex<ECMA262> re("(a?b?\x3f)*"); // Pattern (a?b??)* isn't written plain to avoid "??)", which is a trigraph.
    auto result = re.match("ab"sv);
    EXPECT_EQ(result.matches.at(0).view, "ab"sv);
}

TEST_CASE(theoretically_infinite_loop)
{
    Array patterns {
        "(a*)*"sv,  // Infinitely matching empty substrings, the outer loop should short-circuit.
        "(a*?)*"sv, // Infinitely matching empty substrings, the outer loop should short-circuit.
        "(a*)*?"sv, // Should match exactly nothing.
        "(?:)*?"sv, // Should not generate an infinite fork loop.
        "(a?)+$"sv, // Infinitely matching empty strings, but with '+' instead of '*'.
    };
    for (auto& pattern : patterns) {
        Regex<ECMA262> re(pattern);
        auto result = re.match(""sv);
        EXPECT_EQ(result.success, true);
    }
}

static auto g_lots_of_a_s = String::repeated('a', 10'000'000).release_value();

BENCHMARK_CASE(fork_performance)
{
    {
        Regex<ECMA262> re("(?:aa)*");
        auto result = re.match(g_lots_of_a_s);
        EXPECT_EQ(result.success, true);
    }
    {
        Regex<ECMA262> re("(a+)+b");
        auto result = re.match(g_lots_of_a_s.bytes_as_string_view().substring_view(0, 100));
        EXPECT_EQ(result.success, false);
    }
    {
        Regex<ECMA262> re("^(a|a?)+$");
        auto input = MUST(String::formatted("{}b", g_lots_of_a_s.bytes_as_string_view().substring_view(0, 100)));
        auto result = re.match(input);
        EXPECT_EQ(result.success, false);
    }
}

BENCHMARK_CASE(anchor_performance)
{
    Regex<ECMA262> re("^b");
    for (auto i = 0; i < 100'000; i++) {
        auto result = re.match(g_lots_of_a_s);
        EXPECT_EQ(result.success, false);
    }
}

TEST_CASE(optimizer_atomic_groups)
{
    Array tests {
        // Fork -> ForkReplace
        Tuple { "a*b"sv, "aaaaa"sv, false },
        Tuple { "a+b"sv, "aaaaa"sv, false },
        Tuple { "\\\\(\\d+)"sv, "\\\\"sv, false }, // Rewrite bug turning a+ to a*, see #10952.
        Tuple { "[a-z.]+\\."sv, "..."sv, true },   // Rewrite bug, incorrect interpretation of Compare.
        Tuple { "[.-]+\\."sv, ".-."sv, true },
        // Alternative fuse
        Tuple { "(abcfoo|abcbar|abcbaz).*x"sv, "abcbarx"sv, true },
        Tuple { "(a|a)"sv, "a"sv, true },
        Tuple { "(a|)"sv, ""sv, true },                   // Ensure that empty alternatives are not outright removed
        Tuple { "a{2,3}|a{5,8}"sv, "abc"sv, false },      // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247.
        Tuple { "^(a{2,3}|a{5,8})$"sv, "aaaa"sv, false }, // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247.
        // Optimizer should not chop off *half* of an instruction when fusing instructions.
        Tuple { "cubic-bezier\\(\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*\\)"sv, "cubic-bezier(.05, 0, 0, 1)"sv, true },
        // ForkReplace shouldn't be applied where it would change the semantics
        Tuple { "(1+)\\1"sv, "11"sv, true },
        Tuple { "(1+)1"sv, "11"sv, true },
        Tuple { "(1+)0"sv, "10"sv, true },
        // Rewrite should not skip over first required iteration of <x>+.
        Tuple { "a+"sv, ""sv, false },
        // 'y' and [^x] have an overlap ('y'), the loop should not be rewritten here.
        Tuple { "[^x]+y"sv, "ay"sv, true },
        // .+ should not be rewritten here, as it's followed by something that would be matched by `.`.
        Tuple { ".+(a|b|c)"sv, "xxa"sv, true },
        // (b+)(b+) produces an intermediate block with no matching ops, the optimiser should ignore that block when looking for following matches and correctly detect the overlap between (b+) and (b+).
        // note that the second loop may be rewritten to a ForkReplace, but the first loop should not be rewritten.
        Tuple { "(b+)(b+)"sv, "bbb"sv, true },
        // Don't treat [\S] as [\s]; see ladybird#2296.
        Tuple { "([^\\s]+?)\\(([\\s\\S]*)\\)"sv, "a(b)"sv, true },
        // Follow direct jumps in the optimizer instead of assuming they're a noop.
        Tuple { "(|[^]*)\\)"sv, "p)"sv, true },
    };

    for (auto& test : tests) {
        Regex<ECMA262> re(test.get<0>());
        auto result = re.match(test.get<1>());
        EXPECT_EQ(result.success, test.get<2>());
    }
}

TEST_CASE(optimizer_char_class_lut)
{
    Regex<ECMA262> re(R"([\f\n\r\t\v\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]+$)");

    if constexpr (REGEX_DEBUG) {
        dbgln("\n");
        RegexDebug<regex::FlatByteCode> regex_dbg(stderr);
        regex_dbg.print_raw_bytecode(re);
        regex_dbg.print_header();
        regex_dbg.print_bytecode(re);
        dbgln("\n");
    }

    // This will go through _all_ alternatives in the character class, and then fail.
    for (size_t i = 0; i < 1'000'000; ++i)
        EXPECT_EQ(re.match("1635488940000"sv).success, false);
}

TEST_CASE(optimizer_alternation)
{
    Array tests {
        // Pattern, Subject, Expected length [0 == fail]
        Tuple { "a|"sv, "a"sv, 1u },
        Tuple { "a|a|a|a|a|a|a|a|a|b"sv, "a"sv, 1u },
        Tuple { "ab|ac|ad|bc"sv, "bc"sv, 2u },
        // Should not crash on backwards jumps introduced by '.*'.
        Tuple { "\\bDroid\\b.*Build|XT912|XT928|XT926|XT915|XT919|XT925|XT1021|\\bMoto E\\b|XT1068|XT1092|XT1052"sv, "XT1068"sv, 6u },
        // Backwards jumps to IP 0 are normal jumps too.
        Tuple { "^(\\d+|x)"sv, "42"sv, 2u },
        // `Repeat' does not add its insn size to the jump target.
        Tuple { "[0-9]{2}|[0-9]"sv, "92"sv, 2u },
        // Don't ForkJump to the next instruction, rerunning it would produce the same result. see ladybird#2398.
        Tuple { "(xxxxxxxxxxxxxxxxxxxxxxx|xxxxxxxxxxxxxxxxxxxxxxx)?b"sv, "xxxxxxxxxxxxxxxxxxxxxxx"sv, 0u },
        // Don't take the jump in JumpNonEmpty with nonexistent checkpoints (also don't crash).
        Tuple { "(?!\\d*|[g-ta-r]+|[h-l]|\\S|\\S|\\S){,9}|\\S{7,8}|\\d|(?<wnvdfimiwd>)|[c-mj-tb-o]*|\\s"sv, "rjvogg7pm|li4nmct mjb2|pk7s8e0"sv, 0u },
        // Use the right offset when patching jumps through a fork-tree
        Tuple { "(?!a)|(?!a)b"sv, "b"sv, 0u },
        // Optimizer should maintain the correct ordering between the alternatives
        Tuple { "\\\\junk|(\\\\[a-zA-Z@]+)|\\\\[^X]"sv, "\\sqrt"sv, 5u },
    };

    for (auto& test : tests) {
        Regex<ECMA262> re(test.get<0>());
        auto result = re.match(test.get<1>());
        if (test.get<2>() != 0) {
            EXPECT(result.success);
            EXPECT_EQ(result.matches.first().view.length(), test.get<2>());
        } else {
            EXPECT(!result.success);
        }
    }
}

TEST_CASE(optimizer_rseekto)
{
    Regex<ECMA262> re("^(.*)\\/(?:\\/(.*))$"); // should backtrack from the second '/'.

    auto result = re.match("foo//bar"sv);
    EXPECT_EQ(result.success, true);
    EXPECT_EQ(result.matches.at(0).view, "foo//bar"sv);
    EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "foo"sv);
    EXPECT_EQ(result.capture_group_matches.at(0).at(1).view, "bar"sv);
}

TEST_CASE(start_anchor)
{
    // Ensure that a circumflex at the start only matches the start of the line.
    {
        Regex<PosixBasic> re("^abc");
        EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
        EXPECT_EQ(re.match("abc123"sv, PosixFlags::Global).success, true);
        EXPECT_EQ(re.match("123^abcdef"sv, PosixFlags::Global).success, false);
        EXPECT_EQ(re.match("^abc123"sv, PosixFlags::Global).success, false);

        // Multiple lines
        EXPECT_EQ(re.match("123\nabc"sv, PosixFlags::Multiline).success, true);
    }
}

TEST_CASE(posix_basic_dollar_is_end_anchor)
{
    // Ensure that a dollar sign at the end only matches the end of the line.
    {
        Regex<PosixBasic> re("abc$");
        EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
        EXPECT_EQ(re.match("123abc"sv, PosixFlags::Global).success, true);
        EXPECT_EQ(re.match("123abc$def"sv, PosixFlags::Global).success, false);
        EXPECT_EQ(re.match("123abc$"sv, PosixFlags::Global).success, false);
    }
}

TEST_CASE(posix_basic_dollar_is_literal)
{
    // Ensure that a dollar sign in the middle is treated as a literal.
    {
        Regex<PosixBasic> re("abc$d");
        EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
        EXPECT_EQ(re.match("123abc"sv, PosixFlags::Global).success, false);
        EXPECT_EQ(re.match("123abc$def"sv, PosixFlags::Global).success, true);
        EXPECT_EQ(re.match("123abc$"sv, PosixFlags::Global).success, false);
    }

    // Ensure that a dollar sign is always treated as a literal if escaped, even if at the end of the pattern.
    {
        Regex<PosixBasic> re("abc\\$");
        EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
        EXPECT_EQ(re.match("123abc"sv, PosixFlags::Global).success, false);
        EXPECT_EQ(re.match("123abc$def"sv, PosixFlags::Global).success, true);
        EXPECT_EQ(re.match("123abc$"sv, PosixFlags::Global).success, true);
    }
}

TEST_CASE(negative_lookahead)
{
    {
        // Negative lookahead with more than 2 forks difference between lookahead init and finish.
        auto options = ECMAScriptOptions { ECMAScriptFlags::Global };
        options.reset_flag((ECMAScriptFlags)regex::AllFlags::Internal_Stateful);
        Regex<ECMA262> re(":(?!\\^\\)|1)", options);
        EXPECT_EQ(re.match(":^)"sv).success, false);
        EXPECT_EQ(re.match(":1"sv).success, false);
        EXPECT_EQ(re.match(":foobar"sv).success, true);
    }
    {
        // Correctly count forks with nested groups and optimised loops
        Regex<ECMA262> re("^((?:[^\\n]|\\n(?! *\\n))+)(?:\\n *)+\\n");
        EXPECT_EQ(re.match("foo\n\n"sv).success, true);
        EXPECT_EQ(re.match("foo\n"sv).success, false);
    }
}

TEST_CASE(single_match_flag)
{
    {
        // Ensure that only a single match is produced and nothing past that.
        Regex<ECMA262> re("[\\u0008-\\uffff]"sv, ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
        auto result = re.match("ABC"sv);
        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "A"sv);
    }
}

TEST_CASE(empty_string_wildcard_match)
{
    {
        // Ensure that the wildcard ".*" matches the empty string exactly once
        Regex<ECMA262> re(".*"sv, ECMAScriptFlags::Global);
        auto result = re.match(""sv);
        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), ""sv);
    }
}

TEST_CASE(inversion_state_in_char_class)
{
    {
        // #13755, /[\S\s]/.exec("hello") should be [ "h" ], not null.
        Regex<ECMA262> re("[\\S\\s]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);

        auto result = re.match("hello"sv);
        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "h"sv);
    }
    {
        Regex<ECMA262> re("^(?:([^\\s!\"#%-,\\./;->@\\[-\\^`\\{-~]+(?=([=~}\\s/.)|]))))"sv, ECMAScriptFlags::Global);

        auto result = re.match("slideNumbers}}"sv);
        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "slideNumbers"sv);
        EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "slideNumbers"sv);
        EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), "}"sv);
    }
    {
        // #21786, /[^\S\n]/.exec("\n") should be null, not [ "\n" ].
        // This was a general confusion between the inversion state and the negation state (temp inverse).
        Regex<ECMA262> re("[^\\S\\n]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);

        auto result = re.match("\n"sv);
        EXPECT_EQ(result.success, false);
    }
    {
        // /[^\S]/ should match whitespace characters
        Regex<ECMA262> re("[^\\S]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);

        auto result = re.match("\t"sv);
        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "\t"sv);
    }
}

TEST_CASE(mismatching_brackets)
{
    auto const test_cases = Array {
        "["sv,
        "[ -"sv,
    };

    for (auto const& test_case : test_cases) {
        Regex<ECMA262> re(test_case);
        EXPECT_EQ(re.parser_result.error, regex::Error::MismatchingBracket);
    }
}

TEST_CASE(optimizer_repeat_offset)
{
    {
        // Miscalculating the repeat offset in table reconstruction of alternatives would lead to crash here
        // make sure that doesn't happen :)
        Regex<ECMA262> re("\\/?\\??#?([\\/?#]|[\\uD800-\\uDBFF]|%[c-f][0-9a-f](%[89ab][0-9a-f]){0,2}(%[89ab]?)?|%[0-9a-f]?)$"sv);
    }
}

TEST_CASE(quantified_alternation_capture_groups)
{
    {
        // Ensure that (a|a?)+ captures the last meaningful match, not empty string
        Regex<ECMA262> re("^(a|a?)+$");
        auto result = re.match("a"sv);

        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "a"sv);
        EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "a"sv);
    }
    {
        Regex<ECMA262> re("^(a|a?)+$");
        auto result = re.match("aa"sv);

        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "aa"sv);
        EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "a"sv);
    }
}

TEST_CASE(zero_width_backreference)
{
    {
        // Ensure that a zero-width backreference will match correctly.
        Regex<ECMA262> re("(a*)b\\1+", ECMAScriptFlags::Global);
        auto result = re.match("baaac"sv);

        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "b"sv);
        EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), ""sv);
    }
    {
        Regex<ECMA262> re("(x)?\\1y"sv);
        auto result = re.match("y"sv);

        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.first().view, "y"sv);
        EXPECT(result.capture_group_matches.first()[0].view.is_null());
    }
    {
        Regex<ECMA262> re("(?!(y)y)(\\1)z"sv, ECMAScriptFlags::Global);
        auto result = re.match("xyyz"sv);

        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.first().view, "z"sv);
        EXPECT(result.capture_group_matches.first()[0].view.is_null());
        EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), ""sv);
    }
}

TEST_CASE(account_for_opcode_size_calculating_incoming_jump_edges)
{
    {
        // The optimizer should not optimize the initial ForkStay for these alternatives as they are jumped to from different locations.
        Regex<ECMA262> re(".*a|.*b", ECMAScriptFlags::Global);
        auto result = re.match("aa"sv);
        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "aa"sv);
    }
}

TEST_CASE(backreference_to_undefined_capture_groups)
{
    {
        // Test duplicate named groups in alternatives where backreference refers to participating group
        Regex<ECMA262> re("(?:(?<x>a)|(?<x>b))\\k<x>"sv);
        auto result = re.match("bb"sv);

        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "bb"sv);
        EXPECT_EQ(result.capture_group_matches.first().size(), 2u);
        EXPECT(result.capture_group_matches.first()[0].view.is_null());
        EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), "b"sv);
    }

    {
        // Test duplicate named groups with quantifier
        Regex<ECMA262> re("(?:(?:(?<x>a)|(?<x>b))\\k<x>){2}"sv);
        auto result = re.match("aabb"sv);

        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "aabb"sv);
        EXPECT_EQ(result.capture_group_matches.first().size(), 2u);
        EXPECT(result.capture_group_matches.first()[0].view.is_null());
        EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), "b"sv);
    }

    {
        // Test that first alternative works too
        Regex<ECMA262> re("(?:(?<x>a)|(?<x>b))\\k<x>"sv);
        auto result = re.match("aa"sv);

        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "aa"sv);
        EXPECT_EQ(result.capture_group_matches.first().size(), 2u);
        EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "a"sv);
        EXPECT(result.capture_group_matches.first()[1].view.is_null());
    }

    {
        // Test numbered backreference to undefined group
        Regex<ECMA262> re("(.*?)a(?!(a+)b\\2c)\\2(.*)"sv);
        auto result = re.match("baaabaac"sv);

        EXPECT_EQ(result.success, true);
        EXPECT_EQ(result.matches.size(), 1u);
        EXPECT_EQ(result.matches.first().view.to_byte_string(), "baaabaac"sv);
        EXPECT_EQ(result.capture_group_matches.first().size(), 3u);
        EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "ba"sv);
        EXPECT(result.capture_group_matches.first()[1].view.is_null());
        EXPECT_EQ(result.capture_group_matches.first()[2].view.to_byte_string(), "abaac"sv);
    }

    {
        Regex<ECMA262> re("^(?:(?<a>x)|(?<a>y)|z)\\k<a>$"sv);

        // Third alternative matches and backreference is undefined
        auto result1 = re.match("z"sv);
        EXPECT_EQ(result1.success, true);
        EXPECT_EQ(result1.matches.size(), 1u);
        EXPECT_EQ(result1.matches.first().view.to_byte_string(), "z"sv);
        EXPECT_EQ(result1.capture_group_matches.first().size(), 2u);
        EXPECT(result1.capture_group_matches.first()[0].view.is_null());
        EXPECT(result1.capture_group_matches.first()[1].view.is_null());
    }

    {
        // Quantified version of the above pattern
        Regex<ECMA262> re("^(?:(?<a>x)|(?<a>y)|z){2}\\k<a>$"sv);

        auto result1 = re.match("xz"sv);
        EXPECT_EQ(result1.success, true);
        EXPECT_EQ(result1.matches.size(), 1u);
        EXPECT_EQ(result1.matches.first().view.to_byte_string(), "xz"sv);
        EXPECT_EQ(result1.capture_group_matches.first().size(), 2u);
        EXPECT(result1.capture_group_matches.first()[0].view.is_null());
        EXPECT(result1.capture_group_matches.first()[1].view.is_null());

        auto result2 = re.match("yz"sv);
        EXPECT_EQ(result2.success, true);
        EXPECT_EQ(result2.matches.size(), 1u);
        EXPECT_EQ(result2.matches.first().view.to_byte_string(), "yz"sv);
        EXPECT_EQ(result2.capture_group_matches.first().size(), 2u);
        EXPECT(result2.capture_group_matches.first()[0].view.is_null());
        EXPECT(result2.capture_group_matches.first()[1].view.is_null());
    }
}

TEST_CASE(optional_groups_with_empty_matches)
{
    Regex<ECMA262> re1("^(.*)(.*)?$"sv);
    auto result1 = re1.match("a"sv);
    EXPECT_EQ(result1.success, true);
    EXPECT_EQ(result1.capture_group_matches.first()[0].view.to_byte_string(), "a"sv);
    EXPECT(result1.capture_group_matches.first()[1].view.is_null());

    Regex<ECMA262> re2("()?"sv);
    auto result3 = re2.match(""sv);
    EXPECT_EQ(result3.success, true);
    EXPECT(result3.capture_group_matches.first()[0].view.is_null());

    Regex<ECMA262> re3("(z)((a+)?(b+)?(c))*"sv);
    auto result4 = re3.match("zaacbbbcac"sv);
    EXPECT_EQ(result4.success, true);
    EXPECT_EQ(result4.capture_group_matches.first()[0].view.to_byte_string(), "z"sv);
    EXPECT_EQ(result4.capture_group_matches.first()[1].view.to_byte_string(), "ac"sv);
    EXPECT_EQ(result4.capture_group_matches.first()[2].view.to_byte_string(), "a"sv);
    EXPECT(result4.capture_group_matches.first()[3].view.is_null());
    EXPECT_EQ(result4.capture_group_matches.first()[4].view.to_byte_string(), "c"sv);

    Regex<ECMA262> re4("(?:(?=(abc)))?a"sv);
    auto result5 = re4.match("abc"sv, ECMAScriptFlags::Global);
    EXPECT_EQ(result5.success, true);
    EXPECT_EQ(result5.matches.first().view.to_byte_string(), "a"sv);
    EXPECT(result5.capture_group_matches.first()[0].view.is_null());

    Regex<ECMA262> re5("^(?:(?=(abc))){0,1}a"sv);
    auto result6 = re5.match("abc"sv, ECMAScriptFlags::Global);
    EXPECT_EQ(result6.success, true);
    EXPECT_EQ(result6.matches.first().view.to_byte_string(), "a"sv);
    EXPECT(result6.capture_group_matches.first()[0].view.is_null());
}

TEST_CASE(ecma262_modifiers)
{
    struct Test {
        StringView pattern;
        StringView subject;
        bool matches { true };
        ECMAScriptFlags flags {};
    };

    constexpr Test tests[] {
        { "a(?i:b)c"sv, "aBc"sv, true, {} },
        { "a(?i:b)c"sv, "aBC"sv, false, {} },
        { "a(?s:.)c"sv, "a\nc"sv, true, {} },
        { "(?ims:a.b)"sv, "A\nB"sv, true, {} },
        { "(?i:a(?-i:b)c)"sv, "AbC"sv, true, {} },
        { "(?i:a(?-i:b)c)"sv, "ABC"sv, false, {} },
        { "a(?-i:b)c"sv, "AbC"sv, true, ECMAScriptFlags::Insensitive },
        { "a(?-i:b)c"sv, "ABC"sv, false, ECMAScriptFlags::Insensitive },
        { "x.(?m:^a)"sv, "x\na"sv, true, ECMAScriptFlags::SingleLine },
    };

    for (auto const& test : tests) {
        Regex<ECMA262> re(test.pattern, test.flags);
        auto result = re.match(test.subject);
        EXPECT_EQ(result.success, test.matches);
    }
}

#define EXPECT_PATTERNS_IN_DUMP(re, ...)                                                              \
    do {                                                                                              \
        auto dump = bytecode_dump(re);                                                                \
        if (!bytecode_matches_checks(dump, Array { __VA_ARGS__ })) {                                  \
            warnln("Failed pattern expectation {} in dump lines:\n{}", Vector { __VA_ARGS__ }, dump); \
            EXPECT(false && #__VA_ARGS__);                                                            \
        }                                                                                             \
    } while (0);

#define EXPECT_NO_PATTERN_IN_DUMP(re, pattern)                                   \
    do {                                                                         \
        auto dump = bytecode_dump(re);                                           \
        if (bytecode_contains_pattern(dump, pattern)) {                          \
            warnln("Unexpected pattern '{}' found in dump:\n{}", pattern, dump); \
            EXPECT(false && #pattern);                                           \
        }                                                                        \
    } while (0);

static Vector<ByteString> bytecode_dump(Regex<ECMA262> const& re)
{
    Vector<ByteString> lines;
    auto& bytecode = re.parser_result.bytecode.get<regex::FlatByteCode>();
    auto const* data = bytecode.flat_data().data();
    auto data_size = bytecode.size();
    auto state = regex::MatchState::only_for_enumeration();
    while (state.instruction_position < data_size) {
        auto id = static_cast<regex::OpCodeId>(data[state.instruction_position]);
        auto sz = regex::opcode_size(id, data, state.instruction_position);
        lines.append(ByteString::formatted("{} {}", regex::opcode_id_name(id), regex::opcode_arguments_string(id, data, state.instruction_position, state, bytecode)));
        if (id == regex::OpCodeId::Exit)
            break;
        state.instruction_position += sz;
    }
    return lines;
}

template<auto N>
static bool bytecode_matches_checks(Span<ByteString const> lines, Array<StringView, N> checks)
{
    size_t line_idx = 0;
    for (auto check : checks) {
        bool found = false;
        for (; line_idx < lines.size(); ++line_idx) {
            if (lines[line_idx].contains(check)) {
                found = true;
                ++line_idx;
                break;
            }
        }
        if (!found)
            return false;
    }
    return true;
}

static bool bytecode_contains_pattern(Span<ByteString const> lines, StringView pattern)
{
    for (auto const& line : lines) {
        if (line.contains(pattern))
            return true;
    }
    return false;
}

TEST_CASE(optimizer_dot_star_to_rseekto)
{
    Regex<ECMA262> re(".*foo");

    // 'f' = 102
    EXPECT_PATTERNS_IN_DUMP(re, "RSeekTo before '102'"sv, "ForkStay"sv);

    // Should still match correctly
    EXPECT_EQ(re.match("xyzfoo"sv).success, true);
    EXPECT_EQ(re.match("foo"sv).success, true);
    EXPECT_EQ(re.match("xyzbar"sv).success, false);
}

TEST_CASE(optimizer_simple_compare_string)
{
    Regex<ECMA262> re(".?foo");

    EXPECT_PATTERNS_IN_DUMP(re, "CompareSimple String \"foo\""sv);

    EXPECT_EQ(re.match("foo"sv).success, true);
    EXPECT_EQ(re.match("xyzbar"sv).success, false);
}

TEST_CASE(optimizer_dot_star_with_fail_if_empty)
{
    // FailIfEmpty within a .* loop should be ignored during RSeekTo detection.
    Regex<ECMA262> re(".*foo");

    // 'f' = 102
    EXPECT_PATTERNS_IN_DUMP(re, "RSeekTo before '102'"sv);

    EXPECT_EQ(re.match("foo"sv).success, true);
    EXPECT_EQ(re.match("xyzfoo"sv).success, true);
    EXPECT_EQ(re.match("bar"sv).success, false);
}

TEST_CASE(optimizer_dot_plus_no_rseekto)
{
    // .+ uses a `JumpNonEmpty ForkJump` loop structure without ForkStay at the start,
    // so it is not eligible for the RSeekTo rewrite.
    Regex<ECMA262> re(".+foo");
    EXPECT_NO_PATTERN_IN_DUMP(re, "RSeekTo"sv);

    EXPECT_EQ(re.match("xfoo"sv).success, true);
    EXPECT_EQ(re.match("xyzfoo"sv).success, true);
    EXPECT_EQ(re.match("foo"sv).success, false); // .+ requires at least one character
    EXPECT_EQ(re.match("bar"sv).success, false);
}

TEST_CASE(optimizer_dot_star_in_capture_group)
{
    // .* inside a capture group should still produce RSeekTo
    Regex<ECMA262> re("(.*)x");

    // 'x' = 120
    EXPECT_PATTERNS_IN_DUMP(re, "RSeekTo before '120'"sv);

    EXPECT_EQ(re.match("abcx"sv).success, true);
    EXPECT_EQ(re.match("x"sv).success, true);
    EXPECT_EQ(re.match("abc"sv).success, false);
}

TEST_CASE(optimizer_no_rseekto_for_char_class)
{
    // .* followed by a char class cannot produce a RSeekTo (can't seek to a class)
    {
        Regex<ECMA262> re(".*\\d");
        EXPECT_NO_PATTERN_IN_DUMP(re, "RSeekTo"sv);

        EXPECT_EQ(re.match("abc5"sv).success, true);
        EXPECT_EQ(re.match("abc"sv).success, false);
    }

    // .* followed by a range cannot produce RSeekTo
    {
        Regex<ECMA262> re(".*[abc]");
        EXPECT_NO_PATTERN_IN_DUMP(re, "RSeekTo"sv);

        EXPECT_EQ(re.match("xyzc"sv).success, true);
        EXPECT_EQ(re.match("xyz"sv).success, false);
    }
}

TEST_CASE(optimizer_atomic_rewrite_bytecode)
{
    // a+b: 'b' cannot be matched by 'a', so the a+ loop should be rewritten as atomic.
    {
        Regex<ECMA262> re("a+b");
        EXPECT_PATTERNS_IN_DUMP(re, "ForkReplace"sv);

        EXPECT_EQ(re.match("ab"sv).success, true);
        EXPECT_EQ(re.match("aab"sv).success, true);
        EXPECT_EQ(re.match("b"sv).success, false);
        EXPECT_EQ(re.match("aaa"sv).success, false);
    }

    // [a-z]+[0-9]: char classes don't overlap, so it should be rewritten as atomic.
    {
        Regex<ECMA262> re("[a-z]+[0-9]");
        EXPECT_PATTERNS_IN_DUMP(re, "ForkReplace"sv);

        EXPECT_EQ(re.match("abc5"sv).success, true);
        EXPECT_EQ(re.match("5"sv).success, false);
        EXPECT_EQ(re.match("abc"sv).success, false);
    }
}

TEST_CASE(optimizer_no_atomic_rewrite_with_overlap)
{
    // a+a: 'a' overlaps with 'a', so the loop cannot be rewritten as atomic
    {
        Regex<ECMA262> re("a+a");
        EXPECT_NO_PATTERN_IN_DUMP(re, "ForkReplace"sv);

        EXPECT_EQ(re.match("aa"sv).success, true);
        EXPECT_EQ(re.match("aaa"sv).success, true);
        EXPECT_EQ(re.match("a"sv).success, false);
    }

    // (a+)\1: backreference should prevent atomic rewrite.
    {
        Regex<ECMA262> re("(a+)\\1");
        EXPECT_NO_PATTERN_IN_DUMP(re, "ForkReplace"sv);

        EXPECT_EQ(re.match("aa"sv).success, true);
        EXPECT_EQ(re.match("aaaa"sv).success, true);
        EXPECT_EQ(re.match("a"sv).success, false);
    }
}

TEST_CASE(optimizer_adjacent_char_to_string_compare)
{
    // Multiple adjacent single-character compares should be merged into a string compare
    {
        Regex<ECMA262> re(".?hello");
        EXPECT_PATTERNS_IN_DUMP(re, "CompareSimple String \"hello\""sv);

        EXPECT_EQ(re.match("hello"sv).success, true);
        EXPECT_EQ(re.match("xhello"sv).success, true);
        EXPECT_EQ(re.match("world"sv).success, false);
    }

    // Two characters should also be merged
    {
        Regex<ECMA262> re(".?ab");
        EXPECT_PATTERNS_IN_DUMP(re, "CompareSimple String \"ab\""sv);

        EXPECT_EQ(re.match("ab"sv).success, true);
        EXPECT_EQ(re.match("xab"sv).success, true);
        EXPECT_EQ(re.match("ba"sv).success, false);
    }
}

TEST_CASE(optimizer_simple_compare_char)
{
    // A single character compare should become 'CompareSimple Char'
    {
        Regex<ECMA262> re(".*a");
        EXPECT_PATTERNS_IN_DUMP(re, "CompareSimple Char 'a'"sv);

        EXPECT_EQ(re.match("a"sv).success, true);
        EXPECT_EQ(re.match("ba"sv).success, true);
        EXPECT_EQ(re.match("b"sv).success, false);
    }
}

TEST_CASE(optimizer_simple_compare_char_class)
{
    // A single char class compare should become 'CompareSimple CharClass'
    {
        Regex<ECMA262> re(".*\\d");
        EXPECT_PATTERNS_IN_DUMP(re, "CompareSimple CharClass"sv);

        EXPECT_EQ(re.match("abc5"sv).success, true);
        EXPECT_EQ(re.match("abc"sv).success, false);
    }
}