Files
ladybird/Libraries/LibJS/Tests/builtins/RegExp/RegExp.js

377 lines
16 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
describe("errors", () => {
test("invalid pattern", () => {
expect(() => {
RegExp("[");
}).toThrowWithMessage(SyntaxError, "RegExp compile error: Error during parsing of regular expression:");
});
test("invalid flag", () => {
expect(() => {
RegExp("", "x");
}).toThrowWithMessage(SyntaxError, "Invalid RegExp flag 'x'");
});
test("repeated flag", () => {
expect(() => {
RegExp("", "gg");
}).toThrowWithMessage(SyntaxError, "Repeated RegExp flag 'g'");
});
});
test("basic functionality", () => {
expect(RegExp().toString()).toBe("/(?:)/");
expect(RegExp(undefined).toString()).toBe("/(?:)/");
expect(RegExp("foo").toString()).toBe("/foo/");
expect(RegExp("foo", undefined).toString()).toBe("/foo/");
expect(RegExp("foo", "g").toString()).toBe("/foo/g");
expect(RegExp(undefined, "g").toString()).toBe("/(?:)/g");
});
test("regexp object as pattern parameter", () => {
expect(RegExp(/foo/).toString()).toBe("/foo/");
expect(RegExp(/foo/g).toString()).toBe("/foo/g");
expect(RegExp(/foo/g, "").toString()).toBe("/foo/");
expect(RegExp(/foo/g, "y").toString()).toBe("/foo/y");
var regex_like_object_without_flags = {
source: "foo",
[Symbol.match]: function () {},
};
expect(RegExp(regex_like_object_without_flags).toString()).toBe("/foo/");
expect(RegExp(regex_like_object_without_flags, "y").toString()).toBe("/foo/y");
var regex_like_object_with_flags = {
source: "foo",
flags: "g",
[Symbol.match]: function () {},
};
expect(RegExp(regex_like_object_with_flags).toString()).toBe("/foo/g");
expect(RegExp(regex_like_object_with_flags, "").toString()).toBe("/foo/");
expect(RegExp(regex_like_object_with_flags, "y").toString()).toBe("/foo/y");
});
test("regexp literals are re-useable", () => {
for (var i = 0; i < 2; ++i) {
const re = /test/;
expect(re.test("te")).toBeFalse();
expect(re.test("test")).toBeTrue();
}
});
test("Incorrectly escaped code units not converted to invalid patterns", () => {
const re = /[\⪾-\⫀]/;
expect(re.test("⫀")).toBeTrue();
expect(re.test("\\u2abe")).toBeFalse(); // ⫀ is \u2abe
});
test("regexp that always matches stops matching if it's past the end of the string instead of infinitely looping", () => {
const re = new RegExp("[\u200E]*", "gu");
expect("whf".match(re)).toEqual(["", "", "", ""]);
expect(re.lastIndex).toBe(0);
});
test("v flag should enable unicode mode", () => {
const re = new RegExp("a\\u{10FFFF}", "v");
expect(re.test("a\u{10FFFF}")).toBe(true);
});
test("parsing a large bytestring shouldn't crash", () => {
RegExp(new Uint8Array(0x40000));
});
test("Unicode non-ASCII matching", () => {
const cases = [
{ pattern: /é/u, match: "é", expected: ["é"] },
{ pattern: /é/, match: "é", expected: ["é"] },
{ pattern: /\u{61}/u, match: "a", expected: ["a"] },
{ pattern: /\u{61}/, match: "a", expected: null },
{ pattern: /😄/u, match: "😄", expected: ["😄"] },
{ pattern: /😄/u, match: "\ud83d", expected: null },
{ pattern: /😄/, match: "\ud83d", expected: null },
];
for (const test of cases) {
const result = test.match.match(test.pattern);
expect(result).toEqual(test.expected);
}
});
// https://github.com/tc39/test262/tree/main/test/built-ins/RegExp/unicodeSets/generated
test("Unicode properties of strings", () => {
const regexes = [
/\p{Basic_Emoji}/v,
/\p{Emoji_Keycap_Sequence}/v,
/\p{RGI_Emoji_Modifier_Sequence}/v,
/\p{RGI_Emoji_Flag_Sequence}/v,
/\p{RGI_Emoji_Tag_Sequence}/v,
/\p{RGI_Emoji_ZWJ_Sequence}/v,
/\p{RGI_Emoji}/v,
];
for (const re of regexes) {
expect(() => {
re.test("test");
}).not.toThrow();
}
function testExtendedCharacterClass({ regExp, matchStrings, nonMatchStrings }) {
matchStrings.forEach(str => expect(regExp.test(str)).toBeTrue());
nonMatchStrings.forEach(str => expect(regExp.test(str)).toBeFalse());
}
testExtendedCharacterClass({
regExp: /^[\p{ASCII_Hex_Digit}--\p{Emoji_Keycap_Sequence}]+$/v,
matchStrings: ["0", "1", "2", "3", "4", "5", "8", "A", "B", "D", "E", "F", "a", "b", "c", "d", "e", "f"],
nonMatchStrings: [
"6\uFE0F\u20E3",
"7\uFE0F\u20E3",
"9\uFE0F\u20E3",
"\u2603",
"\u{1D306}",
"\u{1F1E7}\u{1F1EA}",
],
});
testExtendedCharacterClass({
regExp: /^[\d\p{Emoji_Keycap_Sequence}]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0", "0\uFE0F\u20E3", "9", "9\uFE0F\u20E3"],
nonMatchStrings: ["C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[[0-9]\p{Emoji_Keycap_Sequence}]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0", "0\uFE0F\u20E3", "9", "9\uFE0F\u20E3"],
nonMatchStrings: ["C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[_--[0-9]]+$/v,
matchStrings: ["_"],
nonMatchStrings: ["6\uFE0F\u20E3", "7", "9\uFE0F\u20E3", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{ASCII_Hex_Digit}--[0-9]]+$/v,
matchStrings: ["a", "b"],
nonMatchStrings: ["0", "9", "9\uFE0F\u20E3", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{ASCII_Hex_Digit}\p{Emoji_Keycap_Sequence}]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0", "0\uFE0F\u20E3", "A", "B", "a", "b"],
nonMatchStrings: ["\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[_\p{Emoji_Keycap_Sequence}]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0\uFE0F\u20E3", "_"],
nonMatchStrings: ["7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}--\d]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0\uFE0F\u20E3"],
nonMatchStrings: ["7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}--[0-9]]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0\uFE0F\u20E3"],
nonMatchStrings: ["7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}--\p{ASCII_Hex_Digit}]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0\uFE0F\u20E3"],
nonMatchStrings: ["7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}--_]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0\uFE0F\u20E3"],
nonMatchStrings: ["7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}&&\p{Emoji_Keycap_Sequence}]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0\uFE0F\u20E3"],
nonMatchStrings: ["7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}\d]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0", "0\uFE0F\u20E3", "9", "9\uFE0F\u20E3"],
nonMatchStrings: ["C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}[0-9]]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0", "0\uFE0F\u20E3", "9", "9\uFE0F\u20E3"],
nonMatchStrings: ["C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}\p{ASCII_Hex_Digit}]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0", "0\uFE0F\u20E3", "9", "9\uFE0F\u20E3", "A", "a"],
nonMatchStrings: ["\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}_]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0\uFE0F\u20E3", "_"],
nonMatchStrings: ["7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}\p{Emoji_Keycap_Sequence}]+$/v,
matchStrings: ["#\uFE0F\u20E3", "*\uFE0F\u20E3", "0\uFE0F\u20E3"],
nonMatchStrings: ["7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\d--\q{0|2|4|9\uFE0F\u20E3}]+$/v,
expression: "[\d--\q{0|2|4|9\uFE0F\u20E3}]",
matchStrings: ["1", "9"],
nonMatchStrings: ["0", "9\uFE0F\u20E3", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\d&&\q{0|2|4|9\uFE0F\u20E3}]+$/v,
expression: "[\d&&\q{0|2|4|9\uFE0F\u20E3}]",
matchStrings: ["0", "2", "4"],
nonMatchStrings: ["1", "9\uFE0F\u20E3", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\d\q{0|2|4|9\uFE0F\u20E3}]+$/v,
expression: "[\d\q{0|2|4|9\uFE0F\u20E3}]",
matchStrings: ["0", "9\uFE0F\u20E3"],
nonMatchStrings: ["6\uFE0F\u20E3", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}--\q{0|2|4|9\uFE0F\u20E3}]+$/v,
expression: "[\p{Emoji_Keycap_Sequence}--\q{0|2|4|9\uFE0F\u20E3}]",
matchStrings: ["#\uFE0F\u20E3", "8\uFE0F\u20E3"],
nonMatchStrings: ["7", "9\uFE0F\u20E3", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\p{Emoji_Keycap_Sequence}\q{0|2|4|9\uFE0F\u20E3}]+$/v,
expression: "[\p{Emoji_Keycap_Sequence}\q{0|2|4|9\uFE0F\u20E3}]",
matchStrings: ["#\uFE0F\u20E3", "0", "9\uFE0F\u20E3"],
nonMatchStrings: ["7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\q{0|2|4|9\uFE0F\u20E3}--\q{0|2|4|9\uFE0F\u20E3}]+$/v,
expression: "[\q{0|2|4|9\uFE0F\u20E3}--\q{0|2|4|9\uFE0F\u20E3}]",
matchStrings: [],
nonMatchStrings: ["0", "9\uFE0F\u20E3", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\q{0|2|4|9\uFE0F\u20E3}&&\q{0|2|4|9\uFE0F\u20E3}]+$/v,
expression: "[\q{0|2|4|9\uFE0F\u20E3}&&\q{0|2|4|9\uFE0F\u20E3}]",
matchStrings: ["0", "2", "4", "9\uFE0F\u20E3"],
nonMatchStrings: ["6\uFE0F\u20E3", "7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\q{0|2|4|9\uFE0F\u20E3}\q{0|2|4|9\uFE0F\u20E3}]+$/v,
expression: "[\q{0|2|4|9\uFE0F\u20E3}\q{0|2|4|9\uFE0F\u20E3}]",
matchStrings: ["0", "2", "4", "9\uFE0F\u20E3"],
nonMatchStrings: ["6\uFE0F\u20E3", "7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
testExtendedCharacterClass({
regExp: /^[\q{0|2|4|9\uFE0F\u20E3}&&\p{Emoji_Keycap_Sequence}]+$/v,
expression: "[\q{0|2|4|9\uFE0F\u20E3}&&\p{Emoji_Keycap_Sequence}]",
matchStrings: ["9\uFE0F\u20E3"],
nonMatchStrings: ["0", "2", "4", "6\uFE0F\u20E3", "7", "C", "\u2603", "\u{1D306}", "\u{1F1E7}\u{1F1EA}"],
});
});
test("Unicode matching with u and v flags", () => {
const text = "𠮷a𠮷b𠮷";
const complexText = "a\u{20BB7}b\u{10FFFF}c";
const cases = [
{ pattern: /𠮷/, match: text, expected: ["𠮷"] },
{ pattern: /𠮷/u, match: text, expected: ["𠮷"] },
{ pattern: /𠮷/v, match: text, expected: ["𠮷"] },
{ pattern: /\p{Script=Han}/u, match: text, expected: ["𠮷"] },
{ pattern: /\p{Script=Han}/v, match: text, expected: ["𠮷"] },
{ pattern: /./u, match: text, expected: ["𠮷"] },
{ pattern: /./v, match: text, expected: ["𠮷"] },
{ pattern: /\p{ASCII}/u, match: text, expected: ["a"] },
{ pattern: /\p{ASCII}/v, match: text, expected: ["a"] },
{ pattern: /x/u, match: text, expected: null },
{ pattern: /x/v, match: text, expected: null },
{ pattern: /\p{Script=Han}(.)/gu, match: text, expected: ["𠮷a", "𠮷b"] },
{ pattern: /\p{Script=Han}(.)/gv, match: text, expected: ["𠮷a", "𠮷b"] },
{ pattern: /\P{ASCII}/u, match: complexText, expected: ["\u{20BB7}"] },
{ pattern: /\P{ASCII}/v, match: complexText, expected: ["\u{20BB7}"] },
{ pattern: /\P{ASCII}/gu, match: complexText, expected: ["\u{20BB7}", "\u{10FFFF}"] },
{ pattern: /\P{ASCII}/gv, match: complexText, expected: ["\u{20BB7}", "\u{10FFFF}"] },
{ pattern: /./gu, match: text, expected: ["𠮷", "a", "𠮷", "b", "𠮷"] },
{ pattern: /./gv, match: text, expected: ["𠮷", "a", "𠮷", "b", "𠮷"] },
{ pattern: /(?:)/gu, match: text, expected: ["", "", "", "", "", ""] },
{ pattern: /(?:)/gv, match: text, expected: ["", "", "", "", "", ""] },
];
for (const test of cases) {
const result = test.match.match(test.pattern);
expect(result).toEqual(test.expected);
}
});
test("RegExp string literal", () => {
[
{ pattern: /[\q{abc}]/v, match: "abc", expected: ["abc"] },
{ pattern: /[\q{abc}]/v, match: "a", expected: null },
{ pattern: /[\q{a|b}]/v, match: "b", expected: ["b"] },
{ pattern: /[\q{a\\b}]/v, match: "a\\b", expected: ["a\\b"] },
{ pattern: /[\q{}]/v, match: "", expected: [""] },
{ pattern: /[\q{😀|😁|😂}]/v, match: "😁", expected: ["😁"] },
{ pattern: /[\q{1|1\uFE0F\u20E3}]/v, match: "1", expected: ["1"] },
{ pattern: /[\q{1}]/v, match: "1", expected: ["1"] },
{ pattern: /[\d&&\q{2}]/v, match: "123", expected: ["2"] },
{ pattern: /[^\q{a|b}]/v, match: "abc", expected: ["c"] },
{ pattern: /[\q{\n}]/v, match: "\n", expected: ["\n"] },
{ pattern: /[\q{\b}]/v, match: "\b", expected: ["\b"] },
{ pattern: /[\q{\0}]/v, match: "\0", expected: ["\0"] },
{ pattern: /[\q{\|}]/v, match: "|", expected: ["|"] },
{ pattern: /[\q{\x41}]/v, match: "A", expected: ["A"] },
{
pattern: /[\q{\uD83D\uDC68\u200d\uD83D\uDC69\u200d\uD83D\uDC66\u200d\uD83D\uDC66}]/v,
match: "👨‍👩‍👦‍👦",
expected: ["👨‍👩‍👦‍👦"],
},
{ pattern: /[\q{\u{1F600}}]/v, match: "😀", expected: ["😀"] },
{ pattern: /[\q{\cZ}]/v, match: "\x1A", expected: ["\x1A"] },
{ pattern: /[\q{ }]/v, match: " ", expected: [" "] },
{ pattern: /[[\d+]--[\q{1}]]/gv, match: "12", expected: ["2"] },
{ pattern: /[[\d]&&[\q{1}]]/gv, match: "21", expected: ["1"] },
{ pattern: /[\d\q{a}]/gv, match: "a1", expected: ["a", "1"] },
].forEach(test => {
const result = test.match.match(test.pattern);
expect(result).toEqual(test.expected);
});
[
"[\\q{(a)}]",
"[\\q{[a]}]",
"[\\q{{a}}]",
"[^\\q{bad}]",
"[\\q{a-b}]",
"[^\\q{a|bc}]",
"[^\\q{\\b+}]",
"[\\q{\\d}]",
"[\\q{\\w}]",
"[\\q{\\q}]",
"[^\\q{\\(\\)}]",
].forEach(pattern => {
expect(() => new RegExp(pattern, "v")).toThrow(SyntaxError);
});
});