Files
ladybird/Tests/LibWeb/test-css-tokenizer.py
Sam Atkins 59a860249b Tests: Add a CSS Tokenizer test suite
Based on test-js-ast and test-js-bytecode, test-css-tokenizer finds CSS
files in CSSTokenizer/input, tokenizes them, and then dumps the output,
to compare against files in the expected/ directory. Our Tokenizer is
in a good state currently, but this will let us track regressions, and
also allow us to rewrite this in Rust and ensure that the output
matches.

To support test-css-tokenizer.py, we also have css-tokenizer.cpp which
exposes the Tokenizer, which is usually only used internally.

We'll want to expand this test suite over time, but this initial set of
tests covers each type of Token.
2026-04-15 10:23:09 +01:00

135 lines
3.5 KiB
Python
Executable File

#!/usr/bin/env python3
import difflib
import os
import subprocess
import sys
from argparse import ArgumentParser
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
from pathlib import Path
LADYBIRD_SOURCE_DIR: Path
CSS_TOKENIZER_TEST_DIR: Path
BUILD_DIR: Path
def setup() -> None:
global LADYBIRD_SOURCE_DIR, CSS_TOKENIZER_TEST_DIR, BUILD_DIR
ladybird_source_dir = os.getenv("LADYBIRD_SOURCE_DIR")
if ladybird_source_dir is None:
print("LADYBIRD_SOURCE_DIR must be set!")
sys.exit(1)
LADYBIRD_SOURCE_DIR = Path(ladybird_source_dir)
CSS_TOKENIZER_TEST_DIR = LADYBIRD_SOURCE_DIR / "Tests/LibWeb/CSSTokenizer/"
# The script is copied to bin/test-css-tokenizer, so the build dir is one level up
BUILD_DIR = Path(__file__).parent.parent.resolve()
DIFF_PREFIX_ESCAPES = {
"@": "\x1b[36m",
"+": "\x1b[32m",
"-": "\x1b[31m",
}
def diff(a: str, a_file: Path, b: str, b_file: Path) -> None:
for line in difflib.unified_diff(a.splitlines(), b.splitlines(), fromfile=str(a_file), tofile=str(b_file)):
line = line.rstrip()
color_prefix = DIFF_PREFIX_ESCAPES.get((line or " ")[0], "")
print(f"{color_prefix}{line}\x1b[0m")
def encoding_for(file: Path) -> str:
encoding_file = CSS_TOKENIZER_TEST_DIR / "input" / Path(f"{file.name}.encoding")
if not encoding_file.exists():
return "utf-8"
return encoding_file.read_text(encoding="utf8").strip()
def test(file: Path, rebaseline: bool) -> bool:
args = [
str(BUILD_DIR / "bin/css-tokenizer"),
"--encoding",
encoding_for(file),
str(CSS_TOKENIZER_TEST_DIR / "input" / file),
]
process = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout = process.stdout.decode().strip()
if process.returncode != 0:
print(stdout)
sys.exit(1)
expected_file = CSS_TOKENIZER_TEST_DIR / "expected" / file.with_suffix(".txt")
output_file = CSS_TOKENIZER_TEST_DIR / "output" / file.with_suffix(".txt")
output_file.write_text(stdout + "\n", encoding="utf8")
if rebaseline:
expected_file.write_text(stdout + "\n", encoding="utf8")
return False
expected = expected_file.read_text(encoding="utf8").strip()
if stdout != expected:
print(f"\nCSS tokens do not match for {file}!\n")
diff(a=expected, a_file=expected_file, b=stdout, b_file=output_file)
return True
return False
def main() -> int:
setup()
parser = ArgumentParser()
parser.add_argument("-j", "--jobs", type=int)
parser.add_argument("--rebaseline", action="store_true")
args = parser.parse_args()
input_dir = CSS_TOKENIZER_TEST_DIR / "input"
failed = 0
css_files = [
css_file for css_file in sorted(input_dir.iterdir()) if css_file.is_file() and css_file.suffix == ".css"
]
with ThreadPoolExecutor(max_workers=args.jobs) as executor:
executables = [
executor.submit(test, css_file.relative_to(input_dir), args.rebaseline) for css_file in css_files
]
for executable in as_completed(executables):
if executable.result():
failed += 1
total = len(css_files)
passed = total - failed
if args.rebaseline:
print(f"Rebaselined {total} tests.")
return 0
if failed:
print(f"\nTests: {passed} passed, {failed} failed, {total} total")
return 1
print(f"All tests passed! ({total} total)")
return 0
if __name__ == "__main__":
sys.exit(main())