ladybird/Libraries/LibJS/Rust/src/parser.rs

/*
 * Copyright (c) 2026-present, the Ladybird developers.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

//! JavaScript parser: recursive descent with precedence climbing.
//!
//! This is the core parser module. It contains the `Parser` struct (parser
//! state + helpers) and delegates actual parsing to submodules:
//!
//! - `expressions` — `parse_expression()`, primary/secondary expressions
//! - `statements` — `parse_statement()`, control flow
//! - `declarations` — functions, classes, variables, import/export
//!
//! ## How parsing works
//!
//! The parser is a single-pass, recursive-descent parser. Expression parsing
//! uses precedence climbing (Pratt-style): `parse_expression(min_precedence)`
//! parses a primary expression, then loops consuming binary/postfix operators
//! whose precedence is >= `min_precedence`.
//!
//! The parser reads tokens one at a time from the Lexer. The "current token"
//! is always available via `self.current_token`. Calling `consume()` returns
//! the current token and advances to the next one.
//!
//! ## Backtracking
//!
//! Some constructs require speculative parsing (e.g., arrow functions:
//! `(a, b) =>` looks like a parenthesized expression until `=>` is seen).
//! The parser supports this via `save_state()` / `load_state()`, which
//! save and restore the full parser state including lexer position, current
//! token, error list, and all boolean flags.

use std::collections::{HashMap, HashSet};

use std::rc::Rc;

use crate::ast::{
    BindingPattern, CompiledRegex, Expression, ExpressionKind, FunctionParameter, FunctionTable,
    Identifier, PrivateIdentifier, ProgramData, ScopeData, SourceRange, Statement, StatementKind,
    Utf16String,
};
use crate::lexer::{Lexer, ch};
use crate::scope_collector::{ScopeCollector, ScopeCollectorState};
use crate::token::{Token, TokenType};

mod declarations;
mod expressions;
mod statements;

pub use crate::ast::DeclarationKind;
pub use crate::ast::FunctionKind;
pub use crate::ast::FunctionParsingInsights;
pub use crate::ast::Position;
pub use crate::ast::ProgramType;

// Named precedence levels for parse_expression().
// These correspond to the operator precedence table in ECMA-262.
pub(crate) const PRECEDENCE_COMMA: i32 = 0;
pub(crate) const PRECEDENCE_ASSIGNMENT: i32 = 2;
pub(crate) const PRECEDENCE_UNARY: i32 = 17;
pub(crate) const PRECEDENCE_MEMBER: i32 = 19;

/// Result of parsing a function's formal parameter list.
pub struct ParsedParameters {
    pub parameters: Vec<FunctionParameter>,
    pub function_length: i32,
    pub parameter_info: Vec<ParamInfo>,
    pub is_simple: bool,
}

/// Information about a single parameter name binding.
pub struct ParamInfo {
    pub name: Utf16String,
    pub is_rest: bool,
    pub is_from_pattern: bool,
    pub identifier: Option<Rc<Identifier>>,
}

/// Result of parsing a property key (object literal or class element).
pub(crate) struct PropertyKey {
    pub expression: Expression,
    pub name: Option<Utf16String>,
    pub is_proto: bool,
    pub is_computed: bool,
    pub is_identifier: bool,
}

/// Method kind for parse_method_definition.
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum MethodKind {
    Normal,
    Getter,
    Setter,
    Constructor,
}

/// Associativity for operator precedence.
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Associativity {
    Left,
    Right,
}

/// Tracks which tokens are forbidden in the current expression context.
///
/// This is threaded through `parse_expression()` to prevent ambiguity:
/// - `forbid_in`: in for-loop init position (`for (x in ...)` is for-in, not comparison)
/// - `forbid_logical/forbid_coalesce`: `&&`/`||` and `??` cannot be mixed without parens
/// - `forbid_paren_open`: prevents consuming `(` as call in `new Foo()` callee position
/// - `forbid_question_mark_period`: prevents `?.` in `new Foo?.bar`
/// - `forbid_equals`: prevents `=` from being consumed as assignment in certain contexts
#[derive(Clone, Copy, Default)]
pub struct ForbiddenTokens {
    pub forbid_in: bool,
    pub forbid_logical: bool,
    pub forbid_coalesce: bool,
    pub forbid_paren_open: bool,
    pub forbid_question_mark_period: bool,
    pub forbid_equals: bool,
}

impl ForbiddenTokens {
    pub fn none() -> Self {
        Self::default()
    }

    pub fn with_in() -> Self {
        Self {
            forbid_in: true,
            ..Self::default()
        }
    }

    pub fn allows(&self, token: TokenType) -> bool {
        match token {
            TokenType::In => !self.forbid_in,
            TokenType::DoubleAmpersand | TokenType::DoublePipe => !self.forbid_logical,
            TokenType::DoubleQuestionMark => !self.forbid_coalesce,
            TokenType::ParenOpen => !self.forbid_paren_open,
            TokenType::QuestionMarkPeriod => !self.forbid_question_mark_period,
            TokenType::Equals => !self.forbid_equals,
            _ => true,
        }
    }

    pub fn merge(&self, other: ForbiddenTokens) -> ForbiddenTokens {
        ForbiddenTokens {
            forbid_in: self.forbid_in || other.forbid_in,
            forbid_logical: self.forbid_logical || other.forbid_logical,
            forbid_coalesce: self.forbid_coalesce || other.forbid_coalesce,
            forbid_paren_open: self.forbid_paren_open || other.forbid_paren_open,
            forbid_question_mark_period: self.forbid_question_mark_period
                || other.forbid_question_mark_period,
            forbid_equals: self.forbid_equals || other.forbid_equals,
        }
    }

    pub fn forbid(&self, tokens: &[TokenType]) -> ForbiddenTokens {
        let mut result = *self;
        for &t in tokens {
            match t {
                TokenType::In => result.forbid_in = true,
                TokenType::DoubleAmpersand | TokenType::DoublePipe => result.forbid_logical = true,
                TokenType::DoubleQuestionMark => result.forbid_coalesce = true,
                TokenType::ParenOpen => result.forbid_paren_open = true,
                TokenType::QuestionMarkPeriod => result.forbid_question_mark_period = true,
                TokenType::Equals => result.forbid_equals = true,
                _ => {}
            }
        }
        result
    }
}

pub struct ParseError {
    pub message: String,
    pub line: u32,
    pub column: u32,
}

/// Boolean flags that are saved/restored during speculative parsing.
#[derive(Clone, Copy, Default)]
pub(crate) struct ParserFlags {
    pub strict_mode: bool,
    pub allow_super_property_lookup: bool,
    pub allow_super_constructor_call: bool,
    pub in_function_context: bool,
    pub in_formal_parameter_context: bool,
    pub in_generator_function_context: bool,
    pub await_expression_is_valid: bool,
    pub in_break_context: bool,
    pub in_continue_context: bool,
    pub string_legacy_octal_escape_sequence_in_scope: bool,
    pub in_class_field_initializer: bool,
    pub in_class_static_init_block: bool,
    pub function_might_need_arguments_object: bool,
    pub previous_token_was_period: bool,
    /// Set during property key parsing to suppress eval/arguments check.
    /// C++ uses separate `consume()` and `consume_and_allow_division()` methods;
    /// we emulate this by skipping the check in property key contexts.
    pub in_property_key_context: bool,
}

/// A regex literal whose compilation is deferred until after parsing.
pub struct DeferredRegex {
    pub compiled_regex: Rc<CompiledRegex>,
    pub pattern: Vec<u16>,
    pub flags: Vec<u16>,
    pub line: u32,
    pub column: u32,
}

/// Snapshot of parser state for speculative parsing (backtracking).
struct SavedState {
    token: Token,
    errors_len: usize,
    flags: ParserFlags,
    scope_collector_state: ScopeCollectorState,
    deferred_regexes_len: usize,
}

/// The main JavaScript parser.
///
/// Produces an AST. Parsing methods live in the `expressions`,
/// `statements`, and `declarations` submodules (all `impl Parser`).
pub struct Parser<'a> {
    lexer: Lexer<'a>,
    /// `consume()` returns this and advances to the next token.
    current_token: Token,
    errors: Vec<ParseError>,
    saved_states: Vec<SavedState>,
    program_type: ProgramType,
    /// UTF-16 source text.
    source: &'a [u16],

    // --- Parser state flags (saved/restored during speculative parsing) ---
    pub(crate) flags: ParserFlags,

    // --- Flags NOT saved/restored during speculative parsing ---
    pub(crate) initiated_by_eval: bool,
    pub(crate) in_eval_function_context: bool,

    /// Labels currently in scope. Value is Some(line, col) if a `continue`
    /// statement referenced this label, None otherwise.
    labels_in_scope: HashMap<Utf16String, Option<(u32, u32)>>,

    /// Set by try_parse_labelled_statement to propagate iteration-ness
    /// through nested labels (e.g., `a: b: for(...)`).
    last_inner_label_is_iteration: bool,

    last_function_name: Utf16String,
    last_function_kind: FunctionKind,
    last_class_name: Utf16String,

    /// Bound names collected during parse_binding_pattern.
    /// Caller drains this after calling parse_binding_pattern.
    /// Each entry is (name, identifier) — allows scope analysis to annotate
    /// binding pattern identifiers with local variable info.
    pub(crate) pattern_bound_names: Vec<(Utf16String, Rc<Identifier>)>,

    /// Set during synthesize_binding_pattern to allow MemberExpressions as binding targets.
    allow_member_expressions: bool,

    /// Position of the opening bracket/brace in binding patterns.
    /// Used so all identifiers inside a binding pattern share the pattern's start position,
    /// matching C++ parser behavior.
    binding_pattern_start: Option<Position>,

    /// True while parsing a class body that has an `extends` clause.
    pub(crate) class_has_super_class: bool,
    /// Depth counter for class bodies — used to reject `#name` outside classes.
    pub(crate) class_scope_depth: u32,
    pub(crate) has_default_export_name: bool,

    /// Stack of sets tracking private names referenced inside class bodies.
    /// Each class body pushes a new set. At the end of the class, any names
    /// not found in the class's declared private names are bubbled up to the
    /// outer class, or reported as errors if there is no outer class.
    referenced_private_names_stack: Vec<HashSet<Utf16String>>,

    /// Communication channel from `parse_variable_declaration` back to
    /// `parse_for_statement` when parsing `for (let/const/var ... ; ...)`.
    /// These are set when `is_for_loop` is true and read by the for-loop
    /// parser to validate for-in/of restrictions.
    pub(crate) for_loop_declaration_count: usize,
    pub(crate) for_loop_declaration_has_init: bool,
    pub(crate) for_loop_declaration_is_var: bool,

    pub scope_collector: ScopeCollector,

    /// Track exported names for duplicate detection in modules.
    exported_names: HashSet<Utf16String>,

    /// Side table owning all FunctionData produced during parsing.
    pub function_table: FunctionTable,

    /// Memoization: offsets where arrow function parsing has already failed.
    /// Prevents exponential re-processing of nested expressions like
    /// `(a=(b=(c=0)))` where each failed arrow attempt would otherwise
    /// re-attempt inner positions during grouping expression re-parse.
    arrow_function_failed_positions: HashSet<usize>,

    /// Regex literals whose compilation is deferred until after parsing.
    deferred_regexes: Vec<DeferredRegex>,
}

impl<'a> Parser<'a> {
    pub fn new(source: &'a [u16], program_type: ProgramType) -> Self {
        Self::new_with_line_offset(source, program_type, 1)
    }

    pub fn new_with_line_offset(
        source: &'a [u16],
        program_type: ProgramType,
        initial_line_number: u32,
    ) -> Self {
        let mut lexer = Lexer::new(source, initial_line_number, 0);
        if program_type == ProgramType::Module {
            lexer.disallow_html_comments();
        }
        let first_token = lexer.next();
        Self {
            lexer,
            current_token: first_token,
            errors: Vec::new(),
            saved_states: Vec::new(),
            program_type,
            source,
            flags: ParserFlags::default(),
            initiated_by_eval: false,
            in_eval_function_context: false,
            labels_in_scope: HashMap::new(),
            last_inner_label_is_iteration: false,
            last_function_name: Utf16String::default(),
            last_function_kind: FunctionKind::Normal,
            last_class_name: Utf16String::default(),
            pattern_bound_names: Vec::new(),
            allow_member_expressions: false,
            binding_pattern_start: None,
            class_has_super_class: false,
            class_scope_depth: 0,
            has_default_export_name: false,
            referenced_private_names_stack: Vec::new(),
            for_loop_declaration_count: 0,
            for_loop_declaration_has_init: false,
            for_loop_declaration_is_var: false,
            scope_collector: ScopeCollector::new(),
            exported_names: HashSet::new(),
            function_table: FunctionTable::new(),
            arrow_function_failed_positions: HashSet::new(),
            deferred_regexes: Vec::new(),
        }
    }

    // === AST construction helpers ===

    pub(crate) fn range_from(&self, start: Position) -> SourceRange {
        SourceRange {
            start,
            end: self.position(),
        }
    }

    pub(crate) fn expression(&self, start: Position, expression: ExpressionKind) -> Expression {
        Expression::new(self.range_from(start), expression)
    }

    pub(crate) fn statement(&self, start: Position, statement: StatementKind) -> Statement {
        Statement::new(self.range_from(start), statement)
    }

    pub(crate) fn make_identifier(
        &self,
        start: Position,
        name: impl Into<Utf16String>,
    ) -> Rc<Identifier> {
        Rc::new(Identifier::new(self.range_from(start), name.into()))
    }

    pub(crate) fn register_function_parameters_with_scope(
        &mut self,
        parameters: &[FunctionParameter],
        parameter_info: &[ParamInfo],
    ) {
        use crate::ast::FunctionParameterBinding;
        use crate::scope_collector::ParameterEntry;
        let mut entries: Vec<ParameterEntry> = Vec::new();
        let mut has_parameter_expressions = false;
        let mut info_index = 0;
        for parameter in parameters {
            if parameter.default_value.is_some() {
                has_parameter_expressions = true;
            }
            match &parameter.binding {
                FunctionParameterBinding::Identifier(id) => {
                    let (name, is_rest, is_from_pattern) = if info_index < parameter_info.len() {
                        let pi = &parameter_info[info_index];
                        info_index += 1;
                        (pi.name.clone(), pi.is_rest, pi.is_from_pattern)
                    } else {
                        (id.name.clone(), parameter.is_rest, false)
                    };
                    entries.push(ParameterEntry {
                        name,
                        identifier: Some(id.clone()),
                        is_rest,
                        is_from_pattern,
                        is_first_from_pattern: false,
                    });
                }
                FunctionParameterBinding::BindingPattern(pattern) => {
                    if pattern.contains_expression() {
                        has_parameter_expressions = true;
                    }
                    // Push a placeholder entry for the pattern parameter itself
                    // so subsequent parameters get correct positional indices.
                    entries.push(ParameterEntry {
                        name: Utf16String::default(),
                        identifier: None,
                        is_rest: false,
                        is_from_pattern: true,
                        is_first_from_pattern: true,
                    });
                    // Then push bound names from this pattern.
                    while info_index < parameter_info.len()
                        && parameter_info[info_index].is_from_pattern
                    {
                        let pi = &parameter_info[info_index];
                        entries.push(ParameterEntry {
                            name: pi.name.clone(),
                            identifier: pi.identifier.clone(),
                            is_rest: pi.is_rest,
                            is_from_pattern: true,
                            is_first_from_pattern: false,
                        });
                        info_index += 1;
                    }
                }
            }
        }
        self.scope_collector
            .set_function_parameters(&entries, has_parameter_expressions);
    }

    // === Token access ===

    pub(crate) fn current_token(&self) -> &Token {
        &self.current_token
    }

    pub(crate) fn current_token_type(&self) -> TokenType {
        self.current_token.token_type
    }

    pub(crate) fn match_token(&self, tt: TokenType) -> bool {
        self.current_token.token_type == tt
    }

    pub(crate) fn done(&self) -> bool {
        self.match_token(TokenType::Eof)
    }

    // === Token consumption ===

    pub(crate) fn consume(&mut self) -> Token {
        let old = std::mem::replace(&mut self.current_token, self.lexer.next());
        // C++ checks for `arguments`/`eval` in `consume_and_allow_division()` which
        // is used by `consume_identifier()`. We put the check here (in `consume()`)
        // but skip it when parsing property keys, matching C++'s behavior.
        if !self.flags.in_property_key_context {
            self.check_arguments_or_eval(&old);
        }
        self.flags.previous_token_was_period = old.token_type == TokenType::Period;
        old
    }

    pub(crate) fn consume_and_check_identifier(&mut self) -> Token {
        let token = self.consume();
        if self.flags.strict_mode && token.token_type == TokenType::Identifier {
            let value = self.token_value(&token);
            if is_strict_reserved_word(value) {
                let name = String::from_utf16_lossy(value);
                self.syntax_error(&format!(
                    "Identifier must not be a reserved word in strict mode ('{}')",
                    name
                ));
            }
        }
        token
    }

    pub(crate) fn consume_token(&mut self, expected: TokenType) -> Token {
        if self.current_token.token_type != expected {
            self.expected(expected.name());
        }
        self.consume()
    }

    pub(crate) fn eat(&mut self, tt: TokenType) -> bool {
        if self.match_token(tt) {
            self.consume();
            true
        } else {
            false
        }
    }

    fn check_arguments_or_eval(&mut self, token: &Token) {
        if token.token_type == TokenType::Identifier && !self.flags.previous_token_was_period {
            let value: &[u16] = if let Some(ref v) = token.identifier_value {
                v
            } else {
                let start = token.value_start as usize;
                let end = start + token.value_len as usize;
                if end <= self.source.len() {
                    &self.source[start..end]
                } else {
                    &[]
                }
            };
            if value == utf16!("arguments") {
                if self.flags.in_class_field_initializer {
                    self.syntax_error("'arguments' is not allowed in class field initializer");
                }
                self.flags.function_might_need_arguments_object = true;
            } else if value == utf16!("eval") {
                self.flags.function_might_need_arguments_object = true;
            }
        }
    }

    pub(crate) fn consume_identifier(&mut self) -> Token {
        if self.match_identifier() {
            return self.consume_and_check_identifier();
        }
        self.expected("identifier");
        self.consume()
    }

    // https://tc39.es/ecma262/#sec-numeric-literals-early-errors
    // It is a Syntax Error if IsStringWellFormedUnicode of the source text matched
    // by NumericLiteral is not true.
    // The source character immediately following a NumericLiteral must not be an
    // IdentifierStart or DecimalDigit.
    pub(crate) fn consume_and_validate_numeric_literal(&mut self) -> Token {
        let token = self.consume();
        if self.flags.strict_mode {
            // https://tc39.es/ecma262/#sec-additional-syntax-numeric-literals
            // In strict mode, legacy octal literals (0-prefixed) are not permitted.
            let value = self.token_value(&token);
            if value.len() > 1
                && value[0] == ch(b'0')
                && value[1] >= ch(b'0')
                && value[1] <= ch(b'9')
            {
                self.syntax_error("Unprefixed octal number not allowed in strict mode");
            }
        }
        if self.match_identifier_name() && self.current_token.trivia_len == 0 {
            self.syntax_error("Numeric literal must not be immediately followed by identifier");
        }
        token
    }

    // https://tc39.es/ecma262/#sec-automatic-semicolon-insertion
    // A semicolon is automatically inserted when:
    //   1. The offending token is separated from the previous token by at least
    //      one LineTerminator.
    //   2. The offending token is `}`.
    //   3. The previous token is `)` and the inserted semicolon would then be
    //      parsed as the terminating semicolon of a do-while statement.
    //   4. The end of the input stream of tokens is reached.
    pub(crate) fn consume_or_insert_semicolon(&mut self) {
        if self.match_token(TokenType::Semicolon) {
            self.consume();
            return;
        }
        if self.current_token.trivia_has_line_terminator
            || self.match_token(TokenType::CurlyClose)
            || self.done()
        {
            return;
        }
        self.expected("Semicolon");
    }

    // === Lookahead ===

    pub(crate) fn next_token(&mut self) -> Token {
        self.lexer.save_state();
        let token = self.lexer.next();
        self.lexer.load_state();
        token
    }

    // === Position ===

    pub(crate) fn position(&self) -> Position {
        Position {
            line: self.current_token.line_number,
            column: self.current_token.line_column,
            offset: self.current_token.offset,
        }
    }

    pub(crate) fn source_text_end_offset(&self) -> u32 {
        self.current_token.offset - self.current_token.trivia_len
    }

    // === Error reporting ===

    pub(crate) fn syntax_error(&mut self, message: &str) {
        self.errors.push(ParseError {
            message: message.to_string(),
            line: self.current_token.line_number,
            column: self.current_token.line_column,
        });
    }

    pub(crate) fn syntax_error_at(&mut self, message: &str, line: u32, column: u32) {
        self.errors.push(ParseError {
            message: message.to_string(),
            line,
            column,
        });
    }

    pub(crate) fn syntax_error_at_position(&mut self, message: &str, pos: Position) {
        self.syntax_error_at(message, pos.line, pos.column);
    }

    /// Register a referenced private name. Returns true if we're inside a class
    /// body (the reference is valid for now, will be checked at class end).
    /// Returns false if we're outside all class bodies (always invalid).
    pub(crate) fn register_referenced_private_name(&mut self, name: &[u16]) -> bool {
        if let Some(set) = self.referenced_private_names_stack.last_mut() {
            set.insert(Utf16String::from(name));
            true
        } else {
            false
        }
    }

    /// Parse and validate a private identifier token.
    /// Registers the private name reference and emits an error if outside a class body.
    /// The current token must be a PrivateIdentifier.
    pub(crate) fn parse_private_identifier(&mut self, range_start: Position) -> PrivateIdentifier {
        let value = self.token_value(&self.current_token).to_vec();
        if !self.register_referenced_private_name(&value) {
            let name = String::from_utf16_lossy(&value);
            self.syntax_error(&format!(
                "Reference to undeclared private field or method '{}'",
                name
            ));
        }
        let token = self.consume();
        let value = self.token_value(&token).to_vec();
        PrivateIdentifier {
            range: self.range_from(range_start),
            name: value.into(),
        }
    }

    pub(crate) fn expected(&mut self, what: &str) {
        let msg = if let Some(ref message) = self.current_token.message {
            message.clone()
        } else {
            format!(
                "Unexpected token {}. Expected {}",
                self.current_token.token_type.name(),
                what
            )
        };
        self.syntax_error(&msg);
    }

    /// Take the deferred regex literals collected during parsing.
    /// The caller is responsible for compiling them (on the main thread).
    pub(crate) fn take_deferred_regexes(&mut self) -> Vec<DeferredRegex> {
        std::mem::take(&mut self.deferred_regexes)
    }

    /// Batch-compile deferred regex literals. On error, returns the errors.
    pub(crate) fn compile_deferred_regexes(deferred: Vec<DeferredRegex>) -> Vec<ParseError> {
        let mut errors = Vec::new();
        for d in deferred {
            match crate::bytecode::ffi::compile_regex(&d.pattern, &d.flags) {
                Ok(handle) => d.compiled_regex.set(handle),
                Err(msg) => {
                    errors.push(ParseError {
                        message: msg,
                        line: d.line,
                        column: d.column,
                    });
                }
            }
        }
        errors
    }

    pub(crate) fn validate_regex_flags(&mut self, flags: &[u16]) {
        let valid_flags: &[u16] = &[
            ch(b'd'),
            ch(b'g'),
            ch(b'i'),
            ch(b'm'),
            ch(b's'),
            ch(b'u'),
            ch(b'v'),
            ch(b'y'),
        ];
        let mut seen = [false; 128];
        for &flag in flags {
            if flag >= 128 || !valid_flags.contains(&flag) {
                self.syntax_error(&format!(
                    "Invalid RegExp flag '{}'",
                    char::from_u32(flag as u32).unwrap_or('?')
                ));
                return;
            }
            if seen[flag as usize] {
                self.syntax_error(&format!(
                    "Repeated RegExp flag '{}'",
                    char::from_u32(flag as u32).unwrap_or('?')
                ));
                return;
            }
            seen[flag as usize] = true;
        }
    }

    pub fn has_errors(&self) -> bool {
        !self.errors.is_empty()
    }

    pub fn errors(&self) -> &[ParseError] {
        &self.errors
    }

    pub fn take_errors(&mut self) -> Vec<ParseError> {
        std::mem::take(&mut self.errors)
    }

    pub fn error_messages(&self) -> Vec<String> {
        self.errors
            .iter()
            .map(|e| format!("{}:{}: {}", e.line, e.column, e.message))
            .collect()
    }

    // === State save/restore for backtracking ===

    pub(crate) fn save_state(&mut self) {
        self.lexer.save_state();
        self.saved_states.push(SavedState {
            token: self.current_token.clone(),
            errors_len: self.errors.len(),
            flags: self.flags,
            scope_collector_state: self.scope_collector.save_state(),
            deferred_regexes_len: self.deferred_regexes.len(),
        });
    }

    pub(crate) fn load_state(&mut self) {
        let state = self.saved_states.pop().expect("No saved state to restore");
        self.current_token = state.token;
        self.errors.truncate(state.errors_len);
        self.deferred_regexes.truncate(state.deferred_regexes_len);
        self.flags = state.flags;
        self.scope_collector.load_state(state.scope_collector_state);
        self.lexer.load_state();
    }

    pub(crate) fn discard_saved_state(&mut self) {
        self.saved_states.pop();
        self.lexer.discard_saved_state();
    }

    // === Token matching helpers ===

    pub(crate) fn match_identifier(&self) -> bool {
        self.token_is_identifier(&self.current_token)
    }

    pub(crate) fn token_is_identifier(&self, token: &Token) -> bool {
        use TokenType::*;

        let tt = token.token_type;

        match tt {
            Identifier => true,
            EscapedKeyword => !self.match_invalid_escaped_keyword(),
            Let => !self.flags.strict_mode,
            Yield => !self.flags.strict_mode && !self.flags.in_generator_function_context,
            Await => {
                !self.flags.await_expression_is_valid
                    && self.program_type != ProgramType::Module
                    && !self.flags.in_class_static_init_block
            }
            Async => true,
            _ => false,
        }
    }

    pub(crate) fn match_identifier_name(&self) -> bool {
        self.current_token.token_type.is_identifier_name() || self.match_identifier()
    }

    pub(crate) fn match_invalid_escaped_keyword(&self) -> bool {
        if self.current_token.token_type != TokenType::EscapedKeyword {
            return false;
        }
        let value = self.token_value(&self.current_token);
        if value == utf16!("await") {
            return self.program_type == ProgramType::Module
                || self.flags.await_expression_is_valid
                || self.flags.in_class_static_init_block;
        }
        if value == utf16!("async") {
            return false;
        }
        if value == utf16!("yield") {
            return self.flags.in_generator_function_context;
        }
        if self.flags.strict_mode {
            return true;
        }
        // In non-strict mode, "let" and "static" are context-sensitive
        // keywords that are valid as escaped identifiers. All other
        // escaped keywords (break, for, etc.) are always invalid.
        value != utf16!("let") && value != utf16!("static")
    }

    pub(crate) fn check_identifier_name_for_assignment_validity(
        &mut self,
        name: &[u16],
        force_strict: bool,
    ) {
        if self.flags.strict_mode || force_strict {
            if name == utf16!("arguments") || name == utf16!("eval") {
                self.syntax_error(
                    "Binding pattern target may not be called 'arguments' or 'eval' in strict mode",
                );
            } else if is_strict_reserved_word(name) {
                let name_str = String::from_utf16_lossy(name);
                self.syntax_error(&format!(
                    "Identifier must not be a reserved word in strict mode ('{}')",
                    name_str
                ));
            }
        }
    }

    /// Check for duplicate parameter names in arrow functions.
    /// Arrow functions always reject duplicates, regardless of strict mode.
    pub(crate) fn check_arrow_duplicate_parameters(&mut self, parameter_info: &[ParamInfo]) {
        let mut seen_names: HashSet<&[u16]> = HashSet::new();
        for pi in parameter_info {
            let name = &pi.name;
            if name.is_empty() {
                continue;
            }
            if !seen_names.insert(&**name) {
                let name_str = String::from_utf16_lossy(name);
                self.syntax_error(&format!(
                    "Duplicate parameter '{}' not allowed in arrow function",
                    name_str
                ));
            }
        }
    }

    /// Post-body check for function parameters when 'use strict' was found in the
    /// body or the function is a generator/async.
    pub(crate) fn check_parameters_post_body(
        &mut self,
        parameter_info: &[ParamInfo],
        force_strict: bool,
        _kind: FunctionKind,
    ) {
        let mut seen_names: HashSet<&[u16]> = HashSet::new();
        for pi in parameter_info {
            let name = &pi.name;
            if name.is_empty() {
                continue;
            }
            self.check_identifier_name_for_assignment_validity(name, force_strict);
            if !seen_names.insert(&**name) {
                let name_str = String::from_utf16_lossy(name);
                self.syntax_error(&format!(
                    "Duplicate parameter '{}' not allowed in strict mode",
                    name_str
                ));
            }
        }
    }

    pub(crate) fn token_value<'b>(&'b self, token: &'b Token) -> &'b [u16] {
        if let Some(ref value) = token.identifier_value {
            return value;
        }
        let start = token.value_start as usize;
        let end = start + token.value_len as usize;
        assert!(
            end <= self.source.len(),
            "token_value: bounds [{start}..{end}) exceed source length {}",
            self.source.len()
        );
        &self.source[start..end]
    }

    pub(crate) fn token_original_value(&self, token: &Token) -> &'a [u16] {
        let start = token.value_start as usize;
        let end = (token.value_start + token.value_len) as usize;
        assert!(
            end <= self.source.len(),
            "token_original_value: bounds [{start}..{end}) exceed source length {}",
            self.source.len()
        );
        &self.source[start..end]
    }

    /// Re-parse the source range starting at `start` as a binding pattern
    /// with member expressions allowed (for destructuring assignment patterns).
    pub(crate) fn synthesize_binding_pattern(&mut self, start: Position) -> Option<BindingPattern> {
        // Clear any syntax errors that occurred in the range of the expression
        // being reinterpreted as a binding pattern. This matches C++'s behavior
        // where errors like duplicate __proto__ in object literals are cleared
        // when the object is reinterpreted as an assignment target.
        let end_line = self.current_token.line_number;
        let end_column = self.current_token.line_column;
        self.errors.retain(|e| {
            !(e.line > start.line || (e.line == start.line && e.column >= start.column))
                || (e.line > end_line || (e.line == end_line && e.column >= end_column))
        });

        let saved_lexer = std::mem::replace(
            &mut self.lexer,
            Lexer::new_at_offset(self.source, start.offset as usize, start.line, start.column),
        );
        let saved_token = std::mem::replace(&mut self.current_token, Token::new(TokenType::Eof));
        let saved_allow = self.allow_member_expressions;

        self.current_token = self.lexer.next();
        self.allow_member_expressions = true;

        let pattern = self.parse_binding_pattern();

        self.lexer = saved_lexer;
        self.current_token = saved_token;
        self.allow_member_expressions = saved_allow;

        Some(pattern)
    }

    pub(crate) fn is_simple_assignment_target(
        expression: &Expression,
        allow_call_expression: bool,
    ) -> bool {
        matches!(
            &expression.inner,
            ExpressionKind::Identifier(_) | ExpressionKind::Member { .. }
        ) || (allow_call_expression && matches!(&expression.inner, ExpressionKind::Call(_)))
    }

    fn is_object_expression(expression: &Expression) -> bool {
        matches!(&expression.inner, ExpressionKind::Object(_))
    }

    fn is_array_expression(expression: &Expression) -> bool {
        matches!(&expression.inner, ExpressionKind::Array(_))
    }

    fn is_identifier(expression: &Expression) -> bool {
        matches!(&expression.inner, ExpressionKind::Identifier(_))
    }

    fn is_member_expression(expression: &Expression) -> bool {
        matches!(&expression.inner, ExpressionKind::Member { .. })
    }

    fn is_call_expression(expression: &Expression) -> bool {
        matches!(&expression.inner, ExpressionKind::Call(_))
    }

    fn is_update_expression(expression: &Expression) -> bool {
        matches!(&expression.inner, ExpressionKind::Update { .. })
    }

    // === Main entry point ===

    pub fn parse_program(&mut self, starts_in_strict_mode: bool) -> Statement {
        let start = self.position();

        if self.program_type == ProgramType::Script {
            let (children, is_strict) = self.parse_script(starts_in_strict_mode);
            let scope = ScopeData::shared_with_children(children);
            // Scope was opened in parse_script via open_program_scope.
            // Now close it after children are set.
            self.scope_collector.set_scope_node(scope.clone());
            self.scope_collector.close_scope();
            self.statement(
                start,
                StatementKind::Program(ProgramData {
                    scope,
                    program_type: ProgramType::Script,
                    is_strict_mode: is_strict,
                    has_top_level_await: false,
                }),
            )
        } else {
            let (children, has_top_level_await) = self.parse_module();
            let scope = ScopeData::shared_with_children(children);
            self.scope_collector.set_scope_node(scope.clone());
            self.scope_collector.close_scope();
            self.statement(
                start,
                StatementKind::Program(ProgramData {
                    scope,
                    program_type: ProgramType::Module,
                    is_strict_mode: true,
                    has_top_level_await,
                }),
            )
        }
    }

    fn parse_script(&mut self, starts_in_strict_mode: bool) -> (Vec<Statement>, bool) {
        // Open program scope — will be closed in parse_program after ScopeData is created.
        self.scope_collector.open_program_scope(ProgramType::Script);

        let strict_before = self.flags.strict_mode;
        if starts_in_strict_mode {
            self.flags.strict_mode = true;
        }

        let (has_use_strict, mut children) = self.parse_directive();

        if self.flags.strict_mode || has_use_strict {
            self.flags.strict_mode = true;
        }

        children.extend(self.parse_statement_list(true));
        if !self.done() {
            if self.flags.in_function_context {
                self.expected("CurlyClose");
            } else {
                self.expected("statement or declaration");
            }
            self.consume();
        }

        let is_strict = self.flags.strict_mode;
        self.flags.strict_mode = strict_before;
        (children, is_strict)
    }

    // https://tc39.es/ecma262/#sec-modules
    // Module code is always strict mode code.
    fn parse_module(&mut self) -> (Vec<Statement>, bool) {
        // Open program scope — will be closed in parse_program after ScopeData is created.
        self.scope_collector.open_program_scope(ProgramType::Module);

        let strict_before = self.flags.strict_mode;
        let await_before = self.flags.await_expression_is_valid;
        self.flags.strict_mode = true;
        self.flags.await_expression_is_valid = true;

        let mut children = Vec::new();

        while !self.done() {
            children.extend(self.parse_statement_list(true));

            if self.done() {
                break;
            }

            if self.match_export_or_import() {
                if self.match_token(TokenType::Export) {
                    children.push(self.parse_export_statement());
                } else {
                    children.push(self.parse_import_statement());
                }
            } else {
                self.expected("statement or declaration");
                self.consume();
            }
        }

        // Check that all exported bindings are declared in the module.
        self.check_undeclared_exports(&children);

        self.flags.strict_mode = strict_before;
        self.flags.await_expression_is_valid = await_before;
        let has_top_level_await = self.scope_collector.contains_await_expression();
        (children, has_top_level_await)
    }

    fn check_undeclared_exports(&mut self, children: &[Statement]) {
        use crate::ast::*;

        // Collect all declared names at module level.
        let mut declared_names: HashSet<Utf16String> = HashSet::new();
        for child in children {
            match &child.inner {
                StatementKind::VariableDeclaration { declarations, .. } => {
                    for decl in declarations {
                        collect_binding_names(&decl.target, &mut declared_names);
                    }
                }
                StatementKind::FunctionDeclaration {
                    name: Some(name), ..
                } => {
                    declared_names.insert(name.name.clone());
                }
                StatementKind::ClassDeclaration(data) => {
                    if let Some(ref name) = data.name {
                        declared_names.insert(name.name.clone());
                    }
                }
                StatementKind::Import(data) => {
                    for entry in &data.entries {
                        declared_names.insert(entry.local_name.clone());
                    }
                }
                StatementKind::Export(data) => {
                    if let Some(ref statement) = data.statement {
                        match &statement.inner {
                            StatementKind::VariableDeclaration { declarations, .. } => {
                                for decl in declarations {
                                    collect_binding_names(&decl.target, &mut declared_names);
                                }
                            }
                            StatementKind::FunctionDeclaration {
                                name: Some(name), ..
                            } => {
                                declared_names.insert(name.name.clone());
                            }
                            StatementKind::ClassDeclaration(class_data) => {
                                if let Some(ref name) = class_data.name {
                                    declared_names.insert(name.name.clone());
                                }
                            }
                            _ => {}
                        }
                    }
                }
                _ => {}
            }
        }

        // Check each export's local bindings.
        for child in children {
            if let StatementKind::Export(data) = &child.inner {
                if data.statement.is_some() {
                    continue;
                }
                for entry in &data.entries {
                    if data.module_request.is_some() {
                        continue;
                    }
                    if entry.kind == ExportEntryKind::EmptyNamedExport {
                        continue;
                    }
                    if let Some(ref local_name) = entry.local_or_import_name
                        && !declared_names.contains(local_name.as_slice())
                    {
                        self.syntax_error_at_position(
                            &format!(
                                "'{}' in export is not declared",
                                String::from_utf16_lossy(local_name.as_slice())
                            ),
                            child.range.start,
                        );
                    }
                }
            }
        }
    }

    // https://tc39.es/ecma262/#sec-directive-prologues-and-the-use-strict-directive
    // A Directive Prologue is a sequence of ExpressionStatements at the beginning
    // of a FunctionBody, ScriptBody, or ModuleBody that each consist entirely of
    // a StringLiteral followed by semicolon. A "use strict" directive causes
    // subsequent code to be interpreted in strict mode.
    pub(crate) fn parse_directive(&mut self) -> (bool, Vec<Statement>) {
        let mut found_use_strict = false;
        let mut statements = Vec::new();
        while !self.done() && self.match_token(TokenType::StringLiteral) {
            let raw_value = self.token_original_value(&self.current_token);
            let statement = self.parse_statement(false);
            statements.push(statement);

            if is_use_strict(raw_value) {
                found_use_strict = true;
                if self.flags.string_legacy_octal_escape_sequence_in_scope {
                    self.syntax_error(
                        "Octal escape sequence in string literal not allowed in strict mode",
                    );
                }
                break;
            }
        }
        self.flags.string_legacy_octal_escape_sequence_in_scope = false;
        (found_use_strict, statements)
    }

    pub(crate) fn parse_statement_list(
        &mut self,
        allow_labelled_functions: bool,
    ) -> Vec<Statement> {
        let mut statements = Vec::new();
        while !self.done() {
            if self.match_export_or_import() {
                break;
            }
            if self.match_declaration() {
                statements.push(self.parse_declaration());
            } else if self.match_statement() {
                statements.push(self.parse_statement(allow_labelled_functions));
            } else {
                break;
            }
        }
        statements
    }

    pub(crate) fn match_statement(&mut self) -> bool {
        matches!(
            self.current_token_type(),
            TokenType::CurlyOpen
                | TokenType::Return
                | TokenType::Var
                | TokenType::For
                | TokenType::If
                | TokenType::Throw
                | TokenType::Try
                | TokenType::Break
                | TokenType::Continue
                | TokenType::Switch
                | TokenType::Do
                | TokenType::While
                | TokenType::With
                | TokenType::Debugger
                | TokenType::Semicolon
                | TokenType::Slash
                | TokenType::SlashEquals
        ) || self.match_expression()
    }

    pub(crate) fn match_declaration(&mut self) -> bool {
        match self.current_token_type() {
            TokenType::Function | TokenType::Class | TokenType::Const => true,
            TokenType::Let => {
                if !self.flags.strict_mode {
                    self.try_match_let_declaration()
                } else {
                    true
                }
            }
            TokenType::Async => {
                let next = self.next_token();
                next.token_type == TokenType::Function && !next.trivia_has_line_terminator
            }
            TokenType::Identifier => {
                let value = self.token_value(&self.current_token);
                if value != utf16!("using") {
                    return false;
                }
                let next = self.next_token();
                !next.trivia_has_line_terminator && next.token_type.is_identifier_name()
            }
            _ => false,
        }
    }

    fn try_match_let_declaration(&mut self) -> bool {
        let next = self.next_token();
        if next.token_type.is_identifier_name() && self.token_value(&next) != utf16!("in") {
            return true;
        }
        if next.token_type == TokenType::CurlyOpen || next.token_type == TokenType::BracketOpen {
            return true;
        }
        false
    }

    fn match_iteration_start(&self) -> bool {
        matches!(
            self.current_token_type(),
            TokenType::For | TokenType::While | TokenType::Do
        )
    }

    pub(crate) fn match_export_or_import(&mut self) -> bool {
        if self.match_token(TokenType::Export) {
            return true;
        }
        if self.match_token(TokenType::Import) {
            let next = self.next_token();
            return next.token_type != TokenType::ParenOpen && next.token_type != TokenType::Period;
        }
        false
    }

    // === Operator precedence ===

    pub(crate) fn operator_precedence(tt: TokenType) -> i32 {
        match tt {
            TokenType::Period
            | TokenType::BracketOpen
            | TokenType::ParenOpen
            | TokenType::QuestionMarkPeriod => 20,
            TokenType::New => 19,
            TokenType::PlusPlus | TokenType::MinusMinus => 18,
            TokenType::ExclamationMark
            | TokenType::Tilde
            | TokenType::Typeof
            | TokenType::Void
            | TokenType::Delete
            | TokenType::Await => 17,
            TokenType::DoubleAsterisk => 16,
            TokenType::Asterisk | TokenType::Slash | TokenType::Percent => 15,
            TokenType::Plus | TokenType::Minus => 14,
            TokenType::ShiftLeft | TokenType::ShiftRight | TokenType::UnsignedShiftRight => 13,
            TokenType::LessThan
            | TokenType::LessThanEquals
            | TokenType::GreaterThan
            | TokenType::GreaterThanEquals
            | TokenType::In
            | TokenType::Instanceof => 12,
            TokenType::EqualsEquals
            | TokenType::ExclamationMarkEquals
            | TokenType::EqualsEqualsEquals
            | TokenType::ExclamationMarkEqualsEquals => 11,
            TokenType::Ampersand => 10,
            TokenType::Caret => 9,
            TokenType::Pipe => 8,
            TokenType::DoubleQuestionMark => 7,
            TokenType::DoubleAmpersand => 6,
            TokenType::DoublePipe => 5,
            TokenType::QuestionMark => 4,
            TokenType::Equals
            | TokenType::PlusEquals
            | TokenType::MinusEquals
            | TokenType::DoubleAsteriskEquals
            | TokenType::AsteriskEquals
            | TokenType::SlashEquals
            | TokenType::PercentEquals
            | TokenType::ShiftLeftEquals
            | TokenType::ShiftRightEquals
            | TokenType::UnsignedShiftRightEquals
            | TokenType::AmpersandEquals
            | TokenType::CaretEquals
            | TokenType::PipeEquals
            | TokenType::DoubleAmpersandEquals
            | TokenType::DoublePipeEquals
            | TokenType::DoubleQuestionMarkEquals => 3,
            TokenType::Yield => 2,
            TokenType::Comma => 1,
            _ => 0,
        }
    }

    pub(crate) fn operator_associativity(tt: TokenType) -> Associativity {
        match tt {
            TokenType::Period
            | TokenType::BracketOpen
            | TokenType::ParenOpen
            | TokenType::QuestionMarkPeriod
            | TokenType::Asterisk
            | TokenType::Slash
            | TokenType::Percent
            | TokenType::Plus
            | TokenType::Minus
            | TokenType::ShiftLeft
            | TokenType::ShiftRight
            | TokenType::UnsignedShiftRight
            | TokenType::LessThan
            | TokenType::LessThanEquals
            | TokenType::GreaterThan
            | TokenType::GreaterThanEquals
            | TokenType::In
            | TokenType::Instanceof
            | TokenType::EqualsEquals
            | TokenType::ExclamationMarkEquals
            | TokenType::EqualsEqualsEquals
            | TokenType::ExclamationMarkEqualsEquals
            | TokenType::Typeof
            | TokenType::Void
            | TokenType::Delete
            | TokenType::Await
            | TokenType::Ampersand
            | TokenType::Caret
            | TokenType::Pipe
            | TokenType::DoubleQuestionMark
            | TokenType::DoubleAmpersand
            | TokenType::DoublePipe
            | TokenType::Comma => Associativity::Left,
            _ => Associativity::Right,
        }
    }
}

// === Helpers ===

fn is_use_strict(raw: &[u16]) -> bool {
    raw == utf16!("'use strict'") || raw == utf16!("\"use strict\"")
}

/// Collect all binding names introduced by a variable declarator target.
fn collect_binding_names(
    target: &crate::ast::VariableDeclaratorTarget,
    names: &mut HashSet<Utf16String>,
) {
    match target {
        crate::ast::VariableDeclaratorTarget::Identifier(identifier) => {
            names.insert(identifier.name.clone());
        }
        crate::ast::VariableDeclaratorTarget::BindingPattern(pattern) => {
            collect_binding_pattern_names(pattern, names);
        }
    }
}

/// Collect all binding names from a binding pattern (object or array destructuring).
fn collect_binding_pattern_names(
    pattern: &crate::ast::BindingPattern,
    names: &mut HashSet<Utf16String>,
) {
    for entry in &pattern.entries {
        if let Some(ref alias) = entry.alias {
            match alias {
                crate::ast::BindingEntryAlias::Identifier(identifier) => {
                    names.insert(identifier.name.clone());
                }
                crate::ast::BindingEntryAlias::BindingPattern(nested) => {
                    collect_binding_pattern_names(nested, names);
                }
                crate::ast::BindingEntryAlias::MemberExpression(_) => {}
            }
        } else if let Some(crate::ast::BindingEntryName::Identifier(identifier)) = &entry.name {
            names.insert(identifier.name.clone());
        }
    }
}

// https://tc39.es/ecma262/#sec-keywords-and-reserved-words
// In strict mode code, the following tokens are also reserved:
// `implements` `interface` `let` `package` `private` `protected` `public` `static` `yield`
pub(crate) fn is_strict_reserved_word(name: &[u16]) -> bool {
    name == utf16!("implements")
        || name == utf16!("interface")
        || name == utf16!("let")
        || name == utf16!("package")
        || name == utf16!("private")
        || name == utf16!("protected")
        || name == utf16!("public")
        || name == utf16!("static")
        || name == utf16!("yield")
}