Files
ladybird/Libraries/LibJS/Rust/src/parser.rs
Andreas Kling c31c52b0a9 LibJS: Unify parser and scope collector error types
Replace three identical error structs (ParserError, ScopeError,
ParsedError) with a single shared ParseError type. Since all three
had the same fields (message, line, column), having separate types
only added verbose field-by-field copying at each boundary.

Now errors flow directly from parser/scope collector into
ParsedProgram without conversion.
2026-03-06 13:06:05 +01:00

1464 lines
53 KiB
Rust

/*
* Copyright (c) 2026-present, the Ladybird developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
//! JavaScript parser: recursive descent with precedence climbing.
//!
//! This is the core parser module. It contains the `Parser` struct (parser
//! state + helpers) and delegates actual parsing to submodules:
//!
//! - `expressions` — `parse_expression()`, primary/secondary expressions
//! - `statements` — `parse_statement()`, control flow
//! - `declarations` — functions, classes, variables, import/export
//!
//! ## How parsing works
//!
//! The parser is a single-pass, recursive-descent parser. Expression parsing
//! uses precedence climbing (Pratt-style): `parse_expression(min_precedence)`
//! parses a primary expression, then loops consuming binary/postfix operators
//! whose precedence is >= `min_precedence`.
//!
//! The parser reads tokens one at a time from the Lexer. The "current token"
//! is always available via `self.current_token`. Calling `consume()` returns
//! the current token and advances to the next one.
//!
//! ## Backtracking
//!
//! Some constructs require speculative parsing (e.g., arrow functions:
//! `(a, b) =>` looks like a parenthesized expression until `=>` is seen).
//! The parser supports this via `save_state()` / `load_state()`, which
//! save and restore the full parser state including lexer position, current
//! token, error list, and all boolean flags.
use std::collections::{HashMap, HashSet};
use std::rc::Rc;
use crate::ast::{
BindingPattern, CompiledRegex, Expression, ExpressionKind, FunctionParameter, FunctionTable,
Identifier, PrivateIdentifier, ProgramData, ScopeData, SourceRange, Statement, StatementKind,
Utf16String,
};
use crate::lexer::{Lexer, ch};
use crate::scope_collector::{ScopeCollector, ScopeCollectorState};
use crate::token::{Token, TokenType};
mod declarations;
mod expressions;
mod statements;
pub use crate::ast::DeclarationKind;
pub use crate::ast::FunctionKind;
pub use crate::ast::FunctionParsingInsights;
pub use crate::ast::Position;
pub use crate::ast::ProgramType;
// Named precedence levels for parse_expression().
// These correspond to the operator precedence table in ECMA-262.
pub(crate) const PRECEDENCE_COMMA: i32 = 0;
pub(crate) const PRECEDENCE_ASSIGNMENT: i32 = 2;
pub(crate) const PRECEDENCE_UNARY: i32 = 17;
pub(crate) const PRECEDENCE_MEMBER: i32 = 19;
/// Result of parsing a function's formal parameter list.
pub struct ParsedParameters {
pub parameters: Vec<FunctionParameter>,
pub function_length: i32,
pub parameter_info: Vec<ParamInfo>,
pub is_simple: bool,
}
/// Information about a single parameter name binding.
pub struct ParamInfo {
pub name: Utf16String,
pub is_rest: bool,
pub is_from_pattern: bool,
pub identifier: Option<Rc<Identifier>>,
}
/// Result of parsing a property key (object literal or class element).
pub(crate) struct PropertyKey {
pub expression: Expression,
pub name: Option<Utf16String>,
pub is_proto: bool,
pub is_computed: bool,
pub is_identifier: bool,
}
/// Method kind for parse_method_definition.
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum MethodKind {
Normal,
Getter,
Setter,
Constructor,
}
/// Associativity for operator precedence.
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Associativity {
Left,
Right,
}
/// Tracks which tokens are forbidden in the current expression context.
///
/// This is threaded through `parse_expression()` to prevent ambiguity:
/// - `forbid_in`: in for-loop init position (`for (x in ...)` is for-in, not comparison)
/// - `forbid_logical/forbid_coalesce`: `&&`/`||` and `??` cannot be mixed without parens
/// - `forbid_paren_open`: prevents consuming `(` as call in `new Foo()` callee position
/// - `forbid_question_mark_period`: prevents `?.` in `new Foo?.bar`
/// - `forbid_equals`: prevents `=` from being consumed as assignment in certain contexts
#[derive(Clone, Copy, Default)]
pub struct ForbiddenTokens {
pub forbid_in: bool,
pub forbid_logical: bool,
pub forbid_coalesce: bool,
pub forbid_paren_open: bool,
pub forbid_question_mark_period: bool,
pub forbid_equals: bool,
}
impl ForbiddenTokens {
pub fn none() -> Self {
Self::default()
}
pub fn with_in() -> Self {
Self {
forbid_in: true,
..Self::default()
}
}
pub fn allows(&self, token: TokenType) -> bool {
match token {
TokenType::In => !self.forbid_in,
TokenType::DoubleAmpersand | TokenType::DoublePipe => !self.forbid_logical,
TokenType::DoubleQuestionMark => !self.forbid_coalesce,
TokenType::ParenOpen => !self.forbid_paren_open,
TokenType::QuestionMarkPeriod => !self.forbid_question_mark_period,
TokenType::Equals => !self.forbid_equals,
_ => true,
}
}
pub fn merge(&self, other: ForbiddenTokens) -> ForbiddenTokens {
ForbiddenTokens {
forbid_in: self.forbid_in || other.forbid_in,
forbid_logical: self.forbid_logical || other.forbid_logical,
forbid_coalesce: self.forbid_coalesce || other.forbid_coalesce,
forbid_paren_open: self.forbid_paren_open || other.forbid_paren_open,
forbid_question_mark_period: self.forbid_question_mark_period
|| other.forbid_question_mark_period,
forbid_equals: self.forbid_equals || other.forbid_equals,
}
}
pub fn forbid(&self, tokens: &[TokenType]) -> ForbiddenTokens {
let mut result = *self;
for &t in tokens {
match t {
TokenType::In => result.forbid_in = true,
TokenType::DoubleAmpersand | TokenType::DoublePipe => result.forbid_logical = true,
TokenType::DoubleQuestionMark => result.forbid_coalesce = true,
TokenType::ParenOpen => result.forbid_paren_open = true,
TokenType::QuestionMarkPeriod => result.forbid_question_mark_period = true,
TokenType::Equals => result.forbid_equals = true,
_ => {}
}
}
result
}
}
pub struct ParseError {
pub message: String,
pub line: u32,
pub column: u32,
}
/// Boolean flags that are saved/restored during speculative parsing.
#[derive(Clone, Copy, Default)]
pub(crate) struct ParserFlags {
pub strict_mode: bool,
pub allow_super_property_lookup: bool,
pub allow_super_constructor_call: bool,
pub in_function_context: bool,
pub in_formal_parameter_context: bool,
pub in_generator_function_context: bool,
pub await_expression_is_valid: bool,
pub in_break_context: bool,
pub in_continue_context: bool,
pub string_legacy_octal_escape_sequence_in_scope: bool,
pub in_class_field_initializer: bool,
pub in_class_static_init_block: bool,
pub function_might_need_arguments_object: bool,
pub previous_token_was_period: bool,
/// Set during property key parsing to suppress eval/arguments check.
/// C++ uses separate `consume()` and `consume_and_allow_division()` methods;
/// we emulate this by skipping the check in property key contexts.
pub in_property_key_context: bool,
}
/// A regex literal whose compilation is deferred until after parsing.
pub struct DeferredRegex {
pub compiled_regex: Rc<CompiledRegex>,
pub pattern: Vec<u16>,
pub flags: Vec<u16>,
pub line: u32,
pub column: u32,
}
/// Snapshot of parser state for speculative parsing (backtracking).
struct SavedState {
token: Token,
errors_len: usize,
flags: ParserFlags,
scope_collector_state: ScopeCollectorState,
deferred_regexes_len: usize,
}
/// The main JavaScript parser.
///
/// Produces an AST. Parsing methods live in the `expressions`,
/// `statements`, and `declarations` submodules (all `impl Parser`).
pub struct Parser<'a> {
lexer: Lexer<'a>,
/// `consume()` returns this and advances to the next token.
current_token: Token,
errors: Vec<ParseError>,
saved_states: Vec<SavedState>,
program_type: ProgramType,
/// UTF-16 source text.
source: &'a [u16],
// --- Parser state flags (saved/restored during speculative parsing) ---
pub(crate) flags: ParserFlags,
// --- Flags NOT saved/restored during speculative parsing ---
pub(crate) initiated_by_eval: bool,
pub(crate) in_eval_function_context: bool,
/// Labels currently in scope. Value is Some(line, col) if a `continue`
/// statement referenced this label, None otherwise.
labels_in_scope: HashMap<Utf16String, Option<(u32, u32)>>,
/// Set by try_parse_labelled_statement to propagate iteration-ness
/// through nested labels (e.g., `a: b: for(...)`).
last_inner_label_is_iteration: bool,
last_function_name: Utf16String,
last_function_kind: FunctionKind,
last_class_name: Utf16String,
/// Bound names collected during parse_binding_pattern.
/// Caller drains this after calling parse_binding_pattern.
/// Each entry is (name, identifier) — allows scope analysis to annotate
/// binding pattern identifiers with local variable info.
pub(crate) pattern_bound_names: Vec<(Utf16String, Rc<Identifier>)>,
/// Set during synthesize_binding_pattern to allow MemberExpressions as binding targets.
allow_member_expressions: bool,
/// Position of the opening bracket/brace in binding patterns.
/// Used so all identifiers inside a binding pattern share the pattern's start position,
/// matching C++ parser behavior.
binding_pattern_start: Option<Position>,
/// True while parsing a class body that has an `extends` clause.
pub(crate) class_has_super_class: bool,
/// Depth counter for class bodies — used to reject `#name` outside classes.
pub(crate) class_scope_depth: u32,
pub(crate) has_default_export_name: bool,
/// Stack of sets tracking private names referenced inside class bodies.
/// Each class body pushes a new set. At the end of the class, any names
/// not found in the class's declared private names are bubbled up to the
/// outer class, or reported as errors if there is no outer class.
referenced_private_names_stack: Vec<HashSet<Utf16String>>,
/// Communication channel from `parse_variable_declaration` back to
/// `parse_for_statement` when parsing `for (let/const/var ... ; ...)`.
/// These are set when `is_for_loop` is true and read by the for-loop
/// parser to validate for-in/of restrictions.
pub(crate) for_loop_declaration_count: usize,
pub(crate) for_loop_declaration_has_init: bool,
pub(crate) for_loop_declaration_is_var: bool,
pub scope_collector: ScopeCollector,
/// Track exported names for duplicate detection in modules.
exported_names: HashSet<Utf16String>,
/// Side table owning all FunctionData produced during parsing.
pub function_table: FunctionTable,
/// Memoization: offsets where arrow function parsing has already failed.
/// Prevents exponential re-processing of nested expressions like
/// `(a=(b=(c=0)))` where each failed arrow attempt would otherwise
/// re-attempt inner positions during grouping expression re-parse.
arrow_function_failed_positions: HashSet<usize>,
/// Regex literals whose compilation is deferred until after parsing.
deferred_regexes: Vec<DeferredRegex>,
}
impl<'a> Parser<'a> {
pub fn new(source: &'a [u16], program_type: ProgramType) -> Self {
Self::new_with_line_offset(source, program_type, 1)
}
pub fn new_with_line_offset(
source: &'a [u16],
program_type: ProgramType,
initial_line_number: u32,
) -> Self {
let mut lexer = Lexer::new(source, initial_line_number, 0);
if program_type == ProgramType::Module {
lexer.disallow_html_comments();
}
let first_token = lexer.next();
Self {
lexer,
current_token: first_token,
errors: Vec::new(),
saved_states: Vec::new(),
program_type,
source,
flags: ParserFlags::default(),
initiated_by_eval: false,
in_eval_function_context: false,
labels_in_scope: HashMap::new(),
last_inner_label_is_iteration: false,
last_function_name: Utf16String::default(),
last_function_kind: FunctionKind::Normal,
last_class_name: Utf16String::default(),
pattern_bound_names: Vec::new(),
allow_member_expressions: false,
binding_pattern_start: None,
class_has_super_class: false,
class_scope_depth: 0,
has_default_export_name: false,
referenced_private_names_stack: Vec::new(),
for_loop_declaration_count: 0,
for_loop_declaration_has_init: false,
for_loop_declaration_is_var: false,
scope_collector: ScopeCollector::new(),
exported_names: HashSet::new(),
function_table: FunctionTable::new(),
arrow_function_failed_positions: HashSet::new(),
deferred_regexes: Vec::new(),
}
}
// === AST construction helpers ===
pub(crate) fn range_from(&self, start: Position) -> SourceRange {
SourceRange {
start,
end: self.position(),
}
}
pub(crate) fn expression(&self, start: Position, expression: ExpressionKind) -> Expression {
Expression::new(self.range_from(start), expression)
}
pub(crate) fn statement(&self, start: Position, statement: StatementKind) -> Statement {
Statement::new(self.range_from(start), statement)
}
pub(crate) fn make_identifier(
&self,
start: Position,
name: impl Into<Utf16String>,
) -> Rc<Identifier> {
Rc::new(Identifier::new(self.range_from(start), name.into()))
}
pub(crate) fn register_function_parameters_with_scope(
&mut self,
parameters: &[FunctionParameter],
parameter_info: &[ParamInfo],
) {
use crate::ast::FunctionParameterBinding;
use crate::scope_collector::ParameterEntry;
let mut entries: Vec<ParameterEntry> = Vec::new();
let mut has_parameter_expressions = false;
let mut info_index = 0;
for parameter in parameters {
if parameter.default_value.is_some() {
has_parameter_expressions = true;
}
match &parameter.binding {
FunctionParameterBinding::Identifier(id) => {
let (name, is_rest, is_from_pattern) = if info_index < parameter_info.len() {
let pi = &parameter_info[info_index];
info_index += 1;
(pi.name.clone(), pi.is_rest, pi.is_from_pattern)
} else {
(id.name.clone(), parameter.is_rest, false)
};
entries.push(ParameterEntry {
name,
identifier: Some(id.clone()),
is_rest,
is_from_pattern,
is_first_from_pattern: false,
});
}
FunctionParameterBinding::BindingPattern(pattern) => {
if pattern.contains_expression() {
has_parameter_expressions = true;
}
// Push a placeholder entry for the pattern parameter itself
// so subsequent parameters get correct positional indices.
entries.push(ParameterEntry {
name: Utf16String::default(),
identifier: None,
is_rest: false,
is_from_pattern: true,
is_first_from_pattern: true,
});
// Then push bound names from this pattern.
while info_index < parameter_info.len()
&& parameter_info[info_index].is_from_pattern
{
let pi = &parameter_info[info_index];
entries.push(ParameterEntry {
name: pi.name.clone(),
identifier: pi.identifier.clone(),
is_rest: pi.is_rest,
is_from_pattern: true,
is_first_from_pattern: false,
});
info_index += 1;
}
}
}
}
self.scope_collector
.set_function_parameters(&entries, has_parameter_expressions);
}
// === Token access ===
pub(crate) fn current_token(&self) -> &Token {
&self.current_token
}
pub(crate) fn current_token_type(&self) -> TokenType {
self.current_token.token_type
}
pub(crate) fn match_token(&self, tt: TokenType) -> bool {
self.current_token.token_type == tt
}
pub(crate) fn done(&self) -> bool {
self.match_token(TokenType::Eof)
}
// === Token consumption ===
pub(crate) fn consume(&mut self) -> Token {
let old = std::mem::replace(&mut self.current_token, self.lexer.next());
// C++ checks for `arguments`/`eval` in `consume_and_allow_division()` which
// is used by `consume_identifier()`. We put the check here (in `consume()`)
// but skip it when parsing property keys, matching C++'s behavior.
if !self.flags.in_property_key_context {
self.check_arguments_or_eval(&old);
}
self.flags.previous_token_was_period = old.token_type == TokenType::Period;
old
}
pub(crate) fn consume_and_check_identifier(&mut self) -> Token {
let token = self.consume();
if self.flags.strict_mode && token.token_type == TokenType::Identifier {
let value = self.token_value(&token);
if is_strict_reserved_word(value) {
let name = String::from_utf16_lossy(value);
self.syntax_error(&format!(
"Identifier must not be a reserved word in strict mode ('{}')",
name
));
}
}
token
}
pub(crate) fn consume_token(&mut self, expected: TokenType) -> Token {
if self.current_token.token_type != expected {
self.expected(expected.name());
}
self.consume()
}
pub(crate) fn eat(&mut self, tt: TokenType) -> bool {
if self.match_token(tt) {
self.consume();
true
} else {
false
}
}
fn check_arguments_or_eval(&mut self, token: &Token) {
if token.token_type == TokenType::Identifier && !self.flags.previous_token_was_period {
let value: &[u16] = if let Some(ref v) = token.identifier_value {
v
} else {
let start = token.value_start as usize;
let end = start + token.value_len as usize;
if end <= self.source.len() {
&self.source[start..end]
} else {
&[]
}
};
if value == utf16!("arguments") {
if self.flags.in_class_field_initializer {
self.syntax_error("'arguments' is not allowed in class field initializer");
}
self.flags.function_might_need_arguments_object = true;
} else if value == utf16!("eval") {
self.flags.function_might_need_arguments_object = true;
}
}
}
pub(crate) fn consume_identifier(&mut self) -> Token {
if self.match_identifier() {
return self.consume_and_check_identifier();
}
self.expected("identifier");
self.consume()
}
// https://tc39.es/ecma262/#sec-numeric-literals-early-errors
// It is a Syntax Error if IsStringWellFormedUnicode of the source text matched
// by NumericLiteral is not true.
// The source character immediately following a NumericLiteral must not be an
// IdentifierStart or DecimalDigit.
pub(crate) fn consume_and_validate_numeric_literal(&mut self) -> Token {
let token = self.consume();
if self.flags.strict_mode {
// https://tc39.es/ecma262/#sec-additional-syntax-numeric-literals
// In strict mode, legacy octal literals (0-prefixed) are not permitted.
let value = self.token_value(&token);
if value.len() > 1
&& value[0] == ch(b'0')
&& value[1] >= ch(b'0')
&& value[1] <= ch(b'9')
{
self.syntax_error("Unprefixed octal number not allowed in strict mode");
}
}
if self.match_identifier_name() && self.current_token.trivia_len == 0 {
self.syntax_error("Numeric literal must not be immediately followed by identifier");
}
token
}
// https://tc39.es/ecma262/#sec-automatic-semicolon-insertion
// A semicolon is automatically inserted when:
// 1. The offending token is separated from the previous token by at least
// one LineTerminator.
// 2. The offending token is `}`.
// 3. The previous token is `)` and the inserted semicolon would then be
// parsed as the terminating semicolon of a do-while statement.
// 4. The end of the input stream of tokens is reached.
pub(crate) fn consume_or_insert_semicolon(&mut self) {
if self.match_token(TokenType::Semicolon) {
self.consume();
return;
}
if self.current_token.trivia_has_line_terminator
|| self.match_token(TokenType::CurlyClose)
|| self.done()
{
return;
}
self.expected("Semicolon");
}
// === Lookahead ===
pub(crate) fn next_token(&mut self) -> Token {
self.lexer.save_state();
let token = self.lexer.next();
self.lexer.load_state();
token
}
// === Position ===
pub(crate) fn position(&self) -> Position {
Position {
line: self.current_token.line_number,
column: self.current_token.line_column,
offset: self.current_token.offset,
}
}
pub(crate) fn source_text_end_offset(&self) -> u32 {
self.current_token.offset - self.current_token.trivia_len
}
// === Error reporting ===
pub(crate) fn syntax_error(&mut self, message: &str) {
self.errors.push(ParseError {
message: message.to_string(),
line: self.current_token.line_number,
column: self.current_token.line_column,
});
}
pub(crate) fn syntax_error_at(&mut self, message: &str, line: u32, column: u32) {
self.errors.push(ParseError {
message: message.to_string(),
line,
column,
});
}
pub(crate) fn syntax_error_at_position(&mut self, message: &str, pos: Position) {
self.syntax_error_at(message, pos.line, pos.column);
}
/// Register a referenced private name. Returns true if we're inside a class
/// body (the reference is valid for now, will be checked at class end).
/// Returns false if we're outside all class bodies (always invalid).
pub(crate) fn register_referenced_private_name(&mut self, name: &[u16]) -> bool {
if let Some(set) = self.referenced_private_names_stack.last_mut() {
set.insert(Utf16String::from(name));
true
} else {
false
}
}
/// Parse and validate a private identifier token.
/// Registers the private name reference and emits an error if outside a class body.
/// The current token must be a PrivateIdentifier.
pub(crate) fn parse_private_identifier(&mut self, range_start: Position) -> PrivateIdentifier {
let value = self.token_value(&self.current_token).to_vec();
if !self.register_referenced_private_name(&value) {
let name = String::from_utf16_lossy(&value);
self.syntax_error(&format!(
"Reference to undeclared private field or method '{}'",
name
));
}
let token = self.consume();
let value = self.token_value(&token).to_vec();
PrivateIdentifier {
range: self.range_from(range_start),
name: value.into(),
}
}
pub(crate) fn expected(&mut self, what: &str) {
let msg = if let Some(ref message) = self.current_token.message {
message.clone()
} else {
format!(
"Unexpected token {}. Expected {}",
self.current_token.token_type.name(),
what
)
};
self.syntax_error(&msg);
}
/// Take the deferred regex literals collected during parsing.
/// The caller is responsible for compiling them (on the main thread).
pub(crate) fn take_deferred_regexes(&mut self) -> Vec<DeferredRegex> {
std::mem::take(&mut self.deferred_regexes)
}
/// Batch-compile deferred regex literals. On error, returns the errors.
pub(crate) fn compile_deferred_regexes(deferred: Vec<DeferredRegex>) -> Vec<ParseError> {
let mut errors = Vec::new();
for d in deferred {
match crate::bytecode::ffi::compile_regex(&d.pattern, &d.flags) {
Ok(handle) => d.compiled_regex.set(handle),
Err(msg) => {
errors.push(ParseError {
message: msg,
line: d.line,
column: d.column,
});
}
}
}
errors
}
pub(crate) fn validate_regex_flags(&mut self, flags: &[u16]) {
let valid_flags: &[u16] = &[
ch(b'd'),
ch(b'g'),
ch(b'i'),
ch(b'm'),
ch(b's'),
ch(b'u'),
ch(b'v'),
ch(b'y'),
];
let mut seen = [false; 128];
for &flag in flags {
if flag >= 128 || !valid_flags.contains(&flag) {
self.syntax_error(&format!(
"Invalid RegExp flag '{}'",
char::from_u32(flag as u32).unwrap_or('?')
));
return;
}
if seen[flag as usize] {
self.syntax_error(&format!(
"Repeated RegExp flag '{}'",
char::from_u32(flag as u32).unwrap_or('?')
));
return;
}
seen[flag as usize] = true;
}
}
pub fn has_errors(&self) -> bool {
!self.errors.is_empty()
}
pub fn errors(&self) -> &[ParseError] {
&self.errors
}
pub fn take_errors(&mut self) -> Vec<ParseError> {
std::mem::take(&mut self.errors)
}
pub fn error_messages(&self) -> Vec<String> {
self.errors
.iter()
.map(|e| format!("{}:{}: {}", e.line, e.column, e.message))
.collect()
}
// === State save/restore for backtracking ===
pub(crate) fn save_state(&mut self) {
self.lexer.save_state();
self.saved_states.push(SavedState {
token: self.current_token.clone(),
errors_len: self.errors.len(),
flags: self.flags,
scope_collector_state: self.scope_collector.save_state(),
deferred_regexes_len: self.deferred_regexes.len(),
});
}
pub(crate) fn load_state(&mut self) {
let state = self.saved_states.pop().expect("No saved state to restore");
self.current_token = state.token;
self.errors.truncate(state.errors_len);
self.deferred_regexes.truncate(state.deferred_regexes_len);
self.flags = state.flags;
self.scope_collector.load_state(state.scope_collector_state);
self.lexer.load_state();
}
pub(crate) fn discard_saved_state(&mut self) {
self.saved_states.pop();
self.lexer.discard_saved_state();
}
// === Token matching helpers ===
pub(crate) fn match_identifier(&self) -> bool {
self.token_is_identifier(&self.current_token)
}
pub(crate) fn token_is_identifier(&self, token: &Token) -> bool {
use TokenType::*;
let tt = token.token_type;
match tt {
Identifier => true,
EscapedKeyword => !self.match_invalid_escaped_keyword(),
Let => !self.flags.strict_mode,
Yield => !self.flags.strict_mode && !self.flags.in_generator_function_context,
Await => {
!self.flags.await_expression_is_valid
&& self.program_type != ProgramType::Module
&& !self.flags.in_class_static_init_block
}
Async => true,
_ => false,
}
}
pub(crate) fn match_identifier_name(&self) -> bool {
self.current_token.token_type.is_identifier_name() || self.match_identifier()
}
pub(crate) fn match_invalid_escaped_keyword(&self) -> bool {
if self.current_token.token_type != TokenType::EscapedKeyword {
return false;
}
let value = self.token_value(&self.current_token);
if value == utf16!("await") {
return self.program_type == ProgramType::Module
|| self.flags.await_expression_is_valid
|| self.flags.in_class_static_init_block;
}
if value == utf16!("async") {
return false;
}
if value == utf16!("yield") {
return self.flags.in_generator_function_context;
}
if self.flags.strict_mode {
return true;
}
// In non-strict mode, "let" and "static" are context-sensitive
// keywords that are valid as escaped identifiers. All other
// escaped keywords (break, for, etc.) are always invalid.
value != utf16!("let") && value != utf16!("static")
}
pub(crate) fn check_identifier_name_for_assignment_validity(
&mut self,
name: &[u16],
force_strict: bool,
) {
if self.flags.strict_mode || force_strict {
if name == utf16!("arguments") || name == utf16!("eval") {
self.syntax_error(
"Binding pattern target may not be called 'arguments' or 'eval' in strict mode",
);
} else if is_strict_reserved_word(name) {
let name_str = String::from_utf16_lossy(name);
self.syntax_error(&format!(
"Identifier must not be a reserved word in strict mode ('{}')",
name_str
));
}
}
}
/// Check for duplicate parameter names in arrow functions.
/// Arrow functions always reject duplicates, regardless of strict mode.
pub(crate) fn check_arrow_duplicate_parameters(&mut self, parameter_info: &[ParamInfo]) {
let mut seen_names: HashSet<&[u16]> = HashSet::new();
for pi in parameter_info {
let name = &pi.name;
if name.is_empty() {
continue;
}
if !seen_names.insert(&**name) {
let name_str = String::from_utf16_lossy(name);
self.syntax_error(&format!(
"Duplicate parameter '{}' not allowed in arrow function",
name_str
));
}
}
}
/// Post-body check for function parameters when 'use strict' was found in the
/// body or the function is a generator/async.
pub(crate) fn check_parameters_post_body(
&mut self,
parameter_info: &[ParamInfo],
force_strict: bool,
_kind: FunctionKind,
) {
let mut seen_names: HashSet<&[u16]> = HashSet::new();
for pi in parameter_info {
let name = &pi.name;
if name.is_empty() {
continue;
}
self.check_identifier_name_for_assignment_validity(name, force_strict);
if !seen_names.insert(&**name) {
let name_str = String::from_utf16_lossy(name);
self.syntax_error(&format!(
"Duplicate parameter '{}' not allowed in strict mode",
name_str
));
}
}
}
pub(crate) fn token_value<'b>(&'b self, token: &'b Token) -> &'b [u16] {
if let Some(ref value) = token.identifier_value {
return value;
}
let start = token.value_start as usize;
let end = start + token.value_len as usize;
assert!(
end <= self.source.len(),
"token_value: bounds [{start}..{end}) exceed source length {}",
self.source.len()
);
&self.source[start..end]
}
pub(crate) fn token_original_value(&self, token: &Token) -> &'a [u16] {
let start = token.value_start as usize;
let end = (token.value_start + token.value_len) as usize;
assert!(
end <= self.source.len(),
"token_original_value: bounds [{start}..{end}) exceed source length {}",
self.source.len()
);
&self.source[start..end]
}
/// Re-parse the source range starting at `start` as a binding pattern
/// with member expressions allowed (for destructuring assignment patterns).
pub(crate) fn synthesize_binding_pattern(&mut self, start: Position) -> Option<BindingPattern> {
// Clear any syntax errors that occurred in the range of the expression
// being reinterpreted as a binding pattern. This matches C++'s behavior
// where errors like duplicate __proto__ in object literals are cleared
// when the object is reinterpreted as an assignment target.
let end_line = self.current_token.line_number;
let end_column = self.current_token.line_column;
self.errors.retain(|e| {
!(e.line > start.line || (e.line == start.line && e.column >= start.column))
|| (e.line > end_line || (e.line == end_line && e.column >= end_column))
});
let saved_lexer = std::mem::replace(
&mut self.lexer,
Lexer::new_at_offset(self.source, start.offset as usize, start.line, start.column),
);
let saved_token = std::mem::replace(&mut self.current_token, Token::new(TokenType::Eof));
let saved_allow = self.allow_member_expressions;
self.current_token = self.lexer.next();
self.allow_member_expressions = true;
let pattern = self.parse_binding_pattern();
self.lexer = saved_lexer;
self.current_token = saved_token;
self.allow_member_expressions = saved_allow;
Some(pattern)
}
pub(crate) fn is_simple_assignment_target(
expression: &Expression,
allow_call_expression: bool,
) -> bool {
matches!(
&expression.inner,
ExpressionKind::Identifier(_) | ExpressionKind::Member { .. }
) || (allow_call_expression && matches!(&expression.inner, ExpressionKind::Call(_)))
}
fn is_object_expression(expression: &Expression) -> bool {
matches!(&expression.inner, ExpressionKind::Object(_))
}
fn is_array_expression(expression: &Expression) -> bool {
matches!(&expression.inner, ExpressionKind::Array(_))
}
fn is_identifier(expression: &Expression) -> bool {
matches!(&expression.inner, ExpressionKind::Identifier(_))
}
fn is_member_expression(expression: &Expression) -> bool {
matches!(&expression.inner, ExpressionKind::Member { .. })
}
fn is_call_expression(expression: &Expression) -> bool {
matches!(&expression.inner, ExpressionKind::Call(_))
}
fn is_update_expression(expression: &Expression) -> bool {
matches!(&expression.inner, ExpressionKind::Update { .. })
}
// === Main entry point ===
pub fn parse_program(&mut self, starts_in_strict_mode: bool) -> Statement {
let start = self.position();
if self.program_type == ProgramType::Script {
let (children, is_strict) = self.parse_script(starts_in_strict_mode);
let scope = ScopeData::shared_with_children(children);
// Scope was opened in parse_script via open_program_scope.
// Now close it after children are set.
self.scope_collector.set_scope_node(scope.clone());
self.scope_collector.close_scope();
self.statement(
start,
StatementKind::Program(ProgramData {
scope,
program_type: ProgramType::Script,
is_strict_mode: is_strict,
has_top_level_await: false,
}),
)
} else {
let (children, has_top_level_await) = self.parse_module();
let scope = ScopeData::shared_with_children(children);
self.scope_collector.set_scope_node(scope.clone());
self.scope_collector.close_scope();
self.statement(
start,
StatementKind::Program(ProgramData {
scope,
program_type: ProgramType::Module,
is_strict_mode: true,
has_top_level_await,
}),
)
}
}
fn parse_script(&mut self, starts_in_strict_mode: bool) -> (Vec<Statement>, bool) {
// Open program scope — will be closed in parse_program after ScopeData is created.
self.scope_collector.open_program_scope(ProgramType::Script);
let strict_before = self.flags.strict_mode;
if starts_in_strict_mode {
self.flags.strict_mode = true;
}
let (has_use_strict, mut children) = self.parse_directive();
if self.flags.strict_mode || has_use_strict {
self.flags.strict_mode = true;
}
children.extend(self.parse_statement_list(true));
if !self.done() {
if self.flags.in_function_context {
self.expected("CurlyClose");
} else {
self.expected("statement or declaration");
}
self.consume();
}
let is_strict = self.flags.strict_mode;
self.flags.strict_mode = strict_before;
(children, is_strict)
}
// https://tc39.es/ecma262/#sec-modules
// Module code is always strict mode code.
fn parse_module(&mut self) -> (Vec<Statement>, bool) {
// Open program scope — will be closed in parse_program after ScopeData is created.
self.scope_collector.open_program_scope(ProgramType::Module);
let strict_before = self.flags.strict_mode;
let await_before = self.flags.await_expression_is_valid;
self.flags.strict_mode = true;
self.flags.await_expression_is_valid = true;
let mut children = Vec::new();
while !self.done() {
children.extend(self.parse_statement_list(true));
if self.done() {
break;
}
if self.match_export_or_import() {
if self.match_token(TokenType::Export) {
children.push(self.parse_export_statement());
} else {
children.push(self.parse_import_statement());
}
} else {
self.expected("statement or declaration");
self.consume();
}
}
// Check that all exported bindings are declared in the module.
self.check_undeclared_exports(&children);
self.flags.strict_mode = strict_before;
self.flags.await_expression_is_valid = await_before;
let has_top_level_await = self.scope_collector.contains_await_expression();
(children, has_top_level_await)
}
fn check_undeclared_exports(&mut self, children: &[Statement]) {
use crate::ast::*;
// Collect all declared names at module level.
let mut declared_names: HashSet<Utf16String> = HashSet::new();
for child in children {
match &child.inner {
StatementKind::VariableDeclaration { declarations, .. } => {
for decl in declarations {
collect_binding_names(&decl.target, &mut declared_names);
}
}
StatementKind::FunctionDeclaration {
name: Some(name), ..
} => {
declared_names.insert(name.name.clone());
}
StatementKind::ClassDeclaration(data) => {
if let Some(ref name) = data.name {
declared_names.insert(name.name.clone());
}
}
StatementKind::Import(data) => {
for entry in &data.entries {
declared_names.insert(entry.local_name.clone());
}
}
StatementKind::Export(data) => {
if let Some(ref statement) = data.statement {
match &statement.inner {
StatementKind::VariableDeclaration { declarations, .. } => {
for decl in declarations {
collect_binding_names(&decl.target, &mut declared_names);
}
}
StatementKind::FunctionDeclaration {
name: Some(name), ..
} => {
declared_names.insert(name.name.clone());
}
StatementKind::ClassDeclaration(class_data) => {
if let Some(ref name) = class_data.name {
declared_names.insert(name.name.clone());
}
}
_ => {}
}
}
}
_ => {}
}
}
// Check each export's local bindings.
for child in children {
if let StatementKind::Export(data) = &child.inner {
if data.statement.is_some() {
continue;
}
for entry in &data.entries {
if data.module_request.is_some() {
continue;
}
if entry.kind == ExportEntryKind::EmptyNamedExport {
continue;
}
if let Some(ref local_name) = entry.local_or_import_name
&& !declared_names.contains(local_name.as_slice())
{
self.syntax_error_at_position(
&format!(
"'{}' in export is not declared",
String::from_utf16_lossy(local_name.as_slice())
),
child.range.start,
);
}
}
}
}
}
// https://tc39.es/ecma262/#sec-directive-prologues-and-the-use-strict-directive
// A Directive Prologue is a sequence of ExpressionStatements at the beginning
// of a FunctionBody, ScriptBody, or ModuleBody that each consist entirely of
// a StringLiteral followed by semicolon. A "use strict" directive causes
// subsequent code to be interpreted in strict mode.
pub(crate) fn parse_directive(&mut self) -> (bool, Vec<Statement>) {
let mut found_use_strict = false;
let mut statements = Vec::new();
while !self.done() && self.match_token(TokenType::StringLiteral) {
let raw_value = self.token_original_value(&self.current_token);
let statement = self.parse_statement(false);
statements.push(statement);
if is_use_strict(raw_value) {
found_use_strict = true;
if self.flags.string_legacy_octal_escape_sequence_in_scope {
self.syntax_error(
"Octal escape sequence in string literal not allowed in strict mode",
);
}
break;
}
}
self.flags.string_legacy_octal_escape_sequence_in_scope = false;
(found_use_strict, statements)
}
pub(crate) fn parse_statement_list(
&mut self,
allow_labelled_functions: bool,
) -> Vec<Statement> {
let mut statements = Vec::new();
while !self.done() {
if self.match_export_or_import() {
break;
}
if self.match_declaration() {
statements.push(self.parse_declaration());
} else if self.match_statement() {
statements.push(self.parse_statement(allow_labelled_functions));
} else {
break;
}
}
statements
}
pub(crate) fn match_statement(&mut self) -> bool {
matches!(
self.current_token_type(),
TokenType::CurlyOpen
| TokenType::Return
| TokenType::Var
| TokenType::For
| TokenType::If
| TokenType::Throw
| TokenType::Try
| TokenType::Break
| TokenType::Continue
| TokenType::Switch
| TokenType::Do
| TokenType::While
| TokenType::With
| TokenType::Debugger
| TokenType::Semicolon
| TokenType::Slash
| TokenType::SlashEquals
) || self.match_expression()
}
pub(crate) fn match_declaration(&mut self) -> bool {
match self.current_token_type() {
TokenType::Function | TokenType::Class | TokenType::Const => true,
TokenType::Let => {
if !self.flags.strict_mode {
self.try_match_let_declaration()
} else {
true
}
}
TokenType::Async => {
let next = self.next_token();
next.token_type == TokenType::Function && !next.trivia_has_line_terminator
}
TokenType::Identifier => {
let value = self.token_value(&self.current_token);
if value != utf16!("using") {
return false;
}
let next = self.next_token();
!next.trivia_has_line_terminator && next.token_type.is_identifier_name()
}
_ => false,
}
}
fn try_match_let_declaration(&mut self) -> bool {
let next = self.next_token();
if next.token_type.is_identifier_name() && self.token_value(&next) != utf16!("in") {
return true;
}
if next.token_type == TokenType::CurlyOpen || next.token_type == TokenType::BracketOpen {
return true;
}
false
}
fn match_iteration_start(&self) -> bool {
matches!(
self.current_token_type(),
TokenType::For | TokenType::While | TokenType::Do
)
}
pub(crate) fn match_export_or_import(&mut self) -> bool {
if self.match_token(TokenType::Export) {
return true;
}
if self.match_token(TokenType::Import) {
let next = self.next_token();
return next.token_type != TokenType::ParenOpen && next.token_type != TokenType::Period;
}
false
}
// === Operator precedence ===
pub(crate) fn operator_precedence(tt: TokenType) -> i32 {
match tt {
TokenType::Period
| TokenType::BracketOpen
| TokenType::ParenOpen
| TokenType::QuestionMarkPeriod => 20,
TokenType::New => 19,
TokenType::PlusPlus | TokenType::MinusMinus => 18,
TokenType::ExclamationMark
| TokenType::Tilde
| TokenType::Typeof
| TokenType::Void
| TokenType::Delete
| TokenType::Await => 17,
TokenType::DoubleAsterisk => 16,
TokenType::Asterisk | TokenType::Slash | TokenType::Percent => 15,
TokenType::Plus | TokenType::Minus => 14,
TokenType::ShiftLeft | TokenType::ShiftRight | TokenType::UnsignedShiftRight => 13,
TokenType::LessThan
| TokenType::LessThanEquals
| TokenType::GreaterThan
| TokenType::GreaterThanEquals
| TokenType::In
| TokenType::Instanceof => 12,
TokenType::EqualsEquals
| TokenType::ExclamationMarkEquals
| TokenType::EqualsEqualsEquals
| TokenType::ExclamationMarkEqualsEquals => 11,
TokenType::Ampersand => 10,
TokenType::Caret => 9,
TokenType::Pipe => 8,
TokenType::DoubleQuestionMark => 7,
TokenType::DoubleAmpersand => 6,
TokenType::DoublePipe => 5,
TokenType::QuestionMark => 4,
TokenType::Equals
| TokenType::PlusEquals
| TokenType::MinusEquals
| TokenType::DoubleAsteriskEquals
| TokenType::AsteriskEquals
| TokenType::SlashEquals
| TokenType::PercentEquals
| TokenType::ShiftLeftEquals
| TokenType::ShiftRightEquals
| TokenType::UnsignedShiftRightEquals
| TokenType::AmpersandEquals
| TokenType::CaretEquals
| TokenType::PipeEquals
| TokenType::DoubleAmpersandEquals
| TokenType::DoublePipeEquals
| TokenType::DoubleQuestionMarkEquals => 3,
TokenType::Yield => 2,
TokenType::Comma => 1,
_ => 0,
}
}
pub(crate) fn operator_associativity(tt: TokenType) -> Associativity {
match tt {
TokenType::Period
| TokenType::BracketOpen
| TokenType::ParenOpen
| TokenType::QuestionMarkPeriod
| TokenType::Asterisk
| TokenType::Slash
| TokenType::Percent
| TokenType::Plus
| TokenType::Minus
| TokenType::ShiftLeft
| TokenType::ShiftRight
| TokenType::UnsignedShiftRight
| TokenType::LessThan
| TokenType::LessThanEquals
| TokenType::GreaterThan
| TokenType::GreaterThanEquals
| TokenType::In
| TokenType::Instanceof
| TokenType::EqualsEquals
| TokenType::ExclamationMarkEquals
| TokenType::EqualsEqualsEquals
| TokenType::ExclamationMarkEqualsEquals
| TokenType::Typeof
| TokenType::Void
| TokenType::Delete
| TokenType::Await
| TokenType::Ampersand
| TokenType::Caret
| TokenType::Pipe
| TokenType::DoubleQuestionMark
| TokenType::DoubleAmpersand
| TokenType::DoublePipe
| TokenType::Comma => Associativity::Left,
_ => Associativity::Right,
}
}
}
// === Helpers ===
fn is_use_strict(raw: &[u16]) -> bool {
raw == utf16!("'use strict'") || raw == utf16!("\"use strict\"")
}
/// Collect all binding names introduced by a variable declarator target.
fn collect_binding_names(
target: &crate::ast::VariableDeclaratorTarget,
names: &mut HashSet<Utf16String>,
) {
match target {
crate::ast::VariableDeclaratorTarget::Identifier(identifier) => {
names.insert(identifier.name.clone());
}
crate::ast::VariableDeclaratorTarget::BindingPattern(pattern) => {
collect_binding_pattern_names(pattern, names);
}
}
}
/// Collect all binding names from a binding pattern (object or array destructuring).
fn collect_binding_pattern_names(
pattern: &crate::ast::BindingPattern,
names: &mut HashSet<Utf16String>,
) {
for entry in &pattern.entries {
if let Some(ref alias) = entry.alias {
match alias {
crate::ast::BindingEntryAlias::Identifier(identifier) => {
names.insert(identifier.name.clone());
}
crate::ast::BindingEntryAlias::BindingPattern(nested) => {
collect_binding_pattern_names(nested, names);
}
crate::ast::BindingEntryAlias::MemberExpression(_) => {}
}
} else if let Some(crate::ast::BindingEntryName::Identifier(identifier)) = &entry.name {
names.insert(identifier.name.clone());
}
}
}
// https://tc39.es/ecma262/#sec-keywords-and-reserved-words
// In strict mode code, the following tokens are also reserved:
// `implements` `interface` `let` `package` `private` `protected` `public` `static` `yield`
pub(crate) fn is_strict_reserved_word(name: &[u16]) -> bool {
name == utf16!("implements")
|| name == utf16!("interface")
|| name == utf16!("let")
|| name == utf16!("package")
|| name == utf16!("private")
|| name == utf16!("protected")
|| name == utf16!("public")
|| name == utf16!("static")
|| name == utf16!("yield")
}