mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-04-25 17:25:08 +02:00
LibJS: Add alternative source-to-bytecode pipeline in Rust
Implement a complete Rust reimplementation of the LibJS frontend: lexer, parser, AST, scope collector, and bytecode code generator. The Rust pipeline is built via Corrosion (CMake-Cargo bridge) and linked into LibJS as a static library. It is gated behind a build flag (ENABLE_RUST, on by default except on Windows) and two runtime environment variables: - LIBJS_CPP: Use the C++ pipeline instead of Rust - LIBJS_COMPARE_PIPELINES=1: Run both pipelines in lockstep, aborting on any difference in AST or bytecode generated. The C++ side communicates with Rust through a C FFI layer (RustIntegration.cpp/h) that passes source text to Rust and receives a populated Executable back via a BytecodeFactory interface.
This commit is contained in:
committed by
Andreas Kling
parent
8bf1d749a1
commit
6cdfbd01a6
Notes:
github-actions[bot]
2026-02-24 08:41:00 +00:00
Author: https://github.com/awesomekling Commit: https://github.com/LadybirdBrowser/ladybird/commit/6cdfbd01a6b Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/8104
53
Cargo.lock
generated
Normal file
53
Cargo.lock
generated
Normal file
@@ -0,0 +1,53 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
||||
|
||||
[[package]]
|
||||
name = "libjs_rust"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"num-bigint",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
|
||||
dependencies = [
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.46"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
|
||||
5
Cargo.toml
Normal file
5
Cargo.toml
Normal file
@@ -0,0 +1,5 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"Libraries/LibJS/Rust",
|
||||
]
|
||||
resolver = "2"
|
||||
@@ -128,11 +128,13 @@ ThrowCompletionOr<Value> Interpreter::run(Script& script_record, GC::Ptr<Environ
|
||||
GC::Ptr<Executable> executable = script_record.cached_executable();
|
||||
if (!executable && result.type() == Completion::Type::Normal) {
|
||||
executable = JS::Bytecode::Generator::generate_from_ast_node(vm, *script_record.parse_node(), {});
|
||||
script_record.cache_executable(*executable);
|
||||
script_record.drop_ast();
|
||||
if (g_dump_bytecode)
|
||||
executable->dump();
|
||||
if (executable) {
|
||||
script_record.cache_executable(*executable);
|
||||
script_record.drop_ast();
|
||||
}
|
||||
}
|
||||
if (executable && g_dump_bytecode)
|
||||
executable->dump();
|
||||
|
||||
u32 registers_and_locals_count = 0;
|
||||
u32 constants_count = 0;
|
||||
|
||||
464
Libraries/LibJS/BytecodeFactory.h
Normal file
464
Libraries/LibJS/BytecodeFactory.h
Normal file
@@ -0,0 +1,464 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef ENABLE_RUST
|
||||
|
||||
# include <stddef.h>
|
||||
# include <stdint.h>
|
||||
|
||||
// FFI types for creating a Bytecode::Executable from Rust.
|
||||
//
|
||||
// The Rust bytecode generator assembles instructions into a byte buffer
|
||||
// matching C++ layout. This FFI layer creates the C++ Executable from
|
||||
// that data.
|
||||
|
||||
// Constant value tags (matches Rust ConstantValue enum discriminants)
|
||||
# define CONSTANT_TAG_NUMBER 0
|
||||
# define CONSTANT_TAG_BOOLEAN_TRUE 1
|
||||
# define CONSTANT_TAG_BOOLEAN_FALSE 2
|
||||
# define CONSTANT_TAG_NULL 3
|
||||
# define CONSTANT_TAG_UNDEFINED 4
|
||||
# define CONSTANT_TAG_EMPTY 5
|
||||
# define CONSTANT_TAG_STRING 6
|
||||
# define CONSTANT_TAG_BIGINT 7
|
||||
# define CONSTANT_TAG_RAW_VALUE 8
|
||||
|
||||
struct FFIExceptionHandler {
|
||||
uint32_t start_offset;
|
||||
uint32_t end_offset;
|
||||
uint32_t handler_offset;
|
||||
};
|
||||
|
||||
struct FFISourceMapEntry {
|
||||
uint32_t bytecode_offset;
|
||||
uint32_t source_start;
|
||||
uint32_t source_end;
|
||||
};
|
||||
|
||||
// A UTF-16 string slice (pointer + length).
|
||||
struct FFIUtf16Slice {
|
||||
uint16_t const* data;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
// An optional uint32_t for FFI (replaces -1 sentinel values).
|
||||
struct FFIOptionalU32 {
|
||||
uint32_t value;
|
||||
bool has_value;
|
||||
};
|
||||
|
||||
// Class element descriptor for FFI (matches ClassElementDescriptor::Kind).
|
||||
// Kind values: 0=Method, 1=Getter, 2=Setter, 3=Field, 4=StaticInitializer
|
||||
struct FFIClassElement {
|
||||
uint8_t kind;
|
||||
bool is_static;
|
||||
bool is_private;
|
||||
uint16_t const* private_identifier;
|
||||
size_t private_identifier_len;
|
||||
FFIOptionalU32 shared_function_data_index;
|
||||
bool has_initializer;
|
||||
uint8_t literal_value_kind; // 0=none, 1=number, 2=boolean_true, 3=boolean_false, 4=null, 5=string
|
||||
double literal_value_number;
|
||||
uint16_t const* literal_value_string;
|
||||
size_t literal_value_string_len;
|
||||
};
|
||||
|
||||
# ifdef __cplusplus
|
||||
extern "C" {
|
||||
# endif
|
||||
|
||||
// Callback for reporting parse errors from Rust.
|
||||
// message is UTF-8, line and column are 1-based.
|
||||
typedef void (*RustParseErrorCallback)(void* ctx, char const* message, size_t message_len, uint32_t line, uint32_t column);
|
||||
|
||||
// Parse, compile, and extract GDI metadata for a script using the Rust
|
||||
// parser. Populates gdi_context (a ScriptGdiBuilder*) via callbacks.
|
||||
// On parse failure, calls error_callback for each error, then returns nullptr.
|
||||
// Returns a Bytecode::Executable* cast to void*, or nullptr on failure.
|
||||
void* rust_compile_script(
|
||||
uint16_t const* source,
|
||||
size_t source_len,
|
||||
void* vm_ptr,
|
||||
void const* source_code_ptr,
|
||||
void* gdi_context,
|
||||
bool dump_ast,
|
||||
bool use_color,
|
||||
void* error_context,
|
||||
RustParseErrorCallback error_callback,
|
||||
uint8_t** ast_dump_output,
|
||||
size_t* ast_dump_output_len,
|
||||
size_t initial_line_number);
|
||||
|
||||
// Parse and compile a JavaScript program using the Rust parser and
|
||||
// bytecode generator. Returns a Bytecode::Executable* cast to void*,
|
||||
// or nullptr on failure.
|
||||
void* rust_compile_program(
|
||||
uint16_t const* source,
|
||||
size_t source_len,
|
||||
void* vm_ptr,
|
||||
void const* source_code_ptr,
|
||||
uint8_t program_type,
|
||||
bool starts_in_strict_mode,
|
||||
bool initiated_by_eval,
|
||||
bool in_eval_function_context,
|
||||
bool allow_super_property_lookup,
|
||||
bool allow_super_constructor_call,
|
||||
bool in_class_field_initializer);
|
||||
|
||||
// All the data needed to create a Bytecode::Executable from Rust.
|
||||
struct FFIExecutableData {
|
||||
// Bytecode
|
||||
uint8_t const* bytecode;
|
||||
size_t bytecode_length;
|
||||
// Tables: arrays of UTF-16 string slices
|
||||
FFIUtf16Slice const* identifier_table;
|
||||
size_t identifier_count;
|
||||
FFIUtf16Slice const* property_key_table;
|
||||
size_t property_key_count;
|
||||
FFIUtf16Slice const* string_table;
|
||||
size_t string_count;
|
||||
// Constants: tagged byte array
|
||||
uint8_t const* constants_data;
|
||||
size_t constants_data_length;
|
||||
size_t constants_count;
|
||||
// Exception handlers
|
||||
FFIExceptionHandler const* exception_handlers;
|
||||
size_t exception_handler_count;
|
||||
// Source map
|
||||
FFISourceMapEntry const* source_map;
|
||||
size_t source_map_count;
|
||||
// Basic block start offsets
|
||||
size_t const* basic_block_offsets;
|
||||
size_t basic_block_count;
|
||||
// Local variable names
|
||||
FFIUtf16Slice const* local_variable_names;
|
||||
size_t local_variable_count;
|
||||
// Cache counts
|
||||
uint32_t property_lookup_cache_count;
|
||||
uint32_t global_variable_cache_count;
|
||||
uint32_t template_object_cache_count;
|
||||
uint32_t object_shape_cache_count;
|
||||
// Register and mode
|
||||
uint32_t number_of_registers;
|
||||
bool is_strict;
|
||||
// Length identifier: PropertyKeyTableIndex for "length"
|
||||
FFIOptionalU32 length_identifier;
|
||||
// Shared function data (inner functions)
|
||||
void const* const* shared_function_data;
|
||||
size_t shared_function_data_count;
|
||||
// Class blueprints (heap-allocated, ownership transfers)
|
||||
void* const* class_blueprints;
|
||||
size_t class_blueprint_count;
|
||||
// Regex table (pre-compiled, ownership transfers)
|
||||
void* const* compiled_regexes;
|
||||
size_t regex_count;
|
||||
};
|
||||
|
||||
// Create a C++ Bytecode::Executable from assembled Rust bytecode data.
|
||||
//
|
||||
// The source_code parameter is a SourceCode const* cast to void*.
|
||||
// Returns a GC::Ptr<Executable> cast to void*, or nullptr on failure.
|
||||
void* rust_create_executable(
|
||||
void* vm_ptr,
|
||||
void* source_code_ptr,
|
||||
FFIExecutableData const* data);
|
||||
|
||||
// All the data needed to create a SharedFunctionInstanceData from Rust.
|
||||
struct FFISharedFunctionData {
|
||||
// Function name (UTF-16)
|
||||
uint16_t const* name;
|
||||
size_t name_len;
|
||||
// Metadata
|
||||
uint8_t function_kind;
|
||||
int32_t function_length;
|
||||
uint32_t formal_parameter_count;
|
||||
bool strict;
|
||||
bool is_arrow;
|
||||
bool has_simple_parameter_list;
|
||||
// Parameter names for mapped arguments (only for simple parameter lists)
|
||||
FFIUtf16Slice const* parameter_names;
|
||||
size_t parameter_name_count;
|
||||
// Source text range (for Function.prototype.toString)
|
||||
size_t source_text_offset;
|
||||
size_t source_text_length;
|
||||
// Opaque Rust AST pointer (Box<FunctionData>)
|
||||
void* rust_function_ast;
|
||||
// Parsing insights needed before lazy compilation
|
||||
bool uses_this;
|
||||
bool uses_this_from_environment;
|
||||
};
|
||||
|
||||
// Create a SharedFunctionInstanceData from pre-computed metadata (Rust pipeline).
|
||||
// Stores an opaque Rust AST pointer for lazy compilation.
|
||||
//
|
||||
// Returns a SharedFunctionInstanceData* cast to void*.
|
||||
void* rust_create_sfd(
|
||||
void* vm_ptr,
|
||||
void const* source_code_ptr,
|
||||
FFISharedFunctionData const* data);
|
||||
|
||||
// Set class_field_initializer_name on a SharedFunctionInstanceData.
|
||||
// Called after rust_create_sfd for class field initializer functions.
|
||||
void rust_sfd_set_class_field_initializer_name(
|
||||
void* sfd_ptr,
|
||||
uint16_t const* name,
|
||||
size_t name_len,
|
||||
bool is_private);
|
||||
|
||||
// Compile a function body using the Rust pipeline.
|
||||
// Takes ownership of the Rust AST (frees it after compilation).
|
||||
//
|
||||
// Writes FDI runtime metadata to the SFD via the sfd_ptr parameter.
|
||||
// Returns a Bytecode::Executable* cast to void*, or nullptr on failure.
|
||||
void* rust_compile_function(
|
||||
void* vm_ptr,
|
||||
void const* source_code_ptr,
|
||||
uint16_t const* source,
|
||||
size_t source_len,
|
||||
void* sfd_ptr,
|
||||
void* rust_function_ast,
|
||||
bool builtin_abstract_operations_enabled);
|
||||
|
||||
// Free a Rust Box<FunctionData> (called from SFD destructor).
|
||||
void rust_free_function_ast(void* ast);
|
||||
|
||||
// Set FDI runtime metadata on a SharedFunctionInstanceData.
|
||||
// Called from Rust after compiling a function body.
|
||||
void rust_sfd_set_metadata(
|
||||
void* sfd_ptr,
|
||||
bool uses_this,
|
||||
bool function_environment_needed,
|
||||
size_t function_environment_bindings_count,
|
||||
bool might_need_arguments_object,
|
||||
bool contains_direct_call_to_eval);
|
||||
|
||||
// Create a ClassBlueprint on the heap. Ownership transfers to the
|
||||
// caller; pass the pointer to rust_create_executable which will move
|
||||
// the blueprint into the Executable.
|
||||
//
|
||||
// Returns a heap-allocated ClassBlueprint* cast to void*.
|
||||
void* rust_create_class_blueprint(
|
||||
// VM pointer for creating GC objects (e.g. PrimitiveString)
|
||||
void* vm_ptr,
|
||||
// Source code object for substring_view
|
||||
void const* source_code_ptr,
|
||||
// Class name (empty for anonymous)
|
||||
uint16_t const* name,
|
||||
size_t name_len,
|
||||
// Source text of the entire class (for Function.prototype.toString)
|
||||
size_t source_text_offset,
|
||||
size_t source_text_len,
|
||||
// Index into shared_function_data for the constructor
|
||||
uint32_t constructor_sfd_index,
|
||||
bool has_super_class,
|
||||
bool has_name,
|
||||
// Array of class element descriptors
|
||||
FFIClassElement const* elements,
|
||||
size_t element_count);
|
||||
|
||||
// Callbacks used by rust_compile_script to populate GDI metadata.
|
||||
void script_gdi_push_lexical_name(void* ctx, uint16_t const* name, size_t len);
|
||||
void script_gdi_push_var_name(void* ctx, uint16_t const* name, size_t len);
|
||||
void script_gdi_push_function(void* ctx, void* sfd_ptr, uint16_t const* name, size_t len);
|
||||
void script_gdi_push_var_scoped_name(void* ctx, uint16_t const* name, size_t len);
|
||||
void script_gdi_push_annex_b_name(void* ctx, uint16_t const* name, size_t len);
|
||||
void script_gdi_push_lexical_binding(void* ctx, uint16_t const* name, size_t len, bool is_constant);
|
||||
|
||||
// Parse, compile, and extract EDI metadata for eval using the Rust
|
||||
// parser. Populates gdi_context (an EvalGdiBuilder*) via callbacks.
|
||||
// Returns a Bytecode::Executable* cast to void*, or nullptr on failure.
|
||||
void* rust_compile_eval(
|
||||
uint16_t const* source,
|
||||
size_t source_len,
|
||||
void* vm_ptr,
|
||||
void const* source_code_ptr,
|
||||
void* gdi_context,
|
||||
bool starts_in_strict_mode,
|
||||
bool in_eval_function_context,
|
||||
bool allow_super_property_lookup,
|
||||
bool allow_super_constructor_call,
|
||||
bool in_class_field_initializer,
|
||||
void* error_context,
|
||||
RustParseErrorCallback error_callback,
|
||||
uint8_t** ast_dump_output,
|
||||
size_t* ast_dump_output_len);
|
||||
|
||||
// Parse and compile a dynamically-created function (new Function()).
|
||||
// Validates parameters and body separately per spec, then parses the
|
||||
// full synthetic source to create a SharedFunctionInstanceData.
|
||||
//
|
||||
// Returns a SharedFunctionInstanceData* cast to void*, or nullptr on
|
||||
// parse failure (caller should throw SyntaxError).
|
||||
//
|
||||
// function_kind: 0=Normal, 1=Generator, 2=Async, 3=AsyncGenerator
|
||||
void* rust_compile_dynamic_function(
|
||||
uint16_t const* full_source,
|
||||
size_t full_source_len,
|
||||
uint16_t const* params_source,
|
||||
size_t params_source_len,
|
||||
uint16_t const* body_source,
|
||||
size_t body_source_len,
|
||||
void* vm_ptr,
|
||||
void const* source_code_ptr,
|
||||
uint8_t function_kind,
|
||||
void* error_context,
|
||||
RustParseErrorCallback error_callback,
|
||||
uint8_t** ast_dump_output,
|
||||
size_t* ast_dump_output_len);
|
||||
|
||||
// Callbacks used by rust_compile_eval to populate EDI metadata.
|
||||
void eval_gdi_set_strict(void* ctx, bool is_strict);
|
||||
void eval_gdi_push_var_name(void* ctx, uint16_t const* name, size_t len);
|
||||
void eval_gdi_push_function(void* ctx, void* sfd, uint16_t const* name, size_t len);
|
||||
void eval_gdi_push_var_scoped_name(void* ctx, uint16_t const* name, size_t len);
|
||||
void eval_gdi_push_annex_b_name(void* ctx, uint16_t const* name, size_t len);
|
||||
void eval_gdi_push_lexical_binding(void* ctx, uint16_t const* name, size_t len, bool is_constant);
|
||||
|
||||
// Parse a builtin JS file in strict mode, extract top-level function
|
||||
// declarations, and create SharedFunctionInstanceData for each via the
|
||||
// Rust pipeline. Calls push_function for each function found.
|
||||
typedef void (*RustBuiltinFunctionCallback)(void* ctx, void* sfd_ptr, uint16_t const* name, size_t name_len);
|
||||
void rust_compile_builtin_file(
|
||||
uint16_t const* source,
|
||||
size_t source_len,
|
||||
void* vm_ptr,
|
||||
void const* source_code_ptr,
|
||||
void* ctx,
|
||||
RustBuiltinFunctionCallback push_function,
|
||||
uint8_t** ast_dump_output,
|
||||
size_t* ast_dump_output_len);
|
||||
|
||||
// Module compilation callback table (matches Rust ModuleCallbacks struct).
|
||||
struct ModuleCallbacks {
|
||||
void (*set_has_top_level_await)(void* ctx, bool value);
|
||||
void (*push_import_entry)(
|
||||
void* ctx,
|
||||
uint16_t const* import_name,
|
||||
size_t import_name_len,
|
||||
bool is_namespace,
|
||||
uint16_t const* local_name,
|
||||
size_t local_name_len,
|
||||
uint16_t const* module_specifier,
|
||||
size_t specifier_len,
|
||||
FFIUtf16Slice const* attribute_keys,
|
||||
FFIUtf16Slice const* attribute_values,
|
||||
size_t attribute_count);
|
||||
void (*push_local_export)(
|
||||
void* ctx,
|
||||
uint8_t kind,
|
||||
uint16_t const* export_name,
|
||||
size_t export_name_len,
|
||||
uint16_t const* local_or_import_name,
|
||||
size_t local_or_import_name_len,
|
||||
uint16_t const* module_specifier,
|
||||
size_t specifier_len,
|
||||
FFIUtf16Slice const* attribute_keys,
|
||||
FFIUtf16Slice const* attribute_values,
|
||||
size_t attribute_count);
|
||||
void (*push_indirect_export)(
|
||||
void* ctx,
|
||||
uint8_t kind,
|
||||
uint16_t const* export_name,
|
||||
size_t export_name_len,
|
||||
uint16_t const* local_or_import_name,
|
||||
size_t local_or_import_name_len,
|
||||
uint16_t const* module_specifier,
|
||||
size_t specifier_len,
|
||||
FFIUtf16Slice const* attribute_keys,
|
||||
FFIUtf16Slice const* attribute_values,
|
||||
size_t attribute_count);
|
||||
void (*push_star_export)(
|
||||
void* ctx,
|
||||
uint8_t kind,
|
||||
uint16_t const* export_name,
|
||||
size_t export_name_len,
|
||||
uint16_t const* local_or_import_name,
|
||||
size_t local_or_import_name_len,
|
||||
uint16_t const* module_specifier,
|
||||
size_t specifier_len,
|
||||
FFIUtf16Slice const* attribute_keys,
|
||||
FFIUtf16Slice const* attribute_values,
|
||||
size_t attribute_count);
|
||||
void (*push_requested_module)(
|
||||
void* ctx,
|
||||
uint16_t const* specifier,
|
||||
size_t specifier_len,
|
||||
FFIUtf16Slice const* attribute_keys,
|
||||
FFIUtf16Slice const* attribute_values,
|
||||
size_t attribute_count);
|
||||
void (*set_default_export_binding)(void* ctx, uint16_t const* name, size_t name_len);
|
||||
void (*push_var_name)(void* ctx, uint16_t const* name, size_t name_len);
|
||||
void (*push_function)(void* ctx, void* sfd_ptr, uint16_t const* name, size_t name_len);
|
||||
void (*push_lexical_binding)(void* ctx, uint16_t const* name, size_t name_len, bool is_constant, int32_t function_index);
|
||||
};
|
||||
|
||||
// Parse, compile, and extract module metadata using the Rust parser.
|
||||
// Populates module_context (a ModuleBuilder*) via callbacks.
|
||||
// On parse failure, calls error_callback for each error, then returns nullptr.
|
||||
//
|
||||
// Returns Executable* for non-TLA modules (tla_executable_out is null).
|
||||
// For TLA modules, returns nullptr and sets tla_executable_out to the
|
||||
// async wrapper Executable*.
|
||||
void* rust_compile_module(
|
||||
uint16_t const* source,
|
||||
size_t source_len,
|
||||
void* vm_ptr,
|
||||
void const* source_code_ptr,
|
||||
void* module_context,
|
||||
ModuleCallbacks const* callbacks,
|
||||
bool dump_ast,
|
||||
bool use_color,
|
||||
void* error_context,
|
||||
RustParseErrorCallback error_callback,
|
||||
void** tla_executable_out,
|
||||
uint8_t** ast_dump_output,
|
||||
size_t* ast_dump_output_len);
|
||||
|
||||
// Set the name on a SharedFunctionInstanceData (used for module default
|
||||
// export renaming from "*default*" to "default").
|
||||
void module_sfd_set_name(void* sfd_ptr, uint16_t const* name, size_t name_len);
|
||||
|
||||
// Compile a regex pattern+flags. On success, returns a heap-allocated
|
||||
// opaque object (RustCompiledRegex*) and sets *error_out to nullptr.
|
||||
// On failure, returns nullptr and sets *error_out to a heap-allocated
|
||||
// error string (caller must free with rust_free_error_string).
|
||||
// Successful results must be freed with rust_free_compiled_regex or
|
||||
// passed to rust_create_executable (which takes ownership).
|
||||
void* rust_compile_regex(
|
||||
uint16_t const* pattern_data, size_t pattern_len, uint16_t const* flags_data, size_t flags_len, char const** error_out);
|
||||
void rust_free_compiled_regex(void* ptr);
|
||||
void rust_free_error_string(char const* str);
|
||||
|
||||
// Convert a JS number to its UTF-16 string representation using the
|
||||
// ECMA-262 Number::toString algorithm. Writes up to buffer_len code
|
||||
// units into buffer and returns the actual length.
|
||||
size_t rust_number_to_utf16(double value, uint16_t* buffer, size_t buffer_len);
|
||||
|
||||
// FIXME: This FFI workaround exists only to match C++ float-to-string
|
||||
// formatting in the Rust AST dump. Once the C++ pipeline is
|
||||
// removed, this can be deleted and the Rust side can use its own
|
||||
// formatting without needing to match C++.
|
||||
// Format a double using AK's shortest-representation algorithm.
|
||||
// Writes up to buffer_len bytes into buffer and returns the actual length.
|
||||
size_t rust_format_double(double value, uint8_t* buffer, size_t buffer_len);
|
||||
|
||||
// Get a well-known symbol as an encoded JS::Value.
|
||||
// symbol_id: 0 = Symbol.iterator, 1 = Symbol.asyncIterator
|
||||
uint64_t get_well_known_symbol(void* vm_ptr, uint32_t symbol_id);
|
||||
|
||||
// Get an intrinsic abstract operation function as an encoded JS::Value.
|
||||
uint64_t get_abstract_operation_function(void* vm_ptr, uint16_t const* name, size_t name_len);
|
||||
|
||||
// Free a string allocated by Rust (e.g. AST dump output).
|
||||
void rust_free_string(uint8_t* ptr, size_t len);
|
||||
|
||||
# ifdef __cplusplus
|
||||
}
|
||||
# endif
|
||||
|
||||
#endif // ENABLE_RUST
|
||||
@@ -27,7 +27,9 @@ set(SOURCES
|
||||
Module.cpp
|
||||
Parser.cpp
|
||||
ParserError.cpp
|
||||
PipelineComparison.cpp
|
||||
Print.cpp
|
||||
RustIntegration.cpp
|
||||
Runtime/AbstractOperations.cpp
|
||||
Runtime/Accessor.cpp
|
||||
Runtime/Agent.cpp
|
||||
@@ -302,6 +304,29 @@ endif()
|
||||
|
||||
target_link_libraries(LibJS PUBLIC JSClangPlugin)
|
||||
|
||||
if (ENABLE_RUST)
|
||||
corrosion_import_crate(MANIFEST_PATH Rust/Cargo.toml)
|
||||
target_link_libraries(LibJS PRIVATE libjs_rust)
|
||||
target_compile_definitions(LibJS PRIVATE ENABLE_RUST)
|
||||
install(TARGETS libjs_rust EXPORT LagomTargets
|
||||
ARCHIVE COMPONENT Lagom_Development
|
||||
)
|
||||
# The Rust library and LibJS have a circular dependency (C++ calls Rust
|
||||
# entry points, Rust calls C++ callbacks). For static builds, merge the
|
||||
# Rust archive into the LibJS archive so all symbols are in one place.
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
add_custom_command(TARGET LibJS POST_BUILD
|
||||
COMMAND ${CMAKE_AR} -x $<TARGET_FILE:libjs_rust-static>
|
||||
COMMAND ${CMAKE_AR} -qS $<TARGET_FILE:LibJS> *.o
|
||||
COMMAND ${CMAKE_RANLIB} $<TARGET_FILE:LibJS>
|
||||
COMMAND ${CMAKE_COMMAND} -E remove -f *.o
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/rust_merge_tmp
|
||||
COMMENT "Merging Rust archive into LibJS"
|
||||
)
|
||||
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/rust_merge_tmp)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WIN32 AND ENABLE_ADDRESS_SANITIZER)
|
||||
# FIXME: Fix address sanitizer stack-overflow error when running test-js.
|
||||
# Even tripling the stack size for this target to 24MB didn't fix it, so it is most likely some ASAN related bug/quirk given test-js passes using the 8MB stack without ASAN
|
||||
|
||||
44
Libraries/LibJS/PipelineComparison.cpp
Normal file
44
Libraries/LibJS/PipelineComparison.cpp
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <LibJS/PipelineComparison.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
bool compare_pipelines_enabled()
|
||||
{
|
||||
static bool const enabled = getenv("LIBJS_COMPARE_PIPELINES") != nullptr;
|
||||
return enabled;
|
||||
}
|
||||
|
||||
static void report_mismatch(StringView kind, StringView rust_dump, StringView cpp_dump, StringView context)
|
||||
{
|
||||
StringBuilder message;
|
||||
message.appendff("PIPELINE MISMATCH ({}) in: {}\n", kind, context);
|
||||
message.appendff("\n=== Rust {} ===\n{}\n", kind, rust_dump);
|
||||
message.appendff("\n=== C++ {} ===\n{}\n", kind, cpp_dump);
|
||||
warnln("{}", message.string_view());
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
void compare_pipeline_asts(StringView rust_ast, StringView cpp_ast, StringView context)
|
||||
{
|
||||
if (rust_ast != cpp_ast)
|
||||
report_mismatch("AST"sv, rust_ast, cpp_ast, context);
|
||||
}
|
||||
|
||||
void compare_pipeline_bytecode(StringView rust_bytecode, StringView cpp_bytecode, StringView context, StringView ast_dump)
|
||||
{
|
||||
if (rust_bytecode != cpp_bytecode) {
|
||||
if (!ast_dump.is_empty())
|
||||
warnln("\n=== AST (both identical) ===\n{}", ast_dump);
|
||||
report_mismatch("Bytecode"sv, rust_bytecode, cpp_bytecode, context);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
18
Libraries/LibJS/PipelineComparison.h
Normal file
18
Libraries/LibJS/PipelineComparison.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/StringView.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
bool compare_pipelines_enabled();
|
||||
|
||||
void compare_pipeline_asts(StringView rust_ast, StringView cpp_ast, StringView context);
|
||||
void compare_pipeline_bytecode(StringView rust_bytecode, StringView cpp_bytecode, StringView context, StringView ast_dump = {});
|
||||
|
||||
}
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <AK/Function.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/Utf16View.h>
|
||||
#include <LibJS/Bytecode/Generator.h>
|
||||
#include <LibJS/Bytecode/Interpreter.h>
|
||||
#include <LibJS/ModuleLoading.h>
|
||||
#include <LibJS/Parser.h>
|
||||
@@ -39,6 +40,8 @@
|
||||
#include <LibJS/Runtime/SuppressedError.h>
|
||||
#include <LibJS/Runtime/Temporal/AbstractOperations.h>
|
||||
#include <LibJS/Runtime/ValueInlines.h>
|
||||
#include <LibJS/RustIntegration.h>
|
||||
#include <LibJS/SourceCode.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
@@ -651,30 +654,52 @@ ThrowCompletionOr<Value> perform_eval(VM& vm, Value x, CallerMode strict_caller,
|
||||
// f. If inMethod is false, and body Contains SuperProperty, throw a SyntaxError exception.
|
||||
// g. If inDerivedConstructor is false, and body Contains SuperCall, throw a SyntaxError exception.
|
||||
// h. If inClassFieldInitializer is true, and ContainsArguments of body is true, throw a SyntaxError exception.
|
||||
Parser::EvalInitialState initial_state {
|
||||
.in_eval_function_context = in_function,
|
||||
.allow_super_property_lookup = in_method,
|
||||
.allow_super_constructor_call = in_derived_constructor,
|
||||
.in_class_field_initializer = in_class_field_initializer,
|
||||
};
|
||||
|
||||
Parser parser(Lexer(SourceCode::create({}, code_string->utf16_string())), Program::Type::Script, move(initial_state));
|
||||
auto program = parser.parse_program(strict_caller == CallerMode::Strict);
|
||||
GC::Ptr<Bytecode::Executable> executable;
|
||||
bool strict_eval = false;
|
||||
EvalDeclarationData eval_declaration_data;
|
||||
|
||||
// b. If script is a List of errors, throw a SyntaxError exception.
|
||||
if (parser.has_errors()) {
|
||||
auto& error = parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
auto rust_compilation = RustIntegration::compile_eval(*code_string, vm, strict_caller, in_function, in_method, in_derived_constructor, in_class_field_initializer);
|
||||
if (rust_compilation.has_value()) {
|
||||
if (rust_compilation->is_error())
|
||||
return vm.throw_completion<SyntaxError>(rust_compilation->release_error());
|
||||
auto& eval_result = rust_compilation->value();
|
||||
executable = eval_result.executable;
|
||||
strict_eval = eval_result.is_strict_mode;
|
||||
eval_declaration_data = move(eval_result.declaration_data);
|
||||
}
|
||||
|
||||
bool strict_eval = false;
|
||||
RefPtr<Program> cpp_program;
|
||||
|
||||
// 14. If strictCaller is true, let strictEval be true.
|
||||
if (strict_caller == CallerMode::Strict)
|
||||
strict_eval = true;
|
||||
// 15. Else, let strictEval be IsStrict of script.
|
||||
else
|
||||
strict_eval = program->is_strict_mode();
|
||||
if (!executable) {
|
||||
Parser::EvalInitialState initial_state {
|
||||
.in_eval_function_context = in_function,
|
||||
.allow_super_property_lookup = in_method,
|
||||
.allow_super_constructor_call = in_derived_constructor,
|
||||
.in_class_field_initializer = in_class_field_initializer,
|
||||
};
|
||||
|
||||
Parser parser(Lexer(SourceCode::create({}, code_string->utf16_string())), Program::Type::Script, move(initial_state));
|
||||
cpp_program = parser.parse_program(strict_caller == CallerMode::Strict);
|
||||
|
||||
// b. If script is a List of errors, throw a SyntaxError exception.
|
||||
if (parser.has_errors()) {
|
||||
auto& error = parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
}
|
||||
|
||||
// 14. If strictCaller is true, let strictEval be true.
|
||||
if (strict_caller == CallerMode::Strict)
|
||||
strict_eval = true;
|
||||
// 15. Else, let strictEval be IsStrict of script.
|
||||
else
|
||||
strict_eval = cpp_program->is_strict_mode();
|
||||
|
||||
eval_declaration_data = EvalDeclarationData::create(vm, *cpp_program, strict_eval);
|
||||
|
||||
// NB: Bytecode compilation is deferred until after EvalDeclarationInstantiation,
|
||||
// which sets annex B flags on AST nodes that affect codegen.
|
||||
}
|
||||
|
||||
// 16. Let runningContext be the running execution context.
|
||||
// 17. NOTE: If direct is true, runningContext will be the execution context that performed the direct eval. If direct is false, runningContext will be the execution context for the invocation of the eval function.
|
||||
@@ -724,13 +749,14 @@ ThrowCompletionOr<Value> perform_eval(VM& vm, Value x, CallerMode strict_caller,
|
||||
// NOTE: Spec steps are rearranged in order to compute number of registers+constants+locals before construction of the execution context.
|
||||
|
||||
// 30. Let result be Completion(EvalDeclarationInstantiation(body, varEnv, lexEnv, privateEnv, strictEval)).
|
||||
auto eval_declaration_data = EvalDeclarationData::create(vm, program, strict_eval);
|
||||
TRY(eval_declaration_instantiation(vm, eval_declaration_data, variable_environment, lexical_environment, private_environment, strict_eval));
|
||||
|
||||
// 31. If result.[[Type]] is normal, then
|
||||
// a. Set result to the result of evaluating body.
|
||||
auto executable = Bytecode::Generator::generate_from_ast_node(vm, program, {});
|
||||
executable->name = "eval"_utf16_fly_string;
|
||||
// Compile C++ AST after EDI, since EDI sets annex B flags on AST nodes.
|
||||
if (cpp_program) {
|
||||
executable = Bytecode::Generator::generate_from_ast_node(vm, *cpp_program, {});
|
||||
executable->name = "eval"_utf16_fly_string;
|
||||
}
|
||||
|
||||
if (Bytecode::g_dump_bytecode)
|
||||
executable->dump();
|
||||
|
||||
@@ -808,7 +834,8 @@ EvalDeclarationData EvalDeclarationData::create(VM& vm, Program const& program,
|
||||
// Pre-compute AnnexB candidates.
|
||||
if (!strict) {
|
||||
MUST(program.for_each_function_hoistable_with_annexB_extension([&](FunctionDeclaration& function_declaration) -> ThrowCompletionOr<void> {
|
||||
data.annex_b_candidates.append(function_declaration);
|
||||
data.annex_b_candidate_names.append(function_declaration.name());
|
||||
data.annex_b_function_declarations.append(function_declaration);
|
||||
return {};
|
||||
}));
|
||||
}
|
||||
@@ -909,9 +936,9 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, EvalDeclarationDa
|
||||
HashTable<Utf16FlyString> hoisted_functions;
|
||||
|
||||
// b. For each FunctionDeclaration f that is directly contained in the StatementList of a Block, CaseClause, or DefaultClause Contained within body, do
|
||||
for (auto& function_declaration : data.annex_b_candidates) {
|
||||
for (size_t i = 0; i < data.annex_b_candidate_names.size(); ++i) {
|
||||
// i. Let F be StringValue of the BindingIdentifier of f.
|
||||
auto function_name = function_declaration->name();
|
||||
auto& function_name = data.annex_b_candidate_names[i];
|
||||
|
||||
// ii. If replacing the FunctionDeclaration f with a VariableStatement that has F as a BindingIdentifier would not produce any Early Errors for body, then
|
||||
// Note: This is checked during parsing and for_each_function_hoistable_with_annexB_extension so it always passes here.
|
||||
@@ -993,7 +1020,8 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, EvalDeclarationDa
|
||||
// iii. Let fobj be ! benv.GetBindingValue(F, false).
|
||||
// iv. Perform ? genv.SetMutableBinding(F, fobj, false).
|
||||
// v. Return unused.
|
||||
function_declaration->set_should_do_additional_annexB_steps();
|
||||
if (i < data.annex_b_function_declarations.size())
|
||||
data.annex_b_function_declarations[i]->set_should_do_additional_annexB_steps();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1045,7 +1073,7 @@ ThrowCompletionOr<void> eval_declaration_instantiation(VM& vm, EvalDeclarationDa
|
||||
// b. Let fo be InstantiateFunctionObject of f with arguments lexEnv and privateEnv.
|
||||
auto function = ECMAScriptFunctionObject::create_from_function_data(
|
||||
realm,
|
||||
function_to_initialize.shared_data,
|
||||
*function_to_initialize.shared_data,
|
||||
lexical_environment,
|
||||
private_environment);
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ struct EvalDeclarationData {
|
||||
Vector<Utf16FlyString> var_names;
|
||||
|
||||
struct FunctionToInitialize {
|
||||
GC::Ref<SharedFunctionInstanceData> shared_data;
|
||||
GC::Root<SharedFunctionInstanceData> shared_data;
|
||||
Utf16FlyString name;
|
||||
};
|
||||
Vector<FunctionToInitialize> functions_to_initialize;
|
||||
@@ -105,7 +105,8 @@ struct EvalDeclarationData {
|
||||
|
||||
Vector<Utf16FlyString> var_scoped_names;
|
||||
|
||||
Vector<NonnullRefPtr<FunctionDeclaration>> annex_b_candidates;
|
||||
Vector<Utf16FlyString> annex_b_candidate_names;
|
||||
Vector<NonnullRefPtr<FunctionDeclaration>> annex_b_function_declarations;
|
||||
|
||||
struct LexicalBinding {
|
||||
Utf16FlyString name;
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include <AK/Debug.h>
|
||||
#include <AK/Function.h>
|
||||
#include <LibGC/DeferGC.h>
|
||||
#include <LibJS/AST.h>
|
||||
#include <LibJS/Bytecode/BasicBlock.h>
|
||||
#include <LibJS/Bytecode/Generator.h>
|
||||
@@ -29,6 +30,7 @@
|
||||
#include <LibJS/Runtime/PromiseConstructor.h>
|
||||
#include <LibJS/Runtime/Value.h>
|
||||
#include <LibJS/Runtime/ValueInlines.h>
|
||||
#include <LibJS/RustIntegration.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
@@ -217,7 +219,13 @@ void ECMAScriptFunctionObject::get_stack_frame_size(size_t& registers_and_locals
|
||||
{
|
||||
auto& executable = shared_data().m_executable;
|
||||
if (!executable) {
|
||||
if (is_module_wrapper()) {
|
||||
auto rust_executable = RustIntegration::compile_function(vm(), *m_shared_data, false);
|
||||
if (rust_executable) {
|
||||
executable = rust_executable;
|
||||
executable->name = m_shared_data->m_name;
|
||||
if (Bytecode::g_dump_bytecode)
|
||||
executable->dump();
|
||||
} else if (is_module_wrapper()) {
|
||||
executable = Bytecode::compile(vm(), ecmascript_code(), kind(), name());
|
||||
} else {
|
||||
executable = Bytecode::compile(vm(), shared_data(), Bytecode::BuiltinAbstractOperationsEnabled::No);
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
#include <LibJS/Runtime/GlobalEnvironment.h>
|
||||
#include <LibJS/Runtime/GlobalObject.h>
|
||||
#include <LibJS/Runtime/Realm.h>
|
||||
#include <LibJS/RustIntegration.h>
|
||||
#include <LibJS/SourceCode.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
@@ -148,46 +150,75 @@ ThrowCompletionOr<GC::Ref<ECMAScriptFunctionObject>> FunctionConstructor::create
|
||||
// 16. Perform ? HostEnsureCanCompileStrings(currentRealm, parameterStrings, bodyString, sourceString, FUNCTION, parameterArgs, bodyArg).
|
||||
TRY(vm.host_ensure_can_compile_strings(realm, parameter_strings, body_string, source_text, CompilationType::Function, parameter_args, body_arg));
|
||||
|
||||
u8 parse_options = FunctionNodeParseOptions::CheckForFunctionAndName;
|
||||
if (kind == FunctionKind::Async || kind == FunctionKind::AsyncGenerator)
|
||||
parse_options |= FunctionNodeParseOptions::IsAsyncFunction;
|
||||
if (kind == FunctionKind::Generator || kind == FunctionKind::AsyncGenerator)
|
||||
parse_options |= FunctionNodeParseOptions::IsGeneratorFunction;
|
||||
GC::Ptr<SharedFunctionInstanceData> function_data;
|
||||
|
||||
// 17. Let parameters be ParseText(P, parameterSym).
|
||||
i32 function_length = 0;
|
||||
auto parameters_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(parameters_string))));
|
||||
auto parameters = parameters_parser.parse_formal_parameters(function_length, parse_options);
|
||||
|
||||
// 18. If parameters is a List of errors, throw a SyntaxError exception.
|
||||
if (parameters_parser.has_errors()) {
|
||||
auto error = parameters_parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
auto rust_compilation = RustIntegration::compile_dynamic_function(vm, source_text, parameters_string, body_parse_string, kind);
|
||||
if (rust_compilation.has_value()) {
|
||||
if (rust_compilation->is_error())
|
||||
return vm.throw_completion<SyntaxError>(rust_compilation->release_error());
|
||||
function_data = rust_compilation->value();
|
||||
}
|
||||
|
||||
// 19. Let body be ParseText(bodyParseString, bodySym).
|
||||
FunctionParsingInsights parsing_insights;
|
||||
auto body_parser = Parser::parse_function_body_from_string(body_parse_string, parse_options, parameters, kind, parsing_insights);
|
||||
if (!function_data) {
|
||||
u8 parse_options = FunctionNodeParseOptions::CheckForFunctionAndName;
|
||||
if (kind == FunctionKind::Async || kind == FunctionKind::AsyncGenerator)
|
||||
parse_options |= FunctionNodeParseOptions::IsAsyncFunction;
|
||||
if (kind == FunctionKind::Generator || kind == FunctionKind::AsyncGenerator)
|
||||
parse_options |= FunctionNodeParseOptions::IsGeneratorFunction;
|
||||
|
||||
// 20. If body is a List of errors, throw a SyntaxError exception.
|
||||
if (body_parser.has_errors()) {
|
||||
auto error = body_parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
}
|
||||
// 17. Let parameters be ParseText(P, parameterSym).
|
||||
i32 function_length = 0;
|
||||
auto parameters_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(parameters_string))));
|
||||
auto parameters = parameters_parser.parse_formal_parameters(function_length, parse_options);
|
||||
|
||||
// 21. NOTE: The parameters and body are parsed separately to ensure that each is valid alone. For example, new Function("/*", "*/ ) {") does not evaluate to a function.
|
||||
// 22. NOTE: If this step is reached, sourceText must have the syntax of exprSym (although the reverse implication does not hold). The purpose of the next two steps is to enforce any Early Error rules which apply to exprSym directly.
|
||||
// 18. If parameters is a List of errors, throw a SyntaxError exception.
|
||||
if (parameters_parser.has_errors()) {
|
||||
auto error = parameters_parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
}
|
||||
|
||||
// 23. Let expr be ParseText(sourceText, exprSym).
|
||||
auto source_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(source_text))));
|
||||
// This doesn't need any parse_options, it determines those & the function type based on the tokens that were found.
|
||||
auto expr = source_parser.parse_function_node<FunctionExpression>();
|
||||
source_parser.run_scope_analysis();
|
||||
// 19. Let body be ParseText(bodyParseString, bodySym).
|
||||
FunctionParsingInsights parsing_insights;
|
||||
auto body_parser = Parser::parse_function_body_from_string(body_parse_string, parse_options, parameters, kind, parsing_insights);
|
||||
|
||||
// 24. If expr is a List of errors, throw a SyntaxError exception.
|
||||
if (source_parser.has_errors()) {
|
||||
auto error = source_parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
// 20. If body is a List of errors, throw a SyntaxError exception.
|
||||
if (body_parser.has_errors()) {
|
||||
auto error = body_parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
}
|
||||
|
||||
// 21. NOTE: The parameters and body are parsed separately to ensure that each is valid alone. For example, new Function("/*", "*/ ) {") does not evaluate to a function.
|
||||
// 22. NOTE: If this step is reached, sourceText must have the syntax of exprSym (although the reverse implication does not hold). The purpose of the next two steps is to enforce any Early Error rules which apply to exprSym directly.
|
||||
|
||||
// 23. Let expr be ParseText(sourceText, exprSym).
|
||||
auto source_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(source_text))));
|
||||
// This doesn't need any parse_options, it determines those & the function type based on the tokens that were found.
|
||||
auto expr = source_parser.parse_function_node<FunctionExpression>();
|
||||
source_parser.run_scope_analysis();
|
||||
|
||||
// 24. If expr is a List of errors, throw a SyntaxError exception.
|
||||
if (source_parser.has_errors()) {
|
||||
auto error = source_parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
}
|
||||
|
||||
// 28. Let F be OrdinaryFunctionCreate(proto, sourceText, parameters, body, non-lexical-this, env, privateEnv).
|
||||
parsing_insights.might_need_arguments_object = true;
|
||||
|
||||
function_data = vm.heap().allocate<SharedFunctionInstanceData>(
|
||||
vm,
|
||||
expr->kind(),
|
||||
"anonymous"_utf16_fly_string,
|
||||
expr->function_length(),
|
||||
expr->parameters(),
|
||||
expr->body(),
|
||||
Utf16View {},
|
||||
expr->is_strict_mode(),
|
||||
false,
|
||||
parsing_insights,
|
||||
expr->local_variables_names());
|
||||
function_data->m_source_text_owner = Utf16String::from_utf8(source_text);
|
||||
function_data->m_source_text = function_data->m_source_text_owner.utf16_view();
|
||||
}
|
||||
|
||||
// 25. Let proto be ? GetPrototypeFromConstructor(newTarget, fallbackProto).
|
||||
@@ -199,27 +230,9 @@ ThrowCompletionOr<GC::Ref<ECMAScriptFunctionObject>> FunctionConstructor::create
|
||||
// 27. Let privateEnv be null.
|
||||
PrivateEnvironment* private_environment = nullptr;
|
||||
|
||||
// 28. Let F be OrdinaryFunctionCreate(proto, sourceText, parameters, body, non-lexical-this, env, privateEnv).
|
||||
parsing_insights.might_need_arguments_object = true;
|
||||
|
||||
auto function_data = vm.heap().allocate<SharedFunctionInstanceData>(
|
||||
vm,
|
||||
expr->kind(),
|
||||
"anonymous"_utf16_fly_string,
|
||||
expr->function_length(),
|
||||
expr->parameters(),
|
||||
expr->body(),
|
||||
Utf16View {},
|
||||
expr->is_strict_mode(),
|
||||
false,
|
||||
parsing_insights,
|
||||
expr->local_variables_names());
|
||||
function_data->m_source_text_owner = Utf16String::from_utf8(source_text);
|
||||
function_data->m_source_text = function_data->m_source_text_owner.utf16_view();
|
||||
|
||||
auto function = ECMAScriptFunctionObject::create_from_function_data(
|
||||
realm,
|
||||
function_data,
|
||||
*function_data,
|
||||
&environment,
|
||||
private_environment,
|
||||
*prototype);
|
||||
|
||||
@@ -136,6 +136,7 @@
|
||||
#include <LibJS/Runtime/WeakSetConstructor.h>
|
||||
#include <LibJS/Runtime/WeakSetPrototype.h>
|
||||
#include <LibJS/Runtime/WrapForValidIteratorPrototype.h>
|
||||
#include <LibJS/RustIntegration.h>
|
||||
|
||||
// FIXME: Remove this asm hack when we upgrade to GCC 15.
|
||||
#define INCLUDE_FILE_WITH_ASSEMBLY(name, file_path) \
|
||||
@@ -208,8 +209,13 @@ GC::Ref<Intrinsics> Intrinsics::create(Realm& realm)
|
||||
|
||||
static Vector<GC::Root<SharedFunctionInstanceData>> parse_builtin_file(unsigned char const* script_text, VM& vm)
|
||||
{
|
||||
auto rust_compilation = RustIntegration::compile_builtin_file(script_text, vm);
|
||||
if (rust_compilation.has_value())
|
||||
return move(rust_compilation.value());
|
||||
|
||||
auto script_text_as_utf16 = Utf16String::from_utf8_without_validation({ script_text, strlen(reinterpret_cast<char const*>(script_text)) });
|
||||
auto code = SourceCode::create("BuiltinFile"_string, move(script_text_as_utf16));
|
||||
|
||||
auto lexer = Lexer { move(code) };
|
||||
auto parser = Parser { move(lexer) };
|
||||
VERIFY(!parser.has_errors());
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <LibJS/Runtime/AsyncGenerator.h>
|
||||
#include <LibJS/Runtime/GeneratorObject.h>
|
||||
#include <LibJS/Runtime/NativeJavaScriptBackedFunction.h>
|
||||
#include <LibJS/RustIntegration.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
@@ -98,7 +99,15 @@ Bytecode::Executable& NativeJavaScriptBackedFunction::bytecode_executable()
|
||||
{
|
||||
auto& executable = m_shared_function_instance_data->m_executable;
|
||||
if (!executable) {
|
||||
executable = Bytecode::compile(vm(), m_shared_function_instance_data, Bytecode::BuiltinAbstractOperationsEnabled::Yes);
|
||||
auto rust_executable = RustIntegration::compile_function(vm(), *m_shared_function_instance_data, true);
|
||||
if (rust_executable) {
|
||||
executable = rust_executable;
|
||||
executable->name = m_shared_function_instance_data->m_name;
|
||||
if (Bytecode::g_dump_bytecode)
|
||||
executable->dump();
|
||||
} else {
|
||||
executable = Bytecode::compile(vm(), m_shared_function_instance_data, Bytecode::BuiltinAbstractOperationsEnabled::Yes);
|
||||
}
|
||||
m_shared_function_instance_data->clear_compile_inputs();
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
#include <LibJS/Runtime/PromiseConstructor.h>
|
||||
#include <LibJS/Runtime/ShadowRealm.h>
|
||||
#include <LibJS/Runtime/WrappedFunction.h>
|
||||
#include <LibJS/RustIntegration.h>
|
||||
#include <LibJS/SourceCode.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
@@ -122,35 +124,53 @@ ThrowCompletionOr<Value> perform_shadow_realm_eval(VM& vm, Value source, Realm&
|
||||
|
||||
// 2. Perform the following substeps in an implementation-defined order, possibly interleaving parsing and error detection:
|
||||
|
||||
// a. Let script be ParseText(StringToCodePoints(sourceText), Script).
|
||||
auto parser = Parser(Lexer(SourceCode::create({}, source_text->utf16_string())), Program::Type::Script, Parser::EvalInitialState {});
|
||||
auto program = parser.parse_program();
|
||||
GC::Ptr<Bytecode::Executable> executable;
|
||||
bool strict_eval = false;
|
||||
EvalDeclarationData eval_declaration_data;
|
||||
|
||||
// b. If script is a List of errors, throw a SyntaxError exception.
|
||||
if (parser.has_errors()) {
|
||||
auto& error = parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
auto rust_compilation = RustIntegration::compile_shadow_realm_eval(*source_text, vm);
|
||||
if (rust_compilation.has_value()) {
|
||||
if (rust_compilation->is_error())
|
||||
return vm.throw_completion<SyntaxError>(rust_compilation->release_error());
|
||||
auto& eval_result = rust_compilation->value();
|
||||
executable = eval_result.executable;
|
||||
strict_eval = eval_result.is_strict_mode;
|
||||
eval_declaration_data = move(eval_result.declaration_data);
|
||||
}
|
||||
|
||||
// c. If script Contains ScriptBody is false, return undefined.
|
||||
if (program->children().is_empty())
|
||||
return js_undefined();
|
||||
if (!executable) {
|
||||
// a. Let script be ParseText(StringToCodePoints(sourceText), Script).
|
||||
auto parser = Parser(Lexer(SourceCode::create({}, source_text->utf16_string())), Program::Type::Script, Parser::EvalInitialState {});
|
||||
auto program = parser.parse_program();
|
||||
|
||||
// d. Let body be the ScriptBody of script.
|
||||
// e. If body Contains NewTarget is true, throw a SyntaxError exception.
|
||||
// f. If body Contains SuperProperty is true, throw a SyntaxError exception.
|
||||
// g. If body Contains SuperCall is true, throw a SyntaxError exception.
|
||||
// FIXME: Implement these, we probably need a generic way of scanning the AST for certain nodes.
|
||||
// b. If script is a List of errors, throw a SyntaxError exception.
|
||||
if (parser.has_errors()) {
|
||||
auto& error = parser.errors()[0];
|
||||
return vm.throw_completion<SyntaxError>(error.to_string());
|
||||
}
|
||||
|
||||
// 3. Let strictEval be IsStrict of script.
|
||||
auto strict_eval = program->is_strict_mode();
|
||||
// c. If script Contains ScriptBody is false, return undefined.
|
||||
if (program->children().is_empty())
|
||||
return js_undefined();
|
||||
|
||||
// d. Let body be the ScriptBody of script.
|
||||
// e. If body Contains NewTarget is true, throw a SyntaxError exception.
|
||||
// f. If body Contains SuperProperty is true, throw a SyntaxError exception.
|
||||
// g. If body Contains SuperCall is true, throw a SyntaxError exception.
|
||||
// FIXME: Implement these, we probably need a generic way of scanning the AST for certain nodes.
|
||||
|
||||
// 3. Let strictEval be IsStrict of script.
|
||||
strict_eval = program->is_strict_mode();
|
||||
|
||||
eval_declaration_data = EvalDeclarationData::create(vm, program, strict_eval);
|
||||
|
||||
executable = Bytecode::compile(vm, program, FunctionKind::Normal, "ShadowRealmEval"_utf16_fly_string);
|
||||
}
|
||||
|
||||
// 4. Let runningContext be the running execution context.
|
||||
// 5. If runningContext is not already suspended, suspend runningContext.
|
||||
// NOTE: This would be unused due to step 9 and is omitted for that reason.
|
||||
|
||||
auto executable = Bytecode::compile(vm, program, FunctionKind::Normal, "ShadowRealmEval"_utf16_fly_string);
|
||||
|
||||
// 6. Let evalContext be GetShadowRealmContext(evalRealm, strictEval).
|
||||
auto eval_context = get_shadow_realm_context(eval_realm, strict_eval, executable->registers_and_locals_count, executable->constants.size());
|
||||
|
||||
@@ -164,7 +184,6 @@ ThrowCompletionOr<Value> perform_shadow_realm_eval(VM& vm, Value source, Realm&
|
||||
TRY(vm.push_execution_context(*eval_context, {}));
|
||||
|
||||
// 10. Let result be Completion(EvalDeclarationInstantiation(body, varEnv, lexEnv, null, strictEval)).
|
||||
auto eval_declaration_data = EvalDeclarationData::create(vm, program, strict_eval);
|
||||
auto eval_result = eval_declaration_instantiation(vm, eval_declaration_data, variable_environment, lexical_environment, nullptr, strict_eval);
|
||||
|
||||
Completion result;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <LibJS/AST.h>
|
||||
#include <LibJS/Runtime/SharedFunctionInstanceData.h>
|
||||
#include <LibJS/Runtime/VM.h>
|
||||
#include <LibJS/RustIntegration.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
@@ -307,6 +308,43 @@ void SharedFunctionInstanceData::visit_edges(Visitor& visitor)
|
||||
|
||||
SharedFunctionInstanceData::~SharedFunctionInstanceData() = default;
|
||||
|
||||
void SharedFunctionInstanceData::finalize()
|
||||
{
|
||||
Base::finalize();
|
||||
RustIntegration::free_function_ast(m_rust_function_ast);
|
||||
m_rust_function_ast = nullptr;
|
||||
}
|
||||
|
||||
SharedFunctionInstanceData::SharedFunctionInstanceData(
|
||||
VM&,
|
||||
FunctionKind kind,
|
||||
Utf16FlyString name,
|
||||
i32 function_length,
|
||||
u32 formal_parameter_count,
|
||||
bool strict,
|
||||
bool is_arrow_function,
|
||||
bool has_simple_parameter_list,
|
||||
Vector<Utf16FlyString> parameter_names_for_mapped_arguments,
|
||||
void* rust_function_ast)
|
||||
: m_name(move(name))
|
||||
, m_function_length(function_length)
|
||||
, m_formal_parameter_count(formal_parameter_count)
|
||||
, m_parameter_names_for_mapped_arguments(move(parameter_names_for_mapped_arguments))
|
||||
, m_kind(kind)
|
||||
, m_strict(strict)
|
||||
, m_is_arrow_function(is_arrow_function)
|
||||
, m_has_simple_parameter_list(has_simple_parameter_list)
|
||||
, m_rust_function_ast(rust_function_ast)
|
||||
, m_use_rust_compilation(true)
|
||||
{
|
||||
if (m_is_arrow_function)
|
||||
m_this_mode = ThisMode::Lexical;
|
||||
else if (m_strict)
|
||||
m_this_mode = ThisMode::Strict;
|
||||
else
|
||||
m_this_mode = ThisMode::Global;
|
||||
}
|
||||
|
||||
GC::Ref<SharedFunctionInstanceData> SharedFunctionInstanceData::create_for_function_node(VM& vm, FunctionNode const& node)
|
||||
{
|
||||
return create_for_function_node(vm, node, node.name());
|
||||
@@ -342,6 +380,8 @@ void SharedFunctionInstanceData::clear_compile_inputs()
|
||||
m_functions_to_initialize.clear();
|
||||
m_var_names_to_initialize_binding.clear();
|
||||
m_lexical_bindings.clear();
|
||||
RustIntegration::free_function_ast(m_rust_function_ast);
|
||||
m_rust_function_ast = nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -50,9 +50,11 @@ class FunctionNode;
|
||||
class JS_API SharedFunctionInstanceData final : public GC::Cell {
|
||||
GC_CELL(SharedFunctionInstanceData, GC::Cell);
|
||||
GC_DECLARE_ALLOCATOR(SharedFunctionInstanceData);
|
||||
static constexpr bool OVERRIDES_FINALIZE = true;
|
||||
|
||||
public:
|
||||
virtual ~SharedFunctionInstanceData() override;
|
||||
virtual void finalize() override;
|
||||
|
||||
static GC::Ref<SharedFunctionInstanceData> create_for_function_node(VM&, FunctionNode const&);
|
||||
static GC::Ref<SharedFunctionInstanceData> create_for_function_node(VM&, FunctionNode const&, Utf16FlyString name);
|
||||
@@ -70,6 +72,21 @@ public:
|
||||
FunctionParsingInsights const&,
|
||||
Vector<LocalVariable> local_variables_names);
|
||||
|
||||
// NB: Constructor for the Rust pipeline. Takes pre-computed metadata
|
||||
// instead of a C++ AST. FDI fields are populated later during
|
||||
// lazy compilation by rust_compile_function.
|
||||
SharedFunctionInstanceData(
|
||||
VM& vm,
|
||||
FunctionKind,
|
||||
Utf16FlyString name,
|
||||
i32 function_length,
|
||||
u32 formal_parameter_count,
|
||||
bool strict,
|
||||
bool is_arrow_function,
|
||||
bool has_simple_parameter_list,
|
||||
Vector<Utf16FlyString> parameter_names_for_mapped_arguments,
|
||||
void* rust_function_ast);
|
||||
|
||||
mutable GC::Ptr<Bytecode::Executable> m_executable;
|
||||
|
||||
RefPtr<FunctionParameters const> m_formal_parameters; // [[FormalParameters]]
|
||||
@@ -113,7 +130,7 @@ public:
|
||||
No,
|
||||
Yes,
|
||||
};
|
||||
HashMap<Utf16FlyString, ParameterIsLocal> m_parameter_names;
|
||||
OrderedHashMap<Utf16FlyString, ParameterIsLocal> m_parameter_names;
|
||||
struct FunctionToInitialize {
|
||||
GC::Ref<SharedFunctionInstanceData> shared_data;
|
||||
Utf16FlyString name;
|
||||
@@ -142,6 +159,11 @@ public:
|
||||
ConstructorKind m_constructor_kind : 1 { ConstructorKind::Base }; // [[ConstructorKind]]
|
||||
bool m_is_class_constructor : 1 { false }; // [[IsClassConstructor]]
|
||||
|
||||
// NB: When non-null, points to a Rust Box<FunctionData> used for
|
||||
// lazy compilation through the Rust pipeline.
|
||||
void* m_rust_function_ast { nullptr };
|
||||
bool m_use_rust_compilation { false };
|
||||
|
||||
void clear_compile_inputs();
|
||||
|
||||
private:
|
||||
|
||||
13
Libraries/LibJS/Rust/Cargo.toml
Normal file
13
Libraries/LibJS/Rust/Cargo.toml
Normal file
@@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "libjs_rust"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
crate-type = ["staticlib"]
|
||||
|
||||
[dependencies]
|
||||
unicode-ident = "1.0"
|
||||
num-bigint = "0.4"
|
||||
num-traits = "0.2"
|
||||
num-integer = "0.1"
|
||||
674
Libraries/LibJS/Rust/build.rs
Normal file
674
Libraries/LibJS/Rust/build.rs
Normal file
@@ -0,0 +1,674 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
//! Build script that generates Rust bytecode instruction types from Bytecode.def.
|
||||
//!
|
||||
//! This mirrors Meta/generate-libjs-bytecode-def-derived.py but generates Rust
|
||||
//! code instead of C++. The generated code lives in $OUT_DIR/instruction_generated.rs
|
||||
//! and is included! from src/bytecode/instruction.rs.
|
||||
|
||||
use std::env;
|
||||
use std::fmt::Write;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// .def file parser (mirrors Meta/libjs_bytecode_def.py)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct Field {
|
||||
name: String,
|
||||
ty: String,
|
||||
is_array: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct OpDef {
|
||||
name: String,
|
||||
fields: Vec<Field>,
|
||||
is_terminator: bool,
|
||||
}
|
||||
|
||||
fn parse_bytecode_def(path: &std::path::Path) -> Vec<OpDef> {
|
||||
let content = fs::read_to_string(path).expect("Failed to read Bytecode.def");
|
||||
let mut ops = Vec::new();
|
||||
let mut current: Option<OpDef> = None;
|
||||
let mut in_op = false;
|
||||
|
||||
for raw_line in content.lines() {
|
||||
let stripped = raw_line.trim();
|
||||
if stripped.is_empty() || stripped.starts_with("//") || stripped.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if stripped.starts_with("op ") {
|
||||
assert!(!in_op, "Nested op blocks");
|
||||
in_op = true;
|
||||
let rest = stripped.strip_prefix("op ").unwrap().trim();
|
||||
let name = if let Some(idx) = rest.find('<') {
|
||||
rest[..idx].trim().to_string()
|
||||
} else {
|
||||
rest.to_string()
|
||||
};
|
||||
current = Some(OpDef {
|
||||
name,
|
||||
fields: Vec::new(),
|
||||
is_terminator: false,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if stripped == "endop" {
|
||||
assert!(in_op && current.is_some(), "endop without op");
|
||||
ops.push(current.take().unwrap());
|
||||
in_op = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if !in_op {
|
||||
continue;
|
||||
}
|
||||
|
||||
if stripped.starts_with('@') {
|
||||
if stripped == "@terminator" {
|
||||
current.as_mut().unwrap().is_terminator = true;
|
||||
}
|
||||
// @nothrow is C++-only, ignore
|
||||
continue;
|
||||
}
|
||||
|
||||
let (lhs, rhs) = stripped.split_once(':').expect("Malformed field line");
|
||||
let field_name = lhs.trim().to_string();
|
||||
let mut field_type = rhs.trim().to_string();
|
||||
let is_array = field_type.ends_with("[]");
|
||||
if is_array {
|
||||
field_type = field_type[..field_type.len() - 2].trim().to_string();
|
||||
}
|
||||
current.as_mut().unwrap().fields.push(Field {
|
||||
name: field_name,
|
||||
ty: field_type,
|
||||
is_array,
|
||||
});
|
||||
}
|
||||
assert!(!in_op, "Unclosed op block");
|
||||
|
||||
// Remove the base "Instruction" definition (not an actual opcode).
|
||||
ops.retain(|op| op.name != "Instruction");
|
||||
ops
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Type mapping: C++ types → Rust types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Returns (rust_type, c_alignment, c_size, encoding_kind).
|
||||
fn field_type_info(ty: &str) -> (&'static str, usize, usize, &'static str) {
|
||||
match ty {
|
||||
"bool" => ("bool", 1, 1, "bool"),
|
||||
"u32" => ("u32", 4, 4, "u32"),
|
||||
"Operand" => ("Operand", 4, 4, "operand"),
|
||||
"Optional<Operand>" => ("Option<Operand>", 4, 4, "optional_operand"),
|
||||
"Label" => ("Label", 4, 4, "label"),
|
||||
"Optional<Label>" => ("Option<Label>", 4, 8, "optional_label"),
|
||||
"IdentifierTableIndex" => ("IdentifierTableIndex", 4, 4, "u32_newtype"),
|
||||
"Optional<IdentifierTableIndex>" => ("Option<IdentifierTableIndex>", 4, 4, "optional_u32_newtype"),
|
||||
"PropertyKeyTableIndex" => ("PropertyKeyTableIndex", 4, 4, "u32_newtype"),
|
||||
"StringTableIndex" => ("StringTableIndex", 4, 4, "u32_newtype"),
|
||||
"Optional<StringTableIndex>" => ("Option<StringTableIndex>", 4, 4, "optional_u32_newtype"),
|
||||
"RegexTableIndex" => ("RegexTableIndex", 4, 4, "u32_newtype"),
|
||||
"EnvironmentCoordinate" => ("EnvironmentCoordinate", 4, 8, "env_coord"),
|
||||
"Builtin" => ("u8", 1, 1, "u8"),
|
||||
"Completion::Type" => ("u32", 4, 4, "u32"),
|
||||
"IteratorHint" => ("u32", 4, 4, "u32"),
|
||||
"EnvironmentMode" => ("u32", 4, 4, "u32"),
|
||||
"ArgumentsKind" => ("u32", 4, 4, "u32"),
|
||||
"Value" => ("u64", 8, 8, "u64"),
|
||||
other => panic!("Unknown field type: {other}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn rust_field_name(name: &str) -> String {
|
||||
// Strip m_ prefix
|
||||
if let Some(stripped) = name.strip_prefix("m_") {
|
||||
stripped.to_string()
|
||||
} else {
|
||||
name.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn round_up(value: usize, align: usize) -> usize {
|
||||
(value + align - 1) & !(align - 1)
|
||||
}
|
||||
|
||||
/// The alignment of the C++ Instruction base class (`alignas(void*)`).
|
||||
/// On 64-bit: alignof(void*) = 8.
|
||||
const STRUCT_ALIGN: usize = 8;
|
||||
|
||||
/// Compute the byte offset of the m_length field within the C++ struct.
|
||||
fn find_m_length_offset(fields: &[Field]) -> usize {
|
||||
let mut offset: usize = 2; // after m_type + m_strict
|
||||
for f in fields {
|
||||
if f.is_array {
|
||||
continue;
|
||||
}
|
||||
if f.name == "m_type" || f.name == "m_strict" {
|
||||
continue;
|
||||
}
|
||||
let (_, align, size, _) = field_type_info(&f.ty);
|
||||
offset = round_up(offset, align);
|
||||
if f.name == "m_length" {
|
||||
return offset;
|
||||
}
|
||||
offset += size;
|
||||
}
|
||||
panic!("m_length field not found");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Code generation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn generate_rust_code(ops: &[OpDef]) -> String {
|
||||
let mut out = String::with_capacity(64 * 1024);
|
||||
|
||||
writeln!(out, "// @generated from Libraries/LibJS/Bytecode/Bytecode.def").unwrap();
|
||||
writeln!(out, "// Do not edit manually.").unwrap();
|
||||
writeln!(out).unwrap();
|
||||
writeln!(out, "use super::operand::*;").unwrap();
|
||||
writeln!(out).unwrap();
|
||||
|
||||
// --- OpCode enum ---
|
||||
generate_opcode_enum(&mut out, ops);
|
||||
|
||||
// --- Instruction enum ---
|
||||
generate_instruction_enum(&mut out, ops);
|
||||
|
||||
// --- impl Instruction ---
|
||||
generate_instruction_impl(&mut out, ops);
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
fn generate_opcode_enum(out: &mut String, ops: &[OpDef]) {
|
||||
writeln!(out, "/// Bytecode opcode (u8), matching the C++ `Instruction::Type` enum.").unwrap();
|
||||
writeln!(out, "#[derive(Debug, Clone, Copy, PartialEq, Eq)]").unwrap();
|
||||
writeln!(out, "#[repr(u8)]").unwrap();
|
||||
writeln!(out, "pub enum OpCode {{").unwrap();
|
||||
for (i, op) in ops.iter().enumerate() {
|
||||
writeln!(out, " {} = {},", op.name, i).unwrap();
|
||||
}
|
||||
writeln!(out, "}}").unwrap();
|
||||
writeln!(out).unwrap();
|
||||
}
|
||||
|
||||
fn generate_instruction_enum(out: &mut String, ops: &[OpDef]) {
|
||||
writeln!(out, "/// A bytecode instruction with typed fields.").unwrap();
|
||||
writeln!(out, "///").unwrap();
|
||||
writeln!(out, "/// Each variant corresponds to one C++ instruction class.").unwrap();
|
||||
writeln!(out, "/// During codegen, instructions are stored as these typed variants.").unwrap();
|
||||
writeln!(out, "/// During flattening, they are serialized to bytes matching C++ layout.").unwrap();
|
||||
writeln!(out, "#[derive(Debug, Clone)]").unwrap();
|
||||
writeln!(out, "pub enum Instruction {{").unwrap();
|
||||
for op in ops {
|
||||
let fields = user_fields(op);
|
||||
if fields.is_empty() {
|
||||
writeln!(out, " {},", op.name).unwrap();
|
||||
} else {
|
||||
writeln!(out, " {} {{", op.name).unwrap();
|
||||
for f in &fields {
|
||||
let (rust_ty, _, _, _) = field_type_info(&f.ty);
|
||||
let rname = rust_field_name(&f.name);
|
||||
if f.is_array {
|
||||
writeln!(out, " {}: Vec<{}>,", rname, rust_ty).unwrap();
|
||||
} else {
|
||||
writeln!(out, " {}: {},", rname, rust_ty).unwrap();
|
||||
}
|
||||
}
|
||||
writeln!(out, " }},").unwrap();
|
||||
}
|
||||
}
|
||||
writeln!(out, "}}").unwrap();
|
||||
writeln!(out).unwrap();
|
||||
}
|
||||
|
||||
/// Returns the user-visible fields (excludes m_type, m_strict, m_length).
|
||||
fn user_fields(op: &OpDef) -> Vec<&Field> {
|
||||
op.fields
|
||||
.iter()
|
||||
.filter(|f| f.name != "m_type" && f.name != "m_strict" && f.name != "m_length")
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn generate_instruction_impl(out: &mut String, ops: &[OpDef]) {
|
||||
writeln!(out, "impl Instruction {{").unwrap();
|
||||
|
||||
// opcode()
|
||||
generate_opcode_method(out, ops);
|
||||
|
||||
// is_terminator()
|
||||
generate_is_terminator_method(out, ops);
|
||||
|
||||
// encode()
|
||||
generate_encode_method(out, ops);
|
||||
|
||||
// encoded_size()
|
||||
generate_encoded_size_method(out, ops);
|
||||
|
||||
// visit_operands()
|
||||
generate_visit_operands_method(out, ops);
|
||||
|
||||
// visit_labels()
|
||||
generate_visit_labels_method(out, ops);
|
||||
|
||||
writeln!(out, "}}").unwrap();
|
||||
}
|
||||
|
||||
fn generate_opcode_method(out: &mut String, ops: &[OpDef]) {
|
||||
writeln!(out, " pub fn opcode(&self) -> OpCode {{").unwrap();
|
||||
writeln!(out, " match self {{").unwrap();
|
||||
for op in ops {
|
||||
let fields = user_fields(op);
|
||||
if fields.is_empty() {
|
||||
writeln!(out, " Instruction::{} => OpCode::{},", op.name, op.name).unwrap();
|
||||
} else {
|
||||
writeln!(out, " Instruction::{} {{ .. }} => OpCode::{},", op.name, op.name).unwrap();
|
||||
}
|
||||
}
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out).unwrap();
|
||||
}
|
||||
|
||||
fn generate_is_terminator_method(out: &mut String, ops: &[OpDef]) {
|
||||
writeln!(out, " pub fn is_terminator(&self) -> bool {{").unwrap();
|
||||
writeln!(out, " matches!(self, ").unwrap();
|
||||
let terminators: Vec<&OpDef> = ops.iter().filter(|op| op.is_terminator).collect();
|
||||
for (i, op) in terminators.iter().enumerate() {
|
||||
let sep = if i + 1 < terminators.len() { " |" } else { "" };
|
||||
let fields = user_fields(op);
|
||||
if fields.is_empty() {
|
||||
writeln!(out, " Instruction::{}{}", op.name, sep).unwrap();
|
||||
} else {
|
||||
writeln!(out, " Instruction::{} {{ .. }}{}", op.name, sep).unwrap();
|
||||
}
|
||||
}
|
||||
writeln!(out, " )").unwrap();
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out).unwrap();
|
||||
}
|
||||
|
||||
fn generate_encoded_size_method(out: &mut String, ops: &[OpDef]) {
|
||||
writeln!(out, " /// Returns the encoded size of this instruction in bytes.").unwrap();
|
||||
writeln!(out, " pub fn encoded_size(&self) -> usize {{").unwrap();
|
||||
writeln!(out, " match self {{").unwrap();
|
||||
|
||||
for op in ops {
|
||||
let fields = user_fields(op);
|
||||
let has_array = op.fields.iter().any(|f| f.is_array);
|
||||
|
||||
if !has_array {
|
||||
// Fixed-length: compute size statically
|
||||
let mut offset: usize = 2; // header
|
||||
for f in &op.fields {
|
||||
if f.is_array || f.name == "m_type" || f.name == "m_strict" {
|
||||
continue;
|
||||
}
|
||||
let (_, align, size, _) = field_type_info(&f.ty);
|
||||
offset = round_up(offset, align);
|
||||
offset += size;
|
||||
}
|
||||
let final_size = round_up(offset, 8);
|
||||
let pat = if fields.is_empty() {
|
||||
format!("Instruction::{}", op.name)
|
||||
} else {
|
||||
format!("Instruction::{} {{ .. }}", op.name)
|
||||
};
|
||||
writeln!(out, " {} => {},", pat, final_size).unwrap();
|
||||
} else {
|
||||
// Variable-length: depends on array size
|
||||
// Compute fixed part size
|
||||
let mut fixed_offset: usize = 2;
|
||||
for f in &op.fields {
|
||||
if f.is_array || f.name == "m_type" || f.name == "m_strict" {
|
||||
continue;
|
||||
}
|
||||
let (_, align, size, _) = field_type_info(&f.ty);
|
||||
fixed_offset = round_up(fixed_offset, align);
|
||||
fixed_offset += size;
|
||||
}
|
||||
|
||||
// Find the array field and its element size
|
||||
let array_field = op.fields.iter().find(|f| f.is_array).unwrap();
|
||||
let (_, _elem_align, elem_size, _) = field_type_info(&array_field.ty);
|
||||
let arr_name = rust_field_name(&array_field.name);
|
||||
// C++ computes m_length as:
|
||||
// round_up(alignof(void*), sizeof(*this) + sizeof(elem) * count)
|
||||
// sizeof(*this) = round_up(fixed_offset, STRUCT_ALIGN) due to alignas(void*).
|
||||
let sizeof_this = round_up(fixed_offset, STRUCT_ALIGN);
|
||||
|
||||
// Bind only the array field
|
||||
let bindings: Vec<String> = fields
|
||||
.iter()
|
||||
.map(|f| {
|
||||
let rname = rust_field_name(&f.name);
|
||||
if rname == arr_name {
|
||||
rname
|
||||
} else {
|
||||
format!("{}: _", rname)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
writeln!(out, " Instruction::{} {{ {} }} => {{", op.name, bindings.join(", ")).unwrap();
|
||||
writeln!(out, " let base = {} + {}.len() * {};", sizeof_this, arr_name, elem_size).unwrap();
|
||||
writeln!(out, " (base + 7) & !7 // round up to 8").unwrap();
|
||||
writeln!(out, " }}").unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out).unwrap();
|
||||
}
|
||||
|
||||
fn generate_encode_method(out: &mut String, ops: &[OpDef]) {
|
||||
writeln!(out, " /// Encode this instruction into bytes matching the C++ struct layout.").unwrap();
|
||||
writeln!(out, " pub fn encode(&self, strict: bool, buf: &mut Vec<u8>) {{").unwrap();
|
||||
writeln!(out, " let start = buf.len();").unwrap();
|
||||
writeln!(out, " match self {{").unwrap();
|
||||
|
||||
for op in ops {
|
||||
let fields = user_fields(op);
|
||||
let has_array = op.fields.iter().any(|f| f.is_array);
|
||||
let has_m_length = op.fields.iter().any(|f| f.name == "m_length");
|
||||
|
||||
// Generate match arm with field bindings
|
||||
if fields.is_empty() {
|
||||
writeln!(out, " Instruction::{} => {{", op.name).unwrap();
|
||||
} else {
|
||||
let bindings: Vec<String> = fields
|
||||
.iter()
|
||||
.map(|f| rust_field_name(&f.name))
|
||||
.collect();
|
||||
writeln!(out, " Instruction::{} {{ {} }} => {{", op.name, bindings.join(", ")).unwrap();
|
||||
}
|
||||
|
||||
// Write header: opcode (u8) + strict (u8) = 2 bytes
|
||||
writeln!(out, " buf.push(OpCode::{} as u8);", op.name).unwrap();
|
||||
writeln!(out, " buf.push(strict as u8);").unwrap();
|
||||
|
||||
// Track offset for C++ struct layout.
|
||||
// We iterate ALL fields (including m_type, m_strict, m_length) for
|
||||
// accurate alignment but only emit writes for user fields.
|
||||
let mut offset: usize = 2;
|
||||
|
||||
// Iterate all non-array fields in declaration order
|
||||
for f in &op.fields {
|
||||
if f.is_array {
|
||||
continue;
|
||||
}
|
||||
// m_type and m_strict are already written as the header
|
||||
if f.name == "m_type" || f.name == "m_strict" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (_, align, size, kind) = field_type_info(&f.ty);
|
||||
|
||||
// Pad to alignment
|
||||
let aligned_offset = round_up(offset, align);
|
||||
let pad = aligned_offset - offset;
|
||||
if pad > 0 {
|
||||
writeln!(out, " buf.extend_from_slice(&[0u8; {}]);", pad).unwrap();
|
||||
}
|
||||
offset = aligned_offset;
|
||||
|
||||
if f.name == "m_length" {
|
||||
// Write placeholder (patched at end for variable-length instructions)
|
||||
writeln!(out, " buf.extend_from_slice(&[0u8; 4]); // m_length placeholder").unwrap();
|
||||
} else {
|
||||
let rname = rust_field_name(&f.name);
|
||||
emit_field_write(out, &rname, kind, false);
|
||||
}
|
||||
offset += size;
|
||||
}
|
||||
|
||||
// Write trailing array elements
|
||||
if has_array {
|
||||
// sizeof(*this) in C++ = round_up(fixed_offset, STRUCT_ALIGN)
|
||||
let sizeof_this = round_up(offset, STRUCT_ALIGN);
|
||||
|
||||
for f in &op.fields {
|
||||
if !f.is_array {
|
||||
continue;
|
||||
}
|
||||
let (_, elem_align, elem_size, elem_kind) = field_type_info(&f.ty);
|
||||
let rname = rust_field_name(&f.name);
|
||||
|
||||
// Pad before first element if needed
|
||||
let aligned_offset = round_up(offset, elem_align);
|
||||
let pad = aligned_offset - offset;
|
||||
if pad > 0 {
|
||||
writeln!(out, " buf.extend_from_slice(&[0u8; {}]);", pad).unwrap();
|
||||
}
|
||||
|
||||
writeln!(out, " for item in {} {{", rname).unwrap();
|
||||
emit_field_write(out, "item", elem_kind, true);
|
||||
writeln!(out, " }}").unwrap();
|
||||
|
||||
// Compute target size matching C++:
|
||||
// round_up(STRUCT_ALIGN, sizeof(*this) + count * elem_size)
|
||||
writeln!(out, " let target = ({} + {}.len() * {} + 7) & !7;",
|
||||
sizeof_this, rname, elem_size).unwrap();
|
||||
writeln!(out, " while (buf.len() - start) < target {{ buf.push(0); }}").unwrap();
|
||||
}
|
||||
|
||||
if has_m_length {
|
||||
// Patch m_length: it's the first u32 field after the header
|
||||
let m_length_offset = find_m_length_offset(&op.fields);
|
||||
writeln!(out, " let total_len = (buf.len() - start) as u32;").unwrap();
|
||||
writeln!(out, " buf[start + {}..start + {}].copy_from_slice(&total_len.to_ne_bytes());",
|
||||
m_length_offset, m_length_offset + 4).unwrap();
|
||||
}
|
||||
} else {
|
||||
// Fixed-length: pad statically
|
||||
let final_size = round_up(offset, 8);
|
||||
let tail_pad = final_size - offset;
|
||||
if tail_pad > 0 {
|
||||
writeln!(out, " buf.extend_from_slice(&[0u8; {}]);", tail_pad).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(out, " }}").unwrap();
|
||||
}
|
||||
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out).unwrap();
|
||||
}
|
||||
|
||||
/// Emit code to write a field value into `buf`.
|
||||
///
|
||||
/// All bindings from pattern matching and loop iteration are references (`&T`).
|
||||
/// Rust auto-derefs for method calls, but explicit `*` is needed for casts
|
||||
/// and direct pushes of Copy types.
|
||||
fn emit_field_write(out: &mut String, name: &str, kind: &str, is_loop_item: bool) {
|
||||
let prefix = if is_loop_item { " " } else { " " };
|
||||
match kind {
|
||||
"bool" => writeln!(out, "{}buf.push(*{} as u8);", prefix, name).unwrap(),
|
||||
"u8" => writeln!(out, "{}buf.push(*{});", prefix, name).unwrap(),
|
||||
"u32" => writeln!(out, "{}buf.extend_from_slice(&{}.to_ne_bytes());", prefix, name).unwrap(),
|
||||
"u64" => writeln!(out, "{}buf.extend_from_slice(&{}.to_ne_bytes());", prefix, name).unwrap(),
|
||||
"operand" => writeln!(out, "{}buf.extend_from_slice(&{}.raw().to_ne_bytes());", prefix, name).unwrap(),
|
||||
"optional_operand" => {
|
||||
writeln!(out, "{}match {} {{", prefix, name).unwrap();
|
||||
writeln!(out, "{} Some(op) => buf.extend_from_slice(&op.raw().to_ne_bytes()),", prefix).unwrap();
|
||||
writeln!(out, "{} None => buf.extend_from_slice(&Operand::INVALID.to_ne_bytes()),", prefix).unwrap();
|
||||
writeln!(out, "{}}}", prefix).unwrap();
|
||||
}
|
||||
"label" => writeln!(out, "{}buf.extend_from_slice(&{}.0.to_ne_bytes());", prefix, name).unwrap(),
|
||||
"optional_label" => {
|
||||
// C++ Optional<Label> layout: u32 value, bool has_value, 3 bytes padding = 8 bytes total
|
||||
writeln!(out, "{}match {} {{", prefix, name).unwrap();
|
||||
writeln!(out, "{} Some(lbl) => {{", prefix).unwrap();
|
||||
writeln!(out, "{} buf.extend_from_slice(&lbl.0.to_ne_bytes());", prefix).unwrap();
|
||||
writeln!(out, "{} buf.push(1); buf.push(0); buf.push(0); buf.push(0);", prefix).unwrap();
|
||||
writeln!(out, "{} }}", prefix).unwrap();
|
||||
writeln!(out, "{} None => {{", prefix).unwrap();
|
||||
writeln!(out, "{} buf.extend_from_slice(&0u32.to_ne_bytes());", prefix).unwrap();
|
||||
writeln!(out, "{} buf.push(0); buf.push(0); buf.push(0); buf.push(0);", prefix).unwrap();
|
||||
writeln!(out, "{} }}", prefix).unwrap();
|
||||
writeln!(out, "{}}}", prefix).unwrap();
|
||||
}
|
||||
"u32_newtype" => writeln!(out, "{}buf.extend_from_slice(&{}.0.to_ne_bytes());", prefix, name).unwrap(),
|
||||
"optional_u32_newtype" => {
|
||||
writeln!(out, "{}match {} {{", prefix, name).unwrap();
|
||||
writeln!(out, "{} Some(idx) => buf.extend_from_slice(&idx.0.to_ne_bytes()),", prefix).unwrap();
|
||||
writeln!(out, "{} None => buf.extend_from_slice(&0xFFFF_FFFFu32.to_ne_bytes()),", prefix).unwrap();
|
||||
writeln!(out, "{}}}", prefix).unwrap();
|
||||
}
|
||||
"env_coord" => {
|
||||
writeln!(out, "{}buf.extend_from_slice(&{}.hops.to_ne_bytes());", prefix, name).unwrap();
|
||||
writeln!(out, "{}buf.extend_from_slice(&{}.index.to_ne_bytes());", prefix, name).unwrap();
|
||||
}
|
||||
other => panic!("Unknown encoding kind: {other}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_visit_operands_method(out: &mut String, ops: &[OpDef]) {
|
||||
writeln!(out, " /// Visit all `Operand` fields (for operand rewriting).").unwrap();
|
||||
writeln!(out, " pub fn visit_operands(&mut self, visitor: &mut dyn FnMut(&mut Operand)) {{").unwrap();
|
||||
writeln!(out, " match self {{").unwrap();
|
||||
|
||||
for op in ops {
|
||||
let fields = user_fields(op);
|
||||
let operand_fields: Vec<&&Field> = fields
|
||||
.iter()
|
||||
.filter(|f| f.ty == "Operand" || f.ty == "Optional<Operand>")
|
||||
.collect();
|
||||
|
||||
if operand_fields.is_empty() {
|
||||
let pat = if fields.is_empty() {
|
||||
format!("Instruction::{}", op.name)
|
||||
} else {
|
||||
format!("Instruction::{} {{ .. }}", op.name)
|
||||
};
|
||||
writeln!(out, " {} => {{}}", pat).unwrap();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Bind the operand fields
|
||||
let bindings: Vec<String> = fields
|
||||
.iter()
|
||||
.map(|f| {
|
||||
let rname = rust_field_name(&f.name);
|
||||
if f.ty == "Operand" || f.ty == "Optional<Operand>" {
|
||||
rname
|
||||
} else {
|
||||
format!("{}: _", rname)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
writeln!(out, " Instruction::{} {{ {} }} => {{", op.name, bindings.join(", ")).unwrap();
|
||||
|
||||
for f in &operand_fields {
|
||||
let rname = rust_field_name(&f.name);
|
||||
if f.is_array {
|
||||
if f.ty == "Optional<Operand>" {
|
||||
writeln!(out, " for op in {}.iter_mut().flatten() {{ visitor(op); }}", rname).unwrap();
|
||||
} else {
|
||||
writeln!(out, " for item in {}.iter_mut() {{ visitor(item); }}", rname).unwrap();
|
||||
}
|
||||
} else if f.ty == "Optional<Operand>" {
|
||||
writeln!(out, " if let Some(op) = {} {{ visitor(op); }}", rname).unwrap();
|
||||
} else {
|
||||
writeln!(out, " visitor({});", rname).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(out, " }}").unwrap();
|
||||
}
|
||||
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out).unwrap();
|
||||
}
|
||||
|
||||
fn generate_visit_labels_method(out: &mut String, ops: &[OpDef]) {
|
||||
writeln!(out, " /// Visit all `Label` fields (for label linking).").unwrap();
|
||||
writeln!(out, " pub fn visit_labels(&mut self, visitor: &mut dyn FnMut(&mut Label)) {{").unwrap();
|
||||
writeln!(out, " match self {{").unwrap();
|
||||
|
||||
for op in ops {
|
||||
let fields = user_fields(op);
|
||||
let label_fields: Vec<&&Field> = fields
|
||||
.iter()
|
||||
.filter(|f| f.ty == "Label" || f.ty == "Optional<Label>")
|
||||
.collect();
|
||||
|
||||
if label_fields.is_empty() {
|
||||
let pat = if fields.is_empty() {
|
||||
format!("Instruction::{}", op.name)
|
||||
} else {
|
||||
format!("Instruction::{} {{ .. }}", op.name)
|
||||
};
|
||||
writeln!(out, " {} => {{}}", pat).unwrap();
|
||||
continue;
|
||||
}
|
||||
|
||||
let bindings: Vec<String> = fields
|
||||
.iter()
|
||||
.map(|f| {
|
||||
let rname = rust_field_name(&f.name);
|
||||
if f.ty == "Label" || f.ty == "Optional<Label>" {
|
||||
rname
|
||||
} else {
|
||||
format!("{}: _", rname)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
writeln!(out, " Instruction::{} {{ {} }} => {{", op.name, bindings.join(", ")).unwrap();
|
||||
|
||||
for f in &label_fields {
|
||||
let rname = rust_field_name(&f.name);
|
||||
if f.is_array {
|
||||
if f.ty == "Optional<Label>" {
|
||||
writeln!(out, " for item in {}.iter_mut() {{", rname).unwrap();
|
||||
writeln!(out, " if let Some(lbl) = item {{ visitor(lbl); }}").unwrap();
|
||||
writeln!(out, " }}").unwrap();
|
||||
} else {
|
||||
writeln!(out, " for item in {}.iter_mut() {{ visitor(item); }}", rname).unwrap();
|
||||
}
|
||||
} else if f.ty == "Optional<Label>" {
|
||||
writeln!(out, " if let Some(lbl) = {} {{ visitor(lbl); }}", rname).unwrap();
|
||||
} else {
|
||||
writeln!(out, " visitor({});", rname).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(out, " }}").unwrap();
|
||||
}
|
||||
|
||||
writeln!(out, " }}").unwrap();
|
||||
writeln!(out, " }}").unwrap();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn main() {
|
||||
let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
|
||||
let def_path = manifest_dir.join("../Bytecode/Bytecode.def");
|
||||
|
||||
println!("cargo:rerun-if-changed={}", def_path.display());
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
|
||||
let ops = parse_bytecode_def(&def_path);
|
||||
let code = generate_rust_code(&ops);
|
||||
|
||||
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
||||
fs::write(out_dir.join("instruction_generated.rs"), &code).unwrap();
|
||||
}
|
||||
1491
Libraries/LibJS/Rust/src/ast.rs
Normal file
1491
Libraries/LibJS/Rust/src/ast.rs
Normal file
File diff suppressed because it is too large
Load Diff
1491
Libraries/LibJS/Rust/src/ast_dump.rs
Normal file
1491
Libraries/LibJS/Rust/src/ast_dump.rs
Normal file
File diff suppressed because it is too large
Load Diff
53
Libraries/LibJS/Rust/src/bytecode/basic_block.rs
Normal file
53
Libraries/LibJS/Rust/src/bytecode/basic_block.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
use super::instruction::Instruction;
|
||||
use super::operand::Label;
|
||||
|
||||
/// A source map entry mapping a bytecode offset to a source range.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct SourceMapEntry {
|
||||
pub bytecode_offset: u32,
|
||||
pub source_start: u32,
|
||||
pub source_end: u32,
|
||||
}
|
||||
|
||||
/// A basic block in the bytecode generator.
|
||||
///
|
||||
/// During codegen, instructions are appended as typed `Instruction` enum
|
||||
/// variants. During flattening (compile/assemble), instructions are
|
||||
/// serialized into the final byte stream.
|
||||
pub struct BasicBlock {
|
||||
pub index: u32,
|
||||
pub instructions: Vec<(Instruction, SourceMapEntry)>,
|
||||
pub handler: Option<Label>,
|
||||
pub terminated: bool,
|
||||
pub resolved_this: bool,
|
||||
}
|
||||
|
||||
impl BasicBlock {
|
||||
pub fn new(index: u32) -> Self {
|
||||
Self {
|
||||
index,
|
||||
instructions: Vec::new(),
|
||||
handler: None,
|
||||
terminated: false,
|
||||
resolved_this: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn append(&mut self, instruction: Instruction, source_map: SourceMapEntry) {
|
||||
let is_terminator = instruction.is_terminator();
|
||||
self.instructions.push((instruction, source_map));
|
||||
if is_terminator {
|
||||
self.terminated = true;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.instructions.is_empty()
|
||||
}
|
||||
}
|
||||
8687
Libraries/LibJS/Rust/src/bytecode/codegen.rs
Normal file
8687
Libraries/LibJS/Rust/src/bytecode/codegen.rs
Normal file
File diff suppressed because it is too large
Load Diff
545
Libraries/LibJS/Rust/src/bytecode/ffi.rs
Normal file
545
Libraries/LibJS/Rust/src/bytecode/ffi.rs
Normal file
@@ -0,0 +1,545 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
//! FFI bridge between the codegen and C++ runtime.
|
||||
//!
|
||||
//! This module handles the boundary between the bytecode generator
|
||||
//! and the C++ `Bytecode::Executable` / `SharedFunctionInstanceData` types.
|
||||
//!
|
||||
//! ## Key operations
|
||||
//!
|
||||
//! - `create_executable()` -- packages assembled bytecode, tables, and
|
||||
//! metadata into a C++ `Bytecode::Executable` via `rust_create_executable()`
|
||||
//! - `create_shared_function_data()` -- creates a C++ `SharedFunctionInstanceData`
|
||||
//! for a parsed function, transferring ownership of the AST
|
||||
//! - `compile_regex()` -- delegates regex compilation to the C++ regex engine
|
||||
//!
|
||||
//! ## FFI types
|
||||
//!
|
||||
//! All `FFI*` structs are `#[repr(C)]` and must match their counterparts
|
||||
//! in `BytecodeFactory.h`. Changes to field order or types here require
|
||||
//! corresponding changes on the C++ side.
|
||||
|
||||
use std::ffi::c_void;
|
||||
|
||||
use super::generator::{AssembledBytecode, ConstantValue, Generator};
|
||||
use crate::ast::Utf16String;
|
||||
use crate::u32_from_usize;
|
||||
|
||||
/// Opaque pointer returned from rust_create_executable.
|
||||
pub type ExecutableHandle = *mut c_void;
|
||||
|
||||
// FFI types matching BytecodeFactory.h.
|
||||
|
||||
/// Exception handler range (C++ `BytecodeFactory::ExceptionHandlerData`).
|
||||
#[repr(C)]
|
||||
pub struct FFIExceptionHandler {
|
||||
pub start_offset: u32,
|
||||
pub end_offset: u32,
|
||||
pub handler_offset: u32,
|
||||
}
|
||||
|
||||
/// Source map entry mapping bytecode offset to source range.
|
||||
#[repr(C)]
|
||||
pub struct FFISourceMapEntry {
|
||||
pub bytecode_offset: u32,
|
||||
pub source_start: u32,
|
||||
pub source_end: u32,
|
||||
}
|
||||
|
||||
/// A borrowed UTF-16 string slice for passing across FFI.
|
||||
/// Points into Rust-owned memory; valid only for the duration of the FFI call.
|
||||
#[repr(C)]
|
||||
pub struct FFIUtf16Slice {
|
||||
pub data: *const u16,
|
||||
pub length: usize,
|
||||
}
|
||||
|
||||
impl From<&[u16]> for FFIUtf16Slice {
|
||||
fn from(slice: &[u16]) -> Self {
|
||||
Self { data: slice.as_ptr(), length: slice.len() }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Utf16String> for FFIUtf16Slice {
|
||||
fn from(s: &Utf16String) -> Self {
|
||||
Self { data: s.as_ptr(), length: s.len() }
|
||||
}
|
||||
}
|
||||
|
||||
/// C-compatible `Optional<u32>` (C++ doesn't have a standard Optional ABI).
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct FFIOptionalU32 {
|
||||
pub value: u32,
|
||||
pub has_value: bool,
|
||||
}
|
||||
|
||||
impl FFIOptionalU32 {
|
||||
pub fn none() -> Self {
|
||||
Self { value: 0, has_value: false }
|
||||
}
|
||||
|
||||
pub fn some(value: u32) -> Self {
|
||||
Self { value, has_value: true }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Option<u32>> for FFIOptionalU32 {
|
||||
fn from(opt: Option<u32>) -> Self {
|
||||
match opt {
|
||||
Some(value) => Self::some(value),
|
||||
None => Self::none(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Class element descriptor for ClassBlueprint creation
|
||||
/// (C++ `BytecodeFactory::ClassElementData`).
|
||||
#[repr(C)]
|
||||
pub struct FFIClassElement {
|
||||
pub kind: u8, // ClassElementKind
|
||||
pub is_static: bool,
|
||||
pub is_private: bool,
|
||||
pub private_identifier: *const u16,
|
||||
pub private_identifier_len: usize,
|
||||
pub shared_function_data_index: FFIOptionalU32,
|
||||
pub has_initializer: bool,
|
||||
pub literal_value_kind: u8, // LiteralValueKind
|
||||
pub literal_value_number: f64,
|
||||
pub literal_value_string: *const u16,
|
||||
pub literal_value_string_len: usize,
|
||||
}
|
||||
|
||||
/// Data for creating a C++ `SharedFunctionInstanceData`.
|
||||
/// Passed to `rust_create_sfd()`.
|
||||
#[repr(C)]
|
||||
pub struct FFISharedFunctionData {
|
||||
pub name: *const u16,
|
||||
pub name_len: usize,
|
||||
pub function_kind: u8,
|
||||
pub function_length: i32,
|
||||
pub formal_parameter_count: u32,
|
||||
pub strict: bool,
|
||||
pub is_arrow: bool,
|
||||
pub has_simple_parameter_list: bool,
|
||||
pub parameter_names: *const FFIUtf16Slice,
|
||||
pub parameter_name_count: usize,
|
||||
pub source_text_offset: usize,
|
||||
pub source_text_length: usize,
|
||||
pub rust_function_ast: *mut c_void,
|
||||
pub uses_this: bool,
|
||||
pub uses_this_from_environment: bool,
|
||||
}
|
||||
|
||||
/// All data needed to create a C++ `Bytecode::Executable`.
|
||||
/// Passed to `rust_create_executable()`.
|
||||
#[repr(C)]
|
||||
pub struct FFIExecutableData {
|
||||
pub bytecode: *const u8,
|
||||
pub bytecode_length: usize,
|
||||
pub identifier_table: *const FFIUtf16Slice,
|
||||
pub identifier_count: usize,
|
||||
pub property_key_table: *const FFIUtf16Slice,
|
||||
pub property_key_count: usize,
|
||||
pub string_table: *const FFIUtf16Slice,
|
||||
pub string_count: usize,
|
||||
pub constants_data: *const u8,
|
||||
pub constants_data_length: usize,
|
||||
pub constants_count: usize,
|
||||
pub exception_handlers: *const FFIExceptionHandler,
|
||||
pub exception_handler_count: usize,
|
||||
pub source_map: *const FFISourceMapEntry,
|
||||
pub source_map_count: usize,
|
||||
pub basic_block_offsets: *const usize,
|
||||
pub basic_block_count: usize,
|
||||
pub local_variable_names: *const FFIUtf16Slice,
|
||||
pub local_variable_count: usize,
|
||||
pub property_lookup_cache_count: u32,
|
||||
pub global_variable_cache_count: u32,
|
||||
pub template_object_cache_count: u32,
|
||||
pub object_shape_cache_count: u32,
|
||||
pub number_of_registers: u32,
|
||||
pub is_strict: bool,
|
||||
pub length_identifier: FFIOptionalU32,
|
||||
pub shared_function_data: *const *const c_void,
|
||||
pub shared_function_data_count: usize,
|
||||
pub class_blueprints: *const *mut c_void,
|
||||
pub class_blueprint_count: usize,
|
||||
pub compiled_regexes: *const *mut c_void,
|
||||
pub regex_count: usize,
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn rust_create_executable(
|
||||
vm_ptr: *mut c_void,
|
||||
source_code_ptr: *const c_void,
|
||||
data: *const FFIExecutableData,
|
||||
) -> *mut c_void;
|
||||
|
||||
pub fn rust_create_sfd(
|
||||
vm_ptr: *mut c_void,
|
||||
source_code_ptr: *const c_void,
|
||||
data: *const FFISharedFunctionData,
|
||||
) -> *mut c_void;
|
||||
|
||||
pub fn rust_sfd_set_class_field_initializer_name(
|
||||
sfd_ptr: *mut c_void,
|
||||
name: *const u16,
|
||||
name_len: usize,
|
||||
is_private: bool,
|
||||
);
|
||||
|
||||
pub fn rust_create_class_blueprint(
|
||||
vm_ptr: *mut c_void,
|
||||
source_code_ptr: *const c_void,
|
||||
name: *const u16,
|
||||
name_len: usize,
|
||||
source_text_offset: usize,
|
||||
source_text_len: usize,
|
||||
constructor_sfd_index: u32,
|
||||
has_super_class: bool,
|
||||
has_name: bool,
|
||||
elements: *const FFIClassElement,
|
||||
element_count: usize,
|
||||
) -> *mut c_void;
|
||||
|
||||
// Callbacks for populating Script GDI data from Rust.
|
||||
pub fn script_gdi_push_lexical_name(ctx: *mut c_void, name: *const u16, len: usize);
|
||||
pub fn script_gdi_push_var_name(ctx: *mut c_void, name: *const u16, len: usize);
|
||||
pub fn script_gdi_push_function(ctx: *mut c_void, sfd: *mut c_void, name: *const u16, len: usize);
|
||||
pub fn script_gdi_push_var_scoped_name(ctx: *mut c_void, name: *const u16, len: usize);
|
||||
pub fn script_gdi_push_annex_b_name(ctx: *mut c_void, name: *const u16, len: usize);
|
||||
pub fn script_gdi_push_lexical_binding(ctx: *mut c_void, name: *const u16, len: usize, is_constant: bool);
|
||||
|
||||
// Callbacks for populating eval EDI data from Rust.
|
||||
pub fn eval_gdi_set_strict(ctx: *mut c_void, is_strict: bool);
|
||||
pub fn eval_gdi_push_var_name(ctx: *mut c_void, name: *const u16, len: usize);
|
||||
pub fn eval_gdi_push_function(ctx: *mut c_void, sfd: *mut c_void, name: *const u16, len: usize);
|
||||
pub fn eval_gdi_push_var_scoped_name(ctx: *mut c_void, name: *const u16, len: usize);
|
||||
pub fn eval_gdi_push_annex_b_name(ctx: *mut c_void, name: *const u16, len: usize);
|
||||
pub fn eval_gdi_push_lexical_binding(ctx: *mut c_void, name: *const u16, len: usize, is_constant: bool);
|
||||
|
||||
pub fn rust_compile_regex(
|
||||
pattern_data: *const u16,
|
||||
pattern_len: usize,
|
||||
flags_data: *const u16,
|
||||
flags_len: usize,
|
||||
error_out: *mut *const std::os::raw::c_char,
|
||||
) -> *mut c_void;
|
||||
|
||||
pub fn rust_free_error_string(str: *const std::os::raw::c_char);
|
||||
|
||||
pub fn rust_number_to_utf16(value: f64, buffer: *mut u16, buffer_len: usize) -> usize;
|
||||
|
||||
// Get a well-known symbol as an opaque Value.
|
||||
// symbol_id: 0 = Symbol.iterator, 1 = Symbol.asyncIterator
|
||||
pub fn get_well_known_symbol(vm_ptr: *mut c_void, symbol_id: u32) -> u64;
|
||||
|
||||
// Get an intrinsic abstract operation function as an opaque Value.
|
||||
// name/name_len is the function name (e.g. "GetMethod").
|
||||
pub fn get_abstract_operation_function(vm_ptr: *mut c_void, name: *const u16, name_len: usize) -> u64;
|
||||
}
|
||||
|
||||
/// Create a SharedFunctionInstanceData from a FunctionData.
|
||||
///
|
||||
/// Computes has_simple_parameter_list, builds parameter name slices,
|
||||
/// transfers ownership of the Box to C++ via `Box::into_raw`, and
|
||||
/// calls `rust_create_sfd`.
|
||||
///
|
||||
/// Used by both `emit_new_function` in codegen.rs (for function
|
||||
/// expressions/declarations) and `create_sfd_for_gdi` below (for
|
||||
/// top-level GDI function initialization).
|
||||
///
|
||||
/// # Safety
|
||||
/// `vm_ptr` and `source_code_ptr` must be valid pointers.
|
||||
#[allow(clippy::boxed_local)] // Callers produce Box<FunctionData>; unboxing would copy a large struct.
|
||||
pub unsafe fn create_shared_function_data(
|
||||
function_data: Box<crate::ast::FunctionData>,
|
||||
subtable: crate::ast::FunctionTable,
|
||||
vm_ptr: *mut c_void,
|
||||
source_code_ptr: *const c_void,
|
||||
is_strict: bool,
|
||||
name_override: Option<&[u16]>,
|
||||
) -> *mut c_void {
|
||||
use crate::ast::FunctionParameterBinding;
|
||||
|
||||
let source_start = function_data.source_text_start as usize;
|
||||
let source_end = function_data.source_text_end as usize;
|
||||
let source_text_len = source_end - source_start;
|
||||
|
||||
let (name_ptr, name_len) = if let Some(name) = name_override {
|
||||
(name.as_ptr(), name.len())
|
||||
} else if let Some(ref name_ident) = function_data.name {
|
||||
(name_ident.name.as_ptr(), name_ident.name.len())
|
||||
} else {
|
||||
(std::ptr::null(), 0)
|
||||
};
|
||||
|
||||
let has_simple_parameter_list = function_data.parameters.iter().all(|p| {
|
||||
!p.is_rest
|
||||
&& p.default_value.is_none()
|
||||
&& matches!(p.binding, FunctionParameterBinding::Identifier(_))
|
||||
});
|
||||
|
||||
let parameter_name_slices: Vec<FFIUtf16Slice> = if has_simple_parameter_list {
|
||||
function_data
|
||||
.parameters
|
||||
.iter()
|
||||
.map(|p| {
|
||||
if let FunctionParameterBinding::Identifier(ref id) = p.binding {
|
||||
FFIUtf16Slice::from(id.name.as_ref())
|
||||
} else {
|
||||
unreachable!("has_simple_parameter_list guarantees all bindings are identifiers")
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let function_kind = function_data.kind as u8;
|
||||
let strict = function_data.is_strict_mode || is_strict;
|
||||
let function_length = function_data.function_length;
|
||||
let formal_parameter_count = u32_from_usize(function_data.parameters.len());
|
||||
let is_arrow = function_data.is_arrow_function;
|
||||
let uses_this = function_data.parsing_insights.uses_this;
|
||||
let uses_this_from_environment = function_data.parsing_insights.uses_this_from_environment;
|
||||
|
||||
let payload = Box::new(crate::ast::FunctionPayload {
|
||||
data: *function_data,
|
||||
function_table: subtable,
|
||||
});
|
||||
let rust_ast_ptr = Box::into_raw(payload) as *mut c_void;
|
||||
|
||||
let ffi_data = FFISharedFunctionData {
|
||||
name: name_ptr,
|
||||
name_len,
|
||||
function_kind,
|
||||
function_length,
|
||||
formal_parameter_count,
|
||||
strict,
|
||||
is_arrow,
|
||||
has_simple_parameter_list,
|
||||
parameter_names: parameter_name_slices.as_ptr(),
|
||||
parameter_name_count: parameter_name_slices.len(),
|
||||
source_text_offset: source_start,
|
||||
source_text_length: source_text_len,
|
||||
rust_function_ast: rust_ast_ptr,
|
||||
uses_this,
|
||||
uses_this_from_environment,
|
||||
};
|
||||
|
||||
let sfd_ptr = rust_create_sfd(vm_ptr, source_code_ptr, &ffi_data);
|
||||
|
||||
assert!(!sfd_ptr.is_null(), "create_shared_function_data: rust_create_sfd returned null");
|
||||
sfd_ptr
|
||||
}
|
||||
|
||||
/// Create a SharedFunctionInstanceData for GDI use (no name override).
|
||||
///
|
||||
/// # Safety
|
||||
/// `vm_ptr` and `source_code_ptr` must be valid pointers.
|
||||
pub unsafe fn create_sfd_for_gdi(
|
||||
function_data: Box<crate::ast::FunctionData>,
|
||||
subtable: crate::ast::FunctionTable,
|
||||
vm_ptr: *mut c_void,
|
||||
source_code_ptr: *const c_void,
|
||||
is_strict: bool,
|
||||
) -> *mut c_void {
|
||||
create_shared_function_data(function_data, subtable, vm_ptr, source_code_ptr, is_strict, None)
|
||||
}
|
||||
|
||||
/// Constant tags for the FFI constant buffer (ABI-compatible with BytecodeFactory).
|
||||
#[repr(u8)]
|
||||
enum ConstantTag {
|
||||
Number = 0,
|
||||
BooleanTrue = 1,
|
||||
BooleanFalse = 2,
|
||||
Null = 3,
|
||||
Undefined = 4,
|
||||
Empty = 5,
|
||||
String = 6,
|
||||
BigInt = 7,
|
||||
RawValue = 8,
|
||||
}
|
||||
|
||||
/// Encode constants into a tagged byte buffer for FFI.
|
||||
fn encode_constants(constants: &[ConstantValue]) -> Vec<u8> {
|
||||
let mut buffer = Vec::new();
|
||||
for c in constants {
|
||||
match c {
|
||||
ConstantValue::Number(v) => {
|
||||
buffer.push(ConstantTag::Number as u8);
|
||||
buffer.extend_from_slice(&v.to_le_bytes());
|
||||
}
|
||||
ConstantValue::Boolean(true) => buffer.push(ConstantTag::BooleanTrue as u8),
|
||||
ConstantValue::Boolean(false) => buffer.push(ConstantTag::BooleanFalse as u8),
|
||||
ConstantValue::Null => buffer.push(ConstantTag::Null as u8),
|
||||
ConstantValue::Undefined => buffer.push(ConstantTag::Undefined as u8),
|
||||
ConstantValue::Empty => buffer.push(ConstantTag::Empty as u8),
|
||||
ConstantValue::String(s) => {
|
||||
buffer.push(ConstantTag::String as u8);
|
||||
let len = u32_from_usize(s.len());
|
||||
buffer.extend_from_slice(&len.to_le_bytes());
|
||||
for &code_unit in s {
|
||||
buffer.extend_from_slice(&code_unit.to_le_bytes());
|
||||
}
|
||||
}
|
||||
ConstantValue::BigInt(s) => {
|
||||
buffer.push(ConstantTag::BigInt as u8);
|
||||
let len = u32_from_usize(s.len());
|
||||
buffer.extend_from_slice(&len.to_le_bytes());
|
||||
buffer.extend_from_slice(s.as_bytes());
|
||||
}
|
||||
ConstantValue::RawValue(encoded) => {
|
||||
buffer.push(ConstantTag::RawValue as u8);
|
||||
buffer.extend_from_slice(&encoded.to_le_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
buffer
|
||||
}
|
||||
|
||||
/// Create a C++ Executable from the generator's assembled output.
|
||||
///
|
||||
/// # Safety
|
||||
/// `vm_ptr` must be a valid `JS::VM*` and `source_code_ptr` a valid
|
||||
/// `JS::SourceCode const*`.
|
||||
pub unsafe fn create_executable(
|
||||
gen: &Generator,
|
||||
assembled: &AssembledBytecode,
|
||||
vm_ptr: *mut c_void,
|
||||
source_code_ptr: *const c_void,
|
||||
) -> ExecutableHandle {
|
||||
// Build FFI slices for tables
|
||||
let ident_slices: Vec<FFIUtf16Slice> = gen
|
||||
.identifier_table
|
||||
.iter()
|
||||
.map(|s| FFIUtf16Slice::from(s.as_ref()))
|
||||
.collect();
|
||||
|
||||
let property_key_slices: Vec<FFIUtf16Slice> = gen
|
||||
.property_key_table
|
||||
.iter()
|
||||
.map(|s| FFIUtf16Slice::from(s.as_ref()))
|
||||
.collect();
|
||||
|
||||
let string_slices: Vec<FFIUtf16Slice> = gen
|
||||
.string_table
|
||||
.iter()
|
||||
.map(|s| FFIUtf16Slice::from(s.as_ref()))
|
||||
.collect();
|
||||
|
||||
// Encode constants
|
||||
let constants_buffer = encode_constants(&gen.constants);
|
||||
|
||||
// Build FFI exception handlers
|
||||
let ffi_handlers: Vec<FFIExceptionHandler> = assembled
|
||||
.exception_handlers
|
||||
.iter()
|
||||
.map(|h| FFIExceptionHandler {
|
||||
start_offset: h.start_offset,
|
||||
end_offset: h.end_offset,
|
||||
handler_offset: h.handler_offset,
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Build FFI source map
|
||||
let ffi_source_map: Vec<FFISourceMapEntry> = assembled
|
||||
.source_map
|
||||
.iter()
|
||||
.map(|e| FFISourceMapEntry {
|
||||
bytecode_offset: e.bytecode_offset,
|
||||
source_start: e.source_start,
|
||||
source_end: e.source_end,
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Build local variable name slices
|
||||
let local_var_slices: Vec<FFIUtf16Slice> = gen
|
||||
.local_variables
|
||||
.iter()
|
||||
.map(|v| FFIUtf16Slice::from(v.name.as_ref()))
|
||||
.collect();
|
||||
|
||||
// Collect shared function data pointers
|
||||
let sfd_ptrs: Vec<*const c_void> = gen
|
||||
.shared_function_data
|
||||
.iter()
|
||||
.map(|ptr| *ptr as *const c_void)
|
||||
.collect();
|
||||
|
||||
// Collect class blueprint pointers
|
||||
let bp_ptrs = &gen.class_blueprints;
|
||||
|
||||
let ffi_data = FFIExecutableData {
|
||||
bytecode: assembled.bytecode.as_ptr(),
|
||||
bytecode_length: assembled.bytecode.len(),
|
||||
identifier_table: ident_slices.as_ptr(),
|
||||
identifier_count: ident_slices.len(),
|
||||
property_key_table: property_key_slices.as_ptr(),
|
||||
property_key_count: property_key_slices.len(),
|
||||
string_table: string_slices.as_ptr(),
|
||||
string_count: string_slices.len(),
|
||||
constants_data: constants_buffer.as_ptr(),
|
||||
constants_data_length: constants_buffer.len(),
|
||||
constants_count: gen.constants.len(),
|
||||
exception_handlers: ffi_handlers.as_ptr(),
|
||||
exception_handler_count: ffi_handlers.len(),
|
||||
source_map: ffi_source_map.as_ptr(),
|
||||
source_map_count: ffi_source_map.len(),
|
||||
basic_block_offsets: assembled.basic_block_start_offsets.as_ptr(),
|
||||
basic_block_count: assembled.basic_block_start_offsets.len(),
|
||||
local_variable_names: local_var_slices.as_ptr(),
|
||||
local_variable_count: local_var_slices.len(),
|
||||
property_lookup_cache_count: gen.next_property_lookup_cache,
|
||||
global_variable_cache_count: gen.next_global_variable_cache,
|
||||
template_object_cache_count: gen.next_template_object_cache,
|
||||
object_shape_cache_count: gen.next_object_shape_cache,
|
||||
number_of_registers: assembled.number_of_registers,
|
||||
is_strict: gen.strict,
|
||||
length_identifier: FFIOptionalU32::from(gen.length_identifier.map(|index| index.0)),
|
||||
shared_function_data: sfd_ptrs.as_ptr(),
|
||||
shared_function_data_count: sfd_ptrs.len(),
|
||||
class_blueprints: bp_ptrs.as_ptr(),
|
||||
class_blueprint_count: bp_ptrs.len(),
|
||||
compiled_regexes: gen.compiled_regexes.as_ptr(),
|
||||
regex_count: gen.compiled_regexes.len(),
|
||||
};
|
||||
|
||||
rust_create_executable(vm_ptr, source_code_ptr, &ffi_data)
|
||||
}
|
||||
|
||||
/// Convert a JS number to its UTF-16 string representation using the
|
||||
/// ECMA-262 Number::toString algorithm (via C++ runtime).
|
||||
pub fn js_number_to_utf16(value: f64) -> Utf16String {
|
||||
let mut buffer = [0u16; 64];
|
||||
let len = unsafe { rust_number_to_utf16(value, buffer.as_mut_ptr(), buffer.len()) };
|
||||
Utf16String(buffer[..len].to_vec())
|
||||
}
|
||||
|
||||
/// Compile a regex pattern+flags using the C++ regex engine.
|
||||
///
|
||||
/// On success, returns an opaque handle to the compiled regex (a C++
|
||||
/// RustCompiledRegex*). On failure, returns the error message.
|
||||
pub fn compile_regex(pattern: &[u16], flags: &[u16]) -> Result<*mut c_void, String> {
|
||||
unsafe {
|
||||
let mut error: *const std::os::raw::c_char = std::ptr::null();
|
||||
let handle = rust_compile_regex(
|
||||
pattern.as_ptr(), pattern.len(),
|
||||
flags.as_ptr(), flags.len(),
|
||||
&mut error,
|
||||
);
|
||||
if error.is_null() {
|
||||
Ok(handle)
|
||||
} else {
|
||||
let msg = std::ffi::CStr::from_ptr(error).to_string_lossy().into_owned();
|
||||
rust_free_error_string(error);
|
||||
Err(msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
1548
Libraries/LibJS/Rust/src/bytecode/generator.rs
Normal file
1548
Libraries/LibJS/Rust/src/bytecode/generator.rs
Normal file
File diff suppressed because it is too large
Load Diff
13
Libraries/LibJS/Rust/src/bytecode/instruction.rs
Normal file
13
Libraries/LibJS/Rust/src/bytecode/instruction.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
//! Bytecode instruction types generated from Bytecode.def.
|
||||
//!
|
||||
//! The `OpCode` enum, `Instruction` enum, and all encoding/visiting methods
|
||||
//! are generated by build.rs from Bytecode.def. This is the single source
|
||||
//! of truth for instruction definitions.
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/instruction_generated.rs"));
|
||||
27
Libraries/LibJS/Rust/src/bytecode/mod.rs
Normal file
27
Libraries/LibJS/Rust/src/bytecode/mod.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
//! Bytecode generator infrastructure for the parser.
|
||||
//!
|
||||
//! This module contains the types and machinery needed to generate
|
||||
//! bytecode from the AST. The generated bytecode is binary-
|
||||
//! compatible with the C++ `Bytecode::Executable` format.
|
||||
//!
|
||||
//! ## Submodules
|
||||
//!
|
||||
//! - `operand` -- Register, Operand, Label, and table index types
|
||||
//! - `instruction` -- Instruction enum (generated from Bytecode.def by build.rs)
|
||||
//! - `basic_block` -- BasicBlock: list of instructions with control flow metadata
|
||||
//! - `generator` -- Generator: manages registers, constants, tables, and assembly
|
||||
//! - `codegen` -- AST-walking code that emits instructions via the Generator
|
||||
//! - `ffi` -- FFI bridge to create C++ Executable and SharedFunctionInstanceData
|
||||
|
||||
pub mod basic_block;
|
||||
pub mod codegen;
|
||||
pub mod ffi;
|
||||
pub mod generator;
|
||||
pub mod instruction;
|
||||
pub mod operand;
|
||||
172
Libraries/LibJS/Rust/src/bytecode/operand.rs
Normal file
172
Libraries/LibJS/Rust/src/bytecode/operand.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
/// A bytecode register index.
|
||||
///
|
||||
/// Reserved registers:
|
||||
/// - 0: accumulator
|
||||
/// - 1: exception
|
||||
/// - 2: this_value
|
||||
/// - 3: return_value
|
||||
/// - 4: saved_lexical_environment
|
||||
/// - 5+: user registers
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Register(pub u32);
|
||||
|
||||
impl Register {
|
||||
pub const ACCUMULATOR: Register = Register(0);
|
||||
pub const EXCEPTION: Register = Register(1);
|
||||
pub const THIS_VALUE: Register = Register(2);
|
||||
pub const RETURN_VALUE: Register = Register(3);
|
||||
pub const SAVED_LEXICAL_ENVIRONMENT: Register = Register(4);
|
||||
pub const RESERVED_COUNT: u32 = 5;
|
||||
}
|
||||
|
||||
/// A bytecode operand.
|
||||
///
|
||||
/// Encoded as a single `u32` with a 3-bit type tag in the top 3 bits
|
||||
/// and a 29-bit index in the lower 29 bits:
|
||||
///
|
||||
/// `raw = (type << 29) | index`
|
||||
///
|
||||
/// This encoding is ABI-compatible with the VM's operand format.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Operand(u32);
|
||||
|
||||
impl Operand {
|
||||
const TYPE_SHIFT: u32 = 29;
|
||||
const INDEX_MASK: u32 = 0x1FFF_FFFF;
|
||||
pub const INVALID: u32 = 0xFFFF_FFFF;
|
||||
|
||||
pub fn register(reg: Register) -> Self {
|
||||
Self((0 << Self::TYPE_SHIFT) | reg.0)
|
||||
}
|
||||
|
||||
pub fn local(index: u32) -> Self {
|
||||
Self((1 << Self::TYPE_SHIFT) | index)
|
||||
}
|
||||
|
||||
pub fn constant(index: u32) -> Self {
|
||||
Self((2 << Self::TYPE_SHIFT) | index)
|
||||
}
|
||||
|
||||
pub fn argument(index: u32) -> Self {
|
||||
Self((3 << Self::TYPE_SHIFT) | index)
|
||||
}
|
||||
|
||||
pub fn invalid() -> Self {
|
||||
Self(Self::INVALID)
|
||||
}
|
||||
|
||||
pub fn is_invalid(self) -> bool {
|
||||
self.0 == Self::INVALID
|
||||
}
|
||||
|
||||
pub fn is_register(self) -> bool {
|
||||
!self.is_invalid() && self.operand_type() == OperandType::Register
|
||||
}
|
||||
|
||||
pub fn is_local(self) -> bool {
|
||||
!self.is_invalid() && self.operand_type() == OperandType::Local
|
||||
}
|
||||
|
||||
pub fn is_constant(self) -> bool {
|
||||
!self.is_invalid() && self.operand_type() == OperandType::Constant
|
||||
}
|
||||
|
||||
pub fn operand_type(self) -> OperandType {
|
||||
assert!(!self.is_invalid(), "operand_type() called on INVALID operand");
|
||||
match (self.0 >> Self::TYPE_SHIFT) & 0x7 {
|
||||
0 => OperandType::Register,
|
||||
1 => OperandType::Local,
|
||||
2 => OperandType::Constant,
|
||||
3 => OperandType::Argument,
|
||||
_ => unreachable!("operand type bits can only be 0-3"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index(self) -> u32 {
|
||||
self.0 & Self::INDEX_MASK
|
||||
}
|
||||
|
||||
pub fn raw(self) -> u32 {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Offset the index by the given amount, stripping the type tag and
|
||||
/// leaving a flat index into the combined
|
||||
/// [registers | locals | constants | arguments] array.
|
||||
/// Used during operand rewriting in the assembler.
|
||||
pub fn offset_index_by(&mut self, offset: u32) {
|
||||
self.0 &= Self::INDEX_MASK;
|
||||
self.0 = self.0.checked_add(offset).expect("operand index overflow");
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum OperandType {
|
||||
Register,
|
||||
Local,
|
||||
Constant,
|
||||
Argument,
|
||||
}
|
||||
|
||||
/// A bytecode label.
|
||||
///
|
||||
/// During compilation, holds a basic block index. After linking,
|
||||
/// holds the final byte offset in the flat bytecode stream.
|
||||
/// Stored as a single `u32`.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Label(pub u32);
|
||||
|
||||
impl Label {
|
||||
pub fn basic_block_index(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
}
|
||||
|
||||
/// Index into the string table.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct StringTableIndex(pub u32);
|
||||
|
||||
impl StringTableIndex {
|
||||
pub const INVALID: u32 = 0xFFFF_FFFF;
|
||||
}
|
||||
|
||||
/// Index into the identifier table.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct IdentifierTableIndex(pub u32);
|
||||
|
||||
impl IdentifierTableIndex {
|
||||
pub const INVALID: u32 = 0xFFFF_FFFF;
|
||||
}
|
||||
|
||||
/// Index into the property key table.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct PropertyKeyTableIndex(pub u32);
|
||||
|
||||
/// Index into the regex table.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct RegexTableIndex(pub u32);
|
||||
|
||||
/// Environment coordinate used as a mutable cache in some instructions.
|
||||
/// Layout: two `u32` fields (hops + index).
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct EnvironmentCoordinate {
|
||||
pub hops: u32,
|
||||
pub index: u32,
|
||||
}
|
||||
|
||||
impl EnvironmentCoordinate {
|
||||
pub const INVALID: u32 = 0xFFFF_FFFE;
|
||||
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
hops: Self::INVALID,
|
||||
index: Self::INVALID,
|
||||
}
|
||||
}
|
||||
}
|
||||
1312
Libraries/LibJS/Rust/src/lexer.rs
Normal file
1312
Libraries/LibJS/Rust/src/lexer.rs
Normal file
File diff suppressed because it is too large
Load Diff
2097
Libraries/LibJS/Rust/src/lib.rs
Normal file
2097
Libraries/LibJS/Rust/src/lib.rs
Normal file
File diff suppressed because it is too large
Load Diff
1273
Libraries/LibJS/Rust/src/parser.rs
Normal file
1273
Libraries/LibJS/Rust/src/parser.rs
Normal file
File diff suppressed because it is too large
Load Diff
1951
Libraries/LibJS/Rust/src/parser/declarations.rs
Normal file
1951
Libraries/LibJS/Rust/src/parser/declarations.rs
Normal file
File diff suppressed because it is too large
Load Diff
2193
Libraries/LibJS/Rust/src/parser/expressions.rs
Normal file
2193
Libraries/LibJS/Rust/src/parser/expressions.rs
Normal file
File diff suppressed because it is too large
Load Diff
849
Libraries/LibJS/Rust/src/parser/statements.rs
Normal file
849
Libraries/LibJS/Rust/src/parser/statements.rs
Normal file
@@ -0,0 +1,849 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
//! Statement parsing: if, for, while, switch, try, etc.
|
||||
|
||||
use std::rc::Rc;
|
||||
|
||||
use crate::ast::*;
|
||||
use crate::parser::{Associativity, ForbiddenTokens, Parser, Position, PRECEDENCE_COMMA};
|
||||
use crate::token::TokenType;
|
||||
|
||||
/// Used locally during for-statement parsing before converting to `ast::ForInit`.
|
||||
enum LocalForInit {
|
||||
Declaration(Statement),
|
||||
Expression(Expression),
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub(crate) fn parse_statement(&mut self, allow_labelled_function: bool) -> Statement {
|
||||
let start = self.position();
|
||||
let tt = self.current_token_type();
|
||||
|
||||
match tt {
|
||||
TokenType::CurlyOpen => self.parse_block_statement(),
|
||||
TokenType::Return => self.parse_return_statement(),
|
||||
TokenType::Var => self.parse_variable_declaration(false),
|
||||
TokenType::For => self.parse_for_statement(),
|
||||
TokenType::If => self.parse_if_statement(),
|
||||
TokenType::Throw => self.parse_throw_statement(),
|
||||
TokenType::Try => self.parse_try_statement(),
|
||||
TokenType::Break => self.parse_break_statement(),
|
||||
TokenType::Continue => self.parse_continue_statement(),
|
||||
TokenType::Switch => self.parse_switch_statement(),
|
||||
TokenType::Do => self.parse_do_while_statement(),
|
||||
TokenType::While => self.parse_while_statement(),
|
||||
TokenType::With => {
|
||||
if self.flags.strict_mode {
|
||||
self.syntax_error("'with' statement not allowed in strict mode");
|
||||
}
|
||||
self.parse_with_statement()
|
||||
}
|
||||
TokenType::Debugger => self.parse_debugger_statement(),
|
||||
TokenType::Semicolon => {
|
||||
self.consume();
|
||||
self.statement(start, StatementKind::Empty)
|
||||
}
|
||||
TokenType::Slash | TokenType::SlashEquals => {
|
||||
let token = self.lexer.force_slash_as_regex();
|
||||
self.current_token = token;
|
||||
self.parse_expression_statement()
|
||||
}
|
||||
_ => {
|
||||
if self.match_invalid_escaped_keyword() {
|
||||
self.syntax_error("Keyword must not contain escaped characters");
|
||||
}
|
||||
if self.match_identifier_name() {
|
||||
if let Some(labelled) = self.try_parse_labelled_statement(allow_labelled_function) {
|
||||
return labelled;
|
||||
}
|
||||
}
|
||||
if self.match_expression() {
|
||||
self.parse_expression_statement()
|
||||
} else {
|
||||
self.expected("statement");
|
||||
self.consume();
|
||||
self.statement(start, StatementKind::Empty)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_block_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::CurlyOpen);
|
||||
|
||||
self.scope_collector.open_block_scope(None);
|
||||
|
||||
let mut children = Vec::new();
|
||||
|
||||
while !self.match_token(TokenType::CurlyClose) && !self.done() {
|
||||
if self.match_declaration() {
|
||||
children.push(self.parse_declaration());
|
||||
} else {
|
||||
children.push(self.parse_statement(true));
|
||||
}
|
||||
}
|
||||
|
||||
self.consume_token(TokenType::CurlyClose);
|
||||
let scope = ScopeData::shared_with_children(children);
|
||||
self.scope_collector.set_scope_node(scope.clone());
|
||||
self.scope_collector.close_scope();
|
||||
self.statement(start, StatementKind::Block(scope))
|
||||
}
|
||||
|
||||
fn parse_expression_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
|
||||
if self.match_token(TokenType::Async) {
|
||||
let lookahead = self.next_token();
|
||||
if lookahead.token_type == TokenType::Function && !lookahead.trivia_has_line_terminator {
|
||||
self.syntax_error("Async function declaration not allowed in single-statement context");
|
||||
}
|
||||
} else if self.match_token(TokenType::Function) || self.match_token(TokenType::Class) {
|
||||
let name = self.current_token.token_type.name();
|
||||
self.syntax_error(&format!("{} declaration not allowed in single-statement context", name));
|
||||
} else if self.match_token(TokenType::Let) && self.next_token().token_type == TokenType::BracketOpen {
|
||||
self.syntax_error("let followed by [ is not allowed in single-statement context");
|
||||
}
|
||||
|
||||
let expression = self.parse_expression_any();
|
||||
self.consume_or_insert_semicolon();
|
||||
self.statement(start, StatementKind::Expression(Box::new(expression)))
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-return-statement
|
||||
// ReturnStatement : `return` [no LineTerminator here] Expression? `;`
|
||||
fn parse_return_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
if !self.flags.in_function_context {
|
||||
self.syntax_error("'return' not allowed outside of a function");
|
||||
}
|
||||
self.consume_token(TokenType::Return);
|
||||
|
||||
// [no LineTerminator here]: if a line terminator follows `return`,
|
||||
// ASI inserts a semicolon and the return has no argument.
|
||||
// NB: Don't consume the next token — it may be `;` which should
|
||||
// become an EmptyStatement, not be swallowed by the return.
|
||||
if self.current_token.trivia_has_line_terminator {
|
||||
return self.statement(start, StatementKind::Return(None));
|
||||
}
|
||||
if self.match_token(TokenType::Semicolon)
|
||||
|| self.match_token(TokenType::CurlyClose)
|
||||
|| self.done()
|
||||
{
|
||||
self.consume_or_insert_semicolon();
|
||||
return self.statement(start, StatementKind::Return(None));
|
||||
}
|
||||
|
||||
let argument = self.parse_expression_any();
|
||||
self.consume_or_insert_semicolon();
|
||||
self.statement(start, StatementKind::Return(Some(Box::new(argument))))
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-throw-statement
|
||||
// ThrowStatement : `throw` [no LineTerminator here] Expression `;`
|
||||
fn parse_throw_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::Throw);
|
||||
|
||||
// [no LineTerminator here]: unlike `return`, a line terminator after
|
||||
// `throw` is always an error because `throw;` is never valid.
|
||||
if self.current_token.trivia_has_line_terminator {
|
||||
self.syntax_error("No line break is allowed between 'throw' and its expression");
|
||||
}
|
||||
|
||||
let argument = self.parse_expression_any();
|
||||
self.consume_or_insert_semicolon();
|
||||
self.statement(start, StatementKind::Throw(Box::new(argument)))
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-break-statement
|
||||
// BreakStatement : `break` [no LineTerminator here] LabelIdentifier? `;`
|
||||
fn parse_break_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::Break);
|
||||
|
||||
let label = if self.match_token(TokenType::Semicolon) {
|
||||
self.consume();
|
||||
None
|
||||
} else if !self.current_token.trivia_has_line_terminator
|
||||
&& !self.match_token(TokenType::CurlyClose)
|
||||
&& !self.done()
|
||||
&& self.match_identifier()
|
||||
{
|
||||
let token = self.consume();
|
||||
let label_value = Utf16String::from(self.token_value(&token));
|
||||
|
||||
if !self.labels_in_scope.contains_key(label_value.as_slice()) {
|
||||
let label_str = String::from_utf16_lossy(&label_value);
|
||||
self.syntax_error(&format!("Label '{}' not found", label_str));
|
||||
}
|
||||
|
||||
self.consume_or_insert_semicolon();
|
||||
Some(label_value)
|
||||
} else {
|
||||
self.consume_or_insert_semicolon();
|
||||
None
|
||||
};
|
||||
|
||||
if label.is_none() && !self.flags.in_break_context {
|
||||
self.syntax_error("Unlabeled 'break' not allowed outside of a loop or switch statement");
|
||||
}
|
||||
|
||||
self.statement(start, StatementKind::Break { target_label: label })
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-continue-statement
|
||||
// ContinueStatement : `continue` [no LineTerminator here] LabelIdentifier? `;`
|
||||
fn parse_continue_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
if !self.flags.in_continue_context {
|
||||
self.syntax_error("'continue' not allowed outside of a loop");
|
||||
}
|
||||
self.consume_token(TokenType::Continue);
|
||||
|
||||
let label = if self.match_token(TokenType::Semicolon) {
|
||||
None
|
||||
} else if !self.current_token.trivia_has_line_terminator
|
||||
&& !self.match_token(TokenType::CurlyClose)
|
||||
&& !self.done()
|
||||
&& self.match_identifier()
|
||||
{
|
||||
let label_line = self.current_token.line_number;
|
||||
let label_col = self.current_token.line_column;
|
||||
let token = self.consume();
|
||||
let label_value = Utf16String::from(self.token_value(&token));
|
||||
|
||||
if let Some(entry) = self.labels_in_scope.get_mut(label_value.as_slice()) {
|
||||
*entry = Some((label_line, label_col));
|
||||
} else {
|
||||
let label_str = String::from_utf16_lossy(&label_value);
|
||||
self.syntax_error(&format!("Label '{}' not found or invalid", label_str));
|
||||
}
|
||||
|
||||
Some(label_value)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.consume_or_insert_semicolon();
|
||||
|
||||
self.statement(start, StatementKind::Continue { target_label: label })
|
||||
}
|
||||
|
||||
fn parse_debugger_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::Debugger);
|
||||
self.consume_or_insert_semicolon();
|
||||
self.statement(start, StatementKind::Debugger)
|
||||
}
|
||||
|
||||
fn parse_if_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::If);
|
||||
self.consume_token(TokenType::ParenOpen);
|
||||
let predicate = self.parse_expression_any();
|
||||
self.consume_token(TokenType::ParenClose);
|
||||
|
||||
let consequent = if !self.flags.strict_mode && self.match_token(TokenType::Function) {
|
||||
self.parse_function_declaration_as_block_statement(start)
|
||||
} else {
|
||||
self.parse_statement(false)
|
||||
};
|
||||
|
||||
let alternate = if self.match_token(TokenType::Else) {
|
||||
self.consume();
|
||||
if !self.flags.strict_mode && self.match_token(TokenType::Function) {
|
||||
Some(Box::new(self.parse_function_declaration_as_block_statement(start)))
|
||||
} else {
|
||||
Some(Box::new(self.parse_statement(false)))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.statement(start, StatementKind::If {
|
||||
test: Box::new(predicate),
|
||||
consequent: Box::new(consequent),
|
||||
alternate,
|
||||
})
|
||||
}
|
||||
|
||||
/// Annex B: Parse a function declaration as if wrapped in a synthetic block.
|
||||
/// See https://tc39.es/ecma262/#sec-functiondeclarations-in-ifstatement-statement-clauses
|
||||
fn parse_function_declaration_as_block_statement(&mut self, if_start: Position) -> Statement {
|
||||
// C++ uses rule_start from the enclosing if-statement for the block position.
|
||||
let start = if_start;
|
||||
self.scope_collector.open_block_scope(None);
|
||||
let declaration = self.parse_function_declaration();
|
||||
let scope = ScopeData::shared_with_children(vec![declaration]);
|
||||
self.scope_collector.set_scope_node(scope.clone());
|
||||
self.scope_collector.close_scope();
|
||||
self.statement(start, StatementKind::Block(scope))
|
||||
}
|
||||
|
||||
/// Parse a statement in a loop body context (break and continue allowed).
|
||||
fn parse_loop_body(&mut self) -> Statement {
|
||||
let break_before = self.flags.in_break_context;
|
||||
let continue_before = self.flags.in_continue_context;
|
||||
self.flags.in_break_context = true;
|
||||
self.flags.in_continue_context = true;
|
||||
let body = self.parse_statement(false);
|
||||
self.flags.in_break_context = break_before;
|
||||
self.flags.in_continue_context = continue_before;
|
||||
body
|
||||
}
|
||||
|
||||
fn parse_while_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::While);
|
||||
self.consume_token(TokenType::ParenOpen);
|
||||
let test = self.parse_expression_any();
|
||||
self.consume_token(TokenType::ParenClose);
|
||||
|
||||
let body = self.parse_loop_body();
|
||||
|
||||
self.statement(start, StatementKind::While {
|
||||
test: Box::new(test),
|
||||
body: Box::new(body),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_do_while_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::Do);
|
||||
|
||||
let body = self.parse_loop_body();
|
||||
|
||||
self.consume_token(TokenType::While);
|
||||
self.consume_token(TokenType::ParenOpen);
|
||||
let test = self.parse_expression_any();
|
||||
self.consume_token(TokenType::ParenClose);
|
||||
|
||||
// Since ES 2015 a missing semicolon is inserted here, despite
|
||||
// the regular ASI rules not applying.
|
||||
self.eat(TokenType::Semicolon);
|
||||
|
||||
self.statement(start, StatementKind::DoWhile {
|
||||
test: Box::new(test),
|
||||
body: Box::new(body),
|
||||
})
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-for-statement
|
||||
// https://tc39.es/ecma262/#sec-for-in-and-for-of-statements
|
||||
fn parse_for_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::For);
|
||||
|
||||
// Open for-loop scope (for let/const/using declarations).
|
||||
// scope_data set after the for-loop body is parsed.
|
||||
self.scope_collector.open_for_loop_scope(None);
|
||||
|
||||
let is_await = if self.match_token(TokenType::Await) {
|
||||
if !self.flags.await_expression_is_valid {
|
||||
self.syntax_error("for-await-of not allowed outside of async context");
|
||||
}
|
||||
self.consume();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
self.consume_token(TokenType::ParenOpen);
|
||||
|
||||
if self.match_token(TokenType::Semicolon) && !is_await {
|
||||
self.consume();
|
||||
let result = self.parse_standard_for_loop(start, None);
|
||||
return self.close_for_loop_scope(start, result);
|
||||
}
|
||||
|
||||
let init_start = self.position();
|
||||
let is_var_init = self.match_token(TokenType::Var);
|
||||
let is_using = self.match_for_using_declaration();
|
||||
let is_let = self.match_token(TokenType::Let) && (self.flags.strict_mode || self.try_match_let_declaration());
|
||||
let is_declaration = is_var_init || is_using || is_let || self.match_token(TokenType::Const);
|
||||
let init = if is_using {
|
||||
LocalForInit::Declaration(self.parse_using_declaration(true))
|
||||
} else if is_declaration {
|
||||
LocalForInit::Declaration(self.parse_variable_declaration(true))
|
||||
} else {
|
||||
let forbidden = ForbiddenTokens::with_in();
|
||||
LocalForInit::Expression(self.parse_expression(PRECEDENCE_COMMA, Associativity::Right, forbidden))
|
||||
};
|
||||
|
||||
// Check for in
|
||||
// https://tc39.es/ecma262/#sec-for-in-and-for-of-statements
|
||||
// It is a Syntax Error if IsDestructuring of ForBinding is false and
|
||||
// Initializer is present. (Only a single, non-initialized ForBinding
|
||||
// is allowed; except in Annex B sloppy var with one declarator.)
|
||||
if self.match_token(TokenType::In) && !is_await {
|
||||
// C++ captures ForInStatement position at the `in` keyword.
|
||||
let forin_start = self.position();
|
||||
if is_using {
|
||||
self.syntax_error("Using declaration not allowed in for-in loop");
|
||||
} else if is_declaration {
|
||||
if self.for_loop_declaration_count > 1 {
|
||||
self.syntax_error("Multiple declarations not allowed in for..in/of");
|
||||
}
|
||||
if self.for_loop_declaration_has_init {
|
||||
// https://tc39.es/ecma262/#sec-initializers-in-forin-statement-heads
|
||||
// Annex B: In sloppy mode, a single `var` with an initializer is permitted.
|
||||
if !(self.for_loop_declaration_is_var && self.for_loop_declaration_count == 1 && !self.flags.strict_mode) {
|
||||
self.syntax_error("Variable initializer not allowed in for..in/of");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.validate_for_in_of_lhs(&init);
|
||||
}
|
||||
self.consume();
|
||||
let rhs = self.parse_expression_any();
|
||||
self.consume_token(TokenType::ParenClose);
|
||||
|
||||
let body = self.parse_loop_body();
|
||||
|
||||
let lhs = self.synthesize_for_in_of_lhs(init, init_start);
|
||||
let result = self.statement(forin_start, StatementKind::ForInOf {
|
||||
kind: ForInOfKind::ForIn,
|
||||
lhs,
|
||||
rhs: Box::new(rhs),
|
||||
body: Box::new(body),
|
||||
});
|
||||
return self.close_for_loop_scope(start, result);
|
||||
}
|
||||
|
||||
// Check for of (keyword must not contain escapes)
|
||||
if self.match_identifier_name() {
|
||||
let value = self.token_original_value(&self.current_token);
|
||||
if value == utf16!("of") {
|
||||
// C++ captures ForOfStatement position at the `of` keyword.
|
||||
let forof_start = self.position();
|
||||
if is_declaration {
|
||||
if self.for_loop_declaration_count > 1 {
|
||||
self.syntax_error("Multiple declarations not allowed in for..in/of");
|
||||
}
|
||||
if self.for_loop_declaration_has_init {
|
||||
self.syntax_error("Variable initializer not allowed in for..of");
|
||||
}
|
||||
} else {
|
||||
self.validate_for_in_of_lhs(&init);
|
||||
// https://tc39.es/ecma262/#sec-for-in-and-for-of-statements
|
||||
if let LocalForInit::Expression(ref expression) = init {
|
||||
if let ExpressionKind::Member { ref object, .. } = expression.inner {
|
||||
if let ExpressionKind::Identifier(ref ident) = object.inner {
|
||||
if ident.name == utf16!("let") {
|
||||
self.syntax_error("For of statement may not start with let.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.consume();
|
||||
let rhs = self.parse_assignment_expression();
|
||||
self.consume_token(TokenType::ParenClose);
|
||||
|
||||
let body = self.parse_loop_body();
|
||||
|
||||
let lhs = self.synthesize_for_in_of_lhs(init, init_start);
|
||||
let for_of_kind = if is_await { ForInOfKind::ForAwaitOf } else { ForInOfKind::ForOf };
|
||||
let result = self.statement(forof_start, StatementKind::ForInOf {
|
||||
kind: for_of_kind,
|
||||
lhs,
|
||||
rhs: Box::new(rhs),
|
||||
body: Box::new(body),
|
||||
});
|
||||
return self.close_for_loop_scope(start, result);
|
||||
}
|
||||
}
|
||||
|
||||
// Standard for loop — const requires initializer.
|
||||
if let LocalForInit::Declaration(ref declaration) = init {
|
||||
if let StatementKind::VariableDeclaration { kind: DeclarationKind::Const, ref declarations } = declaration.inner {
|
||||
for d in declarations {
|
||||
if d.init.is_none() {
|
||||
self.syntax_error("Missing initializer in const declaration");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.consume_token(TokenType::Semicolon);
|
||||
let for_init = match init {
|
||||
LocalForInit::Declaration(declaration) => Some(ForInit::Declaration(Box::new(declaration))),
|
||||
LocalForInit::Expression(expression) => Some(ForInit::Expression(Box::new(expression))),
|
||||
};
|
||||
let result = self.parse_standard_for_loop(start, for_init);
|
||||
self.close_for_loop_scope(start, result)
|
||||
}
|
||||
|
||||
/// Close the for-loop scope and wrap the for-loop statement in a Block
|
||||
/// with scope data.
|
||||
fn close_for_loop_scope(&mut self, start: Position, inner: Statement) -> Statement {
|
||||
let scope = ScopeData::shared_with_children(vec![inner]);
|
||||
self.scope_collector.set_scope_node(scope.clone());
|
||||
self.scope_collector.close_scope();
|
||||
self.statement(start, StatementKind::Block(scope))
|
||||
}
|
||||
|
||||
fn parse_standard_for_loop(&mut self, start: Position, init: Option<ForInit>) -> Statement {
|
||||
let test = if self.match_token(TokenType::Semicolon) {
|
||||
None
|
||||
} else {
|
||||
Some(Box::new(self.parse_expression_any()))
|
||||
};
|
||||
self.consume_token(TokenType::Semicolon);
|
||||
|
||||
let update = if self.match_token(TokenType::ParenClose) {
|
||||
None
|
||||
} else {
|
||||
Some(Box::new(self.parse_expression_any()))
|
||||
};
|
||||
self.consume_token(TokenType::ParenClose);
|
||||
|
||||
let body = self.parse_loop_body();
|
||||
|
||||
self.statement(start, StatementKind::For {
|
||||
init,
|
||||
test,
|
||||
update,
|
||||
body: Box::new(body),
|
||||
})
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-with-statement
|
||||
// NOTE: The with statement is forbidden in strict mode code.
|
||||
fn parse_with_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::With);
|
||||
self.consume_token(TokenType::ParenOpen);
|
||||
let object = self.parse_expression_any();
|
||||
self.consume_token(TokenType::ParenClose);
|
||||
self.scope_collector.open_with_scope(None);
|
||||
let body = self.parse_statement(false);
|
||||
self.scope_collector.close_scope();
|
||||
self.statement(start, StatementKind::With {
|
||||
object: Box::new(object),
|
||||
body: Box::new(body),
|
||||
})
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-switch-statement
|
||||
// All case clauses in a switch statement share a single block scope.
|
||||
fn parse_switch_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::Switch);
|
||||
self.consume_token(TokenType::ParenOpen);
|
||||
let discriminant = self.parse_expression_any();
|
||||
self.consume_token(TokenType::ParenClose);
|
||||
|
||||
self.consume_token(TokenType::CurlyOpen);
|
||||
|
||||
self.scope_collector.open_block_scope(None);
|
||||
|
||||
let break_before = self.flags.in_break_context;
|
||||
self.flags.in_break_context = true;
|
||||
|
||||
let mut cases = Vec::new();
|
||||
let mut has_default = false;
|
||||
while !self.match_token(TokenType::CurlyClose) && !self.done() {
|
||||
let case = self.parse_switch_case();
|
||||
if case.test.is_none() {
|
||||
if has_default {
|
||||
self.syntax_error("Multiple 'default' clauses in switch statement");
|
||||
}
|
||||
has_default = true;
|
||||
}
|
||||
cases.push(case);
|
||||
}
|
||||
|
||||
self.flags.in_break_context = break_before;
|
||||
|
||||
self.consume_token(TokenType::CurlyClose);
|
||||
|
||||
let scope = ScopeData::new_shared();
|
||||
self.scope_collector.set_scope_node(scope.clone());
|
||||
self.scope_collector.close_scope();
|
||||
|
||||
self.statement(start, StatementKind::Switch(SwitchStatementData {
|
||||
scope,
|
||||
discriminant: Box::new(discriminant),
|
||||
cases,
|
||||
}))
|
||||
}
|
||||
|
||||
fn parse_switch_case(&mut self) -> SwitchCase {
|
||||
let start = self.position();
|
||||
let test = if self.match_token(TokenType::Case) {
|
||||
self.consume();
|
||||
Some(self.parse_expression_any())
|
||||
} else if self.match_token(TokenType::Default) {
|
||||
self.consume();
|
||||
None
|
||||
} else {
|
||||
self.expected("'case' or 'default'");
|
||||
None
|
||||
};
|
||||
|
||||
self.consume_token(TokenType::Colon);
|
||||
|
||||
let mut children = Vec::new();
|
||||
while !self.match_token(TokenType::CurlyClose)
|
||||
&& !self.match_token(TokenType::Case)
|
||||
&& !self.match_token(TokenType::Default)
|
||||
&& !self.done()
|
||||
{
|
||||
if self.match_declaration() {
|
||||
children.push(self.parse_declaration());
|
||||
} else {
|
||||
children.push(self.parse_statement(true));
|
||||
}
|
||||
}
|
||||
|
||||
SwitchCase {
|
||||
range: self.range_from(start),
|
||||
scope: ScopeData::shared_with_children(children),
|
||||
test,
|
||||
}
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-try-statement
|
||||
// TryStatement :
|
||||
// `try` Block Catch
|
||||
// `try` Block Finally
|
||||
// `try` Block Catch Finally
|
||||
fn parse_try_statement(&mut self) -> Statement {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::Try);
|
||||
|
||||
let block = self.parse_block_statement();
|
||||
|
||||
let handler = if self.match_token(TokenType::Catch) {
|
||||
Some(self.parse_catch_clause())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let finalizer = if self.match_token(TokenType::Finally) {
|
||||
self.consume();
|
||||
Some(Box::new(self.parse_block_statement()))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if handler.is_none() && finalizer.is_none() {
|
||||
self.syntax_error("try statement must have a catch or finally clause");
|
||||
}
|
||||
|
||||
self.statement(start, StatementKind::Try(TryStatementData {
|
||||
block: Box::new(block),
|
||||
handler,
|
||||
finalizer,
|
||||
}))
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-try-statement
|
||||
// Catch : `catch` `(` CatchParameter `)` Block
|
||||
// | `catch` Block
|
||||
// The catch parameter creates its own scope that wraps the block body.
|
||||
fn parse_catch_clause(&mut self) -> CatchClause {
|
||||
let start = self.position();
|
||||
self.consume_token(TokenType::Catch);
|
||||
|
||||
self.scope_collector.open_catch_scope();
|
||||
|
||||
let parameter = if self.match_token(TokenType::ParenOpen) {
|
||||
self.consume();
|
||||
let parameter = if self.match_token(TokenType::CurlyOpen) || self.match_token(TokenType::BracketOpen) {
|
||||
self.pattern_bound_names.clear();
|
||||
let pattern = self.parse_binding_pattern();
|
||||
let names_to_check: Vec<Utf16String> = self.pattern_bound_names.iter().map(|(n, _)| n.clone()).collect();
|
||||
for name in &names_to_check {
|
||||
self.check_identifier_name_for_assignment_validity(name, false);
|
||||
}
|
||||
let bound_names: Vec<&[u16]> = self.pattern_bound_names.iter().map(|(n, _)| n.as_slice()).collect();
|
||||
self.scope_collector.add_catch_parameter_pattern(&bound_names);
|
||||
// Register each binding pattern identifier for scope analysis
|
||||
// so they get is_local() annotations (matching variable declarations).
|
||||
for (name, id) in &self.pattern_bound_names {
|
||||
self.scope_collector.register_identifier(id.clone(), name, None);
|
||||
}
|
||||
Some(CatchBinding::BindingPattern(pattern))
|
||||
} else if self.match_identifier() {
|
||||
let parameter_start = self.position();
|
||||
let token = self.consume();
|
||||
let value = self.token_value(&token).to_vec();
|
||||
self.check_identifier_name_for_assignment_validity(&value, false);
|
||||
let id = Rc::new(Identifier::new(self.range_from(parameter_start), value.clone().into()));
|
||||
self.scope_collector.register_identifier(id.clone(), &value, None);
|
||||
self.scope_collector.add_catch_parameter_identifier(&value, id.clone());
|
||||
Some(CatchBinding::Identifier(id))
|
||||
} else {
|
||||
self.expected("catch parameter");
|
||||
None
|
||||
};
|
||||
self.consume_token(TokenType::ParenClose);
|
||||
parameter
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let body = self.parse_block_statement();
|
||||
|
||||
self.scope_collector.close_scope();
|
||||
|
||||
CatchClause {
|
||||
range: self.range_from(start),
|
||||
parameter,
|
||||
body: Box::new(body),
|
||||
}
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-labelled-statements
|
||||
// It is a Syntax Error if any source text is matched by this production
|
||||
// and a `continue` statement with label targets that production.
|
||||
// (i.e., `continue` with a label can only target an iteration statement.)
|
||||
fn try_parse_labelled_statement(&mut self, allow_labelled_function: bool) -> Option<Statement> {
|
||||
let start = self.position();
|
||||
|
||||
if !self.match_identifier_name() {
|
||||
return None;
|
||||
}
|
||||
|
||||
self.save_state();
|
||||
let token = self.consume();
|
||||
let label = Utf16String::from(self.token_value(&token));
|
||||
|
||||
if !self.match_token(TokenType::Colon) {
|
||||
self.load_state();
|
||||
return None;
|
||||
}
|
||||
self.discard_saved_state();
|
||||
self.consume(); // consume :
|
||||
|
||||
if self.flags.strict_mode && (label == utf16!("let") || crate::parser::is_strict_reserved_word(&label)) {
|
||||
self.syntax_error("Strict mode reserved word is not allowed in label");
|
||||
}
|
||||
if self.flags.in_generator_function_context && label == utf16!("yield") {
|
||||
self.syntax_error("'yield' label is not allowed in generator function context");
|
||||
}
|
||||
if self.flags.await_expression_is_valid && label == utf16!("await") {
|
||||
self.syntax_error("'await' label is not allowed in async function context");
|
||||
}
|
||||
|
||||
if self.labels_in_scope.contains_key(label.as_slice()) {
|
||||
let label_str = String::from_utf16_lossy(&label);
|
||||
self.syntax_error(&format!("Label '{}' has already been declared", label_str));
|
||||
}
|
||||
|
||||
if self.match_token(TokenType::Function) && (!allow_labelled_function || self.flags.strict_mode) {
|
||||
self.syntax_error("Not allowed to declare a function here");
|
||||
}
|
||||
if self.match_token(TokenType::Async) {
|
||||
let next = self.next_token();
|
||||
if next.token_type == TokenType::Function && !next.trivia_has_line_terminator {
|
||||
self.syntax_error("Async functions cannot be defined in labelled statements");
|
||||
}
|
||||
}
|
||||
|
||||
self.labels_in_scope.insert(label.clone(), None);
|
||||
|
||||
let break_before = self.flags.in_break_context;
|
||||
self.flags.in_break_context = true;
|
||||
|
||||
let body_starts_iteration = self.match_iteration_start();
|
||||
self.last_inner_label_is_iteration = false;
|
||||
let body = if self.match_token(TokenType::Function) {
|
||||
let fn_decl = self.parse_function_declaration();
|
||||
if let StatementKind::FunctionDeclaration { kind, .. } = fn_decl.inner {
|
||||
match kind {
|
||||
FunctionKind::Generator | FunctionKind::AsyncGenerator => {
|
||||
self.syntax_error("Generator functions cannot be defined in labelled statements");
|
||||
}
|
||||
FunctionKind::Async => {
|
||||
self.syntax_error("Async functions cannot be defined in labelled statements");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
fn_decl
|
||||
} else {
|
||||
self.parse_statement(allow_labelled_function)
|
||||
};
|
||||
|
||||
let is_iteration = body_starts_iteration || self.last_inner_label_is_iteration;
|
||||
if !is_iteration {
|
||||
if let Some(Some((line, col))) = self.labels_in_scope.get(label.as_slice()) {
|
||||
self.syntax_error_at(
|
||||
"labelled continue statement cannot use non iterating statement",
|
||||
*line, *col);
|
||||
}
|
||||
}
|
||||
|
||||
self.labels_in_scope.remove(label.as_slice());
|
||||
self.flags.in_break_context = break_before;
|
||||
self.last_inner_label_is_iteration = is_iteration;
|
||||
|
||||
Some(self.statement(start, StatementKind::Labelled {
|
||||
label,
|
||||
item: Box::new(body),
|
||||
}))
|
||||
}
|
||||
|
||||
fn match_for_using_declaration(&mut self) -> bool {
|
||||
if !self.match_token(TokenType::Identifier) {
|
||||
return false;
|
||||
}
|
||||
if self.token_value(&self.current_token) != utf16!("using") {
|
||||
return false;
|
||||
}
|
||||
let next = self.next_token();
|
||||
if next.trivia_has_line_terminator {
|
||||
return false;
|
||||
}
|
||||
if next.token_type == TokenType::Identifier {
|
||||
let next_val = self.token_original_value(&next);
|
||||
if next_val == utf16!("of") {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Must be an actual identifier, not just an identifier-name keyword
|
||||
// like `in`. This matches C++ token_is_identifier().
|
||||
self.token_is_identifier(&next)
|
||||
}
|
||||
|
||||
/// Validate that an expression-form LHS is valid for for-in/for-of.
|
||||
fn validate_for_in_of_lhs(&mut self, init: &LocalForInit) {
|
||||
if let LocalForInit::Expression(ref expression) = *init {
|
||||
if !Self::is_identifier(expression) && !Self::is_member_expression(expression)
|
||||
&& !Self::is_call_expression(expression)
|
||||
&& !Self::is_object_expression(expression) && !Self::is_array_expression(expression) {
|
||||
self.syntax_error("Invalid left-hand side in for-loop");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a `LocalForInit` into a `ForInOfLhs`, synthesizing a binding
|
||||
/// pattern when the LHS is an array or object expression.
|
||||
fn synthesize_for_in_of_lhs(&mut self, init: LocalForInit, init_start: Position) -> ForInOfLhs {
|
||||
match init {
|
||||
LocalForInit::Declaration(declaration) => ForInOfLhs::Declaration(Box::new(declaration)),
|
||||
LocalForInit::Expression(expression) => {
|
||||
if Self::is_array_expression(&expression) || Self::is_object_expression(&expression) {
|
||||
if let Some(pattern) = self.synthesize_binding_pattern(init_start) {
|
||||
for (name, id) in self.pattern_bound_names.drain(..) {
|
||||
self.scope_collector.register_identifier(id, &name, None);
|
||||
}
|
||||
ForInOfLhs::Pattern(pattern)
|
||||
} else {
|
||||
ForInOfLhs::Expression(Box::new(expression))
|
||||
}
|
||||
} else {
|
||||
ForInOfLhs::Expression(Box::new(expression))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1344
Libraries/LibJS/Rust/src/scope_collector.rs
Normal file
1344
Libraries/LibJS/Rust/src/scope_collector.rs
Normal file
File diff suppressed because it is too large
Load Diff
217
Libraries/LibJS/Rust/src/token.rs
Normal file
217
Libraries/LibJS/Rust/src/token.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
/*
|
||||
* Copyright (c) 2026-present, the Ladybird developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
//! Token types and Token struct for the lexer.
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TokenCategory {
|
||||
Invalid,
|
||||
Trivia,
|
||||
Number,
|
||||
String,
|
||||
Punctuation,
|
||||
Operator,
|
||||
Keyword,
|
||||
ControlKeyword,
|
||||
Identifier,
|
||||
}
|
||||
|
||||
/// Generates the `TokenType` enum with `category()` and `name()` methods.
|
||||
/// Each entry maps a variant to its `TokenCategory`. The name is derived
|
||||
/// automatically via `stringify!`.
|
||||
macro_rules! define_tokens {
|
||||
( $( $variant:ident => $category:ident ),* $(,)? ) => {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TokenType {
|
||||
$( $variant, )*
|
||||
}
|
||||
|
||||
impl TokenType {
|
||||
pub fn category(self) -> TokenCategory {
|
||||
match self {
|
||||
$( TokenType::$variant => TokenCategory::$category, )*
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(self) -> &'static str {
|
||||
match self {
|
||||
$( TokenType::$variant => stringify!($variant), )*
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
define_tokens! {
|
||||
Ampersand => Operator,
|
||||
AmpersandEquals => Operator,
|
||||
Arrow => Operator,
|
||||
Asterisk => Operator,
|
||||
AsteriskEquals => Operator,
|
||||
Async => Keyword,
|
||||
Await => Keyword,
|
||||
BigIntLiteral => Number,
|
||||
BoolLiteral => Keyword,
|
||||
BracketClose => Punctuation,
|
||||
BracketOpen => Punctuation,
|
||||
Break => ControlKeyword,
|
||||
Caret => Operator,
|
||||
CaretEquals => Operator,
|
||||
Case => ControlKeyword,
|
||||
Catch => ControlKeyword,
|
||||
Class => Keyword,
|
||||
Colon => Punctuation,
|
||||
Comma => Punctuation,
|
||||
Const => Keyword,
|
||||
Continue => ControlKeyword,
|
||||
CurlyClose => Punctuation,
|
||||
CurlyOpen => Punctuation,
|
||||
Debugger => Keyword,
|
||||
Default => ControlKeyword,
|
||||
Delete => Keyword,
|
||||
Do => ControlKeyword,
|
||||
DoubleAmpersand => Operator,
|
||||
DoubleAmpersandEquals => Operator,
|
||||
DoubleAsterisk => Operator,
|
||||
DoubleAsteriskEquals => Operator,
|
||||
DoublePipe => Operator,
|
||||
DoublePipeEquals => Operator,
|
||||
DoubleQuestionMark => Operator,
|
||||
DoubleQuestionMarkEquals => Operator,
|
||||
Else => ControlKeyword,
|
||||
Enum => Keyword,
|
||||
Eof => Invalid,
|
||||
Equals => Operator,
|
||||
EqualsEquals => Operator,
|
||||
EqualsEqualsEquals => Operator,
|
||||
EscapedKeyword => Identifier,
|
||||
ExclamationMark => Operator,
|
||||
ExclamationMarkEquals => Operator,
|
||||
ExclamationMarkEqualsEquals => Operator,
|
||||
Export => Keyword,
|
||||
Extends => Keyword,
|
||||
Finally => ControlKeyword,
|
||||
For => ControlKeyword,
|
||||
Function => Keyword,
|
||||
GreaterThan => Operator,
|
||||
GreaterThanEquals => Operator,
|
||||
Identifier => Identifier,
|
||||
If => ControlKeyword,
|
||||
Implements => Keyword,
|
||||
Import => Keyword,
|
||||
In => Keyword,
|
||||
Instanceof => Keyword,
|
||||
Interface => Keyword,
|
||||
Invalid => Invalid,
|
||||
LessThan => Operator,
|
||||
LessThanEquals => Operator,
|
||||
Let => Keyword,
|
||||
Minus => Operator,
|
||||
MinusEquals => Operator,
|
||||
MinusMinus => Operator,
|
||||
New => Keyword,
|
||||
NullLiteral => Keyword,
|
||||
NumericLiteral => Number,
|
||||
Package => Keyword,
|
||||
ParenClose => Punctuation,
|
||||
ParenOpen => Punctuation,
|
||||
Percent => Operator,
|
||||
PercentEquals => Operator,
|
||||
Period => Operator,
|
||||
Pipe => Operator,
|
||||
PipeEquals => Operator,
|
||||
Plus => Operator,
|
||||
PlusEquals => Operator,
|
||||
PlusPlus => Operator,
|
||||
Private => Keyword,
|
||||
PrivateIdentifier => Identifier,
|
||||
Protected => Keyword,
|
||||
Public => Keyword,
|
||||
QuestionMark => Operator,
|
||||
QuestionMarkPeriod => Operator,
|
||||
RegexFlags => String,
|
||||
RegexLiteral => String,
|
||||
Return => ControlKeyword,
|
||||
Semicolon => Punctuation,
|
||||
ShiftLeft => Operator,
|
||||
ShiftLeftEquals => Operator,
|
||||
ShiftRight => Operator,
|
||||
ShiftRightEquals => Operator,
|
||||
Slash => Operator,
|
||||
SlashEquals => Operator,
|
||||
Static => Keyword,
|
||||
StringLiteral => String,
|
||||
Super => Keyword,
|
||||
Switch => ControlKeyword,
|
||||
TemplateLiteralEnd => String,
|
||||
TemplateLiteralExprEnd => Punctuation,
|
||||
TemplateLiteralExprStart => Punctuation,
|
||||
TemplateLiteralStart => String,
|
||||
TemplateLiteralString => String,
|
||||
This => Keyword,
|
||||
Throw => ControlKeyword,
|
||||
Tilde => Operator,
|
||||
TripleDot => Operator,
|
||||
Trivia => Trivia,
|
||||
Try => ControlKeyword,
|
||||
Typeof => Keyword,
|
||||
UnsignedShiftRight => Operator,
|
||||
UnsignedShiftRightEquals => Operator,
|
||||
UnterminatedRegexLiteral => String,
|
||||
UnterminatedStringLiteral => String,
|
||||
UnterminatedTemplateLiteral => String,
|
||||
Var => Keyword,
|
||||
Void => Keyword,
|
||||
While => ControlKeyword,
|
||||
With => ControlKeyword,
|
||||
Yield => ControlKeyword,
|
||||
}
|
||||
|
||||
impl TokenType {
|
||||
pub fn is_identifier_name(self) -> bool {
|
||||
self != TokenType::PrivateIdentifier
|
||||
&& matches!(
|
||||
self.category(),
|
||||
TokenCategory::Identifier | TokenCategory::Keyword | TokenCategory::ControlKeyword
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token {
|
||||
pub token_type: TokenType,
|
||||
pub trivia_start: u32,
|
||||
pub trivia_len: u32,
|
||||
pub value_start: u32,
|
||||
pub value_len: u32,
|
||||
pub line_number: u32,
|
||||
pub line_column: u32,
|
||||
pub offset: u32,
|
||||
pub trivia_has_line_terminator: bool,
|
||||
/// Decoded identifier value, set when the identifier contains unicode
|
||||
/// escape sequences (e.g. `l\u0065t` → `let`).
|
||||
pub identifier_value: Option<crate::ast::Utf16String>,
|
||||
/// Error message for Invalid tokens (e.g. "Unterminated multi-line comment").
|
||||
pub message: Option<String>,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn new(token_type: TokenType) -> Self {
|
||||
Token {
|
||||
token_type,
|
||||
trivia_start: 0,
|
||||
trivia_len: 0,
|
||||
value_start: 0,
|
||||
value_len: 0,
|
||||
line_number: 0,
|
||||
line_column: 0,
|
||||
offset: 0,
|
||||
trivia_has_line_terminator: false,
|
||||
identifier_value: None,
|
||||
message: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
1339
Libraries/LibJS/RustIntegration.cpp
Normal file
1339
Libraries/LibJS/RustIntegration.cpp
Normal file
File diff suppressed because it is too large
Load Diff
109
Libraries/LibJS/RustIntegration.h
Normal file
109
Libraries/LibJS/RustIntegration.h
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/HashTable.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/Result.h>
|
||||
#include <AK/Utf16FlyString.h>
|
||||
#include <LibGC/Ptr.h>
|
||||
#include <LibGC/Root.h>
|
||||
#include <LibJS/Forward.h>
|
||||
#include <LibJS/ParserError.h>
|
||||
#include <LibJS/Runtime/AbstractOperations.h>
|
||||
#include <LibJS/Runtime/FunctionKind.h>
|
||||
#include <LibJS/Script.h>
|
||||
#include <LibJS/SourceTextModule.h>
|
||||
|
||||
namespace JS::RustIntegration {
|
||||
|
||||
// Result type for compile_script().
|
||||
// NB: Uses GC::Root to prevent collection while the result is in transit
|
||||
// between compile_script() and the Script constructor.
|
||||
struct ScriptResult {
|
||||
GC::Root<Bytecode::Executable> executable;
|
||||
bool is_strict_mode { false };
|
||||
Vector<Utf16FlyString> lexical_names;
|
||||
Vector<Utf16FlyString> var_names;
|
||||
struct FunctionToInitialize {
|
||||
GC::Root<SharedFunctionInstanceData> shared_data;
|
||||
Utf16FlyString name;
|
||||
};
|
||||
Vector<FunctionToInitialize> functions_to_initialize;
|
||||
HashTable<Utf16FlyString> declared_function_names;
|
||||
Vector<Utf16FlyString> var_scoped_names;
|
||||
Vector<Utf16FlyString> annex_b_candidate_names;
|
||||
Vector<Script::LexicalBinding> lexical_bindings;
|
||||
};
|
||||
|
||||
// Result type for compile_eval() and compile_shadow_realm_eval().
|
||||
// NB: Uses GC::Root to prevent collection while the result is in transit.
|
||||
struct EvalResult {
|
||||
GC::Root<Bytecode::Executable> executable;
|
||||
bool is_strict_mode { false };
|
||||
EvalDeclarationData declaration_data;
|
||||
};
|
||||
|
||||
// Result type for compile_module().
|
||||
// NB: Uses GC::Root to prevent collection while the result is in transit.
|
||||
struct ModuleResult {
|
||||
bool has_top_level_await { false };
|
||||
Vector<ModuleRequest> requested_modules;
|
||||
Vector<ImportEntry> import_entries;
|
||||
Vector<ExportEntry> local_export_entries;
|
||||
Vector<ExportEntry> indirect_export_entries;
|
||||
Vector<ExportEntry> star_export_entries;
|
||||
Optional<Utf16FlyString> default_export_binding_name;
|
||||
Vector<Utf16FlyString> var_declared_names;
|
||||
Vector<SourceTextModule::LexicalBinding> lexical_bindings;
|
||||
struct FunctionToInitialize {
|
||||
GC::Root<SharedFunctionInstanceData> shared_data;
|
||||
Utf16FlyString name;
|
||||
};
|
||||
Vector<FunctionToInitialize> functions_to_initialize;
|
||||
GC::Root<Bytecode::Executable> executable;
|
||||
GC::Root<SharedFunctionInstanceData> tla_shared_data;
|
||||
};
|
||||
|
||||
// Compile a script. Returns nullopt if Rust is not available.
|
||||
Optional<Result<ScriptResult, Vector<ParserError>>> compile_script(
|
||||
StringView source_text, Realm& realm, StringView filename, size_t line_number_offset);
|
||||
|
||||
// Compile eval code. Returns nullopt if Rust is not available.
|
||||
// On success, the executable's name is set to "eval".
|
||||
Optional<Result<EvalResult, String>> compile_eval(
|
||||
PrimitiveString& code_string, VM& vm,
|
||||
CallerMode strict_caller, bool in_function, bool in_method,
|
||||
bool in_derived_constructor, bool in_class_field_initializer);
|
||||
|
||||
// Compile ShadowRealm eval code. Returns nullopt if Rust is not available.
|
||||
// On success, the executable's name is set to "ShadowRealmEval".
|
||||
Optional<Result<EvalResult, String>> compile_shadow_realm_eval(
|
||||
PrimitiveString& source_text, VM& vm);
|
||||
|
||||
// Compile a module. Returns nullopt if Rust is not available.
|
||||
Optional<Result<ModuleResult, Vector<ParserError>>> compile_module(
|
||||
StringView source_text, Realm& realm, StringView filename);
|
||||
|
||||
// Compile a dynamic function (new Function()). Returns nullopt if Rust is not available.
|
||||
// On success, returns a SharedFunctionInstanceData with source_text set.
|
||||
Optional<Result<GC::Ref<SharedFunctionInstanceData>, String>> compile_dynamic_function(
|
||||
VM& vm, StringView source_text, StringView parameters_string, StringView body_parse_string,
|
||||
FunctionKind kind);
|
||||
|
||||
// Compile a builtin JS file. Returns nullopt if Rust is not available.
|
||||
Optional<Vector<GC::Root<SharedFunctionInstanceData>>> compile_builtin_file(
|
||||
unsigned char const* script_text, VM& vm);
|
||||
|
||||
// Compile a function body for lazy compilation.
|
||||
// Returns nullptr if Rust is not available or the SFD doesn't use Rust compilation.
|
||||
GC::Ptr<Bytecode::Executable> compile_function(VM& vm, SharedFunctionInstanceData& shared_data, bool builtin_abstract_operations_enabled);
|
||||
|
||||
// Free a Rust function AST pointer. No-op if Rust is not available.
|
||||
void free_function_ast(void* ast);
|
||||
|
||||
}
|
||||
@@ -5,21 +5,34 @@
|
||||
*/
|
||||
|
||||
#include <LibJS/AST.h>
|
||||
#include <LibJS/Bytecode/Executable.h>
|
||||
#include <LibJS/Lexer.h>
|
||||
#include <LibJS/Parser.h>
|
||||
#include <LibJS/Runtime/ECMAScriptFunctionObject.h>
|
||||
#include <LibJS/Runtime/GlobalEnvironment.h>
|
||||
#include <LibJS/Runtime/SharedFunctionInstanceData.h>
|
||||
#include <LibJS/Runtime/VM.h>
|
||||
#include <LibJS/RustIntegration.h>
|
||||
#include <LibJS/Script.h>
|
||||
#include <LibJS/SourceCode.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
bool g_dump_ast = false;
|
||||
bool g_dump_ast_use_color = false;
|
||||
|
||||
GC_DEFINE_ALLOCATOR(Script);
|
||||
|
||||
// 16.1.5 ParseScript ( sourceText, realm, hostDefined ), https://tc39.es/ecma262/#sec-parse-script
|
||||
Result<GC::Ref<Script>, Vector<ParserError>> Script::parse(StringView source_text, Realm& realm, StringView filename, HostDefined* host_defined, size_t line_number_offset)
|
||||
{
|
||||
auto rust_compilation = RustIntegration::compile_script(source_text, realm, filename, line_number_offset);
|
||||
if (rust_compilation.has_value()) {
|
||||
if (rust_compilation->is_error())
|
||||
return rust_compilation->release_error();
|
||||
return realm.heap().allocate<Script>(realm, filename, move(rust_compilation->value()), host_defined);
|
||||
}
|
||||
|
||||
// 1. Let script be ParseText(sourceText, Script).
|
||||
auto parser = Parser(Lexer(SourceCode::create(String::from_utf8(filename).release_value_but_fixme_should_propagate_errors(), Utf16String::from_utf8(source_text)), line_number_offset));
|
||||
auto script = parser.parse_program();
|
||||
@@ -75,7 +88,8 @@ Script::Script(Realm& realm, StringView filename, RefPtr<Program> parse_node, Ho
|
||||
// Pre-compute AnnexB candidates (GDI step 13).
|
||||
if (!m_is_strict_mode) {
|
||||
MUST(program.for_each_function_hoistable_with_annexB_extension([&](FunctionDeclaration& function_declaration) -> ThrowCompletionOr<void> {
|
||||
m_annex_b_candidates.append(function_declaration);
|
||||
m_annex_b_candidate_names.append(function_declaration.name());
|
||||
m_annex_b_function_declarations.append(function_declaration);
|
||||
return {};
|
||||
}));
|
||||
}
|
||||
@@ -89,6 +103,24 @@ Script::Script(Realm& realm, StringView filename, RefPtr<Program> parse_node, Ho
|
||||
}));
|
||||
}
|
||||
|
||||
Script::Script(Realm& realm, StringView filename, RustIntegration::ScriptResult&& result, HostDefined* host_defined)
|
||||
: m_realm(realm)
|
||||
, m_executable(result.executable)
|
||||
, m_lexical_names(move(result.lexical_names))
|
||||
, m_var_names(move(result.var_names))
|
||||
, m_declared_function_names(move(result.declared_function_names))
|
||||
, m_var_scoped_names(move(result.var_scoped_names))
|
||||
, m_annex_b_candidate_names(move(result.annex_b_candidate_names))
|
||||
, m_lexical_bindings(move(result.lexical_bindings))
|
||||
, m_is_strict_mode(result.is_strict_mode)
|
||||
, m_filename(filename)
|
||||
, m_host_defined(host_defined)
|
||||
{
|
||||
m_functions_to_initialize.ensure_capacity(result.functions_to_initialize.size());
|
||||
for (auto& f : result.functions_to_initialize)
|
||||
m_functions_to_initialize.append({ *f.shared_data, move(f.name) });
|
||||
}
|
||||
|
||||
// 16.1.7 GlobalDeclarationInstantiation ( script, env ), https://tc39.es/ecma262/#sec-globaldeclarationinstantiation
|
||||
ThrowCompletionOr<void> Script::global_declaration_instantiation(VM& vm, GlobalEnvironment& global_environment)
|
||||
{
|
||||
@@ -157,9 +189,9 @@ ThrowCompletionOr<void> Script::global_declaration_instantiation(VM& vm, GlobalE
|
||||
if (!m_is_strict_mode) {
|
||||
// a. Let declaredFunctionOrVarNames be the list-concatenation of declaredFunctionNames and declaredVarNames.
|
||||
// b. For each FunctionDeclaration f that is directly contained in the StatementList of a Block, CaseClause, or DefaultClause Contained within script, do
|
||||
for (auto& function_declaration : m_annex_b_candidates) {
|
||||
for (size_t i = 0; i < m_annex_b_candidate_names.size(); ++i) {
|
||||
// i. Let F be StringValue of the BindingIdentifier of f.
|
||||
auto function_name = function_declaration->name();
|
||||
auto& function_name = m_annex_b_candidate_names[i];
|
||||
|
||||
// 1. If env.HasLexicalDeclaration(F) is false, then
|
||||
if (global_environment.has_lexical_declaration(function_name))
|
||||
@@ -178,7 +210,8 @@ ThrowCompletionOr<void> Script::global_declaration_instantiation(VM& vm, GlobalE
|
||||
}
|
||||
|
||||
// iii. When the FunctionDeclaration f is evaluated, perform the following steps in place of the FunctionDeclaration Evaluation algorithm provided in 15.2.6:
|
||||
function_declaration->set_should_do_additional_annexB_steps();
|
||||
if (i < m_annex_b_function_declarations.size())
|
||||
m_annex_b_function_declarations[i]->set_should_do_additional_annexB_steps();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,7 +261,7 @@ ThrowCompletionOr<void> Script::global_declaration_instantiation(VM& vm, GlobalE
|
||||
void Script::drop_ast()
|
||||
{
|
||||
m_parse_node = nullptr;
|
||||
m_annex_b_candidates.clear();
|
||||
m_annex_b_function_declarations.clear();
|
||||
}
|
||||
|
||||
Script::~Script()
|
||||
|
||||
@@ -18,8 +18,17 @@
|
||||
|
||||
namespace JS {
|
||||
|
||||
JS_API extern bool g_dump_ast;
|
||||
JS_API extern bool g_dump_ast_use_color;
|
||||
|
||||
class FunctionDeclaration;
|
||||
|
||||
namespace RustIntegration {
|
||||
|
||||
struct ScriptResult;
|
||||
|
||||
}
|
||||
|
||||
// 16.1.4 Script Records, https://tc39.es/ecma262/#sec-script-records
|
||||
class JS_API Script final : public Cell {
|
||||
GC_CELL(Script, Cell);
|
||||
@@ -58,17 +67,6 @@ public:
|
||||
|
||||
void drop_ast();
|
||||
|
||||
private:
|
||||
Script(Realm&, StringView filename, RefPtr<Program>, HostDefined*);
|
||||
|
||||
virtual void visit_edges(Cell::Visitor&) override;
|
||||
|
||||
GC::Ptr<Realm> m_realm; // [[Realm]]
|
||||
RefPtr<Program> m_parse_node; // [[ECMAScriptCode]]
|
||||
Vector<LoadedModuleRequest> m_loaded_modules; // [[LoadedModules]]
|
||||
|
||||
mutable GC::Ptr<Bytecode::Executable> m_executable;
|
||||
|
||||
// Pre-computed global declaration instantiation data.
|
||||
// These are extracted from the AST at parse time so that GDI can run
|
||||
// without needing to walk the AST.
|
||||
@@ -80,12 +78,26 @@ private:
|
||||
Utf16FlyString name;
|
||||
bool is_constant { false };
|
||||
};
|
||||
|
||||
private:
|
||||
Script(Realm&, StringView filename, RefPtr<Program>, HostDefined*);
|
||||
Script(Realm&, StringView filename, RustIntegration::ScriptResult&&, HostDefined*);
|
||||
|
||||
virtual void visit_edges(Cell::Visitor&) override;
|
||||
|
||||
GC::Ptr<Realm> m_realm; // [[Realm]]
|
||||
RefPtr<Program> m_parse_node; // [[ECMAScriptCode]]
|
||||
Vector<LoadedModuleRequest> m_loaded_modules; // [[LoadedModules]]
|
||||
|
||||
mutable GC::Ptr<Bytecode::Executable> m_executable;
|
||||
|
||||
Vector<Utf16FlyString> m_lexical_names;
|
||||
Vector<Utf16FlyString> m_var_names;
|
||||
Vector<FunctionToInitialize> m_functions_to_initialize;
|
||||
HashTable<Utf16FlyString> m_declared_function_names;
|
||||
Vector<Utf16FlyString> m_var_scoped_names;
|
||||
Vector<NonnullRefPtr<FunctionDeclaration>> m_annex_b_candidates;
|
||||
Vector<Utf16FlyString> m_annex_b_candidate_names;
|
||||
Vector<NonnullRefPtr<FunctionDeclaration>> m_annex_b_function_declarations;
|
||||
Vector<LexicalBinding> m_lexical_bindings;
|
||||
bool m_is_strict_mode { false };
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include <AK/Debug.h>
|
||||
#include <AK/QuickSort.h>
|
||||
#include <LibJS/Bytecode/Executable.h>
|
||||
#include <LibJS/Bytecode/Interpreter.h>
|
||||
#include <LibJS/Parser.h>
|
||||
#include <LibJS/Runtime/AsyncFunctionDriverWrapper.h>
|
||||
@@ -15,6 +16,9 @@
|
||||
#include <LibJS/Runtime/ModuleEnvironment.h>
|
||||
#include <LibJS/Runtime/PromiseCapability.h>
|
||||
#include <LibJS/Runtime/SharedFunctionInstanceData.h>
|
||||
#include <LibJS/RustIntegration.h>
|
||||
#include <LibJS/Script.h>
|
||||
#include <LibJS/SourceCode.h>
|
||||
#include <LibJS/SourceTextModule.h>
|
||||
|
||||
namespace JS {
|
||||
@@ -164,6 +168,29 @@ SourceTextModule::SourceTextModule(Realm& realm, StringView filename, Script::Ho
|
||||
}
|
||||
}
|
||||
|
||||
SourceTextModule::SourceTextModule(Realm& realm, StringView filename, Script::HostDefined* host_defined, bool has_top_level_await,
|
||||
Vector<ModuleRequest> requested_modules, Vector<ImportEntry> import_entries,
|
||||
Vector<ExportEntry> local_export_entries, Vector<ExportEntry> indirect_export_entries,
|
||||
Vector<ExportEntry> star_export_entries, Optional<Utf16FlyString> default_export_binding_name,
|
||||
Vector<Utf16FlyString> var_declared_names, Vector<LexicalBinding> lexical_bindings,
|
||||
Vector<FunctionToInitialize> functions_to_initialize,
|
||||
GC::Ptr<Bytecode::Executable> executable,
|
||||
GC::Ptr<SharedFunctionInstanceData> tla_shared_data)
|
||||
: CyclicModule(realm, filename, has_top_level_await, move(requested_modules), host_defined)
|
||||
, m_execution_context(ExecutionContext::create(0, 0, 0))
|
||||
, m_import_entries(move(import_entries))
|
||||
, m_local_export_entries(move(local_export_entries))
|
||||
, m_indirect_export_entries(move(indirect_export_entries))
|
||||
, m_star_export_entries(move(star_export_entries))
|
||||
, m_var_declared_names(move(var_declared_names))
|
||||
, m_lexical_bindings(move(lexical_bindings))
|
||||
, m_functions_to_initialize(move(functions_to_initialize))
|
||||
, m_default_export_binding_name(move(default_export_binding_name))
|
||||
, m_executable(executable)
|
||||
, m_tla_shared_data(tla_shared_data)
|
||||
{
|
||||
}
|
||||
|
||||
SourceTextModule::~SourceTextModule() = default;
|
||||
|
||||
void SourceTextModule::visit_edges(Cell::Visitor& visitor)
|
||||
@@ -180,6 +207,25 @@ void SourceTextModule::visit_edges(Cell::Visitor& visitor)
|
||||
// 16.2.1.7.1 ParseModule ( sourceText, realm, hostDefined ), https://tc39.es/ecma262/#sec-parsemodule
|
||||
Result<GC::Ref<SourceTextModule>, Vector<ParserError>> SourceTextModule::parse(StringView source_text, Realm& realm, StringView filename, Script::HostDefined* host_defined)
|
||||
{
|
||||
auto rust_result = RustIntegration::compile_module(source_text, realm, filename);
|
||||
if (rust_result.has_value()) {
|
||||
if (rust_result->is_error())
|
||||
return rust_result->release_error();
|
||||
auto& module_result = rust_result->value();
|
||||
Vector<FunctionToInitialize> functions_to_initialize;
|
||||
functions_to_initialize.ensure_capacity(module_result.functions_to_initialize.size());
|
||||
for (auto& f : module_result.functions_to_initialize)
|
||||
functions_to_initialize.append({ *f.shared_data, move(f.name) });
|
||||
return realm.heap().allocate<SourceTextModule>(
|
||||
realm, filename, host_defined, module_result.has_top_level_await,
|
||||
move(module_result.requested_modules), move(module_result.import_entries),
|
||||
move(module_result.local_export_entries), move(module_result.indirect_export_entries),
|
||||
move(module_result.star_export_entries), move(module_result.default_export_binding_name),
|
||||
move(module_result.var_declared_names), move(module_result.lexical_bindings),
|
||||
move(functions_to_initialize),
|
||||
module_result.executable.ptr(), module_result.tla_shared_data.ptr());
|
||||
}
|
||||
|
||||
// 1. Let body be ParseText(sourceText, Module).
|
||||
auto parser = Parser(Lexer(SourceCode::create(String::from_utf8(filename).release_value_but_fixme_should_propagate_errors(), Utf16String::from_utf8(source_text))), Program::Type::Module);
|
||||
auto body = parser.parse_program();
|
||||
|
||||
@@ -32,23 +32,6 @@ public:
|
||||
Object* import_meta() { return m_import_meta; }
|
||||
void set_import_meta(Badge<VM>, Object* import_meta) { m_import_meta = import_meta; }
|
||||
|
||||
protected:
|
||||
virtual ThrowCompletionOr<void> initialize_environment(VM& vm) override;
|
||||
virtual ThrowCompletionOr<void> execute_module(VM& vm, GC::Ptr<PromiseCapability> capability) override;
|
||||
|
||||
private:
|
||||
SourceTextModule(Realm&, StringView filename, Script::HostDefined* host_defined, bool has_top_level_await, NonnullRefPtr<Program> body, Vector<ModuleRequest> requested_modules, Vector<ImportEntry> import_entries, Vector<ExportEntry> local_export_entries, Vector<ExportEntry> indirect_export_entries, Vector<ExportEntry> star_export_entries, Optional<Utf16FlyString> default_export_binding_name);
|
||||
|
||||
virtual void visit_edges(Cell::Visitor&) override;
|
||||
|
||||
RefPtr<Program> m_ecmascript_code; // [[ECMAScriptCode]]
|
||||
NonnullOwnPtr<ExecutionContext> m_execution_context; // [[Context]]
|
||||
GC::Ptr<Object> m_import_meta; // [[ImportMeta]]
|
||||
Vector<ImportEntry> m_import_entries; // [[ImportEntries]]
|
||||
Vector<ExportEntry> m_local_export_entries; // [[LocalExportEntries]]
|
||||
Vector<ExportEntry> m_indirect_export_entries; // [[IndirectExportEntries]]
|
||||
Vector<ExportEntry> m_star_export_entries; // [[StarExportEntries]]
|
||||
|
||||
// Pre-computed module declaration instantiation data.
|
||||
// These are extracted from the AST at construction time so that
|
||||
// initialize_environment() can run without walking the AST.
|
||||
@@ -61,6 +44,27 @@ private:
|
||||
bool is_constant { false };
|
||||
i32 function_index { -1 }; // index into m_functions_to_initialize, -1 if not a function
|
||||
};
|
||||
|
||||
protected:
|
||||
virtual ThrowCompletionOr<void> initialize_environment(VM& vm) override;
|
||||
virtual ThrowCompletionOr<void> execute_module(VM& vm, GC::Ptr<PromiseCapability> capability) override;
|
||||
|
||||
private:
|
||||
SourceTextModule(Realm&, StringView filename, Script::HostDefined* host_defined, bool has_top_level_await, NonnullRefPtr<Program> body, Vector<ModuleRequest> requested_modules, Vector<ImportEntry> import_entries, Vector<ExportEntry> local_export_entries, Vector<ExportEntry> indirect_export_entries, Vector<ExportEntry> star_export_entries, Optional<Utf16FlyString> default_export_binding_name);
|
||||
|
||||
// Constructor for the Rust pipeline (pre-computed metadata, no AST).
|
||||
SourceTextModule(Realm&, StringView filename, Script::HostDefined* host_defined, bool has_top_level_await, Vector<ModuleRequest> requested_modules, Vector<ImportEntry> import_entries, Vector<ExportEntry> local_export_entries, Vector<ExportEntry> indirect_export_entries, Vector<ExportEntry> star_export_entries, Optional<Utf16FlyString> default_export_binding_name, Vector<Utf16FlyString> var_declared_names, Vector<LexicalBinding> lexical_bindings, Vector<FunctionToInitialize> functions_to_initialize, GC::Ptr<Bytecode::Executable> executable, GC::Ptr<SharedFunctionInstanceData> tla_shared_data);
|
||||
|
||||
virtual void visit_edges(Cell::Visitor&) override;
|
||||
|
||||
RefPtr<Program> m_ecmascript_code; // [[ECMAScriptCode]]
|
||||
NonnullOwnPtr<ExecutionContext> m_execution_context; // [[Context]]
|
||||
GC::Ptr<Object> m_import_meta; // [[ImportMeta]]
|
||||
Vector<ImportEntry> m_import_entries; // [[ImportEntries]]
|
||||
Vector<ExportEntry> m_local_export_entries; // [[LocalExportEntries]]
|
||||
Vector<ExportEntry> m_indirect_export_entries; // [[IndirectExportEntries]]
|
||||
Vector<ExportEntry> m_star_export_entries; // [[StarExportEntries]]
|
||||
|
||||
Vector<Utf16FlyString> m_var_declared_names;
|
||||
Vector<LexicalBinding> m_lexical_bindings;
|
||||
Vector<FunctionToInitialize> m_functions_to_initialize;
|
||||
|
||||
@@ -6,9 +6,9 @@ JS bytecode executable ""
|
||||
|
||||
JS bytecode executable "f"
|
||||
[ 0] 0: GetLexicalEnvironment dst:reg4
|
||||
[ 8] CreateVariable identifier:c, is_immutable:false, is_global:false, is_strict:false
|
||||
[ 18] CreateVariable identifier:a, is_immutable:false, is_global:false, is_strict:false
|
||||
[ 28] CreateVariable identifier:b, is_immutable:false, is_global:false, is_strict:false
|
||||
[ 8] CreateVariable identifier:a, is_immutable:false, is_global:false, is_strict:false
|
||||
[ 18] CreateVariable identifier:b, is_immutable:false, is_global:false, is_strict:false
|
||||
[ 28] CreateVariable identifier:c, is_immutable:false, is_global:false, is_strict:false
|
||||
[ 38] InitializeLexicalBinding identifier:a, src:arg0
|
||||
[ 50] InitializeLexicalBinding identifier:b, src:arg1
|
||||
[ 68] InitializeLexicalBinding identifier:c, src:arg2
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <LibJS/Runtime/JSONObject.h>
|
||||
#include <LibJS/Runtime/StringPrototype.h>
|
||||
#include <LibJS/Runtime/ValueInlines.h>
|
||||
#include <LibJS/Script.h>
|
||||
#include <LibJS/SourceTextModule.h>
|
||||
#include <LibMain/Main.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
@@ -213,7 +214,7 @@ static ErrorOr<bool> parse_and_run(JS::Realm& realm, StringView source, StringVi
|
||||
result = vm.throw_completion<JS::SyntaxError>(move(error_string));
|
||||
} else {
|
||||
auto script = script_or_error.release_value();
|
||||
if (s_dump_ast)
|
||||
if (s_dump_ast && script->parse_node())
|
||||
dump_ast(*script->parse_node());
|
||||
if (!parse_only)
|
||||
result = vm.bytecode_interpreter().run(*script);
|
||||
@@ -846,6 +847,9 @@ ErrorOr<int> ladybird_main(Main::Arguments arguments)
|
||||
|
||||
[[maybe_unused]] bool syntax_highlight = !disable_syntax_highlight;
|
||||
|
||||
JS::g_dump_ast = s_dump_ast;
|
||||
JS::g_dump_ast_use_color = !s_strip_ansi;
|
||||
|
||||
AK::set_debug_enabled(!disable_debug_printing);
|
||||
s_history_path = TRY(String::formatted("{}/.js-history", Core::StandardPaths::home_directory()));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user