diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 00000000000..5bf2a53db3a --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,53 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "libjs_rust" +version = "0.1.0" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", + "unicode-ident", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000000..f9f8398c6d1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,5 @@ +[workspace] +members = [ + "Libraries/LibJS/Rust", +] +resolver = "2" diff --git a/Libraries/LibJS/Bytecode/Interpreter.cpp b/Libraries/LibJS/Bytecode/Interpreter.cpp index eac49b79aeb..eded5c5eb25 100644 --- a/Libraries/LibJS/Bytecode/Interpreter.cpp +++ b/Libraries/LibJS/Bytecode/Interpreter.cpp @@ -128,11 +128,13 @@ ThrowCompletionOr Interpreter::run(Script& script_record, GC::Ptr executable = script_record.cached_executable(); if (!executable && result.type() == Completion::Type::Normal) { executable = JS::Bytecode::Generator::generate_from_ast_node(vm, *script_record.parse_node(), {}); - script_record.cache_executable(*executable); - script_record.drop_ast(); - if (g_dump_bytecode) - executable->dump(); + if (executable) { + script_record.cache_executable(*executable); + script_record.drop_ast(); + } } + if (executable && g_dump_bytecode) + executable->dump(); u32 registers_and_locals_count = 0; u32 constants_count = 0; diff --git a/Libraries/LibJS/BytecodeFactory.h b/Libraries/LibJS/BytecodeFactory.h new file mode 100644 index 00000000000..0707d1b1a8d --- /dev/null +++ b/Libraries/LibJS/BytecodeFactory.h @@ -0,0 +1,464 @@ +/* + * Copyright (c) 2026, Andreas Kling + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#ifdef ENABLE_RUST + +# include +# include + +// FFI types for creating a Bytecode::Executable from Rust. +// +// The Rust bytecode generator assembles instructions into a byte buffer +// matching C++ layout. This FFI layer creates the C++ Executable from +// that data. + +// Constant value tags (matches Rust ConstantValue enum discriminants) +# define CONSTANT_TAG_NUMBER 0 +# define CONSTANT_TAG_BOOLEAN_TRUE 1 +# define CONSTANT_TAG_BOOLEAN_FALSE 2 +# define CONSTANT_TAG_NULL 3 +# define CONSTANT_TAG_UNDEFINED 4 +# define CONSTANT_TAG_EMPTY 5 +# define CONSTANT_TAG_STRING 6 +# define CONSTANT_TAG_BIGINT 7 +# define CONSTANT_TAG_RAW_VALUE 8 + +struct FFIExceptionHandler { + uint32_t start_offset; + uint32_t end_offset; + uint32_t handler_offset; +}; + +struct FFISourceMapEntry { + uint32_t bytecode_offset; + uint32_t source_start; + uint32_t source_end; +}; + +// A UTF-16 string slice (pointer + length). +struct FFIUtf16Slice { + uint16_t const* data; + size_t length; +}; + +// An optional uint32_t for FFI (replaces -1 sentinel values). +struct FFIOptionalU32 { + uint32_t value; + bool has_value; +}; + +// Class element descriptor for FFI (matches ClassElementDescriptor::Kind). +// Kind values: 0=Method, 1=Getter, 2=Setter, 3=Field, 4=StaticInitializer +struct FFIClassElement { + uint8_t kind; + bool is_static; + bool is_private; + uint16_t const* private_identifier; + size_t private_identifier_len; + FFIOptionalU32 shared_function_data_index; + bool has_initializer; + uint8_t literal_value_kind; // 0=none, 1=number, 2=boolean_true, 3=boolean_false, 4=null, 5=string + double literal_value_number; + uint16_t const* literal_value_string; + size_t literal_value_string_len; +}; + +# ifdef __cplusplus +extern "C" { +# endif + +// Callback for reporting parse errors from Rust. +// message is UTF-8, line and column are 1-based. +typedef void (*RustParseErrorCallback)(void* ctx, char const* message, size_t message_len, uint32_t line, uint32_t column); + +// Parse, compile, and extract GDI metadata for a script using the Rust +// parser. Populates gdi_context (a ScriptGdiBuilder*) via callbacks. +// On parse failure, calls error_callback for each error, then returns nullptr. +// Returns a Bytecode::Executable* cast to void*, or nullptr on failure. +void* rust_compile_script( + uint16_t const* source, + size_t source_len, + void* vm_ptr, + void const* source_code_ptr, + void* gdi_context, + bool dump_ast, + bool use_color, + void* error_context, + RustParseErrorCallback error_callback, + uint8_t** ast_dump_output, + size_t* ast_dump_output_len, + size_t initial_line_number); + +// Parse and compile a JavaScript program using the Rust parser and +// bytecode generator. Returns a Bytecode::Executable* cast to void*, +// or nullptr on failure. +void* rust_compile_program( + uint16_t const* source, + size_t source_len, + void* vm_ptr, + void const* source_code_ptr, + uint8_t program_type, + bool starts_in_strict_mode, + bool initiated_by_eval, + bool in_eval_function_context, + bool allow_super_property_lookup, + bool allow_super_constructor_call, + bool in_class_field_initializer); + +// All the data needed to create a Bytecode::Executable from Rust. +struct FFIExecutableData { + // Bytecode + uint8_t const* bytecode; + size_t bytecode_length; + // Tables: arrays of UTF-16 string slices + FFIUtf16Slice const* identifier_table; + size_t identifier_count; + FFIUtf16Slice const* property_key_table; + size_t property_key_count; + FFIUtf16Slice const* string_table; + size_t string_count; + // Constants: tagged byte array + uint8_t const* constants_data; + size_t constants_data_length; + size_t constants_count; + // Exception handlers + FFIExceptionHandler const* exception_handlers; + size_t exception_handler_count; + // Source map + FFISourceMapEntry const* source_map; + size_t source_map_count; + // Basic block start offsets + size_t const* basic_block_offsets; + size_t basic_block_count; + // Local variable names + FFIUtf16Slice const* local_variable_names; + size_t local_variable_count; + // Cache counts + uint32_t property_lookup_cache_count; + uint32_t global_variable_cache_count; + uint32_t template_object_cache_count; + uint32_t object_shape_cache_count; + // Register and mode + uint32_t number_of_registers; + bool is_strict; + // Length identifier: PropertyKeyTableIndex for "length" + FFIOptionalU32 length_identifier; + // Shared function data (inner functions) + void const* const* shared_function_data; + size_t shared_function_data_count; + // Class blueprints (heap-allocated, ownership transfers) + void* const* class_blueprints; + size_t class_blueprint_count; + // Regex table (pre-compiled, ownership transfers) + void* const* compiled_regexes; + size_t regex_count; +}; + +// Create a C++ Bytecode::Executable from assembled Rust bytecode data. +// +// The source_code parameter is a SourceCode const* cast to void*. +// Returns a GC::Ptr cast to void*, or nullptr on failure. +void* rust_create_executable( + void* vm_ptr, + void* source_code_ptr, + FFIExecutableData const* data); + +// All the data needed to create a SharedFunctionInstanceData from Rust. +struct FFISharedFunctionData { + // Function name (UTF-16) + uint16_t const* name; + size_t name_len; + // Metadata + uint8_t function_kind; + int32_t function_length; + uint32_t formal_parameter_count; + bool strict; + bool is_arrow; + bool has_simple_parameter_list; + // Parameter names for mapped arguments (only for simple parameter lists) + FFIUtf16Slice const* parameter_names; + size_t parameter_name_count; + // Source text range (for Function.prototype.toString) + size_t source_text_offset; + size_t source_text_length; + // Opaque Rust AST pointer (Box) + void* rust_function_ast; + // Parsing insights needed before lazy compilation + bool uses_this; + bool uses_this_from_environment; +}; + +// Create a SharedFunctionInstanceData from pre-computed metadata (Rust pipeline). +// Stores an opaque Rust AST pointer for lazy compilation. +// +// Returns a SharedFunctionInstanceData* cast to void*. +void* rust_create_sfd( + void* vm_ptr, + void const* source_code_ptr, + FFISharedFunctionData const* data); + +// Set class_field_initializer_name on a SharedFunctionInstanceData. +// Called after rust_create_sfd for class field initializer functions. +void rust_sfd_set_class_field_initializer_name( + void* sfd_ptr, + uint16_t const* name, + size_t name_len, + bool is_private); + +// Compile a function body using the Rust pipeline. +// Takes ownership of the Rust AST (frees it after compilation). +// +// Writes FDI runtime metadata to the SFD via the sfd_ptr parameter. +// Returns a Bytecode::Executable* cast to void*, or nullptr on failure. +void* rust_compile_function( + void* vm_ptr, + void const* source_code_ptr, + uint16_t const* source, + size_t source_len, + void* sfd_ptr, + void* rust_function_ast, + bool builtin_abstract_operations_enabled); + +// Free a Rust Box (called from SFD destructor). +void rust_free_function_ast(void* ast); + +// Set FDI runtime metadata on a SharedFunctionInstanceData. +// Called from Rust after compiling a function body. +void rust_sfd_set_metadata( + void* sfd_ptr, + bool uses_this, + bool function_environment_needed, + size_t function_environment_bindings_count, + bool might_need_arguments_object, + bool contains_direct_call_to_eval); + +// Create a ClassBlueprint on the heap. Ownership transfers to the +// caller; pass the pointer to rust_create_executable which will move +// the blueprint into the Executable. +// +// Returns a heap-allocated ClassBlueprint* cast to void*. +void* rust_create_class_blueprint( + // VM pointer for creating GC objects (e.g. PrimitiveString) + void* vm_ptr, + // Source code object for substring_view + void const* source_code_ptr, + // Class name (empty for anonymous) + uint16_t const* name, + size_t name_len, + // Source text of the entire class (for Function.prototype.toString) + size_t source_text_offset, + size_t source_text_len, + // Index into shared_function_data for the constructor + uint32_t constructor_sfd_index, + bool has_super_class, + bool has_name, + // Array of class element descriptors + FFIClassElement const* elements, + size_t element_count); + +// Callbacks used by rust_compile_script to populate GDI metadata. +void script_gdi_push_lexical_name(void* ctx, uint16_t const* name, size_t len); +void script_gdi_push_var_name(void* ctx, uint16_t const* name, size_t len); +void script_gdi_push_function(void* ctx, void* sfd_ptr, uint16_t const* name, size_t len); +void script_gdi_push_var_scoped_name(void* ctx, uint16_t const* name, size_t len); +void script_gdi_push_annex_b_name(void* ctx, uint16_t const* name, size_t len); +void script_gdi_push_lexical_binding(void* ctx, uint16_t const* name, size_t len, bool is_constant); + +// Parse, compile, and extract EDI metadata for eval using the Rust +// parser. Populates gdi_context (an EvalGdiBuilder*) via callbacks. +// Returns a Bytecode::Executable* cast to void*, or nullptr on failure. +void* rust_compile_eval( + uint16_t const* source, + size_t source_len, + void* vm_ptr, + void const* source_code_ptr, + void* gdi_context, + bool starts_in_strict_mode, + bool in_eval_function_context, + bool allow_super_property_lookup, + bool allow_super_constructor_call, + bool in_class_field_initializer, + void* error_context, + RustParseErrorCallback error_callback, + uint8_t** ast_dump_output, + size_t* ast_dump_output_len); + +// Parse and compile a dynamically-created function (new Function()). +// Validates parameters and body separately per spec, then parses the +// full synthetic source to create a SharedFunctionInstanceData. +// +// Returns a SharedFunctionInstanceData* cast to void*, or nullptr on +// parse failure (caller should throw SyntaxError). +// +// function_kind: 0=Normal, 1=Generator, 2=Async, 3=AsyncGenerator +void* rust_compile_dynamic_function( + uint16_t const* full_source, + size_t full_source_len, + uint16_t const* params_source, + size_t params_source_len, + uint16_t const* body_source, + size_t body_source_len, + void* vm_ptr, + void const* source_code_ptr, + uint8_t function_kind, + void* error_context, + RustParseErrorCallback error_callback, + uint8_t** ast_dump_output, + size_t* ast_dump_output_len); + +// Callbacks used by rust_compile_eval to populate EDI metadata. +void eval_gdi_set_strict(void* ctx, bool is_strict); +void eval_gdi_push_var_name(void* ctx, uint16_t const* name, size_t len); +void eval_gdi_push_function(void* ctx, void* sfd, uint16_t const* name, size_t len); +void eval_gdi_push_var_scoped_name(void* ctx, uint16_t const* name, size_t len); +void eval_gdi_push_annex_b_name(void* ctx, uint16_t const* name, size_t len); +void eval_gdi_push_lexical_binding(void* ctx, uint16_t const* name, size_t len, bool is_constant); + +// Parse a builtin JS file in strict mode, extract top-level function +// declarations, and create SharedFunctionInstanceData for each via the +// Rust pipeline. Calls push_function for each function found. +typedef void (*RustBuiltinFunctionCallback)(void* ctx, void* sfd_ptr, uint16_t const* name, size_t name_len); +void rust_compile_builtin_file( + uint16_t const* source, + size_t source_len, + void* vm_ptr, + void const* source_code_ptr, + void* ctx, + RustBuiltinFunctionCallback push_function, + uint8_t** ast_dump_output, + size_t* ast_dump_output_len); + +// Module compilation callback table (matches Rust ModuleCallbacks struct). +struct ModuleCallbacks { + void (*set_has_top_level_await)(void* ctx, bool value); + void (*push_import_entry)( + void* ctx, + uint16_t const* import_name, + size_t import_name_len, + bool is_namespace, + uint16_t const* local_name, + size_t local_name_len, + uint16_t const* module_specifier, + size_t specifier_len, + FFIUtf16Slice const* attribute_keys, + FFIUtf16Slice const* attribute_values, + size_t attribute_count); + void (*push_local_export)( + void* ctx, + uint8_t kind, + uint16_t const* export_name, + size_t export_name_len, + uint16_t const* local_or_import_name, + size_t local_or_import_name_len, + uint16_t const* module_specifier, + size_t specifier_len, + FFIUtf16Slice const* attribute_keys, + FFIUtf16Slice const* attribute_values, + size_t attribute_count); + void (*push_indirect_export)( + void* ctx, + uint8_t kind, + uint16_t const* export_name, + size_t export_name_len, + uint16_t const* local_or_import_name, + size_t local_or_import_name_len, + uint16_t const* module_specifier, + size_t specifier_len, + FFIUtf16Slice const* attribute_keys, + FFIUtf16Slice const* attribute_values, + size_t attribute_count); + void (*push_star_export)( + void* ctx, + uint8_t kind, + uint16_t const* export_name, + size_t export_name_len, + uint16_t const* local_or_import_name, + size_t local_or_import_name_len, + uint16_t const* module_specifier, + size_t specifier_len, + FFIUtf16Slice const* attribute_keys, + FFIUtf16Slice const* attribute_values, + size_t attribute_count); + void (*push_requested_module)( + void* ctx, + uint16_t const* specifier, + size_t specifier_len, + FFIUtf16Slice const* attribute_keys, + FFIUtf16Slice const* attribute_values, + size_t attribute_count); + void (*set_default_export_binding)(void* ctx, uint16_t const* name, size_t name_len); + void (*push_var_name)(void* ctx, uint16_t const* name, size_t name_len); + void (*push_function)(void* ctx, void* sfd_ptr, uint16_t const* name, size_t name_len); + void (*push_lexical_binding)(void* ctx, uint16_t const* name, size_t name_len, bool is_constant, int32_t function_index); +}; + +// Parse, compile, and extract module metadata using the Rust parser. +// Populates module_context (a ModuleBuilder*) via callbacks. +// On parse failure, calls error_callback for each error, then returns nullptr. +// +// Returns Executable* for non-TLA modules (tla_executable_out is null). +// For TLA modules, returns nullptr and sets tla_executable_out to the +// async wrapper Executable*. +void* rust_compile_module( + uint16_t const* source, + size_t source_len, + void* vm_ptr, + void const* source_code_ptr, + void* module_context, + ModuleCallbacks const* callbacks, + bool dump_ast, + bool use_color, + void* error_context, + RustParseErrorCallback error_callback, + void** tla_executable_out, + uint8_t** ast_dump_output, + size_t* ast_dump_output_len); + +// Set the name on a SharedFunctionInstanceData (used for module default +// export renaming from "*default*" to "default"). +void module_sfd_set_name(void* sfd_ptr, uint16_t const* name, size_t name_len); + +// Compile a regex pattern+flags. On success, returns a heap-allocated +// opaque object (RustCompiledRegex*) and sets *error_out to nullptr. +// On failure, returns nullptr and sets *error_out to a heap-allocated +// error string (caller must free with rust_free_error_string). +// Successful results must be freed with rust_free_compiled_regex or +// passed to rust_create_executable (which takes ownership). +void* rust_compile_regex( + uint16_t const* pattern_data, size_t pattern_len, uint16_t const* flags_data, size_t flags_len, char const** error_out); +void rust_free_compiled_regex(void* ptr); +void rust_free_error_string(char const* str); + +// Convert a JS number to its UTF-16 string representation using the +// ECMA-262 Number::toString algorithm. Writes up to buffer_len code +// units into buffer and returns the actual length. +size_t rust_number_to_utf16(double value, uint16_t* buffer, size_t buffer_len); + +// FIXME: This FFI workaround exists only to match C++ float-to-string +// formatting in the Rust AST dump. Once the C++ pipeline is +// removed, this can be deleted and the Rust side can use its own +// formatting without needing to match C++. +// Format a double using AK's shortest-representation algorithm. +// Writes up to buffer_len bytes into buffer and returns the actual length. +size_t rust_format_double(double value, uint8_t* buffer, size_t buffer_len); + +// Get a well-known symbol as an encoded JS::Value. +// symbol_id: 0 = Symbol.iterator, 1 = Symbol.asyncIterator +uint64_t get_well_known_symbol(void* vm_ptr, uint32_t symbol_id); + +// Get an intrinsic abstract operation function as an encoded JS::Value. +uint64_t get_abstract_operation_function(void* vm_ptr, uint16_t const* name, size_t name_len); + +// Free a string allocated by Rust (e.g. AST dump output). +void rust_free_string(uint8_t* ptr, size_t len); + +# ifdef __cplusplus +} +# endif + +#endif // ENABLE_RUST diff --git a/Libraries/LibJS/CMakeLists.txt b/Libraries/LibJS/CMakeLists.txt index 87076479b64..44063811c0c 100644 --- a/Libraries/LibJS/CMakeLists.txt +++ b/Libraries/LibJS/CMakeLists.txt @@ -27,7 +27,9 @@ set(SOURCES Module.cpp Parser.cpp ParserError.cpp + PipelineComparison.cpp Print.cpp + RustIntegration.cpp Runtime/AbstractOperations.cpp Runtime/Accessor.cpp Runtime/Agent.cpp @@ -302,6 +304,29 @@ endif() target_link_libraries(LibJS PUBLIC JSClangPlugin) +if (ENABLE_RUST) + corrosion_import_crate(MANIFEST_PATH Rust/Cargo.toml) + target_link_libraries(LibJS PRIVATE libjs_rust) + target_compile_definitions(LibJS PRIVATE ENABLE_RUST) + install(TARGETS libjs_rust EXPORT LagomTargets + ARCHIVE COMPONENT Lagom_Development + ) + # The Rust library and LibJS have a circular dependency (C++ calls Rust + # entry points, Rust calls C++ callbacks). For static builds, merge the + # Rust archive into the LibJS archive so all symbols are in one place. + if(NOT BUILD_SHARED_LIBS) + add_custom_command(TARGET LibJS POST_BUILD + COMMAND ${CMAKE_AR} -x $ + COMMAND ${CMAKE_AR} -qS $ *.o + COMMAND ${CMAKE_RANLIB} $ + COMMAND ${CMAKE_COMMAND} -E remove -f *.o + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/rust_merge_tmp + COMMENT "Merging Rust archive into LibJS" + ) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/rust_merge_tmp) + endif() +endif() + if (WIN32 AND ENABLE_ADDRESS_SANITIZER) # FIXME: Fix address sanitizer stack-overflow error when running test-js. # Even tripling the stack size for this target to 24MB didn't fix it, so it is most likely some ASAN related bug/quirk given test-js passes using the 8MB stack without ASAN diff --git a/Libraries/LibJS/PipelineComparison.cpp b/Libraries/LibJS/PipelineComparison.cpp new file mode 100644 index 00000000000..8457287451e --- /dev/null +++ b/Libraries/LibJS/PipelineComparison.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2026, Andreas Kling + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +namespace JS { + +bool compare_pipelines_enabled() +{ + static bool const enabled = getenv("LIBJS_COMPARE_PIPELINES") != nullptr; + return enabled; +} + +static void report_mismatch(StringView kind, StringView rust_dump, StringView cpp_dump, StringView context) +{ + StringBuilder message; + message.appendff("PIPELINE MISMATCH ({}) in: {}\n", kind, context); + message.appendff("\n=== Rust {} ===\n{}\n", kind, rust_dump); + message.appendff("\n=== C++ {} ===\n{}\n", kind, cpp_dump); + warnln("{}", message.string_view()); + VERIFY_NOT_REACHED(); +} + +void compare_pipeline_asts(StringView rust_ast, StringView cpp_ast, StringView context) +{ + if (rust_ast != cpp_ast) + report_mismatch("AST"sv, rust_ast, cpp_ast, context); +} + +void compare_pipeline_bytecode(StringView rust_bytecode, StringView cpp_bytecode, StringView context, StringView ast_dump) +{ + if (rust_bytecode != cpp_bytecode) { + if (!ast_dump.is_empty()) + warnln("\n=== AST (both identical) ===\n{}", ast_dump); + report_mismatch("Bytecode"sv, rust_bytecode, cpp_bytecode, context); + } +} + +} diff --git a/Libraries/LibJS/PipelineComparison.h b/Libraries/LibJS/PipelineComparison.h new file mode 100644 index 00000000000..b889d49b981 --- /dev/null +++ b/Libraries/LibJS/PipelineComparison.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2026, Andreas Kling + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace JS { + +bool compare_pipelines_enabled(); + +void compare_pipeline_asts(StringView rust_ast, StringView cpp_ast, StringView context); +void compare_pipeline_bytecode(StringView rust_bytecode, StringView cpp_bytecode, StringView context, StringView ast_dump = {}); + +} diff --git a/Libraries/LibJS/Runtime/AbstractOperations.cpp b/Libraries/LibJS/Runtime/AbstractOperations.cpp index f9cb5f0e47b..801ec6f752d 100644 --- a/Libraries/LibJS/Runtime/AbstractOperations.cpp +++ b/Libraries/LibJS/Runtime/AbstractOperations.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,8 @@ #include #include #include +#include +#include namespace JS { @@ -651,30 +654,52 @@ ThrowCompletionOr perform_eval(VM& vm, Value x, CallerMode strict_caller, // f. If inMethod is false, and body Contains SuperProperty, throw a SyntaxError exception. // g. If inDerivedConstructor is false, and body Contains SuperCall, throw a SyntaxError exception. // h. If inClassFieldInitializer is true, and ContainsArguments of body is true, throw a SyntaxError exception. - Parser::EvalInitialState initial_state { - .in_eval_function_context = in_function, - .allow_super_property_lookup = in_method, - .allow_super_constructor_call = in_derived_constructor, - .in_class_field_initializer = in_class_field_initializer, - }; - Parser parser(Lexer(SourceCode::create({}, code_string->utf16_string())), Program::Type::Script, move(initial_state)); - auto program = parser.parse_program(strict_caller == CallerMode::Strict); + GC::Ptr executable; + bool strict_eval = false; + EvalDeclarationData eval_declaration_data; - // b. If script is a List of errors, throw a SyntaxError exception. - if (parser.has_errors()) { - auto& error = parser.errors()[0]; - return vm.throw_completion(error.to_string()); + auto rust_compilation = RustIntegration::compile_eval(*code_string, vm, strict_caller, in_function, in_method, in_derived_constructor, in_class_field_initializer); + if (rust_compilation.has_value()) { + if (rust_compilation->is_error()) + return vm.throw_completion(rust_compilation->release_error()); + auto& eval_result = rust_compilation->value(); + executable = eval_result.executable; + strict_eval = eval_result.is_strict_mode; + eval_declaration_data = move(eval_result.declaration_data); } - bool strict_eval = false; + RefPtr cpp_program; - // 14. If strictCaller is true, let strictEval be true. - if (strict_caller == CallerMode::Strict) - strict_eval = true; - // 15. Else, let strictEval be IsStrict of script. - else - strict_eval = program->is_strict_mode(); + if (!executable) { + Parser::EvalInitialState initial_state { + .in_eval_function_context = in_function, + .allow_super_property_lookup = in_method, + .allow_super_constructor_call = in_derived_constructor, + .in_class_field_initializer = in_class_field_initializer, + }; + + Parser parser(Lexer(SourceCode::create({}, code_string->utf16_string())), Program::Type::Script, move(initial_state)); + cpp_program = parser.parse_program(strict_caller == CallerMode::Strict); + + // b. If script is a List of errors, throw a SyntaxError exception. + if (parser.has_errors()) { + auto& error = parser.errors()[0]; + return vm.throw_completion(error.to_string()); + } + + // 14. If strictCaller is true, let strictEval be true. + if (strict_caller == CallerMode::Strict) + strict_eval = true; + // 15. Else, let strictEval be IsStrict of script. + else + strict_eval = cpp_program->is_strict_mode(); + + eval_declaration_data = EvalDeclarationData::create(vm, *cpp_program, strict_eval); + + // NB: Bytecode compilation is deferred until after EvalDeclarationInstantiation, + // which sets annex B flags on AST nodes that affect codegen. + } // 16. Let runningContext be the running execution context. // 17. NOTE: If direct is true, runningContext will be the execution context that performed the direct eval. If direct is false, runningContext will be the execution context for the invocation of the eval function. @@ -724,13 +749,14 @@ ThrowCompletionOr perform_eval(VM& vm, Value x, CallerMode strict_caller, // NOTE: Spec steps are rearranged in order to compute number of registers+constants+locals before construction of the execution context. // 30. Let result be Completion(EvalDeclarationInstantiation(body, varEnv, lexEnv, privateEnv, strictEval)). - auto eval_declaration_data = EvalDeclarationData::create(vm, program, strict_eval); TRY(eval_declaration_instantiation(vm, eval_declaration_data, variable_environment, lexical_environment, private_environment, strict_eval)); - // 31. If result.[[Type]] is normal, then - // a. Set result to the result of evaluating body. - auto executable = Bytecode::Generator::generate_from_ast_node(vm, program, {}); - executable->name = "eval"_utf16_fly_string; + // Compile C++ AST after EDI, since EDI sets annex B flags on AST nodes. + if (cpp_program) { + executable = Bytecode::Generator::generate_from_ast_node(vm, *cpp_program, {}); + executable->name = "eval"_utf16_fly_string; + } + if (Bytecode::g_dump_bytecode) executable->dump(); @@ -808,7 +834,8 @@ EvalDeclarationData EvalDeclarationData::create(VM& vm, Program const& program, // Pre-compute AnnexB candidates. if (!strict) { MUST(program.for_each_function_hoistable_with_annexB_extension([&](FunctionDeclaration& function_declaration) -> ThrowCompletionOr { - data.annex_b_candidates.append(function_declaration); + data.annex_b_candidate_names.append(function_declaration.name()); + data.annex_b_function_declarations.append(function_declaration); return {}; })); } @@ -909,9 +936,9 @@ ThrowCompletionOr eval_declaration_instantiation(VM& vm, EvalDeclarationDa HashTable hoisted_functions; // b. For each FunctionDeclaration f that is directly contained in the StatementList of a Block, CaseClause, or DefaultClause Contained within body, do - for (auto& function_declaration : data.annex_b_candidates) { + for (size_t i = 0; i < data.annex_b_candidate_names.size(); ++i) { // i. Let F be StringValue of the BindingIdentifier of f. - auto function_name = function_declaration->name(); + auto& function_name = data.annex_b_candidate_names[i]; // ii. If replacing the FunctionDeclaration f with a VariableStatement that has F as a BindingIdentifier would not produce any Early Errors for body, then // Note: This is checked during parsing and for_each_function_hoistable_with_annexB_extension so it always passes here. @@ -993,7 +1020,8 @@ ThrowCompletionOr eval_declaration_instantiation(VM& vm, EvalDeclarationDa // iii. Let fobj be ! benv.GetBindingValue(F, false). // iv. Perform ? genv.SetMutableBinding(F, fobj, false). // v. Return unused. - function_declaration->set_should_do_additional_annexB_steps(); + if (i < data.annex_b_function_declarations.size()) + data.annex_b_function_declarations[i]->set_should_do_additional_annexB_steps(); } } @@ -1045,7 +1073,7 @@ ThrowCompletionOr eval_declaration_instantiation(VM& vm, EvalDeclarationDa // b. Let fo be InstantiateFunctionObject of f with arguments lexEnv and privateEnv. auto function = ECMAScriptFunctionObject::create_from_function_data( realm, - function_to_initialize.shared_data, + *function_to_initialize.shared_data, lexical_environment, private_environment); diff --git a/Libraries/LibJS/Runtime/AbstractOperations.h b/Libraries/LibJS/Runtime/AbstractOperations.h index 4554039fcc2..d038c63d1d9 100644 --- a/Libraries/LibJS/Runtime/AbstractOperations.h +++ b/Libraries/LibJS/Runtime/AbstractOperations.h @@ -97,7 +97,7 @@ struct EvalDeclarationData { Vector var_names; struct FunctionToInitialize { - GC::Ref shared_data; + GC::Root shared_data; Utf16FlyString name; }; Vector functions_to_initialize; @@ -105,7 +105,8 @@ struct EvalDeclarationData { Vector var_scoped_names; - Vector> annex_b_candidates; + Vector annex_b_candidate_names; + Vector> annex_b_function_declarations; struct LexicalBinding { Utf16FlyString name; diff --git a/Libraries/LibJS/Runtime/ECMAScriptFunctionObject.cpp b/Libraries/LibJS/Runtime/ECMAScriptFunctionObject.cpp index b912a71e430..8947d0d4985 100644 --- a/Libraries/LibJS/Runtime/ECMAScriptFunctionObject.cpp +++ b/Libraries/LibJS/Runtime/ECMAScriptFunctionObject.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include namespace JS { @@ -217,7 +219,13 @@ void ECMAScriptFunctionObject::get_stack_frame_size(size_t& registers_and_locals { auto& executable = shared_data().m_executable; if (!executable) { - if (is_module_wrapper()) { + auto rust_executable = RustIntegration::compile_function(vm(), *m_shared_data, false); + if (rust_executable) { + executable = rust_executable; + executable->name = m_shared_data->m_name; + if (Bytecode::g_dump_bytecode) + executable->dump(); + } else if (is_module_wrapper()) { executable = Bytecode::compile(vm(), ecmascript_code(), kind(), name()); } else { executable = Bytecode::compile(vm(), shared_data(), Bytecode::BuiltinAbstractOperationsEnabled::No); diff --git a/Libraries/LibJS/Runtime/FunctionConstructor.cpp b/Libraries/LibJS/Runtime/FunctionConstructor.cpp index ca670002c4e..48d169fb505 100644 --- a/Libraries/LibJS/Runtime/FunctionConstructor.cpp +++ b/Libraries/LibJS/Runtime/FunctionConstructor.cpp @@ -15,6 +15,8 @@ #include #include #include +#include +#include namespace JS { @@ -148,46 +150,75 @@ ThrowCompletionOr> FunctionConstructor::create // 16. Perform ? HostEnsureCanCompileStrings(currentRealm, parameterStrings, bodyString, sourceString, FUNCTION, parameterArgs, bodyArg). TRY(vm.host_ensure_can_compile_strings(realm, parameter_strings, body_string, source_text, CompilationType::Function, parameter_args, body_arg)); - u8 parse_options = FunctionNodeParseOptions::CheckForFunctionAndName; - if (kind == FunctionKind::Async || kind == FunctionKind::AsyncGenerator) - parse_options |= FunctionNodeParseOptions::IsAsyncFunction; - if (kind == FunctionKind::Generator || kind == FunctionKind::AsyncGenerator) - parse_options |= FunctionNodeParseOptions::IsGeneratorFunction; + GC::Ptr function_data; - // 17. Let parameters be ParseText(P, parameterSym). - i32 function_length = 0; - auto parameters_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(parameters_string)))); - auto parameters = parameters_parser.parse_formal_parameters(function_length, parse_options); - - // 18. If parameters is a List of errors, throw a SyntaxError exception. - if (parameters_parser.has_errors()) { - auto error = parameters_parser.errors()[0]; - return vm.throw_completion(error.to_string()); + auto rust_compilation = RustIntegration::compile_dynamic_function(vm, source_text, parameters_string, body_parse_string, kind); + if (rust_compilation.has_value()) { + if (rust_compilation->is_error()) + return vm.throw_completion(rust_compilation->release_error()); + function_data = rust_compilation->value(); } - // 19. Let body be ParseText(bodyParseString, bodySym). - FunctionParsingInsights parsing_insights; - auto body_parser = Parser::parse_function_body_from_string(body_parse_string, parse_options, parameters, kind, parsing_insights); + if (!function_data) { + u8 parse_options = FunctionNodeParseOptions::CheckForFunctionAndName; + if (kind == FunctionKind::Async || kind == FunctionKind::AsyncGenerator) + parse_options |= FunctionNodeParseOptions::IsAsyncFunction; + if (kind == FunctionKind::Generator || kind == FunctionKind::AsyncGenerator) + parse_options |= FunctionNodeParseOptions::IsGeneratorFunction; - // 20. If body is a List of errors, throw a SyntaxError exception. - if (body_parser.has_errors()) { - auto error = body_parser.errors()[0]; - return vm.throw_completion(error.to_string()); - } + // 17. Let parameters be ParseText(P, parameterSym). + i32 function_length = 0; + auto parameters_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(parameters_string)))); + auto parameters = parameters_parser.parse_formal_parameters(function_length, parse_options); - // 21. NOTE: The parameters and body are parsed separately to ensure that each is valid alone. For example, new Function("/*", "*/ ) {") does not evaluate to a function. - // 22. NOTE: If this step is reached, sourceText must have the syntax of exprSym (although the reverse implication does not hold). The purpose of the next two steps is to enforce any Early Error rules which apply to exprSym directly. + // 18. If parameters is a List of errors, throw a SyntaxError exception. + if (parameters_parser.has_errors()) { + auto error = parameters_parser.errors()[0]; + return vm.throw_completion(error.to_string()); + } - // 23. Let expr be ParseText(sourceText, exprSym). - auto source_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(source_text)))); - // This doesn't need any parse_options, it determines those & the function type based on the tokens that were found. - auto expr = source_parser.parse_function_node(); - source_parser.run_scope_analysis(); + // 19. Let body be ParseText(bodyParseString, bodySym). + FunctionParsingInsights parsing_insights; + auto body_parser = Parser::parse_function_body_from_string(body_parse_string, parse_options, parameters, kind, parsing_insights); - // 24. If expr is a List of errors, throw a SyntaxError exception. - if (source_parser.has_errors()) { - auto error = source_parser.errors()[0]; - return vm.throw_completion(error.to_string()); + // 20. If body is a List of errors, throw a SyntaxError exception. + if (body_parser.has_errors()) { + auto error = body_parser.errors()[0]; + return vm.throw_completion(error.to_string()); + } + + // 21. NOTE: The parameters and body are parsed separately to ensure that each is valid alone. For example, new Function("/*", "*/ ) {") does not evaluate to a function. + // 22. NOTE: If this step is reached, sourceText must have the syntax of exprSym (although the reverse implication does not hold). The purpose of the next two steps is to enforce any Early Error rules which apply to exprSym directly. + + // 23. Let expr be ParseText(sourceText, exprSym). + auto source_parser = Parser(Lexer(SourceCode::create({}, Utf16String::from_utf8(source_text)))); + // This doesn't need any parse_options, it determines those & the function type based on the tokens that were found. + auto expr = source_parser.parse_function_node(); + source_parser.run_scope_analysis(); + + // 24. If expr is a List of errors, throw a SyntaxError exception. + if (source_parser.has_errors()) { + auto error = source_parser.errors()[0]; + return vm.throw_completion(error.to_string()); + } + + // 28. Let F be OrdinaryFunctionCreate(proto, sourceText, parameters, body, non-lexical-this, env, privateEnv). + parsing_insights.might_need_arguments_object = true; + + function_data = vm.heap().allocate( + vm, + expr->kind(), + "anonymous"_utf16_fly_string, + expr->function_length(), + expr->parameters(), + expr->body(), + Utf16View {}, + expr->is_strict_mode(), + false, + parsing_insights, + expr->local_variables_names()); + function_data->m_source_text_owner = Utf16String::from_utf8(source_text); + function_data->m_source_text = function_data->m_source_text_owner.utf16_view(); } // 25. Let proto be ? GetPrototypeFromConstructor(newTarget, fallbackProto). @@ -199,27 +230,9 @@ ThrowCompletionOr> FunctionConstructor::create // 27. Let privateEnv be null. PrivateEnvironment* private_environment = nullptr; - // 28. Let F be OrdinaryFunctionCreate(proto, sourceText, parameters, body, non-lexical-this, env, privateEnv). - parsing_insights.might_need_arguments_object = true; - - auto function_data = vm.heap().allocate( - vm, - expr->kind(), - "anonymous"_utf16_fly_string, - expr->function_length(), - expr->parameters(), - expr->body(), - Utf16View {}, - expr->is_strict_mode(), - false, - parsing_insights, - expr->local_variables_names()); - function_data->m_source_text_owner = Utf16String::from_utf8(source_text); - function_data->m_source_text = function_data->m_source_text_owner.utf16_view(); - auto function = ECMAScriptFunctionObject::create_from_function_data( realm, - function_data, + *function_data, &environment, private_environment, *prototype); diff --git a/Libraries/LibJS/Runtime/Intrinsics.cpp b/Libraries/LibJS/Runtime/Intrinsics.cpp index 1405835304b..bb864c97bc9 100644 --- a/Libraries/LibJS/Runtime/Intrinsics.cpp +++ b/Libraries/LibJS/Runtime/Intrinsics.cpp @@ -136,6 +136,7 @@ #include #include #include +#include // FIXME: Remove this asm hack when we upgrade to GCC 15. #define INCLUDE_FILE_WITH_ASSEMBLY(name, file_path) \ @@ -208,8 +209,13 @@ GC::Ref Intrinsics::create(Realm& realm) static Vector> parse_builtin_file(unsigned char const* script_text, VM& vm) { + auto rust_compilation = RustIntegration::compile_builtin_file(script_text, vm); + if (rust_compilation.has_value()) + return move(rust_compilation.value()); + auto script_text_as_utf16 = Utf16String::from_utf8_without_validation({ script_text, strlen(reinterpret_cast(script_text)) }); auto code = SourceCode::create("BuiltinFile"_string, move(script_text_as_utf16)); + auto lexer = Lexer { move(code) }; auto parser = Parser { move(lexer) }; VERIFY(!parser.has_errors()); diff --git a/Libraries/LibJS/Runtime/NativeJavaScriptBackedFunction.cpp b/Libraries/LibJS/Runtime/NativeJavaScriptBackedFunction.cpp index 64eb11d7001..e214dc19e6a 100644 --- a/Libraries/LibJS/Runtime/NativeJavaScriptBackedFunction.cpp +++ b/Libraries/LibJS/Runtime/NativeJavaScriptBackedFunction.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace JS { @@ -98,7 +99,15 @@ Bytecode::Executable& NativeJavaScriptBackedFunction::bytecode_executable() { auto& executable = m_shared_function_instance_data->m_executable; if (!executable) { - executable = Bytecode::compile(vm(), m_shared_function_instance_data, Bytecode::BuiltinAbstractOperationsEnabled::Yes); + auto rust_executable = RustIntegration::compile_function(vm(), *m_shared_function_instance_data, true); + if (rust_executable) { + executable = rust_executable; + executable->name = m_shared_function_instance_data->m_name; + if (Bytecode::g_dump_bytecode) + executable->dump(); + } else { + executable = Bytecode::compile(vm(), m_shared_function_instance_data, Bytecode::BuiltinAbstractOperationsEnabled::Yes); + } m_shared_function_instance_data->clear_compile_inputs(); } diff --git a/Libraries/LibJS/Runtime/ShadowRealm.cpp b/Libraries/LibJS/Runtime/ShadowRealm.cpp index b04be5514f9..dca4bcaa8be 100644 --- a/Libraries/LibJS/Runtime/ShadowRealm.cpp +++ b/Libraries/LibJS/Runtime/ShadowRealm.cpp @@ -17,6 +17,8 @@ #include #include #include +#include +#include namespace JS { @@ -122,35 +124,53 @@ ThrowCompletionOr perform_shadow_realm_eval(VM& vm, Value source, Realm& // 2. Perform the following substeps in an implementation-defined order, possibly interleaving parsing and error detection: - // a. Let script be ParseText(StringToCodePoints(sourceText), Script). - auto parser = Parser(Lexer(SourceCode::create({}, source_text->utf16_string())), Program::Type::Script, Parser::EvalInitialState {}); - auto program = parser.parse_program(); + GC::Ptr executable; + bool strict_eval = false; + EvalDeclarationData eval_declaration_data; - // b. If script is a List of errors, throw a SyntaxError exception. - if (parser.has_errors()) { - auto& error = parser.errors()[0]; - return vm.throw_completion(error.to_string()); + auto rust_compilation = RustIntegration::compile_shadow_realm_eval(*source_text, vm); + if (rust_compilation.has_value()) { + if (rust_compilation->is_error()) + return vm.throw_completion(rust_compilation->release_error()); + auto& eval_result = rust_compilation->value(); + executable = eval_result.executable; + strict_eval = eval_result.is_strict_mode; + eval_declaration_data = move(eval_result.declaration_data); } - // c. If script Contains ScriptBody is false, return undefined. - if (program->children().is_empty()) - return js_undefined(); + if (!executable) { + // a. Let script be ParseText(StringToCodePoints(sourceText), Script). + auto parser = Parser(Lexer(SourceCode::create({}, source_text->utf16_string())), Program::Type::Script, Parser::EvalInitialState {}); + auto program = parser.parse_program(); - // d. Let body be the ScriptBody of script. - // e. If body Contains NewTarget is true, throw a SyntaxError exception. - // f. If body Contains SuperProperty is true, throw a SyntaxError exception. - // g. If body Contains SuperCall is true, throw a SyntaxError exception. - // FIXME: Implement these, we probably need a generic way of scanning the AST for certain nodes. + // b. If script is a List of errors, throw a SyntaxError exception. + if (parser.has_errors()) { + auto& error = parser.errors()[0]; + return vm.throw_completion(error.to_string()); + } - // 3. Let strictEval be IsStrict of script. - auto strict_eval = program->is_strict_mode(); + // c. If script Contains ScriptBody is false, return undefined. + if (program->children().is_empty()) + return js_undefined(); + + // d. Let body be the ScriptBody of script. + // e. If body Contains NewTarget is true, throw a SyntaxError exception. + // f. If body Contains SuperProperty is true, throw a SyntaxError exception. + // g. If body Contains SuperCall is true, throw a SyntaxError exception. + // FIXME: Implement these, we probably need a generic way of scanning the AST for certain nodes. + + // 3. Let strictEval be IsStrict of script. + strict_eval = program->is_strict_mode(); + + eval_declaration_data = EvalDeclarationData::create(vm, program, strict_eval); + + executable = Bytecode::compile(vm, program, FunctionKind::Normal, "ShadowRealmEval"_utf16_fly_string); + } // 4. Let runningContext be the running execution context. // 5. If runningContext is not already suspended, suspend runningContext. // NOTE: This would be unused due to step 9 and is omitted for that reason. - auto executable = Bytecode::compile(vm, program, FunctionKind::Normal, "ShadowRealmEval"_utf16_fly_string); - // 6. Let evalContext be GetShadowRealmContext(evalRealm, strictEval). auto eval_context = get_shadow_realm_context(eval_realm, strict_eval, executable->registers_and_locals_count, executable->constants.size()); @@ -164,7 +184,6 @@ ThrowCompletionOr perform_shadow_realm_eval(VM& vm, Value source, Realm& TRY(vm.push_execution_context(*eval_context, {})); // 10. Let result be Completion(EvalDeclarationInstantiation(body, varEnv, lexEnv, null, strictEval)). - auto eval_declaration_data = EvalDeclarationData::create(vm, program, strict_eval); auto eval_result = eval_declaration_instantiation(vm, eval_declaration_data, variable_environment, lexical_environment, nullptr, strict_eval); Completion result; diff --git a/Libraries/LibJS/Runtime/SharedFunctionInstanceData.cpp b/Libraries/LibJS/Runtime/SharedFunctionInstanceData.cpp index 5e6e58e1e2d..5843db40cf1 100644 --- a/Libraries/LibJS/Runtime/SharedFunctionInstanceData.cpp +++ b/Libraries/LibJS/Runtime/SharedFunctionInstanceData.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace JS { @@ -307,6 +308,43 @@ void SharedFunctionInstanceData::visit_edges(Visitor& visitor) SharedFunctionInstanceData::~SharedFunctionInstanceData() = default; +void SharedFunctionInstanceData::finalize() +{ + Base::finalize(); + RustIntegration::free_function_ast(m_rust_function_ast); + m_rust_function_ast = nullptr; +} + +SharedFunctionInstanceData::SharedFunctionInstanceData( + VM&, + FunctionKind kind, + Utf16FlyString name, + i32 function_length, + u32 formal_parameter_count, + bool strict, + bool is_arrow_function, + bool has_simple_parameter_list, + Vector parameter_names_for_mapped_arguments, + void* rust_function_ast) + : m_name(move(name)) + , m_function_length(function_length) + , m_formal_parameter_count(formal_parameter_count) + , m_parameter_names_for_mapped_arguments(move(parameter_names_for_mapped_arguments)) + , m_kind(kind) + , m_strict(strict) + , m_is_arrow_function(is_arrow_function) + , m_has_simple_parameter_list(has_simple_parameter_list) + , m_rust_function_ast(rust_function_ast) + , m_use_rust_compilation(true) +{ + if (m_is_arrow_function) + m_this_mode = ThisMode::Lexical; + else if (m_strict) + m_this_mode = ThisMode::Strict; + else + m_this_mode = ThisMode::Global; +} + GC::Ref SharedFunctionInstanceData::create_for_function_node(VM& vm, FunctionNode const& node) { return create_for_function_node(vm, node, node.name()); @@ -342,6 +380,8 @@ void SharedFunctionInstanceData::clear_compile_inputs() m_functions_to_initialize.clear(); m_var_names_to_initialize_binding.clear(); m_lexical_bindings.clear(); + RustIntegration::free_function_ast(m_rust_function_ast); + m_rust_function_ast = nullptr; } } diff --git a/Libraries/LibJS/Runtime/SharedFunctionInstanceData.h b/Libraries/LibJS/Runtime/SharedFunctionInstanceData.h index ebdc405f3b7..eb45a4f5f36 100644 --- a/Libraries/LibJS/Runtime/SharedFunctionInstanceData.h +++ b/Libraries/LibJS/Runtime/SharedFunctionInstanceData.h @@ -50,9 +50,11 @@ class FunctionNode; class JS_API SharedFunctionInstanceData final : public GC::Cell { GC_CELL(SharedFunctionInstanceData, GC::Cell); GC_DECLARE_ALLOCATOR(SharedFunctionInstanceData); + static constexpr bool OVERRIDES_FINALIZE = true; public: virtual ~SharedFunctionInstanceData() override; + virtual void finalize() override; static GC::Ref create_for_function_node(VM&, FunctionNode const&); static GC::Ref create_for_function_node(VM&, FunctionNode const&, Utf16FlyString name); @@ -70,6 +72,21 @@ public: FunctionParsingInsights const&, Vector local_variables_names); + // NB: Constructor for the Rust pipeline. Takes pre-computed metadata + // instead of a C++ AST. FDI fields are populated later during + // lazy compilation by rust_compile_function. + SharedFunctionInstanceData( + VM& vm, + FunctionKind, + Utf16FlyString name, + i32 function_length, + u32 formal_parameter_count, + bool strict, + bool is_arrow_function, + bool has_simple_parameter_list, + Vector parameter_names_for_mapped_arguments, + void* rust_function_ast); + mutable GC::Ptr m_executable; RefPtr m_formal_parameters; // [[FormalParameters]] @@ -113,7 +130,7 @@ public: No, Yes, }; - HashMap m_parameter_names; + OrderedHashMap m_parameter_names; struct FunctionToInitialize { GC::Ref shared_data; Utf16FlyString name; @@ -142,6 +159,11 @@ public: ConstructorKind m_constructor_kind : 1 { ConstructorKind::Base }; // [[ConstructorKind]] bool m_is_class_constructor : 1 { false }; // [[IsClassConstructor]] + // NB: When non-null, points to a Rust Box used for + // lazy compilation through the Rust pipeline. + void* m_rust_function_ast { nullptr }; + bool m_use_rust_compilation { false }; + void clear_compile_inputs(); private: diff --git a/Libraries/LibJS/Rust/Cargo.toml b/Libraries/LibJS/Rust/Cargo.toml new file mode 100644 index 00000000000..be3db4b3e62 --- /dev/null +++ b/Libraries/LibJS/Rust/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "libjs_rust" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["staticlib"] + +[dependencies] +unicode-ident = "1.0" +num-bigint = "0.4" +num-traits = "0.2" +num-integer = "0.1" diff --git a/Libraries/LibJS/Rust/build.rs b/Libraries/LibJS/Rust/build.rs new file mode 100644 index 00000000000..8013bae4814 --- /dev/null +++ b/Libraries/LibJS/Rust/build.rs @@ -0,0 +1,674 @@ +/* + * Copyright (c) 2026-present, the Ladybird developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +//! Build script that generates Rust bytecode instruction types from Bytecode.def. +//! +//! This mirrors Meta/generate-libjs-bytecode-def-derived.py but generates Rust +//! code instead of C++. The generated code lives in $OUT_DIR/instruction_generated.rs +//! and is included! from src/bytecode/instruction.rs. + +use std::env; +use std::fmt::Write; +use std::fs; +use std::path::PathBuf; + +// --------------------------------------------------------------------------- +// .def file parser (mirrors Meta/libjs_bytecode_def.py) +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone)] +struct Field { + name: String, + ty: String, + is_array: bool, +} + +#[derive(Debug)] +struct OpDef { + name: String, + fields: Vec, + is_terminator: bool, +} + +fn parse_bytecode_def(path: &std::path::Path) -> Vec { + let content = fs::read_to_string(path).expect("Failed to read Bytecode.def"); + let mut ops = Vec::new(); + let mut current: Option = None; + let mut in_op = false; + + for raw_line in content.lines() { + let stripped = raw_line.trim(); + if stripped.is_empty() || stripped.starts_with("//") || stripped.starts_with('#') { + continue; + } + + if stripped.starts_with("op ") { + assert!(!in_op, "Nested op blocks"); + in_op = true; + let rest = stripped.strip_prefix("op ").unwrap().trim(); + let name = if let Some(idx) = rest.find('<') { + rest[..idx].trim().to_string() + } else { + rest.to_string() + }; + current = Some(OpDef { + name, + fields: Vec::new(), + is_terminator: false, + }); + continue; + } + + if stripped == "endop" { + assert!(in_op && current.is_some(), "endop without op"); + ops.push(current.take().unwrap()); + in_op = false; + continue; + } + + if !in_op { + continue; + } + + if stripped.starts_with('@') { + if stripped == "@terminator" { + current.as_mut().unwrap().is_terminator = true; + } + // @nothrow is C++-only, ignore + continue; + } + + let (lhs, rhs) = stripped.split_once(':').expect("Malformed field line"); + let field_name = lhs.trim().to_string(); + let mut field_type = rhs.trim().to_string(); + let is_array = field_type.ends_with("[]"); + if is_array { + field_type = field_type[..field_type.len() - 2].trim().to_string(); + } + current.as_mut().unwrap().fields.push(Field { + name: field_name, + ty: field_type, + is_array, + }); + } + assert!(!in_op, "Unclosed op block"); + + // Remove the base "Instruction" definition (not an actual opcode). + ops.retain(|op| op.name != "Instruction"); + ops +} + +// --------------------------------------------------------------------------- +// Type mapping: C++ types → Rust types +// --------------------------------------------------------------------------- + +/// Returns (rust_type, c_alignment, c_size, encoding_kind). +fn field_type_info(ty: &str) -> (&'static str, usize, usize, &'static str) { + match ty { + "bool" => ("bool", 1, 1, "bool"), + "u32" => ("u32", 4, 4, "u32"), + "Operand" => ("Operand", 4, 4, "operand"), + "Optional" => ("Option", 4, 4, "optional_operand"), + "Label" => ("Label", 4, 4, "label"), + "Optional