/* * Copyright (c) 2026-present, the Ladybird developers. * * SPDX-License-Identifier: BSD-2-Clause */ //! Build script that generates Rust bytecode instruction types from Bytecode.def. //! //! This mirrors Meta/generate-libjs-bytecode-def-derived.py but generates Rust //! code instead of C++. The generated code lives in $OUT_DIR/instruction_generated.rs //! and is included! from src/bytecode/instruction.rs. use bytecode_def::{ Field, OpDef, STRUCT_ALIGN, field_type_info, find_m_length_offset, round_up, user_fields, }; use std::env; use std::fs; use std::io::Write; use std::path::PathBuf; fn rust_field_name(name: &str) -> String { if let Some(stripped) = name.strip_prefix("m_") { stripped.to_string() } else { name.to_string() } } fn generate_rust_code(mut w: impl Write, ops: &[OpDef]) -> Result<(), Box> { writeln!( w, "// @generated from Libraries/LibJS/Bytecode/Bytecode.def" )?; writeln!(w, "// Do not edit manually.")?; writeln!(w)?; writeln!(w, "use super::operand::*;")?; writeln!(w)?; generate_opcode_enum(&mut w, ops)?; generate_instruction_enum(&mut w, ops)?; generate_instruction_impl(&mut w, ops)?; Ok(()) } fn generate_opcode_enum( mut w: impl Write, ops: &[OpDef], ) -> Result<(), Box> { writeln!( w, "/// Bytecode opcode (u8), matching the C++ `Instruction::Type` enum." )?; writeln!(w, "#[derive(Debug, Clone, Copy, PartialEq, Eq)]")?; writeln!(w, "#[repr(u8)]")?; writeln!(w, "pub enum OpCode {{")?; for (i, op) in ops.iter().enumerate() { writeln!(w, " {} = {},", op.name, i)?; } writeln!(w, "}}")?; writeln!(w)?; Ok(()) } fn generate_instruction_enum( mut w: impl Write, ops: &[OpDef], ) -> Result<(), Box> { writeln!(w, "/// A bytecode instruction with typed fields.")?; writeln!(w, "///")?; writeln!( w, "/// Each variant corresponds to one C++ instruction class." )?; writeln!( w, "/// During codegen, instructions are stored as these typed variants." )?; writeln!( w, "/// During flattening, they are serialized to bytes matching C++ layw." )?; writeln!(w, "#[derive(Debug, Clone)]")?; writeln!(w, "pub enum Instruction {{")?; for op in ops { let fields = user_fields(op); if fields.is_empty() { writeln!(w, " {},", op.name)?; } else { writeln!(w, " {} {{", op.name)?; for f in &fields { let info = field_type_info(&f.ty); let r_name = rust_field_name(&f.name); if f.is_array { writeln!(w, " {}: Vec<{}>,", r_name, info.rust_type)?; } else { writeln!(w, " {}: {},", r_name, info.rust_type)?; } } writeln!(w, " }},")?; } } writeln!(w, "}}")?; writeln!(w)?; Ok(()) } fn generate_instruction_impl( mut w: impl Write, ops: &[OpDef], ) -> Result<(), Box> { writeln!(w, "impl Instruction {{").unwrap(); generate_opcode_method(&mut w, ops)?; generate_is_terminator_method(&mut w, ops)?; generate_encode_method(&mut w, ops)?; generate_encoded_size_method(&mut w, ops)?; generate_visit_operands_method(&mut w, ops)?; generate_visit_labels_method(&mut w, ops)?; writeln!(w, " }}")?; Ok(()) } fn generate_opcode_method( mut w: impl Write, ops: &[OpDef], ) -> Result<(), Box> { writeln!(w, " pub fn opcode(&self) -> OpCode {{")?; writeln!(w, " match self {{")?; for op in ops { let fields = user_fields(op); if fields.is_empty() { writeln!( w, " Instruction::{} => OpCode::{},", op.name, op.name )?; } else { writeln!( w, " Instruction::{} {{ .. }} => OpCode::{},", op.name, op.name )?; } } writeln!(w, " }}")?; writeln!(w, " }}")?; writeln!(w)?; Ok(()) } fn generate_is_terminator_method( mut w: impl Write, ops: &[OpDef], ) -> Result<(), Box> { writeln!(w, " pub fn is_terminator(&self) -> bool {{")?; writeln!(w, " matches!(self, ")?; let terminators: Vec<&OpDef> = ops.iter().filter(|op| op.is_terminator).collect(); for (i, op) in terminators.iter().enumerate() { let sep = if i + 1 < terminators.len() { " |" } else { "" }; let fields = user_fields(op); if fields.is_empty() { writeln!(w, " Instruction::{}{}", op.name, sep)?; } else { writeln!(w, " Instruction::{} {{ .. }}{}", op.name, sep)?; } } writeln!(w, " )")?; writeln!(w, " }}")?; writeln!(w)?; Ok(()) } fn generate_encoded_size_method( mut w: impl Write, ops: &[OpDef], ) -> Result<(), Box> { writeln!( w, " /// Returns the encoded size of this instruction in bytes." )?; writeln!(w, " pub fn encoded_size(&self) -> usize {{")?; writeln!(w, " match self {{")?; for op in ops { let fields = user_fields(op); let has_array = op.fields.iter().any(|f| f.is_array); if !has_array { // Fixed-length: compute size statically let mut offset: usize = 2; // header for f in &op.fields { if f.is_array || f.name == "m_type" || f.name == "m_strict" { continue; } let info = field_type_info(&f.ty); offset = round_up(offset, info.align); offset += info.size; } let final_size = round_up(offset, 8); let pat = if fields.is_empty() { format!("Instruction::{}", op.name) } else { format!("Instruction::{} {{ .. }}", op.name) }; writeln!(w, " {} => {},", pat, final_size)?; } else { // Variable-length: depends on array size // Compute fixed part size let mut fixed_offset: usize = 2; for f in &op.fields { if f.is_array || f.name == "m_type" || f.name == "m_strict" { continue; } let info = field_type_info(&f.ty); fixed_offset = round_up(fixed_offset, info.align); fixed_offset += info.size; } // Find the array field and its element size let Some(array_field) = op.fields.iter().find(|f| f.is_array) else { continue; }; let info = field_type_info(&array_field.ty); let arr_name = rust_field_name(&array_field.name); // C++ computes m_length as: // round_up(alignof(void*), sizeof(*this) + sizeof(elem) * count) // sizeof(*this) = round_up(fixed_offset, STRUCT_ALIGN) due to alignas(void*). let sizeof_this = round_up(fixed_offset, STRUCT_ALIGN); // Bind only the array field let bindings: Vec = fields .iter() .map(|f| { let rname = rust_field_name(&f.name); if rname == arr_name { rname } else { format!("{}: _", rname) } }) .collect(); writeln!( w, " Instruction::{} {{ {} }} => {{", op.name, bindings.join(", ") )?; writeln!( w, " let base = {} + {}.len() * {};", sizeof_this, arr_name, info.size )?; writeln!(w, " (base + 7) & !7 // round up to 8")?; writeln!(w, " }}")?; } } writeln!(w, " }}")?; writeln!(w, " }}")?; writeln!(w)?; Ok(()) } fn generate_encode_method( mut w: impl Write, ops: &[OpDef], ) -> Result<(), Box> { writeln!( w, " /// Encode this instruction into bytes matching the C++ struct layout." )?; writeln!( w, " pub fn encode(&self, strict: bool, buf: &mut Vec) {{" )?; writeln!(w, " let start = buf.len();")?; writeln!(w, " match self {{")?; for op in ops { let fields = user_fields(op); let has_array = op.fields.iter().any(|f| f.is_array); let has_m_length = op.fields.iter().any(|f| f.name == "m_length"); // Generate match arm with field bindings if fields.is_empty() { writeln!(w, " Instruction::{} => {{", op.name)?; } else { let bindings: Vec = fields.iter().map(|f| rust_field_name(&f.name)).collect(); writeln!( w, " Instruction::{} {{ {} }} => {{", op.name, bindings.join(", ") )?; } // Write header: opcode (u8) + strict (u8) = 2 bytes writeln!(w, " buf.push(OpCode::{} as u8);", op.name)?; writeln!(w, " buf.push(strict as u8);")?; // Track offset for C++ struct layw. // We iterate ALL fields (including m_type, m_strict, m_length) for // accurate alignment but only emit writes for user fields. let mut offset: usize = 2; // Iterate all non-array fields in declaration order for f in &op.fields { if f.is_array { continue; } // m_type and m_strict are already written as the header if f.name == "m_type" || f.name == "m_strict" { continue; } let info = field_type_info(&f.ty); // Pad to alignment let aligned_offset = round_up(offset, info.align); let pad = aligned_offset - offset; if pad > 0 { writeln!(w, " buf.extend_from_slice(&[0u8; {}]);", pad)?; } offset = aligned_offset; if f.name == "m_length" { // Write placeholder (patched at end for variable-length instructions) writeln!( w, " buf.extend_from_slice(&[0u8; 4]); // m_length placeholder" )?; } else { let rname = rust_field_name(&f.name); emit_field_write(&mut w, &rname, info.kind, false)?; } offset += info.size; } // Write trailing array elements if has_array { // sizeof(*this) in C++ = round_up(fixed_offset, STRUCT_ALIGN) let sizeof_this = round_up(offset, STRUCT_ALIGN); for f in &op.fields { if !f.is_array { continue; } let info = field_type_info(&f.ty); let rname = rust_field_name(&f.name); // Pad before first element if needed let aligned_offset = round_up(offset, info.align); let pad = aligned_offset - offset; if pad > 0 { writeln!(w, " buf.extend_from_slice(&[0u8; {}]);", pad)?; } writeln!(w, " for item in {} {{", rname)?; emit_field_write(&mut w, "item", info.kind, true)?; writeln!(w, " }}")?; // Compute target size matching C++: // round_up(STRUCT_ALIGN, sizeof(*this) + count * elem_size) writeln!( w, " let target = ({} + {}.len() * {} + 7) & !7;", sizeof_this, rname, info.size )?; writeln!( w, " while (buf.len() - start) < target {{ buf.push(0); }}" )?; } if has_m_length { // Patch m_length: it's the first u32 field after the header let m_length_offset = find_m_length_offset(&op.fields); writeln!( w, " let total_len = (buf.len() - start) as u32;" )?; writeln!( w, " buf[start + {}..start + {}].copy_from_slice(&total_len.to_ne_bytes());", m_length_offset, m_length_offset + 4 )?; } } else { // Fixed-length: pad statically let final_size = round_up(offset, 8); let tail_pad = final_size - offset; if tail_pad > 0 { writeln!( w, " buf.extend_from_slice(&[0u8; {}]);", tail_pad )?; } } writeln!(w, " }}")?; } writeln!(w, " }}")?; writeln!(w, " }}")?; writeln!(w)?; Ok(()) } /// Emit code to write a field value into `w`. /// /// All bindings from pattern matching and loop iteration are references (`&T`). /// Rust auto-derefs for method calls, but explicit `*` is needed for casts /// and direct pushes of Copy types. fn emit_field_write( mut w: impl Write, name: &str, kind: &str, is_loop_item: bool, ) -> Result<(), Box> { let prefix = " ".repeat(if is_loop_item { 20 } else { 16 }); match kind { "bool" => writeln!(w, "{}buf.push(*{} as u8);", prefix, name)?, "u8" => writeln!(w, "{}buf.push(*{});", prefix, name)?, "u32" => writeln!( w, "{}buf.extend_from_slice(&{}.to_ne_bytes());", prefix, name )?, "u64" => writeln!( w, "{}buf.extend_from_slice(&{}.to_ne_bytes());", prefix, name )?, "operand" => writeln!( w, "{}buf.extend_from_slice(&{}.raw().to_ne_bytes());", prefix, name )?, "optional_operand" => { writeln!(w, "{}match {} {{", prefix, name)?; writeln!( w, "{} Some(op) => buf.extend_from_slice(&op.raw().to_ne_bytes()),", prefix )?; writeln!( w, "{} None => buf.extend_from_slice(&Operand::INVALID.to_ne_bytes()),", prefix )?; writeln!(w, "{}}}", prefix)?; } "label" => writeln!( w, "{}buf.extend_from_slice(&{}.0.to_ne_bytes());", prefix, name )?, "optional_label" => { // C++ Optional