Files
ladybird/Libraries/LibJS/AsmIntGen/src/codegen_aarch64.rs
Andreas Kling 6492c88ad8 AsmIntGen: Elide redundant FP comparisons in consecutive branch_fp_*
When consecutive branch_fp_* instructions use the same operands (e.g.
branch_fp_unordered followed by branch_fp_equal), the 2nd ucomisd/fcmp
is redundant since flags are still valid from the first comparison.

Track the last FP comparison operands in HandlerState and skip the
comparison instruction when it would be identical. This is common in
the double_equality_compare macro which checks for unordered (NaN)
before testing equality.
2026-03-07 22:18:22 +01:00

1866 lines
70 KiB
Rust

/*
* Copyright (c) 2026, the Ladybird developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
use crate::parser::{AsmInstruction, Handler, ObjectFormat, Operand, Program};
use crate::registers::{resolve_register, Arch};
use crate::shared::{get_immediate_value, resolve_field_ref, resolve_label, substitute_macro, w, HandlerState};
use std::collections::HashMap;
use std::fmt::Write;
/// Emit `adrp`+`add` for a symbol address, using platform-appropriate relocations.
fn emit_symbol_addr(out: &mut String, dst: &str, symbol: &str, fmt: ObjectFormat) {
match fmt {
ObjectFormat::MachO => {
w!(out, " adrp {dst}, {symbol}@PAGE");
w!(out, " add {dst}, {dst}, {symbol}@PAGEOFF");
}
ObjectFormat::Elf => {
w!(out, " adrp {dst}, {symbol}");
w!(out, " add {dst}, {dst}, :lo12:{symbol}");
}
}
}
pub fn generate(program: &Program) -> String {
let mut out = String::new();
w!(out, "// Generated by asmintgen -- DO NOT EDIT");
w!(out);
// On macOS, symbols need an underscore prefix
w!(out, "#ifdef __APPLE__");
w!(out, "#define CSYM(name) _##name");
w!(out, "#else");
w!(out, "#define CSYM(name) name");
w!(out, "#endif");
w!(out);
w!(out, ".text");
w!(out);
// Generate dispatch table
// The table contains absolute addresses that need relocation, so on Linux
// it must go in .data.rel.ro (not .rodata) to avoid PIC errors in shared libs.
w!(out, "#ifdef __APPLE__");
w!(out, ".section __DATA,__const");
w!(out, "#else");
w!(out, ".section .data.rel.ro");
w!(out, "#endif");
w!(out, ".p2align 3");
w!(out, "asm_dispatch_table:");
let handler_names: HashMap<String, usize> = program
.handlers
.iter()
.enumerate()
.map(|(i, h)| (h.name.clone(), i))
.collect();
// The opcode type is u8, so we need exactly 256 entries. Known opcodes
// get their handler, everything else (including out-of-range values) gets
// the fallback handler.
for opcode in &program.opcode_list {
if handler_names.contains_key(opcode.as_str()) {
w!(out, " .quad asm_handler_{opcode}");
} else {
w!(out, " .quad asm_handler_fallback");
}
}
for _ in program.opcode_list.len()..256 {
w!(out, " .quad asm_handler_fallback");
}
w!(out);
w!(out, ".text");
w!(out);
// Generate entry point
generate_entry_point(&mut out, program);
// Generate fallback handler
generate_fallback_handler(&mut out, program);
// Generate each handler
for handler in &program.handlers {
generate_handler(&mut out, handler, program);
}
// Mark stack as non-executable (required by Linux linker)
w!(out, "#ifndef __APPLE__");
w!(out, ".section .note.GNU-stack,\"\",@progbits");
w!(out, "#endif");
out
}
fn generate_entry_point(out: &mut String, program: &Program) {
// void asm_interpreter_entry(u8 const* bytecode, u32 entry_point, Value* values, Interpreter* interp)
// AAPCS64: x0=bytecode, w1=entry_point, x2=values, x3=interp
w!(out, ".globl CSYM(asm_interpreter_entry)");
w!(out, ".p2align 4");
w!(out, "CSYM(asm_interpreter_entry):");
// Save callee-saved registers and link register.
// Pinned: x19(dispatch), x20(interp), x25(pc), x26(pb), x27(values), x28(exec_ctx)
// Also save x21-x24 (callee-saved, not used by us, but must be preserved).
w!(out, " stp x29, x30, [sp, #-96]!");
w!(out, " mov x29, sp");
w!(out, " stp x25, x26, [sp, #16]");
w!(out, " stp x27, x28, [sp, #32]");
w!(out, " stp x19, x20, [sp, #48]");
w!(out, " stp x21, x22, [sp, #64]");
w!(out, " stp x23, x24, [sp, #80]");
// Set up pinned registers
// x0=bytecode (pb), w1=entry_point (pc), x2=values, x3=interp
let interp_ctx = program
.constants
.get("INTERPRETER_RUNNING_EXECUTION_CONTEXT")
.copied()
.unwrap_or(0);
w!(out, " mov x26, x0 // pb = bytecode base");
w!(out, " mov w25, w1 // pc = entry_point");
w!(out, " mov x27, x2 // values = values array");
// Store Interpreter* in x20 (callee-saved) for C++ calls, pin exec_ctx in x28
w!(out, " mov x20, x3 // interp = Interpreter*");
emit_ldr64(out, "x28", "x3", interp_ctx);
w!(out, " // x28 = exec_ctx");
emit_symbol_addr(out, "x19", "asm_dispatch_table", program.object_format);
w!(out, " // x19 = dispatch table");
// Dispatch to first instruction
w!(out, " add x9, x26, x25 // x9 = pb + pc");
w!(out, " ldrb w9, [x9] // w9 = opcode byte");
w!(out, " ldr x10, [x19, x9, lsl #3]");
w!(out, " br x10");
w!(out);
}
fn generate_fallback_handler(out: &mut String, program: &Program) {
w!(out, ".p2align 4");
w!(out, "asm_handler_fallback:");
// Set up args: x0=interp (x20), w1=pc (w25)
w!(out, " mov x0, x20");
w!(out, " mov w1, w25");
w!(out, " bl CSYM(asm_fallback_handler)");
// Check for exit (return < 0)
w!(out, " tbnz x0, #63, .Lexit");
// Reload exec_ctx, pb, and values
emit_state_reload(out, program);
// New pc from return value
w!(out, " mov w25, w0");
emit_dispatch(out);
w!(out);
// Exit path: restore callee-saved registers and return
w!(out, ".Lexit:");
w!(out, " ldp x25, x26, [sp, #16]");
w!(out, " ldp x27, x28, [sp, #32]");
w!(out, " ldp x19, x20, [sp, #48]");
w!(out, " ldp x21, x22, [sp, #64]");
w!(out, " ldp x23, x24, [sp, #80]");
w!(out, " ldp x29, x30, [sp], #96");
w!(out, " ret");
w!(out);
}
/// Emit instructions to reload exec_ctx (x28), pb (x26), and values (x27)
/// from the Interpreter* in x20. Uses x9 as scratch.
fn emit_state_reload(out: &mut String, program: &Program) {
let interp_ctx = program
.constants
.get("INTERPRETER_RUNNING_EXECUTION_CONTEXT")
.copied()
.unwrap_or(0);
let exec_executable = program
.constants
.get("EXECUTION_CONTEXT_EXECUTABLE")
.copied()
.unwrap_or(0);
let exec_bytecode = program
.constants
.get("EXECUTABLE_BYTECODE_DATA")
.copied()
.unwrap_or(0);
let sizeof_execctx = program
.constants
.get("SIZEOF_EXECUTION_CONTEXT")
.copied()
.unwrap_or(0);
emit_ldr64(out, "x28", "x20", interp_ctx);
emit_ldr64(out, "x9", "x28", exec_executable);
emit_ldr64(out, "x26", "x9", exec_bytecode);
emit_add_imm(out, "x27", "x28", sizeof_execctx);
}
/// Emit a dispatch sequence: load opcode from [pb + pc], look up in table, branch.
fn emit_dispatch(out: &mut String) {
w!(out, " add x9, x26, x25"); // x9 = pb + pc
w!(out, " ldrb w9, [x9]"); // w9 = opcode
emit_dispatch_tail(out);
}
/// Emit the tail of a dispatch: given opcode in w9, look up handler and branch.
fn emit_dispatch_tail(out: &mut String) {
w!(out, " ldr x10, [x19, x9, lsl #3]");
w!(out, " br x10");
}
fn emit_dispatch_with_size(out: &mut String, size: u32) {
emit_add_imm32(out, "w25", "w25", size as i64);
emit_dispatch(out);
}
/// Get the handler's instruction size: from explicit size= attribute, or from Bytecode.def.
fn handler_size(handler: &Handler, program: &Program) -> u32 {
if let Some(size) = handler.size {
return size;
}
let layout = program.op_layouts.get(&handler.name).unwrap_or_else(|| {
panic!(
"No Bytecode.def layout for handler '{}' and no explicit size",
handler.name
)
});
layout.size.unwrap_or_else(|| {
panic!(
"Handler '{}' is variable-length; use dispatch_variable instead of bare dispatch",
handler.name
)
}) as u32
}
fn generate_handler(out: &mut String, handler: &Handler, program: &Program) {
w!(out, ".p2align 4");
w!(out, "asm_handler_{}:", handler.name);
let mut state = HandlerState::new();
for insn in &handler.instructions {
emit_instruction(out, insn, handler, program, &mut state);
}
// Emit cold fixup blocks after the main handler body
if !state.cold_blocks.is_empty() {
out.push_str(&state.cold_blocks);
}
w!(out);
}
fn resolve_op(op: &Operand, handler: &Handler, program: &Program) -> String {
match op {
Operand::Register(name) => {
resolve_register(name, Arch::Aarch64).unwrap_or_else(|| name.clone())
}
Operand::Immediate(val) => format!("{val}"),
Operand::Constant(name) => {
if let Some(val) = program.constants.get(name) {
format!("{val}")
} else {
name.clone()
}
}
Operand::Memory { base, index, scale } => {
// Return a structured string that we'll parse in load/store emitters
let base_r = resolve_register(base, Arch::Aarch64).unwrap_or_else(|| base.clone());
match (index, scale) {
(Some(idx), Some(sc)) => {
let idx_r = resolve_register(idx, Arch::Aarch64).unwrap_or_else(|| idx.clone());
let sc_val = program
.constants
.get(sc.as_str())
.copied()
.unwrap_or_else(|| sc.parse().unwrap_or(1));
format!("MEM:{base_r}:{idx_r}:{sc_val}")
}
(Some(idx), None) => {
if let Some(val) = resolve_field_ref(idx, handler, program) {
format!("MEM:{base_r}:#{val}")
} else if let Some(val) = program.constants.get(idx.as_str()) {
format!("MEM:{base_r}:#{val}")
} else if let Ok(val) = idx.parse::<i64>() {
format!("MEM:{base_r}:#{val}")
} else {
let idx_r =
resolve_register(idx, Arch::Aarch64).unwrap_or_else(|| idx.clone());
format!("MEM:{base_r}:{idx_r}")
}
}
(None, _) => format!("MEM:{base_r}"),
}
}
Operand::Label(name) => name.clone(),
Operand::FieldRef(name) => {
let layout = program
.op_layouts
.get(&handler.name)
.unwrap_or_else(|| panic!("No Bytecode.def layout for handler '{}'", handler.name));
let offset = layout.field_offsets.get(name).unwrap_or_else(|| {
panic!(
"No field '{}' in Bytecode.def for opcode '{}'",
name, handler.name
)
});
format!("{offset}")
}
}
}
/// Parse a MEM: string back into components.
struct MemOp {
base: String,
index: MemIndex,
}
enum MemIndex {
None,
Imm(i64),
Reg(String),
RegScale(String, i64),
}
fn parse_mem(s: &str) -> Option<MemOp> {
let s = s.strip_prefix("MEM:")?;
let parts: Vec<&str> = s.splitn(3, ':').collect();
let base = parts[0].to_string();
match parts.len() {
1 => Some(MemOp {
base,
index: MemIndex::None,
}),
2 => {
if let Some(imm) = parts[1].strip_prefix('#') {
Some(MemOp {
base,
index: MemIndex::Imm(imm.parse().unwrap_or(0)),
})
} else {
Some(MemOp {
base,
index: MemIndex::Reg(parts[1].to_string()),
})
}
}
3 => {
let scale: i64 = parts[2].parse().unwrap_or(1);
Some(MemOp {
base,
index: MemIndex::RegScale(parts[1].to_string(), scale),
})
}
_ => None,
}
}
/// Emit an ldr (64-bit) from [base + offset].
fn emit_ldr64(out: &mut String, dst: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " ldr {dst}, [{base}]");
} else if (0..32760).contains(&offset) && offset % 8 == 0 {
w!(out, " ldr {dst}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " ldur {dst}, [{base}, #{offset}]");
} else {
// Need to materialize offset in a scratch register
emit_mov_imm(out, "x9", offset);
w!(out, " ldr {dst}, [{base}, x9]");
}
}
/// Emit an ldr (32-bit, zero-extending) from [base + offset].
fn emit_ldr32(out: &mut String, dst: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " ldr {dst}, [{base}]");
} else if (0..16380).contains(&offset) && offset % 4 == 0 {
w!(out, " ldr {dst}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " ldur {dst}, [{base}, #{offset}]");
} else {
emit_mov_imm(out, "x9", offset);
w!(out, " ldr {dst}, [{base}, x9]");
}
}
/// Emit an ldrb (8-bit, zero-extending) from [base + offset].
fn emit_ldrb(out: &mut String, dst: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " ldrb {dst}, [{base}]");
} else if (0..=4095).contains(&offset) {
w!(out, " ldrb {dst}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " ldurb {dst}, [{base}, #{offset}]");
} else {
emit_mov_imm(out, "x9", offset);
w!(out, " ldrb {dst}, [{base}, x9]");
}
}
/// Emit an ldrh (16-bit, zero-extending) from [base + offset].
fn emit_ldrh(out: &mut String, dst: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " ldrh {dst}, [{base}]");
} else if (0..8190).contains(&offset) && offset % 2 == 0 {
w!(out, " ldrh {dst}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " ldurh {dst}, [{base}, #{offset}]");
} else {
emit_mov_imm(out, "x9", offset);
w!(out, " ldrh {dst}, [{base}, x9]");
}
}
/// Emit an ldrsb (8-bit, sign-extending to 32-bit) from [base + offset].
fn emit_ldrsb(out: &mut String, dst: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " ldrsb {dst}, [{base}]");
} else if (0..=4095).contains(&offset) {
w!(out, " ldrsb {dst}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " ldursb {dst}, [{base}, #{offset}]");
} else {
emit_mov_imm(out, "x9", offset);
w!(out, " ldrsb {dst}, [{base}, x9]");
}
}
/// Emit an ldrsh (16-bit, sign-extending to 32-bit) from [base + offset].
fn emit_ldrsh(out: &mut String, dst: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " ldrsh {dst}, [{base}]");
} else if (0..8190).contains(&offset) && offset % 2 == 0 {
w!(out, " ldrsh {dst}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " ldursh {dst}, [{base}, #{offset}]");
} else {
emit_mov_imm(out, "x9", offset);
w!(out, " ldrsh {dst}, [{base}, x9]");
}
}
/// Emit a str (8-bit) to [base + offset].
fn emit_strb(out: &mut String, src: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " strb {src}, [{base}]");
} else if (0..=4095).contains(&offset) {
w!(out, " strb {src}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " sturb {src}, [{base}, #{offset}]");
} else {
emit_mov_imm(out, "x9", offset);
w!(out, " strb {src}, [{base}, x9]");
}
}
/// Emit a str (16-bit) to [base + offset].
fn emit_strh(out: &mut String, src: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " strh {src}, [{base}]");
} else if (0..8190).contains(&offset) && offset % 2 == 0 {
w!(out, " strh {src}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " sturh {src}, [{base}, #{offset}]");
} else {
emit_mov_imm(out, "x9", offset);
w!(out, " strh {src}, [{base}, x9]");
}
}
/// Emit a str (32-bit) to [base + offset].
fn emit_str32(out: &mut String, src: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " str {src}, [{base}]");
} else if (0..16380).contains(&offset) && offset % 4 == 0 {
w!(out, " str {src}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " stur {src}, [{base}, #{offset}]");
} else {
emit_mov_imm(out, "x9", offset);
w!(out, " str {src}, [{base}, x9]");
}
}
/// Emit a str (64-bit) to [base + offset].
fn emit_str64(out: &mut String, src: &str, base: &str, offset: i64) {
if offset == 0 {
w!(out, " str {src}, [{base}]");
} else if (0..32760).contains(&offset) && offset % 8 == 0 {
w!(out, " str {src}, [{base}, #{offset}]");
} else if (-256..=255).contains(&offset) {
w!(out, " stur {src}, [{base}, #{offset}]");
} else {
emit_mov_imm(out, "x9", offset);
w!(out, " str {src}, [{base}, x9]");
}
}
/// Emit a mov of an immediate into a register. Handles arbitrary 64-bit values
/// using movz/movk sequences.
fn emit_mov_imm(out: &mut String, dst: &str, val: i64) {
let uval = val as u64;
// Check if it fits in a single movz (16-bit value at any position)
if uval == 0 {
w!(out, " mov {dst}, #0");
return;
}
// Check if it's a small positive value (fits in movz)
if uval <= 0xFFFF {
w!(out, " mov {dst}, #{uval}");
return;
}
// Check if it's a small negative value (use movn)
let neg = !uval;
if neg <= 0xFFFF {
w!(out, " movn {dst}, #{neg}");
return;
}
// General case: movz + movk sequence
let hw0 = uval & 0xFFFF;
let hw1 = (uval >> 16) & 0xFFFF;
let hw2 = (uval >> 32) & 0xFFFF;
let hw3 = (uval >> 48) & 0xFFFF;
// Find the first non-zero halfword to use movz, then movk for the rest
let halfwords = [(hw0, 0), (hw1, 16), (hw2, 32), (hw3, 48)];
let mut first = true;
for &(hw, shift) in &halfwords {
if hw != 0 {
if first {
w!(out, " movz {dst}, #0x{hw:x}, lsl #{shift}");
first = false;
} else {
w!(out, " movk {dst}, #0x{hw:x}, lsl #{shift}");
}
}
}
}
/// Emit a mov of a 32-bit immediate into a w register.
fn emit_mov_imm32(out: &mut String, dst: &str, val: i64) {
let uval = (val as u64) & 0xFFFFFFFF;
if uval == 0 {
w!(out, " mov {dst}, #0");
return;
}
if uval <= 0xFFFF {
w!(out, " mov {dst}, #{uval}");
return;
}
let neg = (!uval) & 0xFFFFFFFF;
if neg <= 0xFFFF {
w!(out, " movn {dst}, #{neg}");
return;
}
let hw0 = uval & 0xFFFF;
let hw1 = (uval >> 16) & 0xFFFF;
if hw0 != 0 && hw1 != 0 {
w!(out, " movz {dst}, #0x{hw0:x}");
w!(out, " movk {dst}, #0x{hw1:x}, lsl #16");
} else if hw0 != 0 {
w!(out, " mov {dst}, #0x{hw0:x}");
} else {
w!(out, " movz {dst}, #0x{hw1:x}, lsl #16");
}
}
/// Check if an immediate can be encoded in an ARM64 logical instruction.
fn is_logical_immediate(val: u64) -> bool {
if val == 0 || val == u64::MAX {
return false;
}
// Simplified check: ARM64 logical immediates are bitmask patterns.
// For now, check common patterns used in this codebase.
// Full check would implement the ARM64 bitmask encoding algorithm.
let patterns: &[u64] = &[
0xFFFF,
0xFFFFFFFF,
0xFFFF_FFFF_FFFF,
0xFFFE,
0x1,
0x3,
0x7,
0xF,
0xFF,
0x1FF,
0x3FF,
0x7FF,
0xFFF,
];
if patterns.contains(&val) {
return true;
}
// Check if it's a repeating pattern that ARM64 can encode
is_valid_logical_imm_full(val)
}
/// Full check for ARM64 logical immediate encoding.
fn is_valid_logical_imm_full(val: u64) -> bool {
if val == 0 || val == u64::MAX {
return false;
}
// Try all element sizes: 2, 4, 8, 16, 32, 64
for size in [2u32, 4, 8, 16, 32, 64] {
let mask = if size == 64 {
u64::MAX
} else {
(1u64 << size) - 1
};
let elem = val & mask;
// Check if the pattern repeats across 64 bits
let mut repeats = true;
let mut pos = size;
while pos < 64 {
if ((val >> pos) & mask) != elem {
repeats = false;
break;
}
pos += size;
}
if !repeats {
continue;
}
// Check that the element is a contiguous run of ones (possibly rotated)
let ones = elem.count_ones();
if ones == 0 || ones == size {
continue;
}
// A rotated contiguous run: rotate until LSB is 1, then check contiguous
let mut e = elem;
let mut rotated = false;
for _ in 0..size {
if e & 1 == 1 {
rotated = true;
break;
}
e = ((e >> 1) | ((e & 1) << (size - 1))) & mask;
}
if !rotated {
continue;
}
// Now check contiguous 1s from LSB
let trailing = e.trailing_ones();
let shifted = e >> trailing;
if shifted == 0 {
return true;
}
}
false
}
/// Emit add immediate. Handles large immediates by materializing in x9.
fn emit_add_imm(out: &mut String, dst: &str, src: &str, imm: i64) {
if imm == 0 {
if dst != src {
w!(out, " mov {dst}, {src}");
}
} else if imm > 0 && imm <= 4095 {
w!(out, " add {dst}, {src}, #{imm}");
} else if imm > 0 && imm <= 0xFFF000 && imm & 0xFFF == 0 {
let shifted = imm >> 12;
w!(out, " add {dst}, {src}, #{shifted}, lsl #12");
} else if (-4095..0).contains(&imm) {
let neg = -imm;
w!(out, " sub {dst}, {src}, #{neg}");
} else {
emit_mov_imm(out, "x9", imm);
w!(out, " add {dst}, {src}, x9");
}
}
/// Emit add immediate for 32-bit registers.
fn emit_add_imm32(out: &mut String, dst: &str, src: &str, imm: i64) {
let imm = imm & 0xFFFFFFFF;
if imm == 0 {
if dst != src {
w!(out, " mov {dst}, {src}");
}
} else if imm <= 4095 {
w!(out, " add {dst}, {src}, #{imm}");
} else {
emit_mov_imm32(out, "w9", imm);
w!(out, " add {dst}, {src}, w9");
}
}
/// Convert a 64-bit register name (x*) to its 32-bit counterpart (w*).
fn to_w_reg(reg: &str) -> String {
if let Some(n) = reg.strip_prefix('x') {
format!("w{n}")
} else if reg == "sp" {
"wsp".to_string()
} else {
reg.to_string()
}
}
fn emit_instruction(
out: &mut String,
insn: &AsmInstruction,
handler: &Handler,
program: &Program,
state: &mut HandlerState,
) {
let m = &insn.mnemonic;
match m.as_str() {
"label" => {
if let Some(Operand::Label(name)) = insn.operands.first() {
if name.starts_with('.') {
w!(out, ".Lasm_{}{name}:", handler.name);
} else {
w!(out, "{name}:");
}
}
}
// Macro invocations
_ if program.macros.contains_key(m) => {
let mac = program.macros[m].clone();
let mut param_map: HashMap<String, String> = HashMap::new();
for (i, param) in mac.params.iter().enumerate() {
if let Some(op) = insn.operands.get(i) {
param_map.insert(param.clone(), resolve_op(op, handler, program));
}
}
for body_insn in &mac.body {
let expanded = substitute_macro(body_insn, &param_map);
emit_instruction(out, &expanded, handler, program, state);
}
}
// reload_exec_ctx: reload the pinned exec_ctx register from Interpreter* (x20)
"reload_exec_ctx" => {
let interp_ctx = program
.constants
.get("INTERPRETER_RUNNING_EXECUTION_CONTEXT")
.copied()
.unwrap_or(0);
emit_ldr64(out, "x28", "x20", interp_ctx);
}
// dispatch_variable: advance pc by value in register and dispatch
"dispatch_variable" => {
if let Some(op) = insn.operands.first() {
let reg = resolve_op(op, handler, program);
let wreg = to_w_reg(&reg);
w!(out, " add w25, w25, {wreg}");
emit_dispatch(out);
}
}
// dispatch_next: advance pc by the handler's instruction size and dispatch
"dispatch_next" => {
let size = handler_size(handler, program);
emit_dispatch_with_size(out, size);
}
// call_slow_path: TERMINAL call to C++ slow path
"call_slow_path" => {
if let Some(Operand::Register(func_name)) = insn.operands.first() {
w!(out, " mov x0, x20"); // interp
w!(out, " mov w1, w25"); // pc
w!(out, " bl CSYM({func_name})");
w!(out, " tbnz x0, #63, .Lexit");
emit_state_reload(out, program);
w!(out, " mov w25, w0");
emit_dispatch(out);
}
}
// call_helper: NON-TERMINAL call to C++ helper
// Passes t1 (x1 on aarch64) as first argument to the helper.
"call_helper" => {
if let Some(Operand::Register(func_name)) = insn.operands.first() {
// t1 on aarch64 is x1. Move to x0 for the call.
w!(out, " mov x0, x1");
w!(out, " bl CSYM({func_name})");
// Result is in x0 (= t0 on aarch64), which is correct.
}
}
// call_interp: NON-TERMINAL call with (Interpreter*, u32 pc)
"call_interp" => {
if let Some(Operand::Register(func_name)) = insn.operands.first() {
w!(out, " mov x0, x20");
w!(out, " mov w1, w25");
w!(out, " bl CSYM({func_name})");
// Result in x0 (= t0)
}
}
// double_to_int32 dst, src_fpr, fail_label
// Truncate double to int32 with strict round-trip check.
// Fails (branches to fail_label) if the value is fractional,
// out of i32 range, or NaN. Unlike js_to_int32, this never
// applies modular reduction -- it's a simple "is this an integer
// that fits in i32?" check.
"double_to_int32" => {
if insn.operands.len() >= 3 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
let fail = resolve_label(&insn.operands[2], handler);
let wdst = to_w_reg(&dst);
w!(out, " fcvtzs {wdst}, {src}");
w!(out, " scvtf d16, {wdst}");
w!(out, " fcmp {src}, d16");
w!(out, " b.ne {fail}");
}
}
// js_to_int32 dst, src_fpr, fail_label
// Convert double to int32 using JS ToInt32 semantics.
"js_to_int32" => {
if insn.operands.len() >= 3 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
let wdst = to_w_reg(&dst);
if program.has_jscvt {
// ARMv8.3 FEAT_JSCVT: single instruction, handles all
// cases (fractional, overflow, NaN) per JS semantics.
w!(out, " fjcvtzs {wdst}, {src}");
} else {
// Portable fallback: truncate and round-trip check.
// Values that don't survive (fractional, out of i32
// range, NaN) fall through to the slow path.
let fail = resolve_label(&insn.operands[2], handler);
w!(out, " fcvtzs {wdst}, {src}");
w!(out, " scvtf d16, {wdst}");
w!(out, " fcmp {src}, d16");
w!(out, " b.ne {fail}");
}
}
}
// divmod quot, rem, dividend, divisor
// Signed integer divide: quot = dividend / divisor, rem = dividend % divisor.
"divmod" => {
if insn.operands.len() == 4 {
let quot = resolve_op(&insn.operands[0], handler, program);
let rem = resolve_op(&insn.operands[1], handler, program);
let dividend = resolve_op(&insn.operands[2], handler, program);
let divisor = resolve_op(&insn.operands[3], handler, program);
w!(out, " sdiv {quot}, {dividend}, {divisor}");
w!(out, " msub {rem}, {quot}, {divisor}, {dividend}");
}
}
// fp_floor dst, src - Round toward negative infinity
"fp_floor" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " frintm {dst}, {src}");
}
}
// fp_ceil dst, src - Round toward positive infinity
"fp_ceil" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " frintp {dst}, {src}");
}
}
// toggle_bit dst, bit - Toggle (flip) a single bit: dst ^= (1 << bit)
"toggle_bit" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
let mask = 1u64 << val;
if is_logical_immediate(mask) {
w!(out, " eor {dst}, {dst}, #0x{mask:x}");
} else {
emit_mov_imm(out, "x9", mask as i64);
w!(out, " eor {dst}, {dst}, x9");
}
}
}
}
// clear_bit dst, bit - Clear a single bit: dst &= ~(1 << bit)
"clear_bit" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
let mask = 1u64 << val;
if is_logical_immediate(mask) {
w!(out, " bic {dst}, {dst}, #0x{mask:x}");
} else {
emit_mov_imm(out, "x9", mask as i64);
w!(out, " bic {dst}, {dst}, x9");
}
}
}
}
// canonicalize_nan dst_gpr, src_fpr
// If src is NaN, write CANON_NAN_BITS to dst. Otherwise bitwise-copy src to dst.
// Uses a cold fixup block to keep the constant load off the hot path, since NaN
// results are extremely rare. The hot path is just: fmov + fcmp + b.vs.
"canonicalize_nan" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
let canon = program
.constants
.get("CANON_NAN_BITS")
.copied()
.expect("CANON_NAN_BITS constant required for canonicalize_nan");
let id = state.unique_counter;
state.unique_counter += 1;
let fixup_label = format!(".Lasm_{}.canon_nan_{id}", handler.name);
let ret_label = format!(".Lasm_{}.canon_nan_{id}_ret", handler.name);
// Hot path: move to GPR, check for NaN, branch to cold fixup if NaN
w!(out, " fmov {dst}, {src}");
w!(out, " fcmp {src}, {src}");
w!(out, " b.vs {fixup_label}");
w!(out, "{ret_label}:");
// Cold fixup block: only reached when result is NaN
w!(state.cold_blocks, "{fixup_label}:");
emit_mov_imm(&mut state.cold_blocks, &dst, canon);
w!(state.cold_blocks, " b {ret_label}");
}
}
"exit" => {
w!(out, " b .Lexit");
}
// load64 dst_reg, [base, offset]
"load64" => {
if insn.operands.len() >= 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let mem_str = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
emit_mem_load(out, &dst, &mem, 8, false);
}
}
}
// load32 dst_reg, [base, offset]
"load32" => {
if insn.operands.len() >= 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let mem_str = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
let wdst = to_w_reg(&dst);
emit_mem_load(out, &wdst, &mem, 4, false);
}
}
}
// load8 dst_reg, [base, offset]
"load8" => {
if insn.operands.len() >= 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let mem_str = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
let wdst = to_w_reg(&dst);
emit_mem_load(out, &wdst, &mem, 1, false);
}
}
}
// load16 dst_reg, [base, offset]
"load16" => {
if insn.operands.len() >= 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let mem_str = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
let wdst = to_w_reg(&dst);
emit_mem_load(out, &wdst, &mem, 2, false);
}
}
}
// load8s dst_reg, [base, offset] - sign-extending
"load8s" => {
if insn.operands.len() >= 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let mem_str = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
let wdst = to_w_reg(&dst);
emit_mem_load(out, &wdst, &mem, 1, true);
}
}
}
// load16s dst_reg, [base, offset] - sign-extending
"load16s" => {
if insn.operands.len() >= 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let mem_str = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
let wdst = to_w_reg(&dst);
emit_mem_load(out, &wdst, &mem, 2, true);
}
}
}
// store8 [base, offset], src_reg
"store8" => {
if insn.operands.len() >= 2 {
let mem_str = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
let wsrc = to_w_reg(&src);
emit_mem_store(out, &wsrc, &mem, 1);
}
}
}
// store16 [base, offset], src_reg
"store16" => {
if insn.operands.len() >= 2 {
let mem_str = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
let wsrc = to_w_reg(&src);
emit_mem_store(out, &wsrc, &mem, 2);
}
}
}
// store32 [base, offset], src_reg
"store32" => {
if insn.operands.len() >= 2 {
let mem_str = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
let wsrc = to_w_reg(&src);
emit_mem_store(out, &wsrc, &mem, 4);
}
}
}
// store64 [base, offset], src_reg
"store64" => {
if insn.operands.len() >= 2 {
let mem_str = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
emit_mem_store(out, &src, &mem, 8);
}
}
}
// load_operand dst_reg, byte_offset
// Load u32 operand from bytecode, use as index into values array
"load_operand" => {
if insn.operands.len() >= 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let offset = resolve_op(&insn.operands[1], handler, program);
let offset_val: i64 = offset.parse().unwrap_or(0);
// x9 = pb + pc + offset -> load w9 from there (operand index)
w!(out, " add x9, x26, x25");
emit_ldr32(out, "w9", "x9", offset_val);
// dst = values[w9] (scaled by 8)
w!(out, " ldr {dst}, [x27, x9, lsl #3]");
}
}
// store_operand byte_offset, src_reg
"store_operand" => {
if insn.operands.len() >= 2 {
let offset = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
let offset_val: i64 = offset.parse().unwrap_or(0);
// x9 = pb + pc + offset -> load w9 from there (operand index)
w!(out, " add x9, x26, x25");
emit_ldr32(out, "w9", "x9", offset_val);
// values[w9] = src
w!(out, " str {src}, [x27, x9, lsl #3]");
}
}
// load_label dst_reg, byte_offset
"load_label" => {
if insn.operands.len() >= 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let offset = resolve_op(&insn.operands[1], handler, program);
let offset_val: i64 = offset.parse().unwrap_or(0);
let wdst = to_w_reg(&dst);
w!(out, " add x9, x26, x25");
emit_ldr32(out, &wdst, "x9", offset_val);
}
}
// goto_handler reg - set pc and dispatch
"goto_handler" => {
if let Some(op) = insn.operands.first() {
let reg = resolve_op(op, handler, program);
let wreg = to_w_reg(&reg);
w!(out, " mov w25, {wreg}");
emit_dispatch(out);
}
}
// mov
"mov" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
emit_mov_imm(out, &dst, val);
} else {
w!(out, " mov {dst}, {src}");
}
}
}
// movsxd: sign-extend 32-bit to 64-bit
"movsxd" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
let wsrc = to_w_reg(&src);
w!(out, " sxtw {dst}, {wsrc}");
}
}
// and
"and" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
let uval = val as u64;
if uval == 0xFFFFFFFF {
// Zero-extend 32-bit: mov w-reg, w-reg
let wdst = to_w_reg(&dst);
w!(out, " mov {wdst}, {wdst}");
} else if is_logical_immediate(uval) {
w!(out, " and {dst}, {dst}, #0x{uval:x}");
} else {
emit_mov_imm(out, "x9", val);
w!(out, " and {dst}, {dst}, x9");
}
} else {
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " and {dst}, {dst}, {src}");
}
}
}
// or
"or" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
let uval = val as u64;
if is_logical_immediate(uval) {
w!(out, " orr {dst}, {dst}, #0x{uval:x}");
} else {
emit_mov_imm(out, "x9", val);
w!(out, " orr {dst}, {dst}, x9");
}
} else {
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " orr {dst}, {dst}, {src}");
}
}
}
// xor
"xor" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
if dst == src {
// xor reg, reg -> zero register
w!(out, " mov {dst}, #0");
} else if let Some(val) = get_immediate_value(&insn.operands[1], program) {
let uval = val as u64;
if is_logical_immediate(uval) {
w!(out, " eor {dst}, {dst}, #0x{uval:x}");
} else {
emit_mov_imm(out, "x9", val);
w!(out, " eor {dst}, {dst}, x9");
}
} else {
w!(out, " eor {dst}, {dst}, {src}");
}
}
}
// add
"add" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
emit_add_imm(out, &dst, &dst, val);
} else {
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " adds {dst}, {dst}, {src}");
}
}
}
// sub
"sub" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
if val > 0 && val <= 4095 {
w!(out, " subs {dst}, {dst}, #{val}");
} else {
emit_mov_imm(out, "x9", val);
w!(out, " subs {dst}, {dst}, x9");
}
} else {
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " subs {dst}, {dst}, {src}");
}
}
}
// neg
"neg" => {
if insn.operands.len() == 1 {
let dst = resolve_op(&insn.operands[0], handler, program);
w!(out, " neg {dst}, {dst}");
}
}
// not (bitwise)
"not" => {
if insn.operands.len() == 1 {
let dst = resolve_op(&insn.operands[0], handler, program);
w!(out, " mvn {dst}, {dst}");
}
}
// 32-bit arithmetic with overflow detection.
// These perform the operation on the low 32 bits and branch on signed overflow.
"add32_overflow" | "sub32_overflow" => {
if insn.operands.len() == 3 {
let dst = resolve_op(&insn.operands[0], handler, program);
let wdst = to_w_reg(&dst);
let label = resolve_label(&insn.operands[2], handler);
let op = if m == "add32_overflow" { "adds" } else { "subs" };
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
if val > 0 && val <= 4095 {
w!(out, " {op} {wdst}, {wdst}, #{val}");
} else {
emit_mov_imm(out, "w9", val);
w!(out, " {op} {wdst}, {wdst}, w9");
}
} else {
let src = resolve_op(&insn.operands[1], handler, program);
let wsrc = to_w_reg(&src);
w!(out, " {op} {wdst}, {wdst}, {wsrc}");
}
w!(out, " b.vs {label}");
}
}
"mul32_overflow" => {
if insn.operands.len() == 3 {
let dst = resolve_op(&insn.operands[0], handler, program);
let wdst = to_w_reg(&dst);
let label = resolve_label(&insn.operands[2], handler);
let src = resolve_op(&insn.operands[1], handler, program);
let wsrc = to_w_reg(&src);
// ARM64 mul doesn't set overflow flag, so we use smull + check.
// smull gives full 64-bit result of 32x32 signed multiply.
w!(out, " smull {dst}, {wdst}, {wsrc}");
w!(out, " sxtw x9, {wdst}");
w!(out, " cmp {dst}, x9");
w!(out, " b.ne {label}");
}
}
"neg32_overflow" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let wdst = to_w_reg(&dst);
let label = resolve_label(&insn.operands[1], handler);
w!(out, " negs {wdst}, {wdst}");
w!(out, " b.vs {label}");
}
}
// mul: 2-operand (dst *= src, with overflow detection) or 3-operand (dst = src * imm)
"mul" => {
if insn.operands.len() == 3 {
// 3-operand: simple multiply for index scaling, no overflow check
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[2], program) {
emit_mov_imm(out, "x9", val);
w!(out, " mul {dst}, {src}, x9");
} else {
let imm_reg = resolve_op(&insn.operands[2], handler, program);
w!(out, " mul {dst}, {src}, {imm_reg}");
}
} else if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
// ARM64 mul doesn't set overflow flag, so we use smull + check.
// smull gives full 64-bit result of 32x32 signed multiply.
// If sign-extending the low 32 bits doesn't match the full
// result, the multiply overflowed.
let wdst = to_w_reg(&dst);
let wsrc = to_w_reg(&src);
w!(out, " smull {dst}, {wdst}, {wsrc}");
// Check: does sxtw(low32) == full result?
w!(out, " sxtw x9, {wdst}");
w!(out, " cmp {dst}, x9");
// The DSL uses `jo` (jump on overflow) after imul.
// We map jo -> b.vs. Use CCMP to set V flag based on the
// comparison result:
// - If EQ (no overflow): perform cmp xzr,xzr -> V=0
// - If NE (overflow): set NZCV=0001 -> V=1
w!(out, " ccmp xzr, xzr, #1, eq");
}
}
// Shift instructions
"shl" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
w!(out, " lsl {dst}, {dst}, #{val}");
} else {
let count = resolve_op(&insn.operands[1], handler, program);
w!(out, " lsl {dst}, {dst}, {count}");
}
}
}
"shr" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
w!(out, " lsr {dst}, {dst}, #{val}");
} else {
let count = resolve_op(&insn.operands[1], handler, program);
w!(out, " lsr {dst}, {dst}, {count}");
}
}
}
"sar" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
w!(out, " asr {dst}, {dst}, #{val}");
} else {
let count = resolve_op(&insn.operands[1], handler, program);
w!(out, " asr {dst}, {dst}, {count}");
}
}
}
// Unconditional jump
"jmp" => {
if let Some(Operand::Label(label)) = insn.operands.first() {
if label.starts_with('.') {
w!(out, " b .Lasm_{}{label}", handler.name);
} else {
w!(out, " b {label}");
}
} else if let Some(op) = insn.operands.first() {
// Indirect jump (e.g., jmp [t1, t0, 8])
let resolved = resolve_op(op, handler, program);
if let Some(mem) = parse_mem(&resolved) {
// Load address from memory, then branch
emit_mem_load_raw(out, "x9", &mem, 8);
w!(out, " br x9");
} else {
w!(out, " br {resolved}");
}
}
}
// lea: load effective address. On ARM64, this becomes add.
"lea" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let mem_str = resolve_op(&insn.operands[1], handler, program);
if let Some(mem) = parse_mem(&mem_str) {
match &mem.index {
MemIndex::None => {
w!(out, " mov {dst}, {}", mem.base);
}
MemIndex::Imm(offset) => {
emit_add_imm(out, &dst, &mem.base, *offset);
}
MemIndex::Reg(idx) => {
// Check if idx is "rip" equivalent (PC-relative)
// In the DSL, `lea t1, [rip, asm_dispatch_table]` means
// load address of asm_dispatch_table. The idx here will
// be "asm_dispatch_table" since "rip" isn't a DSL register.
if mem.base == "rip" || idx == "rip" {
// PC-relative address load
let symbol = if mem.base == "rip" { idx } else { &mem.base };
emit_symbol_addr(out, &dst, symbol, program.object_format);
} else {
w!(out, " add {dst}, {}, {idx}", mem.base);
}
}
MemIndex::RegScale(idx, scale) => {
let shift = match scale {
1 => 0,
2 => 1,
4 => 2,
8 => 3,
_ => {
// General case: multiply and add
emit_mov_imm(out, "x9", *scale);
writeln!(out, " madd {dst}, {idx}, x9, {}", mem.base)
.unwrap();
return;
}
};
if shift == 0 {
w!(out, " add {dst}, {}, {idx}", mem.base);
} else {
writeln!(out, " add {dst}, {}, {idx}, lsl #{shift}", mem.base)
.unwrap();
}
}
}
}
}
}
// Floating point instructions
"fp_add" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " fadd {dst}, {dst}, {src}");
}
}
"fp_sub" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " fsub {dst}, {dst}, {src}");
}
}
"fp_mul" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " fmul {dst}, {dst}, {src}");
}
}
"fp_div" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " fdiv {dst}, {dst}, {src}");
}
}
"fp_sqrt" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " fsqrt {dst}, {src}");
}
}
// int_to_double: convert signed integer to double
"int_to_double" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
w!(out, " scvtf {dst}, {src}");
}
}
// fp_mov: move between GP and FP registers, or FP to FP
"fp_mov" => {
if insn.operands.len() == 2 {
let dst = resolve_op(&insn.operands[0], handler, program);
let src = resolve_op(&insn.operands[1], handler, program);
let dst_is_fp = dst.starts_with('d') || dst.starts_with('v');
let src_is_fp = src.starts_with('d') || src.starts_with('v');
if dst_is_fp || src_is_fp {
w!(out, " fmov {dst}, {src}");
} else {
w!(out, " mov {dst}, {src}");
}
}
}
// Multi-value equality branch: branch if register equals any of the given values.
// branch_any_eq reg, val1, val2, ..., label
// On aarch64: uses ccmp chain to test all values with a single branch.
// cmp reg, val1; ccmp reg, val2, #4, ne; ccmp reg, val3, #4, ne; b.eq label
// The #4 sets the Z flag in the NZCV field when the ccmp is skipped
// (i.e., when a previous comparison was EQ), so b.eq still triggers.
"branch_any_eq" => {
if insn.operands.len() >= 3 {
let reg = resolve_op(&insn.operands[0], handler, program);
let label = resolve_label(insn.operands.last().unwrap(), handler);
let values: Vec<_> = insn.operands[1..insn.operands.len() - 1]
.iter()
.map(|op| get_immediate_value(op, program))
.collect();
for (i, val) in values.iter().enumerate() {
if i == 0 {
// First comparison: regular cmp
if let Some(v) = val {
let uv = *v as u64;
if uv <= 4095 {
w!(out, " cmp {reg}, #{v}");
} else {
emit_mov_imm(out, "x9", *v);
w!(out, " cmp {reg}, x9");
}
} else {
let op_str = resolve_op(&insn.operands[1], handler, program);
w!(out, " cmp {reg}, {op_str}");
}
} else {
// Subsequent comparisons: ccmp (only executes if NE so far)
// nzcv=#4 means Z=1 when skipped (previous matched), so b.eq works
if let Some(v) = val {
let uv = *v as u64;
if uv <= 31 {
w!(out, " ccmp {reg}, #{v}, #4, ne");
} else {
emit_mov_imm(out, "x9", *v);
w!(out, " ccmp {reg}, x9, #4, ne");
}
} else {
let op_str = resolve_op(&insn.operands[1 + i], handler, program);
w!(out, " ccmp {reg}, {op_str}, #4, ne");
}
}
}
w!(out, " b.eq {label}");
}
}
// Architecture-neutral branch operations.
"branch_eq" | "branch_ne" | "branch_ge_unsigned"
| "branch_lt_signed" | "branch_le_signed"
| "branch_gt_signed" | "branch_ge_signed" => {
if insn.operands.len() == 3 {
let a = resolve_op(&insn.operands[0], handler, program);
let b = resolve_op(&insn.operands[1], handler, program);
let label = resolve_label(&insn.operands[2], handler);
let cc = match m.as_str() {
"branch_eq" => "b.eq",
"branch_ne" => "b.ne",
"branch_ge_unsigned" => "b.hs",
"branch_lt_signed" => "b.lt",
"branch_le_signed" => "b.le",
"branch_gt_signed" => "b.gt",
"branch_ge_signed" => "b.ge",
_ => unreachable!(),
};
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
let uval = val as u64;
if uval <= 4095 {
w!(out, " cmp {a}, #{val}");
} else if is_cmn_candidate(val) {
let neg = (-val) as u64;
w!(out, " cmn {a}, #{neg}");
} else {
emit_mov_imm(out, "x9", val);
w!(out, " cmp {a}, x9");
}
} else {
w!(out, " cmp {a}, {b}");
}
w!(out, " {cc} {label}");
}
}
"branch_zero" | "branch_nonzero" | "branch_negative" | "branch_not_negative" => {
if insn.operands.len() == 2 {
let a = resolve_op(&insn.operands[0], handler, program);
let label = resolve_label(&insn.operands[1], handler);
match m.as_str() {
"branch_zero" => {
w!(out, " cbz {a}, {label}");
}
"branch_nonzero" => {
w!(out, " cbnz {a}, {label}");
}
"branch_negative" => {
w!(out, " tbnz {a}, #63, {label}");
}
"branch_not_negative" => {
w!(out, " tbz {a}, #63, {label}");
}
_ => unreachable!(),
}
}
}
"branch_bits_set" | "branch_bits_clear" => {
if insn.operands.len() == 3 {
let a = resolve_op(&insn.operands[0], handler, program);
let mask = resolve_op(&insn.operands[1], handler, program);
let label = resolve_label(&insn.operands[2], handler);
let cc = match m.as_str() {
"branch_bits_set" => "b.ne",
"branch_bits_clear" => "b.eq",
_ => unreachable!(),
};
if let Some(val) = get_immediate_value(&insn.operands[1], program) {
let uval = val as u64;
if is_logical_immediate(uval) {
w!(out, " tst {a}, #0x{uval:x}");
} else {
emit_mov_imm(out, "x9", val);
w!(out, " tst {a}, x9");
}
} else {
w!(out, " tst {a}, {mask}");
}
w!(out, " {cc} {label}");
}
}
"branch_bit_set" => {
if insn.operands.len() == 3 {
let a = resolve_op(&insn.operands[0], handler, program);
let label = resolve_label(&insn.operands[2], handler);
if let Some(bit) = get_immediate_value(&insn.operands[1], program) {
w!(out, " tbnz {a}, #{bit}, {label}");
} else {
let bit = resolve_op(&insn.operands[1], handler, program);
w!(out, " mov x9, #1");
w!(out, " lsl x9, x9, {bit}");
w!(out, " tst {a}, x9");
w!(out, " b.ne {label}");
}
}
}
// Floating-point compare-and-branch operations.
// Consecutive branch_fp_* with the same operands share one fcmp,
// since fcmp sets all the flags these branches test.
"branch_fp_unordered" | "branch_fp_equal" | "branch_fp_less"
| "branch_fp_less_or_equal" | "branch_fp_greater"
| "branch_fp_greater_or_equal" => {
if insn.operands.len() == 3 {
let a = resolve_op(&insn.operands[0], handler, program);
let b = resolve_op(&insn.operands[1], handler, program);
let label = resolve_label(&insn.operands[2], handler);
let cc = match m.as_str() {
"branch_fp_unordered" => "b.vs",
"branch_fp_equal" => "b.eq",
"branch_fp_less" => "b.mi",
"branch_fp_less_or_equal" => "b.ls",
"branch_fp_greater" => "b.gt",
"branch_fp_greater_or_equal" => "b.ge",
_ => unreachable!(),
};
let need_compare = match &state.last_fp_compare {
Some((prev_a, prev_b)) => *prev_a != a || *prev_b != b,
None => true,
};
if need_compare {
w!(out, " fcmp {a}, {b}");
state.last_fp_compare = Some((a, b));
}
w!(out, " {cc} {label}");
}
return;
}
_ => {
panic!("Unknown instruction '{m}' in handler '{}'", handler.name);
}
}
// Any non-branch_fp instruction may clobber flags, invalidating the
// cached FP comparison. branch_fp_* returns early above to skip this.
state.last_fp_compare = None;
}
/// Emit a memory load with the appropriate ARM64 instruction based on size.
fn emit_mem_load(out: &mut String, dst: &str, mem: &MemOp, size: u32, sign_extend: bool) {
match &mem.index {
MemIndex::None => match (size, sign_extend) {
(8, _) => emit_ldr64(out, dst, &mem.base, 0),
(4, false) => emit_ldr32(out, dst, &mem.base, 0),
(4, true) => w!(out, " ldrsw {dst}, [{}]", mem.base),
(2, false) => emit_ldrh(out, dst, &mem.base, 0),
(2, true) => emit_ldrsh(out, dst, &mem.base, 0),
(1, false) => emit_ldrb(out, dst, &mem.base, 0),
(1, true) => emit_ldrsb(out, dst, &mem.base, 0),
_ => {}
},
MemIndex::Imm(offset) => match (size, sign_extend) {
(8, _) => emit_ldr64(out, dst, &mem.base, *offset),
(4, false) => emit_ldr32(out, dst, &mem.base, *offset),
(4, true) => {
if *offset >= 0 && *offset < 16380 && *offset % 4 == 0 {
w!(out, " ldrsw {dst}, [{}, #{offset}]", mem.base);
} else if *offset >= -256 && *offset <= 255 {
w!(out, " ldursw {dst}, [{}, #{offset}]", mem.base);
} else {
emit_mov_imm(out, "x9", *offset);
w!(out, " ldrsw {dst}, [{}, x9]", mem.base);
}
}
(2, false) => emit_ldrh(out, dst, &mem.base, *offset),
(2, true) => emit_ldrsh(out, dst, &mem.base, *offset),
(1, false) => emit_ldrb(out, dst, &mem.base, *offset),
(1, true) => emit_ldrsb(out, dst, &mem.base, *offset),
_ => {}
},
MemIndex::Reg(idx) => {
let instr = match (size, sign_extend) {
(8, _) => "ldr",
(4, false) => "ldr",
(4, true) => "ldrsw",
(2, false) => "ldrh",
(2, true) => "ldrsh",
(1, false) => "ldrb",
(1, true) => "ldrsb",
_ => "ldr",
};
w!(out, " {instr} {dst}, [{}, {idx}]", mem.base);
}
MemIndex::RegScale(idx, scale) => {
let shift = match scale {
1 => None,
2 => Some(1),
4 => Some(2),
8 => Some(3),
_ => None,
};
if let Some(shift_amt) = shift {
let instr = match (size, sign_extend) {
(8, _) => "ldr",
(4, false) => "ldr",
(4, true) => "ldrsw",
(2, false) => "ldrh",
(2, true) => "ldrsh",
(1, false) => "ldrb",
(1, true) => "ldrsb",
_ => "ldr",
};
writeln!(
out,
" {instr} {dst}, [{}, {idx}, lsl #{shift_amt}]",
mem.base
)
.unwrap();
} else {
// Non-power-of-2 scale: compute address manually
emit_mov_imm(out, "x9", *scale);
w!(out, " madd x9, {idx}, x9, {}", mem.base);
let instr = match (size, sign_extend) {
(8, _) => "ldr",
(4, false) => "ldr",
(4, true) => "ldrsw",
(2, false) => "ldrh",
(2, true) => "ldrsh",
(1, false) => "ldrb",
(1, true) => "ldrsb",
_ => "ldr",
};
w!(out, " {instr} {dst}, [x9]");
}
}
}
}
/// Emit a raw memory load (always 64-bit) for indirect jumps etc.
fn emit_mem_load_raw(out: &mut String, dst: &str, mem: &MemOp, size: u32) {
emit_mem_load(out, dst, mem, size, false);
}
/// Emit a memory store with the appropriate ARM64 instruction based on size.
fn emit_mem_store(out: &mut String, src: &str, mem: &MemOp, size: u32) {
match &mem.index {
MemIndex::None => match size {
8 => emit_str64(out, src, &mem.base, 0),
4 => emit_str32(out, src, &mem.base, 0),
2 => emit_strh(out, src, &mem.base, 0),
1 => emit_strb(out, src, &mem.base, 0),
_ => {}
},
MemIndex::Imm(offset) => match size {
8 => emit_str64(out, src, &mem.base, *offset),
4 => emit_str32(out, src, &mem.base, *offset),
2 => emit_strh(out, src, &mem.base, *offset),
1 => emit_strb(out, src, &mem.base, *offset),
_ => {}
},
MemIndex::Reg(idx) => {
let instr = match size {
8 => "str",
4 => "str",
2 => "strh",
1 => "strb",
_ => "str",
};
w!(out, " {instr} {src}, [{}, {idx}]", mem.base);
}
MemIndex::RegScale(idx, scale) => {
let shift = match scale {
1 => None,
2 => Some(1),
4 => Some(2),
8 => Some(3),
_ => None,
};
if let Some(shift_amt) = shift {
let instr = match size {
8 => "str",
4 => "str",
2 => "strh",
1 => "strb",
_ => "str",
};
writeln!(
out,
" {instr} {src}, [{}, {idx}, lsl #{shift_amt}]",
mem.base
)
.unwrap();
} else {
emit_mov_imm(out, "x9", *scale);
w!(out, " madd x9, {idx}, x9, {}", mem.base);
let instr = match size {
8 => "str",
4 => "str",
2 => "strh",
1 => "strb",
_ => "str",
};
w!(out, " {instr} {src}, [x9]");
}
}
}
}
/// Check if a negative immediate can use CMN instead of CMP.
fn is_cmn_candidate(val: i64) -> bool {
let neg = -val;
(0..=4095).contains(&neg)
}