mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-05-05 06:32:30 +02:00
LibJS: Pack asm Call shared-data metadata
Pack the asm Call fast path metadata next to the executable pointer so the interpreter can fetch both values with one paired load. This removes several dependent shared-data loads from the hot path. Keep the executable pointer and packed metadata in separate registers through this binding so the fast path can still use the paired-load layout after any non-strict this adjustment. Lower the packed metadata flag checks correctly on x86_64 as well. Those bits now live above bit 31, so the generator uses bt for single- bit high masks and covers that path with a unit test. Add a runtime test that exercises both object and global this binding through the asm Call fast path.
This commit is contained in:
committed by
Andreas Kling
parent
50c497c59b
commit
517812647a
Notes:
github-actions[bot]
2026-04-14 10:39:09 +00:00
Author: https://github.com/awesomekling Commit: https://github.com/LadybirdBrowser/ladybird/commit/517812647a5 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/8912
@@ -2012,7 +2012,8 @@ handler Call
|
||||
#
|
||||
# Register usage within this handler:
|
||||
# t3 = callee ECMAScriptFunctionObject*
|
||||
# t2 = SharedFunctionInstanceData* / later callee ExecutionContext*
|
||||
# t2 = asm-call metadata / later callee ExecutionContext*
|
||||
# t7 = callee Executable* carried across `this` binding
|
||||
# t8 = boxed `this` value carried into the callee
|
||||
load_operand t0, m_callee
|
||||
extract_tag t1, t0
|
||||
@@ -2026,12 +2027,11 @@ handler Call
|
||||
branch_bits_clear t1, OBJECT_FLAG_IS_ECMASCRIPT_FUNCTION_OBJECT, .call_slow
|
||||
|
||||
load64 t2, [t3, ECMASCRIPT_FUNCTION_OBJECT_SHARED_DATA]
|
||||
load8 t1, [t2, SHARED_FUNCTION_INSTANCE_DATA_CAN_INLINE_CALL]
|
||||
branch_zero t1, .call_slow
|
||||
load_pair64 t7, t2, [t2, SHARED_FUNCTION_INSTANCE_DATA_EXECUTABLE], [t2, SHARED_FUNCTION_INSTANCE_DATA_ASM_CALL_METADATA]
|
||||
branch_bits_clear t2, SHARED_FUNCTION_INSTANCE_DATA_ASM_CALL_METADATA_CAN_INLINE_CALL, .call_slow
|
||||
# NewFunctionEnvironment() allocates and has to stay out of the pure asm
|
||||
# path, but we still preserve inline-call semantics via .call_interp_inline.
|
||||
load8 t1, [t2, SHARED_FUNCTION_INSTANCE_DATA_FUNCTION_ENVIRONMENT_NEEDED]
|
||||
branch_nonzero t1, .call_interp_inline
|
||||
branch_bits_set t2, SHARED_FUNCTION_INSTANCE_DATA_ASM_CALL_METADATA_FUNCTION_ENVIRONMENT_NEEDED, .call_interp_inline
|
||||
|
||||
# Bind this without allocations. Sloppy primitive this-values still need
|
||||
# ToObject(), so they use the C++ inline-frame helper.
|
||||
@@ -2039,11 +2039,9 @@ handler Call
|
||||
# t8 starts as "empty" to match the normal interpreter behavior for
|
||||
# callees that never observe `this`.
|
||||
mov t8, EMPTY_TAG_SHIFTED
|
||||
load8 t1, [t2, SHARED_FUNCTION_INSTANCE_DATA_USES_THIS]
|
||||
branch_zero t1, .this_ready
|
||||
branch_bits_clear t2, SHARED_FUNCTION_INSTANCE_DATA_ASM_CALL_METADATA_USES_THIS, .this_ready
|
||||
load_operand t8, m_this_value
|
||||
load8 t1, [t2, SHARED_FUNCTION_INSTANCE_DATA_STRICT]
|
||||
branch_nonzero t1, .this_ready
|
||||
branch_bits_set t2, SHARED_FUNCTION_INSTANCE_DATA_ASM_CALL_METADATA_STRICT, .this_ready
|
||||
|
||||
# Sloppy null/undefined binds the callee realm's global object.
|
||||
# Sloppy primitive receivers need ToObject(), which may allocate wrappers,
|
||||
@@ -2067,16 +2065,15 @@ handler Call
|
||||
or t8, t1
|
||||
|
||||
.this_ready:
|
||||
# can_inline_call already implies that shared_data has compiled bytecode.
|
||||
load64 t0, [t2, SHARED_FUNCTION_INSTANCE_DATA_EXECUTABLE]
|
||||
# The low 32 bits of the packed metadata word hold the formal parameter count.
|
||||
and t2, 0xFFFFFFFF
|
||||
|
||||
load32 t7, [pb, pc, m_argument_count]
|
||||
load32 t4, [t2, SHARED_FUNCTION_INSTANCE_DATA_FORMAL_PARAMETER_COUNT]
|
||||
branch_ge_unsigned t4, t7, .arg_count_ready
|
||||
mov t4, t7
|
||||
load32 t6, [pb, pc, m_argument_count]
|
||||
mov t4, t2
|
||||
branch_ge_unsigned t4, t6, .arg_count_ready
|
||||
mov t4, t6
|
||||
.arg_count_ready:
|
||||
load32 t5, [t0, EXECUTABLE_REGISTERS_AND_LOCALS_COUNT]
|
||||
load32 t1, [t0, EXECUTABLE_REGISTERS_AND_LOCALS_AND_CONSTANTS_COUNT]
|
||||
load_pair32 t5, t1, [t7, EXECUTABLE_REGISTERS_AND_LOCALS_COUNT], [t7, EXECUTABLE_REGISTERS_AND_LOCALS_AND_CONSTANTS_COUNT]
|
||||
|
||||
# Inline InterpreterStack::allocate().
|
||||
# t1 = total Value slots, t2 = new stack top, t6 = current frame base.
|
||||
|
||||
Reference in New Issue
Block a user