LibJS: Add a charCodeAt builtin bytecode fast path

Teach builtin call specialization to recognize non-computed
member calls to charCodeAt() and emit a dedicated builtin opcode.
Mark String.prototype.charCodeAt with that builtin tag, then add
an asm interpreter fast path for primitive-string receivers whose
UTF-16 data is already resident.

The asm path handles both ASCII-backed and UTF-16-backed resident
strings, returns NaN for out-of-bounds Int32 indices, and falls
back to the generic builtin call path for everything else. This
keeps the optimistic case in asm while preserving the ordinary
method call semantics when charCodeAt has been replaced or when
string resolution would be required.
This commit is contained in:
Andreas Kling
2026-04-12 14:06:57 +02:00
committed by Andreas Kling
parent 7ffe01cee3
commit d31750a43c
Notes: github-actions[bot] 2026-04-12 17:17:01 +00:00
7 changed files with 150 additions and 25 deletions

View File

@@ -367,6 +367,55 @@ macro validate_callee_builtin(expected_builtin, fail)
branch_ne t3, expected_builtin, fail
end
# Load a UTF-16 code unit from a primitive string with resident UTF-16 data.
# Input:
# t2 = PrimitiveString*
# t4 = non-negative code-unit index
# Output:
# t0 = zero-extended code unit
# Clobbers:
# t3, t5
# Jumps to out_of_bounds if index >= string length.
# Jumps to fail if the string would require resolving deferred data.
macro load_primitive_string_utf16_code_unit(out_of_bounds, fail)
load8 t3, [t2, PRIMITIVE_STRING_DEFERRED_KIND]
branch_ne t3, PRIMITIVE_STRING_DEFERRED_KIND_NONE, fail
load64 t5, [t2, PRIMITIVE_STRING_UTF16_STRING]
branch_zero t5, fail
load8 t3, [t2, PRIMITIVE_STRING_UTF16_SHORT_STRING_BYTE_COUNT_AND_FLAG]
and t3, UTF16_SHORT_STRING_FLAG
branch_zero t3, .long_storage
load8 t3, [t2, PRIMITIVE_STRING_UTF16_SHORT_STRING_BYTE_COUNT_AND_FLAG]
shr t3, UTF16_SHORT_STRING_BYTE_COUNT_SHIFT_COUNT
branch_ge_unsigned t4, t3, out_of_bounds
mov t0, t2
add t0, PRIMITIVE_STRING_UTF16_SHORT_STRING_STORAGE
load8 t0, [t0, t4]
jmp .done
.long_storage:
load64 t3, [t5, UTF16_STRING_DATA_LENGTH_IN_CODE_UNITS]
branch_negative t3, .utf16_storage
branch_ge_unsigned t4, t3, out_of_bounds
add t5, UTF16_STRING_DATA_STRING_STORAGE
load8 t0, [t5, t4]
jmp .done
.utf16_storage:
shl t3, 1
shr t3, 1
branch_ge_unsigned t4, t3, out_of_bounds
mov t0, t4
add t0, t4
add t5, UTF16_STRING_DATA_STRING_STORAGE
load16 t0, [t5, t0]
.done:
end
# Dispatch the instruction at current pc (without advancing).
# Clobbers t0.
macro dispatch_current()
@@ -2082,6 +2131,34 @@ handler CallBuiltinStringIteratorPrototypeNext
call_slow_path asm_slow_path_call_builtin_string_iterator_prototype_next
end
handler CallBuiltinStringPrototypeCharCodeAt
validate_callee_builtin BUILTIN_STRING_PROTOTYPE_CHAR_CODE_AT, .slow
load_operand t1, m_this_value
extract_tag t3, t1
branch_ne t3, STRING_TAG, .slow
unbox_object t2, t1
load_operand t1, m_argument
extract_tag t3, t1
branch_ne t3, INT32_TAG, .slow
unbox_int32 t4, t1
branch_negative t4, .out_of_bounds
load_primitive_string_utf16_code_unit .out_of_bounds, .slow
box_int32_clean t1, t0
store_operand m_dst, t1
dispatch_next
.out_of_bounds:
mov t0, CANON_NAN_BITS
store_operand m_dst, t0
dispatch_next
.slow:
call_slow_path asm_slow_path_call_builtin_string_prototype_char_code_at
end
# ============================================================================
# Slow-path-only handlers
# ============================================================================

View File

@@ -8,6 +8,8 @@
// Compiled with the same flags as LibJS so layouts match exactly.
#include <AK/Format.h>
#include <AK/StringBase.h>
#include <AK/Utf16StringData.h>
#include <LibJS/Bytecode/Builtins.h>
#include <LibJS/Bytecode/Executable.h>
#include <LibJS/Bytecode/Interpreter.h>
@@ -19,6 +21,7 @@
#include <LibJS/Runtime/FunctionObject.h>
#include <LibJS/Runtime/IndexedProperties.h>
#include <LibJS/Runtime/Object.h>
#include <LibJS/Runtime/PrimitiveString.h>
#include <LibJS/Runtime/Realm.h>
#include <LibJS/Runtime/Shape.h>
#include <LibJS/Runtime/TypedArray.h>
@@ -34,6 +37,12 @@ int main()
using namespace JS;
using namespace JS::Bytecode;
struct PrimitiveStringLayoutAccessor : PrimitiveString {
using PrimitiveString::DeferredKind;
using PrimitiveString::m_deferred_kind;
using PrimitiveString::m_utf16_string;
};
outln("# Generated by gen_asm_offsets -- DO NOT EDIT\n");
// Object layout
@@ -191,6 +200,7 @@ int main()
outln("const BUILTIN_MATH_ROUND = {}", static_cast<u8>(Bytecode::Builtin::MathRound));
outln("const BUILTIN_MATH_SQRT = {}", static_cast<u8>(Bytecode::Builtin::MathSqrt));
outln("const BUILTIN_MATH_EXP = {}", static_cast<u8>(Bytecode::Builtin::MathExp));
outln("const BUILTIN_STRING_PROTOTYPE_CHAR_CODE_AT = {}", static_cast<u8>(Bytecode::Builtin::StringPrototypeCharCodeAt));
// FunctionObject layout
outln("\n# FunctionObject layout");
@@ -208,6 +218,26 @@ int main()
outln("const FUNCTION_OBJECT_BUILTIN_HAS_VALUE = {}", base + 1);
}
// PrimitiveString layout
outln("\n# PrimitiveString layout");
EMIT_OFFSET(PRIMITIVE_STRING_DEFERRED_KIND, PrimitiveStringLayoutAccessor, m_deferred_kind);
EMIT_OFFSET(PRIMITIVE_STRING_UTF16_STRING, PrimitiveStringLayoutAccessor, m_utf16_string);
outln("const PRIMITIVE_STRING_DEFERRED_KIND_NONE = {}", static_cast<u8>(PrimitiveStringLayoutAccessor::DeferredKind::None));
// Utf16String / ShortString layout
outln("\n# Utf16String layout");
outln("const UTF16_SHORT_STRING_FLAG = {}", AK::Detail::StringBase::SHORT_STRING_FLAG);
outln("const UTF16_SHORT_STRING_BYTE_COUNT_SHIFT_COUNT = {}", AK::Detail::StringBase::SHORT_STRING_BYTE_COUNT_SHIFT_COUNT);
EMIT_OFFSET(UTF16_SHORT_STRING_BYTE_COUNT_AND_FLAG, AK::Detail::ShortString, byte_count_and_short_string_flag);
EMIT_OFFSET(UTF16_SHORT_STRING_STORAGE, AK::Detail::ShortString, storage);
outln("const PRIMITIVE_STRING_UTF16_SHORT_STRING_BYTE_COUNT_AND_FLAG = {}", offsetof(PrimitiveStringLayoutAccessor, m_utf16_string) + offsetof(AK::Detail::ShortString, byte_count_and_short_string_flag));
outln("const PRIMITIVE_STRING_UTF16_SHORT_STRING_STORAGE = {}", offsetof(PrimitiveStringLayoutAccessor, m_utf16_string) + offsetof(AK::Detail::ShortString, storage));
// Utf16StringData layout
outln("\n# Utf16StringData layout");
EMIT_OFFSET(UTF16_STRING_DATA_LENGTH_IN_CODE_UNITS, AK::Detail::Utf16StringData, m_length_in_code_units);
outln("const UTF16_STRING_DATA_STRING_STORAGE = {}", AK::Detail::Utf16StringData::offset_of_string_storage());
// Environment layout
outln("\n# Environment layout");
EMIT_OFFSET(ENVIRONMENT_SCREWED_BY_EVAL, Environment, m_permanently_screwed_by_eval);