AK: Fix Utf16View::operator<=> code-unit ordering on little-endian

The !has_ascii_storage() && !other.has_ascii_storage() branch did a
byte-wise __builtin_memcmp over a char16_t array, which on little-endian
does not give code-unit order: the low byte is compared first, so
0xD83D (bytes [0x3D, 0xD8]) spuriously compared less than 0x2764
(bytes [0x64, 0x27]) even though the code unit 0xD83D is greater.

No in-tree caller currently uses operator<=> for Utf16View ordering,
so this bug is dormant; the follow-up LibJS change exposes it.

Replace the memcmp branch with a per-code-unit loop, which the compiler
can auto-vectorize and which mirrors what is_code_unit_less_than already
does.
This commit is contained in:
Aliaksandr Kalenik
2026-04-22 15:02:57 +02:00
committed by Shannon Booth
parent 1a806e1b8e
commit eb4038fa83
Notes: github-actions[bot] 2026-04-22 17:14:10 +00:00
2 changed files with 10 additions and 2 deletions

View File

@@ -261,8 +261,6 @@ public:
if (has_ascii_storage() && other.has_ascii_storage()) {
result = __builtin_memcmp(m_string.ascii, other.m_string.ascii, length);
} else if (!has_ascii_storage() && !other.has_ascii_storage()) {
result = __builtin_memcmp(m_string.utf16, other.m_string.utf16, length * sizeof(char16_t));
} else {
for (size_t i = 0; i < length; ++i) {
auto this_code_unit = code_unit_at(i);

View File

@@ -482,6 +482,16 @@ TEST_CASE(comparison)
EXPECT(u"😂"sv > u"😀"sv);
EXPECT(!(u"😂"sv <= u"😀"sv));
EXPECT(u"😂"sv >= u"😀"sv);
EXPECT(u"ÿ"sv < u"Ā"sv);
EXPECT(!(u"ÿ"sv > u"Ā"sv));
EXPECT(u"Ā"sv > u"ÿ"sv);
EXPECT(!(u"Ā"sv < u"ÿ"sv));
EXPECT(u""sv < u"😀"sv);
EXPECT(!(u""sv > u"😀"sv));
EXPECT(u"😀"sv > u""sv);
EXPECT(!(u"😀"sv < u""sv));
}
TEST_CASE(equals_ignoring_case)