AK: Fix Utf16View::operator<=> code-unit ordering on little-endian

The !has_ascii_storage() && !other.has_ascii_storage() branch did a byte-wise __builtin_memcmp over a char16_t array, which on little-endian does not give code-unit order: the low byte is compared first, so 0xD83D (bytes [0x3D, 0xD8]) spuriously compared less than 0x2764 (bytes [0x64, 0x27]) even though the code unit 0xD83D is greater. No in-tree caller currently uses operator<=> for Utf16View ordering, so this bug is dormant; the follow-up LibJS change exposes it. Replace the memcmp branch with a per-code-unit loop, which the compiler can auto-vectorize and which mirrors what is_code_unit_less_than already does.
Author: https://github.com/kalenikaliaksandr Commit: https://github.com/LadybirdBrowser/ladybird/commit/eb4038fa830 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/9036 Reviewed-by: https://github.com/shannonbooth
2026-04-25 17:25:08 +02:00 · 2026-04-22 15:02:57 +02:00 · 2026-04-22 17:14:10 +00:00
parent 1a806e1b8e
commit eb4038fa83
2 changed files with 10 additions and 2 deletions
--- a/AK/Utf16View.h
+++ b/AK/Utf16View.h
@@ -261,8 +261,6 @@ public:

        if (has_ascii_storage() && other.has_ascii_storage()) {
            result = __builtin_memcmp(m_string.ascii, other.m_string.ascii, length);
-        } else if (!has_ascii_storage() && !other.has_ascii_storage()) {
-            result = __builtin_memcmp(m_string.utf16, other.m_string.utf16, length * sizeof(char16_t));
        } else {
            for (size_t i = 0; i < length; ++i) {
                auto this_code_unit = code_unit_at(i);
--- a/Tests/AK/TestUtf16View.cpp
+++ b/Tests/AK/TestUtf16View.cpp
@@ -482,6 +482,16 @@ TEST_CASE(comparison)
    EXPECT(u"😂"sv > u"😀"sv);
    EXPECT(!(u"😂"sv <= u"😀"sv));
    EXPECT(u"😂"sv >= u"😀"sv);
+
+    EXPECT(u"ÿ"sv < u"Ā"sv);
+    EXPECT(!(u"ÿ"sv > u"Ā"sv));
+    EXPECT(u"Ā"sv > u"ÿ"sv);
+    EXPECT(!(u"Ā"sv < u"ÿ"sv));
+
+    EXPECT(u"❤"sv < u"😀"sv);
+    EXPECT(!(u"❤"sv > u"😀"sv));
+    EXPECT(u"😀"sv > u"❤"sv);
+    EXPECT(!(u"😀"sv < u"❤"sv));
 }

 TEST_CASE(equals_ignoring_case)