LibLine: Correctly count multi-code-point glyphs towards line length

This also adds some tests to make sure things don't break.
This commit is contained in:
Ali Mohammad Pur
2025-12-04 15:19:28 +01:00
committed by Nico Weber
parent 30630e5ae0
commit b476fcd733
5 changed files with 120 additions and 1 deletions

View File

@@ -691,6 +691,7 @@ if (BUILD_LAGOM)
LibHID
LibHTTP
LibIMAP
LibLine
LibLocale
LibMarkdown
LibMedia

View File

@@ -17,6 +17,7 @@ add_subdirectory(LibGfx)
add_subdirectory(LibHID)
add_subdirectory(LibIMAP)
add_subdirectory(LibJS)
add_subdirectory(LibLine)
add_subdirectory(LibLocale)
add_subdirectory(LibMarkdown)
add_subdirectory(LibMedia)

View File

@@ -0,0 +1,7 @@
set(TEST_SOURCES
TestMetrics.cpp
)
foreach(source IN LISTS TEST_SOURCES)
serenity_test("${source}" LibLine LIBS LibLine LibUnicode)
endforeach()

View File

@@ -0,0 +1,99 @@
/*
* Copyright (c) 2025, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <LibLine/Editor.h>
TEST_CASE(count_ascii_glyphs_u8)
{
constexpr auto string = "Hello, World!"sv; // length in bytes: 13, code points: 13, glyphs: 13
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
EXPECT_EQ(metrics.line_metrics.size(), 1u);
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
}
TEST_CASE(count_ascii_glyphs_u32)
{
constexpr u32 string[] = { 'H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!' }; // length in code points: 13, glyphs: 13
auto metrics = Line::Editor::actual_rendered_string_metrics(Utf32View(string));
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
EXPECT_EQ(metrics.line_metrics.size(), 1u);
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
}
TEST_CASE(count_simple_multibyte_glyphs_u8)
{
constexpr auto string = "Héllo, Wörld!"sv; // length in bytes: 15, code points: 13, glyphs: 13
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
EXPECT_EQ(metrics.line_metrics.size(), 1u);
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
}
TEST_CASE(count_simple_multibyte_glyphs_u32)
{
constexpr u32 string[] = { 'H', 0xe9, 'l', 'l', 'o', ',', ' ', 'W', 0xf6, 'r', 'l', 'd', '!' }; // length in code points: 13, glyphs: 13
auto metrics = Line::Editor::actual_rendered_string_metrics(Utf32View(string));
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
EXPECT_EQ(metrics.line_metrics.size(), 1u);
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
}
TEST_CASE(count_multi_codepoint_glyphs_u8)
{
constexpr auto string = "Héllo, Wörld! 👩‍💻"sv; // length in bytes: 25, code points: 17, glyphs: 15
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
EXPECT_EQ(metrics.grapheme_breaks.size(), 15u);
EXPECT_EQ(metrics.line_metrics.size(), 1u);
EXPECT_EQ(metrics.line_metrics[0].length, 17u);
EXPECT_EQ(metrics.line_metrics[0].visible_length, 17u);
}
TEST_CASE(count_jp_glyphs_u8)
{
{
constexpr auto string = "コンニチハ、ワールド!"sv; // length in bytes: 33, code points: 11, glyphs: 11
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
EXPECT_EQ(metrics.grapheme_breaks.size(), 11u);
EXPECT_EQ(metrics.line_metrics.size(), 1u);
EXPECT_EQ(metrics.line_metrics[0].length, 11u);
EXPECT_EQ(metrics.line_metrics[0].visible_length, 11u);
}
{
constexpr auto string = "がぎぐげご"sv; // length in bytes: 18, code points: 10, glyphs: 5
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
EXPECT_EQ(metrics.grapheme_breaks.size(), 5u);
EXPECT_EQ(metrics.line_metrics.size(), 1u);
EXPECT_EQ(metrics.line_metrics[0].length, 10u);
EXPECT_EQ(metrics.line_metrics[0].visible_length, 10u);
}
{
constexpr auto string = "食べる"sv; // length in bytes: 12, code points: 4, glyphs: 3
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
EXPECT_EQ(metrics.grapheme_breaks.size(), 3u);
EXPECT_EQ(metrics.line_metrics.size(), 1u);
EXPECT_EQ(metrics.line_metrics[0].length, 4u);
EXPECT_EQ(metrics.line_metrics[0].visible_length, 4u);
}
}
TEST_CASE(count_multi_codepoint_glyphs_mixed_u8)
{
constexpr auto string = "Héllo, コンニチハ! 👩‍💻 persian word: کتاب"sv; // length in bytes: 59, code points: 36, glyphs: 34
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
EXPECT_EQ(metrics.grapheme_breaks.size(), 34u);
EXPECT_EQ(metrics.line_metrics.size(), 1u);
EXPECT_EQ(metrics.line_metrics[0].length, 36u);
EXPECT_EQ(metrics.line_metrics[0].visible_length, 36u);
}

View File

@@ -1940,7 +1940,6 @@ StringMetrics Editor::actual_rendered_string_metrics(Utf32View const& view, RedB
for (size_t break_index = 0; break_index < grapheme_breaks.size(); ++break_index) {
auto i = grapheme_breaks[break_index];
auto c = view[i];
if (!mask_it.is_end() && mask_it.key() <= i)
mask = *mask_it;
@@ -1950,8 +1949,20 @@ StringMetrics Editor::actual_rendered_string_metrics(Utf32View const& view, RedB
continue;
}
auto next_grapheme_start = break_index + 1 < grapheme_breaks.size() ? grapheme_breaks[break_index + 1] : view.length();
auto next_c = break_index + 1 < grapheme_breaks.size() ? view.code_points()[grapheme_breaks[break_index + 1]] : 0;
auto c = view[i];
state = actual_rendered_string_length_step(metrics, i, current_line, c, next_c, state, mask, maximum_line_width, last_return);
for (size_t j = i + 1; j < next_grapheme_start; ++j) {
// Consume the rest of the code points in this grapheme cluster without updating the state; this is just to account for their length properly.
current_line.length++;
current_line.visible_length++;
metrics.total_length++;
if (current_line.bit_length.has_value())
current_line.bit_length.value() += code_point_length_in_utf8(view[j]);
}
if (!mask_it.is_end() && mask_it.key() <= i) {
auto mask_it_peek = mask_it;
++mask_it_peek;