mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-04-26 01:35:08 +02:00
LibRegex: Properly track code units in u-v modes
Previously, both string_position and view_index used code unit offsets regardless of mode. Now in unicode mode, these variables track code point positions while string_position_in_code_units is properly updated to reflect code unit offsets.
This commit is contained in:
committed by
Ali Mohammad Pur
parent
fb258639d1
commit
5632a52531
Notes:
github-actions[bot]
2025-10-24 19:24:41 +00:00
Author: https://github.com/aplefull Commit: https://github.com/LadybirdBrowser/ladybird/commit/5632a52531c Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/6567
@@ -237,10 +237,17 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
||||
input.view = view;
|
||||
dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view);
|
||||
|
||||
auto view_length = view.length_in_code_units();
|
||||
auto view_length = view.length();
|
||||
size_t view_index = m_pattern->start_offset;
|
||||
state.string_position = view_index;
|
||||
state.string_position_in_code_units = view_index;
|
||||
if (view.unicode()) {
|
||||
if (view_index < view_length)
|
||||
state.string_position_in_code_units = view.code_unit_offset_of(view_index);
|
||||
else
|
||||
state.string_position_in_code_units = view.length_in_code_units();
|
||||
} else {
|
||||
state.string_position_in_code_units = view_index;
|
||||
}
|
||||
bool succeeded = false;
|
||||
|
||||
if (view_index == view_length && m_pattern->parser_result.match_length_minimum == 0) {
|
||||
@@ -303,7 +310,14 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
||||
input.match_index = match_count;
|
||||
|
||||
state.string_position = view_index;
|
||||
state.string_position_in_code_units = view_index;
|
||||
if (input.view.unicode()) {
|
||||
if (view_index < view_length)
|
||||
state.string_position_in_code_units = input.view.code_unit_offset_of(view_index);
|
||||
else
|
||||
state.string_position_in_code_units = input.view.length_in_code_units();
|
||||
} else {
|
||||
state.string_position_in_code_units = view_index;
|
||||
}
|
||||
state.instruction_position = 0;
|
||||
state.repetition_marks.clear();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user