script: More thoroughly convert between UTF-16 and UTF-8 offsets in text inputs (#41588)

DOM APIs for interacting with selection and text in text inputs
`<input type=text>` and `<textarea>` all accept offsets and lengths in
UTF-16 code units. Servo was not converting all of these offsets into
UTF-8 code units. This change makes it so that this conversion is done
more thoroughly and makes it clear when the code is dealing with UTF-8
offsets and UTF-16 offsets.

Helper functions are added for doing this conversion in both directions
as it is necessary. In addition, a `char` iterator is added for
`TextInput` as it is useful for doing this conversion. It will be used
more completely in the future when a `Rope` data structure is extracted
from `TextInput`.

Finally, specification text is added to all of the DOM implementation
touched here.

Testing: This change includes a new WPT crash test as well as a series
of unit
tests to verify conversion between UTF-8 and UTF-16 offsets.
Fixes #36719.
Fixes #20028.
Fixes #39184.

Signed-off-by: Martin Robinson <mrobinson@igalia.com>
This commit is contained in:
Martin Robinson
2025-12-31 10:29:25 +01:00
committed by GitHub
parent b974a6685c
commit e4822c9c5d
8 changed files with 563 additions and 184 deletions

View File

@@ -394,8 +394,7 @@ impl HTMLTextAreaElementMethods<crate::DomTypeHolder> for HTMLTextAreaElement {
/// <https://html.spec.whatwg.org/multipage/#dom-textarea-textlength>
fn TextLength(&self) -> u32 {
let Utf16CodeUnitLength(num_units) = self.textinput.borrow().utf16_len();
num_units as u32
self.textinput.borrow().len_utf16().0 as u32
}
// https://html.spec.whatwg.org/multipage/#dom-lfe-labels
@@ -408,22 +407,24 @@ impl HTMLTextAreaElementMethods<crate::DomTypeHolder> for HTMLTextAreaElement {
/// <https://html.spec.whatwg.org/multipage/#dom-textarea/input-selectionstart>
fn GetSelectionStart(&self) -> Option<u32> {
self.selection().dom_start()
self.selection().dom_start().map(|start| start.0 as u32)
}
/// <https://html.spec.whatwg.org/multipage/#dom-textarea/input-selectionstart>
fn SetSelectionStart(&self, start: Option<u32>) -> ErrorResult {
self.selection().set_dom_start(start)
self.selection()
.set_dom_start(start.map(Utf16CodeUnitLength::from))
}
/// <https://html.spec.whatwg.org/multipage/#dom-textarea/input-selectionend>
fn GetSelectionEnd(&self) -> Option<u32> {
self.selection().dom_end()
self.selection().dom_end().map(|end| end.0 as u32)
}
/// <https://html.spec.whatwg.org/multipage/#dom-textarea/input-selectionend>
fn SetSelectionEnd(&self, end: Option<u32>) -> ErrorResult {
self.selection().set_dom_end(end)
self.selection()
.set_dom_end(end.map(Utf16CodeUnitLength::from))
}
/// <https://html.spec.whatwg.org/multipage/#dom-textarea/input-selectiondirection>
@@ -438,7 +439,11 @@ impl HTMLTextAreaElementMethods<crate::DomTypeHolder> for HTMLTextAreaElement {
/// <https://html.spec.whatwg.org/multipage/#dom-textarea/input-setselectionrange>
fn SetSelectionRange(&self, start: u32, end: u32, direction: Option<DOMString>) -> ErrorResult {
self.selection().set_dom_range(start, end, direction)
self.selection().set_dom_range(
Utf16CodeUnitLength::from(start),
Utf16CodeUnitLength::from(end),
direction,
)
}
/// <https://html.spec.whatwg.org/multipage/#dom-textarea/input-setrangetext>
@@ -455,8 +460,12 @@ impl HTMLTextAreaElementMethods<crate::DomTypeHolder> for HTMLTextAreaElement {
end: u32,
selection_mode: SelectionMode,
) -> ErrorResult {
self.selection()
.set_dom_range_text(replacement, Some(start), Some(end), selection_mode)
self.selection().set_dom_range_text(
replacement,
Some(Utf16CodeUnitLength::from(start)),
Some(Utf16CodeUnitLength::from(end)),
selection_mode,
)
}
/// <https://html.spec.whatwg.org/multipage/#dom-cva-willvalidate>
@@ -823,7 +832,7 @@ impl Validatable for HTMLTextAreaElement {
let mut failed_flags = ValidationFlags::empty();
let textinput = self.textinput.borrow();
let Utf16CodeUnitLength(value_len) = textinput.utf16_len();
let Utf16CodeUnitLength(value_len) = textinput.len_utf16();
let last_edit_by_user = !textinput.was_last_change_by_set_content();
let value_dirty = self.value_dirty.get();