layout: Split hard line breaks into their own TextRunItem (#44436)

This change makes it so that hard line breaks are not shaped during
inline formatting context creation. Instead they are a separate variant
of a new `TextRunItem` enum. This will make it easier to start
linebreaking later in the flow of inline layout.

Testing: This fixes three WPT tests, likely because we are now properly
using the BiDi level of BiDi control characters. One test starts
failing, but
it depends on tab rendering, which we do not currently support properly.

Signed-off-by: Martin Robinson <mrobinson@igalia.com>
This commit is contained in:
Martin Robinson
2026-04-23 21:29:20 +02:00
committed by GitHub
parent 464ae8204f
commit 25ef3121a7
8 changed files with 200 additions and 195 deletions

View File

@@ -470,7 +470,7 @@ impl Font {
/// Fast path for ASCII text that only needs simple horizontal LTR kerning.
fn shape_text_fast(&self, text: &str, options: &ShapingOptions) -> GlyphStore {
let mut glyph_store = GlyphStore::new(text, text.len(), options);
let mut glyph_store = GlyphStore::new(text.len(), options);
let mut prev_glyph_id = None;
for (string_byte_offset, byte) in text.bytes().enumerate() {
let character = byte as char;

View File

@@ -243,10 +243,6 @@ pub struct GlyphStore {
/// but that may not be the case with `white-space: break-spaces`.
ends_with_whitespace: bool,
/// Whether or not this glyph store contains only a single glyph for a single
/// preserved newline.
is_single_preserved_newline: bool,
/// Whether or not this [`GlyphStore`] has right-to-left text, which has implications
/// about the order of the glyphs in the store.
is_rtl: bool,
@@ -256,7 +252,7 @@ impl GlyphStore {
/// Initializes the glyph store with the given capacity, but doesn't actually add any glyphs.
///
/// Use the `add_*` methods to store glyph data.
pub(crate) fn new(text: &str, length: usize, options: &ShapingOptions) -> Self {
pub(crate) fn new(length: usize, options: &ShapingOptions) -> Self {
Self {
glyphs: Vec::with_capacity(length),
detailed_glyphs: Default::default(),
@@ -269,7 +265,6 @@ impl GlyphStore {
ends_with_whitespace: options
.flags
.contains(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG),
is_single_preserved_newline: text.len() == 1 && text.starts_with('\n'),
is_rtl: options.flags.contains(ShapingFlags::RTL_FLAG),
}
}
@@ -303,7 +298,7 @@ impl GlyphStore {
};
let mut previous_character_offset = None;
let mut glyph_store = GlyphStore::new(text, shaped_glyph_data.len(), options);
let mut glyph_store = GlyphStore::new(shaped_glyph_data.len(), options);
for mut shaped_glyph in shaped_glyph_data.iter() {
// The glyph "cluster" (HarfBuzz terminology) is the byte offset in the string that
// this glyph corresponds to. More than one glyph can share a cluster.
@@ -389,12 +384,6 @@ impl GlyphStore {
self.is_whitespace
}
/// Whether or not this [`GlyphStore`] is a single preserved newline.
#[inline]
pub fn is_single_preserved_newline(&self) -> bool {
self.is_single_preserved_newline
}
/// Whether or not this [`GlyphStore`] ends with whitespace.
#[inline]
pub fn ends_with_whitespace(&self) -> bool {

View File

@@ -122,7 +122,7 @@ use crate::dom::WeakLayoutBox;
use crate::dom_traversal::NodeAndStyleInfo;
use crate::flow::float::{FloatBox, SequentialLayoutState};
use crate::flow::inline::line::TextRunOffsets;
use crate::flow::inline::text_run::FontAndScriptInfo;
use crate::flow::inline::text_run::{FontAndScriptInfo, TextRunItem, TextRunSegment};
use crate::flow::{
BlockLevelBox, CollapsibleWithParentStartMargin, FloatSide, PlacementState,
compute_inline_content_sizes_for_block_level_boxes, layout_block_level_child,
@@ -2132,15 +2132,15 @@ impl InlineFormattingContext {
InlineItem::TextRun(text_run) => {
let text_run = &*text_run.borrow();
let parent_style = text_run.inline_styles.style.borrow();
text_run.shaped_text.iter().all(|segment| {
segment.runs.iter().all(|run| {
text_run.items.iter().all(|item| match item {
TextRunItem::LineBreak { .. } => false,
TextRunItem::TextSegment(segment) => segment.runs.iter().all(|run| {
run.is_whitespace() &&
!run.is_single_preserved_newline() &&
!matches!(
parent_style.get_inherited_text().white_space_collapse,
WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces
)
})
}),
})
},
InlineItem::OutOfFlowAbsolutelyPositionedBox(..) => true,
@@ -2751,64 +2751,16 @@ impl<'layout_data> ContentSizesComputation<'layout_data> {
InlineItem::TextRun(text_run) => {
let text_run = &*text_run.borrow();
let parent_style = text_run.inline_styles.style.borrow();
for segment in text_run.shaped_text.iter() {
let style_text = parent_style.get_inherited_text();
let can_wrap = style_text.text_wrap_mode == TextWrapMode::Wrap;
// TODO: This should take account whether or not the first and last character prevent
// linebreaks after atomics as in layout.
let break_at_start =
segment.break_at_start && self.had_content_yet_for_min_content;
for (run_index, run) in segment.runs.iter().enumerate() {
// Break before each unbreakable run in this TextRun, except the first unless the
// linebreaker was set to break before the first run.
if can_wrap && (run_index != 0 || break_at_start) {
self.line_break_opportunity();
}
let advance = run.total_advance();
if run.is_whitespace() {
for item in text_run.items.iter() {
match item {
TextRunItem::LineBreak { .. } => {
// If this run is a forced line break, we *must* break the line
// and start measuring from the inline origin once more.
if run.is_single_preserved_newline() {
self.forced_line_break();
continue;
}
if !matches!(
style_text.white_space_collapse,
WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces
) {
if self.had_content_yet_for_min_content {
if can_wrap {
self.line_break_opportunity();
} else {
self.pending_whitespace.min_content += advance;
}
}
if self.had_content_yet_for_max_content {
self.pending_whitespace.max_content += advance;
}
continue;
}
if can_wrap {
self.pending_whitespace.max_content += advance;
self.commit_pending_whitespace();
self.line_break_opportunity();
continue;
}
}
self.commit_pending_whitespace();
self.add_inline_size(advance);
// Typically whitespace glyphs are placed in a separate store,
// but for `white-space: break-spaces` we place the first whitespace
// with the preceding text. That prevents a line break before that
// first space, but we still need to allow a line break after it.
if can_wrap && run.ends_with_whitespace() {
self.line_break_opportunity();
}
self.forced_line_break();
},
TextRunItem::TextSegment(segment) => {
self.process_text_segment(&parent_style, segment)
},
}
}
},
@@ -2865,6 +2817,64 @@ impl<'layout_data> ContentSizesComputation<'layout_data> {
}
}
fn process_text_segment(
&mut self,
parent_style: &atomic_refcell::AtomicRef<'_, ServoArc<ComputedValues>>,
segment: &TextRunSegment,
) {
let style_text = parent_style.get_inherited_text();
let can_wrap = style_text.text_wrap_mode == TextWrapMode::Wrap;
// TODO: This should take account whether or not the first and last character prevent
// linebreaks after atomics as in layout.
let break_at_start = segment.break_at_start && self.had_content_yet_for_min_content;
for (run_index, run) in segment.runs.iter().enumerate() {
// Break before each unbreakable run in this TextRun, except the first unless the
// linebreaker was set to break before the first run.
if can_wrap && (run_index != 0 || break_at_start) {
self.line_break_opportunity();
}
let advance = run.total_advance();
if run.is_whitespace() {
if !matches!(
style_text.white_space_collapse,
WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces
) {
if self.had_content_yet_for_min_content {
if can_wrap {
self.line_break_opportunity();
} else {
self.pending_whitespace.min_content += advance;
}
}
if self.had_content_yet_for_max_content {
self.pending_whitespace.max_content += advance;
}
continue;
}
if can_wrap {
self.pending_whitespace.max_content += advance;
self.commit_pending_whitespace();
self.line_break_opportunity();
continue;
}
}
self.commit_pending_whitespace();
self.add_inline_size(advance);
// Typically whitespace glyphs are placed in a separate store,
// but for `white-space: break-spaces` we place the first whitespace
// with the preceding text. That prevents a line break before that
// first space, but we still need to allow a line break after it.
if can_wrap && run.ends_with_whitespace() {
self.line_break_opportunity();
}
}
}
fn add_inline_size(&mut self, l: Au) {
self.current_line.min_content += l;
self.current_line.max_content += l;

View File

@@ -91,7 +91,14 @@ impl From<&FontAndScriptInfo> for ShapingOptions {
if info.bidi_level.is_rtl() {
flags.insert(ShapingFlags::RTL_FLAG);
}
if info.letter_spacing.is_some() {
// From https://www.w3.org/TR/css-text-3/#cursive-script:
// Cursive scripts do not admit gaps between their letters for either
// justification or letter-spacing.
let letter_spacing = info
.letter_spacing
.filter(|_| !is_cursive_script(info.script));
if letter_spacing.is_some() {
flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
};
if info.text_rendering == TextRendering::Optimizespeed {
@@ -99,7 +106,7 @@ impl From<&FontAndScriptInfo> for ShapingOptions {
flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
}
Self {
letter_spacing: info.letter_spacing,
letter_spacing,
word_spacing: info.word_spacing,
script: info.script,
language: info.language,
@@ -116,7 +123,7 @@ pub(crate) struct TextRunSegment {
pub info: Arc<FontAndScriptInfo>,
/// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
pub range: Range<usize>,
pub byte_range: Range<usize>,
/// The range of characters in the parent [`super::InlineFormattingContext`]'s text content.
pub character_range: Range<usize>,
@@ -133,41 +140,52 @@ pub(crate) struct TextRunSegment {
impl TextRunSegment {
fn new(
info: Arc<FontAndScriptInfo>,
start_offset: usize,
start_character_offset: usize,
byte_range: Range<usize>,
character_range: Range<usize>,
) -> Self {
Self {
info,
range: start_offset..start_offset,
character_range: start_character_offset..start_character_offset,
byte_range,
character_range,
runs: Vec::new(),
break_at_start: false,
}
}
/// Update this segment if the Font and Script are compatible. The update will only
/// ever make the Script specific. Returns true if the new Font and Script are
/// compatible with this segment or false otherwise.
fn update_if_compatible(
&mut self,
new_font: &FontRef,
/// Returns true if the new `Font`, `Script` and BiDi `Level` are compatible with this segment
/// or false otherwise.
fn is_compatible(
&self,
new_font: &Option<FontRef>,
new_script: Script,
new_bidi_level: Level,
) -> bool {
if self.info.bidi_level != new_bidi_level || !Arc::ptr_eq(&self.info.font, new_font) {
if self.info.bidi_level != new_bidi_level {
return false;
}
if new_font
.as_ref()
.is_some_and(|new_font| !Arc::ptr_eq(&self.info.font, new_font))
{
return false;
}
fn is_specific(script: Script) -> bool {
script != Script::Common && script != Script::Inherited
}
if !is_specific(self.info.script) && is_specific(new_script) {
!script_is_specific(self.info.script) ||
!script_is_specific(new_script) ||
self.info.script == new_script
}
/// Update this segment to end at the given byte and character index. The update will only ever
/// make the Script specific and will not change it otherwise.
fn update(&mut self, next_byte_index: usize, next_character_index: usize, new_script: Script) {
if !script_is_specific(self.info.script) && script_is_specific(new_script) {
self.info = Arc::new(FontAndScriptInfo {
script: new_script,
..(*self.info).clone()
});
}
new_script == self.info.script || !is_specific(new_script)
self.character_range.end = next_character_index;
self.byte_range.end = next_byte_index;
}
fn layout_into_line_items(
@@ -195,15 +213,6 @@ impl TextRunSegment {
character_range: character_range_start..new_character_range_end,
});
// If this whitespace forces a line break, queue up a hard line break the next time we
// see any content. We don't line break immediately, because we'd like to finish processing
// any ongoing inline boxes before ending the line.
if run.is_single_preserved_newline() {
ifc.defer_forced_line_break_at_character_offset(character_range_start);
character_range_start = new_character_range_end;
continue;
}
// Break before each unbreakable run in this TextRun, except the first unless the
// linebreaker was set to break before the first run.
if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force {
@@ -242,8 +251,8 @@ impl TextRunSegment {
// Gather the linebreaks that apply to this segment from the inline formatting context's collection
// of line breaks. Also add a simulated break at the end of the segment in order to ensure the final
// piece of text is processed.
let range = self.range.clone();
let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone());
let range = self.byte_range.clone();
let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.byte_range.clone());
let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end));
self.runs.clear();
@@ -255,10 +264,10 @@ impl TextRunSegment {
text_style.overflow_wrap == OverflowWrap::Anywhere ||
text_style.overflow_wrap == OverflowWrap::BreakWord;
let mut last_slice = self.range.start..self.range.start;
let mut last_slice = self.byte_range.start..self.byte_range.start;
for break_index in linebreak_iter {
let mut options = options;
if *break_index == self.range.start {
if *break_index == self.byte_range.start {
self.break_at_start = true;
continue;
}
@@ -304,7 +313,7 @@ impl TextRunSegment {
// If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice.
// TODO: This should only happen for CJK text.
if !ends_with_whitespace &&
*break_index != self.range.end &&
*break_index != self.byte_range.end &&
text_style.word_break == WordBreak::KeepAll &&
!can_break_anywhere
{
@@ -366,6 +375,16 @@ impl TextRunSegment {
}
}
/// A single item in a [`TextRun`].
#[derive(Debug, MallocSizeOf)]
pub(crate) enum TextRunItem {
/// A hard line break i.e. a "\n" as other types line breaks are normalized to "\n".
LineBreak { character_index: usize },
/// Any other text for which a font can be matched. We store a `Box` here as [`TextRunSegment`]
/// is quite a bit larger than the other enum variants.
TextSegment(Box<TextRunSegment>),
}
/// A single [`TextRun`] for the box tree. These are all descendants of
/// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`]. During
/// box tree construction, text is split into [`TextRun`]s based on their font, script,
@@ -396,9 +415,10 @@ pub(crate) struct TextRun {
/// UTF-8 offsets.
pub character_range: Range<usize>,
/// The text of this [`TextRun`] with a font selected, broken into unbreakable
/// The [`TextRunItem`]s of this text run. This is produced by segmenting the incoming text
/// by things such as font and script as well as separating out hard line breaks.
/// segments, and shaped.
pub shaped_text: Vec<TextRunSegment>,
pub items: Vec<TextRunItem>,
}
impl TextRun {
@@ -414,7 +434,7 @@ impl TextRun {
inline_styles,
text_range,
character_range,
shaped_text: Vec::new(),
items: Vec::new(),
}
}
@@ -426,16 +446,17 @@ impl TextRun {
bidi_info: &BidiInfo,
) {
let parent_style = self.inline_styles.style.borrow().clone();
let mut segments = self.segment_text_by_font(
self.items = self.segment_text_by_font(
layout_context,
formatting_context_text,
bidi_info,
&parent_style,
);
for segment in segments.iter_mut() {
segment.shape_text(&parent_style, formatting_context_text, linebreaker);
for item in self.items.iter_mut() {
if let TextRunItem::TextSegment(text_segment) = item {
text_segment.shape_text(&parent_style, formatting_context_text, linebreaker)
}
}
let _ = std::mem::replace(&mut self.shaped_text, segments);
}
/// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched
@@ -447,17 +468,11 @@ impl TextRun {
formatting_context_text: &str,
bidi_info: &BidiInfo,
parent_style: &ServoArc<ComputedValues>,
) -> Vec<TextRunSegment> {
) -> Vec<TextRunItem> {
let font_style = parent_style.clone_font();
let language = font_style._x_lang.0.parse().unwrap_or(Language::UND);
let font_size = font_style.font_size.computed_size().into();
let font_group = layout_context.font_context.font_group(font_style);
let mut current: Option<TextRunSegment> = None;
let mut results = Vec::new();
let text_run_text = &formatting_context_text[self.text_range.clone()];
let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
let inherited_text_style = parent_style.get_inherited_text().clone();
let word_spacing = Some(inherited_text_style.word_spacing.to_used_value(font_size));
let letter_spacing = inherited_text_style
@@ -471,7 +486,18 @@ impl TextRun {
};
let text_rendering = inherited_text_style.text_rendering;
// The next bytes index of the charcter within the entire inline formatting context's text.
let mut current: Option<TextRunSegment> = None;
let mut results = Vec::new();
let finish_current_segment =
|current: &mut Option<TextRunSegment>, results: &mut Vec<TextRunItem>| {
if let Some(current) = current.take() {
results.push(TextRunItem::TextSegment(Box::new(current)));
}
};
let text_run_text = &formatting_context_text[self.text_range.clone()];
let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
// The next bytes index of the character within the entire inline formatting context's text.
let mut next_byte_index = self.text_range.start;
for (relative_character_index, (character, next_character)) in char_iterator.enumerate() {
// The current character index within the entire inline formatting context's text.
@@ -480,38 +506,40 @@ impl TextRun {
let current_byte_index = next_byte_index;
next_byte_index += character.len_utf8();
if char_does_not_change_font(character) {
if character == '\n' {
finish_current_segment(&mut current, &mut results);
results.push(TextRunItem::LineBreak {
character_index: current_character_index,
});
continue;
}
let Some(font) = font_group.find_by_codepoint(
&layout_context.font_context,
character,
next_character,
language,
) else {
continue;
let (font, script, bidi_level) = if character_cannot_change_font(character) {
(None, Script::Common, bidi_info.levels[current_byte_index])
} else {
(
font_group.find_by_codepoint(
&layout_context.font_context,
character,
next_character,
language,
),
Script::from(character),
bidi_info.levels[current_byte_index],
)
};
let script = Script::from(character);
let bidi_level = bidi_info.levels[current_byte_index];
// If the existing segment is compatible with the character, keep going.
// If the existing segment is compatible with the character, just merge the character into it.
if let Some(current) = current.as_mut() {
if current.update_if_compatible(&font, script, bidi_level) {
if current.is_compatible(&font, script, bidi_level) {
current.update(next_byte_index, current_character_index + 1, script);
continue;
}
}
// From https://www.w3.org/TR/css-text-3/#cursive-script:
// Cursive scripts do not admit gaps between their letters for either
// justification or letter-spacing.
let letter_spacing = if is_cursive_script(script) {
None
} else {
letter_spacing
let Some(font) = font.or_else(|| font_group.first(&layout_context.font_context)) else {
continue;
};
let info = FontAndScriptInfo {
font,
script,
@@ -522,50 +550,17 @@ impl TextRun {
text_rendering,
};
// Add the new segment and finish the existing one, if we had one. If the first
// characters in the run were control characters we may be creating the first
// segment in the middle of the run (ie the start should be the start of this
// text run's text).
let (start_byte_index, start_character_index) = match current {
Some(_) => (current_byte_index, current_character_index),
None => (self.text_range.start, self.character_range.start),
};
let new = TextRunSegment::new(Arc::new(info), start_byte_index, start_character_index);
if let Some(mut finished) = current.replace(new) {
// The end of the previous segment is the start of the next one.
finished.range.end = current_byte_index;
finished.character_range.end = current_character_index;
results.push(finished);
}
}
// Either we have a current segment or we only had control characters and whitespace. In both
// of those cases, just use the first font.
if current.is_none() {
current = font_group.first(&layout_context.font_context).map(|font| {
TextRunSegment::new(
Arc::new(FontAndScriptInfo {
font,
script: Script::Common,
language,
bidi_level: Level::ltr(),
letter_spacing,
word_spacing,
text_rendering,
}),
self.text_range.start,
self.character_range.start,
)
})
}
// Extend the last segment to the end of the string and add it to the results.
if let Some(mut last_segment) = current.take() {
last_segment.range.end = self.text_range.end;
last_segment.character_range.end = self.character_range.end;
results.push(last_segment);
finish_current_segment(&mut current, &mut results);
assert!(current.is_none());
current = Some(TextRunSegment::new(
Arc::new(info),
current_byte_index..next_byte_index,
current_character_index..current_character_index + 1,
));
}
finish_current_segment(&mut current, &mut results);
results
}
@@ -584,8 +579,19 @@ impl TextRun {
false => SegmentStartSoftWrapPolicy::FollowLinebreaker,
};
for segment in self.shaped_text.iter() {
segment.layout_into_line_items(self, soft_wrap_policy, ifc);
for item in self.items.iter() {
match item {
// If this whitespace forces a line break, queue up a hard line break the next time we
// see any content. We don't line break immediately, because we'd like to finish processing
// any ongoing inline boxes before ending the line.
TextRunItem::LineBreak { character_index } => {
ifc.possibly_flush_deferred_forced_line_break();
ifc.defer_forced_line_break_at_character_offset(*character_index);
},
TextRunItem::TextSegment(segment) => {
segment.layout_into_line_items(self, soft_wrap_policy, ifc)
},
}
soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker;
}
}
@@ -611,7 +617,7 @@ fn is_cursive_script(script: Script) -> bool {
/// Whether or not this character should be able to change the font during segmentation. Certain
/// character are not rendered at all, so it doesn't matter what font we use to render them. They
/// should just be added to the current segment.
fn char_does_not_change_font(character: char) -> bool {
fn character_cannot_change_font(character: char) -> bool {
if character.is_control() {
return true;
}
@@ -676,3 +682,7 @@ where
Some((character, self.next_character))
}
}
fn script_is_specific(script: Script) -> bool {
script != Script::Common && script != Script::Inherited
}

View File

@@ -1,2 +0,0 @@
[line-breaking-bidi-003.xht]
expected: FAIL

View File

@@ -0,0 +1,2 @@
[bidi-tab-001.html]
expected: FAIL

View File

@@ -1,2 +0,0 @@
[shaping-no-join-003.html]
expected: FAIL

View File

@@ -1,2 +0,0 @@
[trailing-space-and-text-alignment-rtl-004.html]
expected: FAIL