diff --git a/components/fonts/font.rs b/components/fonts/font.rs index d991c93c57c..baacb55442f 100644 --- a/components/fonts/font.rs +++ b/components/fonts/font.rs @@ -470,7 +470,7 @@ impl Font { /// Fast path for ASCII text that only needs simple horizontal LTR kerning. fn shape_text_fast(&self, text: &str, options: &ShapingOptions) -> GlyphStore { - let mut glyph_store = GlyphStore::new(text, text.len(), options); + let mut glyph_store = GlyphStore::new(text.len(), options); let mut prev_glyph_id = None; for (string_byte_offset, byte) in text.bytes().enumerate() { let character = byte as char; diff --git a/components/fonts/glyph.rs b/components/fonts/glyph.rs index 0c1ed19cd04..2bc5484bf04 100644 --- a/components/fonts/glyph.rs +++ b/components/fonts/glyph.rs @@ -243,10 +243,6 @@ pub struct GlyphStore { /// but that may not be the case with `white-space: break-spaces`. ends_with_whitespace: bool, - /// Whether or not this glyph store contains only a single glyph for a single - /// preserved newline. - is_single_preserved_newline: bool, - /// Whether or not this [`GlyphStore`] has right-to-left text, which has implications /// about the order of the glyphs in the store. is_rtl: bool, @@ -256,7 +252,7 @@ impl GlyphStore { /// Initializes the glyph store with the given capacity, but doesn't actually add any glyphs. /// /// Use the `add_*` methods to store glyph data. - pub(crate) fn new(text: &str, length: usize, options: &ShapingOptions) -> Self { + pub(crate) fn new(length: usize, options: &ShapingOptions) -> Self { Self { glyphs: Vec::with_capacity(length), detailed_glyphs: Default::default(), @@ -269,7 +265,6 @@ impl GlyphStore { ends_with_whitespace: options .flags .contains(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG), - is_single_preserved_newline: text.len() == 1 && text.starts_with('\n'), is_rtl: options.flags.contains(ShapingFlags::RTL_FLAG), } } @@ -303,7 +298,7 @@ impl GlyphStore { }; let mut previous_character_offset = None; - let mut glyph_store = GlyphStore::new(text, shaped_glyph_data.len(), options); + let mut glyph_store = GlyphStore::new(shaped_glyph_data.len(), options); for mut shaped_glyph in shaped_glyph_data.iter() { // The glyph "cluster" (HarfBuzz terminology) is the byte offset in the string that // this glyph corresponds to. More than one glyph can share a cluster. @@ -389,12 +384,6 @@ impl GlyphStore { self.is_whitespace } - /// Whether or not this [`GlyphStore`] is a single preserved newline. - #[inline] - pub fn is_single_preserved_newline(&self) -> bool { - self.is_single_preserved_newline - } - /// Whether or not this [`GlyphStore`] ends with whitespace. #[inline] pub fn ends_with_whitespace(&self) -> bool { diff --git a/components/layout/flow/inline/mod.rs b/components/layout/flow/inline/mod.rs index 499476c3129..72c3c0b0cb9 100644 --- a/components/layout/flow/inline/mod.rs +++ b/components/layout/flow/inline/mod.rs @@ -122,7 +122,7 @@ use crate::dom::WeakLayoutBox; use crate::dom_traversal::NodeAndStyleInfo; use crate::flow::float::{FloatBox, SequentialLayoutState}; use crate::flow::inline::line::TextRunOffsets; -use crate::flow::inline::text_run::FontAndScriptInfo; +use crate::flow::inline::text_run::{FontAndScriptInfo, TextRunItem, TextRunSegment}; use crate::flow::{ BlockLevelBox, CollapsibleWithParentStartMargin, FloatSide, PlacementState, compute_inline_content_sizes_for_block_level_boxes, layout_block_level_child, @@ -2132,15 +2132,15 @@ impl InlineFormattingContext { InlineItem::TextRun(text_run) => { let text_run = &*text_run.borrow(); let parent_style = text_run.inline_styles.style.borrow(); - text_run.shaped_text.iter().all(|segment| { - segment.runs.iter().all(|run| { + text_run.items.iter().all(|item| match item { + TextRunItem::LineBreak { .. } => false, + TextRunItem::TextSegment(segment) => segment.runs.iter().all(|run| { run.is_whitespace() && - !run.is_single_preserved_newline() && !matches!( parent_style.get_inherited_text().white_space_collapse, WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces ) - }) + }), }) }, InlineItem::OutOfFlowAbsolutelyPositionedBox(..) => true, @@ -2751,64 +2751,16 @@ impl<'layout_data> ContentSizesComputation<'layout_data> { InlineItem::TextRun(text_run) => { let text_run = &*text_run.borrow(); let parent_style = text_run.inline_styles.style.borrow(); - for segment in text_run.shaped_text.iter() { - let style_text = parent_style.get_inherited_text(); - let can_wrap = style_text.text_wrap_mode == TextWrapMode::Wrap; - - // TODO: This should take account whether or not the first and last character prevent - // linebreaks after atomics as in layout. - let break_at_start = - segment.break_at_start && self.had_content_yet_for_min_content; - - for (run_index, run) in segment.runs.iter().enumerate() { - // Break before each unbreakable run in this TextRun, except the first unless the - // linebreaker was set to break before the first run. - if can_wrap && (run_index != 0 || break_at_start) { - self.line_break_opportunity(); - } - - let advance = run.total_advance(); - if run.is_whitespace() { + for item in text_run.items.iter() { + match item { + TextRunItem::LineBreak { .. } => { // If this run is a forced line break, we *must* break the line // and start measuring from the inline origin once more. - if run.is_single_preserved_newline() { - self.forced_line_break(); - continue; - } - if !matches!( - style_text.white_space_collapse, - WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces - ) { - if self.had_content_yet_for_min_content { - if can_wrap { - self.line_break_opportunity(); - } else { - self.pending_whitespace.min_content += advance; - } - } - if self.had_content_yet_for_max_content { - self.pending_whitespace.max_content += advance; - } - continue; - } - if can_wrap { - self.pending_whitespace.max_content += advance; - self.commit_pending_whitespace(); - self.line_break_opportunity(); - continue; - } - } - - self.commit_pending_whitespace(); - self.add_inline_size(advance); - - // Typically whitespace glyphs are placed in a separate store, - // but for `white-space: break-spaces` we place the first whitespace - // with the preceding text. That prevents a line break before that - // first space, but we still need to allow a line break after it. - if can_wrap && run.ends_with_whitespace() { - self.line_break_opportunity(); - } + self.forced_line_break(); + }, + TextRunItem::TextSegment(segment) => { + self.process_text_segment(&parent_style, segment) + }, } } }, @@ -2865,6 +2817,64 @@ impl<'layout_data> ContentSizesComputation<'layout_data> { } } + fn process_text_segment( + &mut self, + parent_style: &atomic_refcell::AtomicRef<'_, ServoArc>, + segment: &TextRunSegment, + ) { + let style_text = parent_style.get_inherited_text(); + let can_wrap = style_text.text_wrap_mode == TextWrapMode::Wrap; + + // TODO: This should take account whether or not the first and last character prevent + // linebreaks after atomics as in layout. + let break_at_start = segment.break_at_start && self.had_content_yet_for_min_content; + + for (run_index, run) in segment.runs.iter().enumerate() { + // Break before each unbreakable run in this TextRun, except the first unless the + // linebreaker was set to break before the first run. + if can_wrap && (run_index != 0 || break_at_start) { + self.line_break_opportunity(); + } + + let advance = run.total_advance(); + if run.is_whitespace() { + if !matches!( + style_text.white_space_collapse, + WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces + ) { + if self.had_content_yet_for_min_content { + if can_wrap { + self.line_break_opportunity(); + } else { + self.pending_whitespace.min_content += advance; + } + } + if self.had_content_yet_for_max_content { + self.pending_whitespace.max_content += advance; + } + continue; + } + if can_wrap { + self.pending_whitespace.max_content += advance; + self.commit_pending_whitespace(); + self.line_break_opportunity(); + continue; + } + } + + self.commit_pending_whitespace(); + self.add_inline_size(advance); + + // Typically whitespace glyphs are placed in a separate store, + // but for `white-space: break-spaces` we place the first whitespace + // with the preceding text. That prevents a line break before that + // first space, but we still need to allow a line break after it. + if can_wrap && run.ends_with_whitespace() { + self.line_break_opportunity(); + } + } + } + fn add_inline_size(&mut self, l: Au) { self.current_line.min_content += l; self.current_line.max_content += l; diff --git a/components/layout/flow/inline/text_run.rs b/components/layout/flow/inline/text_run.rs index 3ade93398fd..02dec28a49e 100644 --- a/components/layout/flow/inline/text_run.rs +++ b/components/layout/flow/inline/text_run.rs @@ -91,7 +91,14 @@ impl From<&FontAndScriptInfo> for ShapingOptions { if info.bidi_level.is_rtl() { flags.insert(ShapingFlags::RTL_FLAG); } - if info.letter_spacing.is_some() { + + // From https://www.w3.org/TR/css-text-3/#cursive-script: + // Cursive scripts do not admit gaps between their letters for either + // justification or letter-spacing. + let letter_spacing = info + .letter_spacing + .filter(|_| !is_cursive_script(info.script)); + if letter_spacing.is_some() { flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG); }; if info.text_rendering == TextRendering::Optimizespeed { @@ -99,7 +106,7 @@ impl From<&FontAndScriptInfo> for ShapingOptions { flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG) } Self { - letter_spacing: info.letter_spacing, + letter_spacing, word_spacing: info.word_spacing, script: info.script, language: info.language, @@ -116,7 +123,7 @@ pub(crate) struct TextRunSegment { pub info: Arc, /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content. - pub range: Range, + pub byte_range: Range, /// The range of characters in the parent [`super::InlineFormattingContext`]'s text content. pub character_range: Range, @@ -133,41 +140,52 @@ pub(crate) struct TextRunSegment { impl TextRunSegment { fn new( info: Arc, - start_offset: usize, - start_character_offset: usize, + byte_range: Range, + character_range: Range, ) -> Self { Self { info, - range: start_offset..start_offset, - character_range: start_character_offset..start_character_offset, + byte_range, + character_range, runs: Vec::new(), break_at_start: false, } } - /// Update this segment if the Font and Script are compatible. The update will only - /// ever make the Script specific. Returns true if the new Font and Script are - /// compatible with this segment or false otherwise. - fn update_if_compatible( - &mut self, - new_font: &FontRef, + /// Returns true if the new `Font`, `Script` and BiDi `Level` are compatible with this segment + /// or false otherwise. + fn is_compatible( + &self, + new_font: &Option, new_script: Script, new_bidi_level: Level, ) -> bool { - if self.info.bidi_level != new_bidi_level || !Arc::ptr_eq(&self.info.font, new_font) { + if self.info.bidi_level != new_bidi_level { + return false; + } + if new_font + .as_ref() + .is_some_and(|new_font| !Arc::ptr_eq(&self.info.font, new_font)) + { return false; } - fn is_specific(script: Script) -> bool { - script != Script::Common && script != Script::Inherited - } - if !is_specific(self.info.script) && is_specific(new_script) { + !script_is_specific(self.info.script) || + !script_is_specific(new_script) || + self.info.script == new_script + } + + /// Update this segment to end at the given byte and character index. The update will only ever + /// make the Script specific and will not change it otherwise. + fn update(&mut self, next_byte_index: usize, next_character_index: usize, new_script: Script) { + if !script_is_specific(self.info.script) && script_is_specific(new_script) { self.info = Arc::new(FontAndScriptInfo { script: new_script, ..(*self.info).clone() }); } - new_script == self.info.script || !is_specific(new_script) + self.character_range.end = next_character_index; + self.byte_range.end = next_byte_index; } fn layout_into_line_items( @@ -195,15 +213,6 @@ impl TextRunSegment { character_range: character_range_start..new_character_range_end, }); - // If this whitespace forces a line break, queue up a hard line break the next time we - // see any content. We don't line break immediately, because we'd like to finish processing - // any ongoing inline boxes before ending the line. - if run.is_single_preserved_newline() { - ifc.defer_forced_line_break_at_character_offset(character_range_start); - character_range_start = new_character_range_end; - continue; - } - // Break before each unbreakable run in this TextRun, except the first unless the // linebreaker was set to break before the first run. if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force { @@ -242,8 +251,8 @@ impl TextRunSegment { // Gather the linebreaks that apply to this segment from the inline formatting context's collection // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final // piece of text is processed. - let range = self.range.clone(); - let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone()); + let range = self.byte_range.clone(); + let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.byte_range.clone()); let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end)); self.runs.clear(); @@ -255,10 +264,10 @@ impl TextRunSegment { text_style.overflow_wrap == OverflowWrap::Anywhere || text_style.overflow_wrap == OverflowWrap::BreakWord; - let mut last_slice = self.range.start..self.range.start; + let mut last_slice = self.byte_range.start..self.byte_range.start; for break_index in linebreak_iter { let mut options = options; - if *break_index == self.range.start { + if *break_index == self.byte_range.start { self.break_at_start = true; continue; } @@ -304,7 +313,7 @@ impl TextRunSegment { // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice. // TODO: This should only happen for CJK text. if !ends_with_whitespace && - *break_index != self.range.end && + *break_index != self.byte_range.end && text_style.word_break == WordBreak::KeepAll && !can_break_anywhere { @@ -366,6 +375,16 @@ impl TextRunSegment { } } +/// A single item in a [`TextRun`]. +#[derive(Debug, MallocSizeOf)] +pub(crate) enum TextRunItem { + /// A hard line break i.e. a "\n" as other types line breaks are normalized to "\n". + LineBreak { character_index: usize }, + /// Any other text for which a font can be matched. We store a `Box` here as [`TextRunSegment`] + /// is quite a bit larger than the other enum variants. + TextSegment(Box), +} + /// A single [`TextRun`] for the box tree. These are all descendants of /// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`]. During /// box tree construction, text is split into [`TextRun`]s based on their font, script, @@ -396,9 +415,10 @@ pub(crate) struct TextRun { /// UTF-8 offsets. pub character_range: Range, - /// The text of this [`TextRun`] with a font selected, broken into unbreakable + /// The [`TextRunItem`]s of this text run. This is produced by segmenting the incoming text + /// by things such as font and script as well as separating out hard line breaks. /// segments, and shaped. - pub shaped_text: Vec, + pub items: Vec, } impl TextRun { @@ -414,7 +434,7 @@ impl TextRun { inline_styles, text_range, character_range, - shaped_text: Vec::new(), + items: Vec::new(), } } @@ -426,16 +446,17 @@ impl TextRun { bidi_info: &BidiInfo, ) { let parent_style = self.inline_styles.style.borrow().clone(); - let mut segments = self.segment_text_by_font( + self.items = self.segment_text_by_font( layout_context, formatting_context_text, bidi_info, &parent_style, ); - for segment in segments.iter_mut() { - segment.shape_text(&parent_style, formatting_context_text, linebreaker); + for item in self.items.iter_mut() { + if let TextRunItem::TextSegment(text_segment) = item { + text_segment.shape_text(&parent_style, formatting_context_text, linebreaker) + } } - let _ = std::mem::replace(&mut self.shaped_text, segments); } /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched @@ -447,17 +468,11 @@ impl TextRun { formatting_context_text: &str, bidi_info: &BidiInfo, parent_style: &ServoArc, - ) -> Vec { + ) -> Vec { let font_style = parent_style.clone_font(); let language = font_style._x_lang.0.parse().unwrap_or(Language::UND); let font_size = font_style.font_size.computed_size().into(); let font_group = layout_context.font_context.font_group(font_style); - let mut current: Option = None; - let mut results = Vec::new(); - - let text_run_text = &formatting_context_text[self.text_range.clone()]; - let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars()); - let inherited_text_style = parent_style.get_inherited_text().clone(); let word_spacing = Some(inherited_text_style.word_spacing.to_used_value(font_size)); let letter_spacing = inherited_text_style @@ -471,7 +486,18 @@ impl TextRun { }; let text_rendering = inherited_text_style.text_rendering; - // The next bytes index of the charcter within the entire inline formatting context's text. + let mut current: Option = None; + let mut results = Vec::new(); + let finish_current_segment = + |current: &mut Option, results: &mut Vec| { + if let Some(current) = current.take() { + results.push(TextRunItem::TextSegment(Box::new(current))); + } + }; + + let text_run_text = &formatting_context_text[self.text_range.clone()]; + let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars()); + // The next bytes index of the character within the entire inline formatting context's text. let mut next_byte_index = self.text_range.start; for (relative_character_index, (character, next_character)) in char_iterator.enumerate() { // The current character index within the entire inline formatting context's text. @@ -480,38 +506,40 @@ impl TextRun { let current_byte_index = next_byte_index; next_byte_index += character.len_utf8(); - if char_does_not_change_font(character) { + if character == '\n' { + finish_current_segment(&mut current, &mut results); + results.push(TextRunItem::LineBreak { + character_index: current_character_index, + }); continue; } - let Some(font) = font_group.find_by_codepoint( - &layout_context.font_context, - character, - next_character, - language, - ) else { - continue; + let (font, script, bidi_level) = if character_cannot_change_font(character) { + (None, Script::Common, bidi_info.levels[current_byte_index]) + } else { + ( + font_group.find_by_codepoint( + &layout_context.font_context, + character, + next_character, + language, + ), + Script::from(character), + bidi_info.levels[current_byte_index], + ) }; - let script = Script::from(character); - let bidi_level = bidi_info.levels[current_byte_index]; - - // If the existing segment is compatible with the character, keep going. + // If the existing segment is compatible with the character, just merge the character into it. if let Some(current) = current.as_mut() { - if current.update_if_compatible(&font, script, bidi_level) { + if current.is_compatible(&font, script, bidi_level) { + current.update(next_byte_index, current_character_index + 1, script); continue; } } - // From https://www.w3.org/TR/css-text-3/#cursive-script: - // Cursive scripts do not admit gaps between their letters for either - // justification or letter-spacing. - let letter_spacing = if is_cursive_script(script) { - None - } else { - letter_spacing + let Some(font) = font.or_else(|| font_group.first(&layout_context.font_context)) else { + continue; }; - let info = FontAndScriptInfo { font, script, @@ -522,50 +550,17 @@ impl TextRun { text_rendering, }; - // Add the new segment and finish the existing one, if we had one. If the first - // characters in the run were control characters we may be creating the first - // segment in the middle of the run (ie the start should be the start of this - // text run's text). - let (start_byte_index, start_character_index) = match current { - Some(_) => (current_byte_index, current_character_index), - None => (self.text_range.start, self.character_range.start), - }; - let new = TextRunSegment::new(Arc::new(info), start_byte_index, start_character_index); - if let Some(mut finished) = current.replace(new) { - // The end of the previous segment is the start of the next one. - finished.range.end = current_byte_index; - finished.character_range.end = current_character_index; - results.push(finished); - } - } - - // Either we have a current segment or we only had control characters and whitespace. In both - // of those cases, just use the first font. - if current.is_none() { - current = font_group.first(&layout_context.font_context).map(|font| { - TextRunSegment::new( - Arc::new(FontAndScriptInfo { - font, - script: Script::Common, - language, - bidi_level: Level::ltr(), - letter_spacing, - word_spacing, - text_rendering, - }), - self.text_range.start, - self.character_range.start, - ) - }) - } - - // Extend the last segment to the end of the string and add it to the results. - if let Some(mut last_segment) = current.take() { - last_segment.range.end = self.text_range.end; - last_segment.character_range.end = self.character_range.end; - results.push(last_segment); + finish_current_segment(&mut current, &mut results); + assert!(current.is_none()); + + current = Some(TextRunSegment::new( + Arc::new(info), + current_byte_index..next_byte_index, + current_character_index..current_character_index + 1, + )); } + finish_current_segment(&mut current, &mut results); results } @@ -584,8 +579,19 @@ impl TextRun { false => SegmentStartSoftWrapPolicy::FollowLinebreaker, }; - for segment in self.shaped_text.iter() { - segment.layout_into_line_items(self, soft_wrap_policy, ifc); + for item in self.items.iter() { + match item { + // If this whitespace forces a line break, queue up a hard line break the next time we + // see any content. We don't line break immediately, because we'd like to finish processing + // any ongoing inline boxes before ending the line. + TextRunItem::LineBreak { character_index } => { + ifc.possibly_flush_deferred_forced_line_break(); + ifc.defer_forced_line_break_at_character_offset(*character_index); + }, + TextRunItem::TextSegment(segment) => { + segment.layout_into_line_items(self, soft_wrap_policy, ifc) + }, + } soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker; } } @@ -611,7 +617,7 @@ fn is_cursive_script(script: Script) -> bool { /// Whether or not this character should be able to change the font during segmentation. Certain /// character are not rendered at all, so it doesn't matter what font we use to render them. They /// should just be added to the current segment. -fn char_does_not_change_font(character: char) -> bool { +fn character_cannot_change_font(character: char) -> bool { if character.is_control() { return true; } @@ -676,3 +682,7 @@ where Some((character, self.next_character)) } } + +fn script_is_specific(script: Script) -> bool { + script != Script::Common && script != Script::Inherited +} diff --git a/tests/wpt/meta/css/CSS2/bidi-text/line-breaking-bidi-003.xht.ini b/tests/wpt/meta/css/CSS2/bidi-text/line-breaking-bidi-003.xht.ini deleted file mode 100644 index 60004f55a76..00000000000 --- a/tests/wpt/meta/css/CSS2/bidi-text/line-breaking-bidi-003.xht.ini +++ /dev/null @@ -1,2 +0,0 @@ -[line-breaking-bidi-003.xht] - expected: FAIL diff --git a/tests/wpt/meta/css/css-text/bidi/bidi-tab-001.html.ini b/tests/wpt/meta/css/css-text/bidi/bidi-tab-001.html.ini new file mode 100644 index 00000000000..a7cb98aad02 --- /dev/null +++ b/tests/wpt/meta/css/css-text/bidi/bidi-tab-001.html.ini @@ -0,0 +1,2 @@ +[bidi-tab-001.html] + expected: FAIL diff --git a/tests/wpt/meta/css/css-text/text-encoding/shaping-no-join-003.html.ini b/tests/wpt/meta/css/css-text/text-encoding/shaping-no-join-003.html.ini deleted file mode 100644 index 01697530439..00000000000 --- a/tests/wpt/meta/css/css-text/text-encoding/shaping-no-join-003.html.ini +++ /dev/null @@ -1,2 +0,0 @@ -[shaping-no-join-003.html] - expected: FAIL diff --git a/tests/wpt/meta/css/css-text/white-space/trailing-space-and-text-alignment-rtl-004.html.ini b/tests/wpt/meta/css/css-text/white-space/trailing-space-and-text-alignment-rtl-004.html.ini deleted file mode 100644 index ad7a302c005..00000000000 --- a/tests/wpt/meta/css/css-text/white-space/trailing-space-and-text-alignment-rtl-004.html.ini +++ /dev/null @@ -1,2 +0,0 @@ -[trailing-space-and-text-alignment-rtl-004.html] - expected: FAIL