/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ use std::mem; use std::ops::Range; use std::sync::Arc; use app_units::Au; use fonts::{ FontContext, FontRef, GlyphStore, LAST_RESORT_GLYPH_ADVANCE, ShapingFlags, ShapingOptions, }; use icu_locid::subtags::Language; use log::warn; use malloc_size_of_derive::MallocSizeOf; use servo_arc::Arc as ServoArc; use servo_base::text::is_bidi_control; use style::computed_values::text_rendering::T as TextRendering; use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse; use style::computed_values::word_break::T as WordBreak; use style::properties::ComputedValues; use style::str::char_is_whitespace; use style::values::computed::OverflowWrap; use unicode_bidi::{BidiInfo, Level}; use unicode_script::Script; use xi_unicode::linebreak_property; use super::line_breaker::LineBreaker; use super::{InlineFormattingContextLayout, SharedInlineStyles}; use crate::context::LayoutContext; use crate::dom::WeakLayoutBox; use crate::flow::inline::line::TextRunOffsets; use crate::fragment_tree::BaseFragmentInfo; // These constants are the xi-unicode line breaking classes that are defined in // `table.rs`. Unfortunately, they are only identified by number. pub(crate) const XI_LINE_BREAKING_CLASS_CM: u8 = 9; pub(crate) const XI_LINE_BREAKING_CLASS_GL: u8 = 12; pub(crate) const XI_LINE_BREAKING_CLASS_ZW: u8 = 28; pub(crate) const XI_LINE_BREAKING_CLASS_WJ: u8 = 30; pub(crate) const XI_LINE_BREAKING_CLASS_ZWJ: u8 = 42; // There are two reasons why we might want to break at the start: // // 1. The line breaker told us that a break was necessary between two separate // instances of sending text to it. // 2. We are following replaced content ie `have_deferred_soft_wrap_opportunity`. // // In both cases, we don't want to do this if the first character prevents a // soft wrap opportunity. #[derive(PartialEq)] enum SegmentStartSoftWrapPolicy { Force, FollowLinebreaker, } /// A data structure which contains information used when shaping a [`TextRunSegment`]. #[derive(Clone, Debug, MallocSizeOf)] pub(crate) struct FontAndScriptInfo { /// The font used when shaping a [`TextRunSegment`]. pub font: FontRef, /// The script used when shaping a [`TextRunSegment`]. pub script: Script, /// The BiDi [`Level`] used when shaping a [`TextRunSegment`]. pub bidi_level: Level, /// The [`Language`] used when shaping a [`TextRunSegment`]. pub language: Language, /// Spacing to add between each letter. Corresponds to the CSS 2.1 `letter-spacing` property. /// NB: You will probably want to set the `IGNORE_LIGATURES_SHAPING_FLAG` if this is non-null. /// /// Letter spacing is not applied to all characters. Use [Self::letter_spacing_for_character] to /// determine the amount of spacing to apply. pub letter_spacing: Option, /// Spacing to add between each word. Corresponds to the CSS 2.1 `word-spacing` property. pub word_spacing: Option, /// The [`TextRendering`] value from the original style. pub text_rendering: TextRendering, } impl From<&FontAndScriptInfo> for ShapingOptions { fn from(info: &FontAndScriptInfo) -> Self { let mut flags = ShapingFlags::empty(); if info.bidi_level.is_rtl() { flags.insert(ShapingFlags::RTL_FLAG); } if info.letter_spacing.is_some() { flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG); }; if info.text_rendering == TextRendering::Optimizespeed { flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG); flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG) } Self { letter_spacing: info.letter_spacing, word_spacing: info.word_spacing, script: info.script, language: info.language, flags, } } } #[derive(Debug, MallocSizeOf)] pub(crate) struct TextRunSegment { /// Information about the font and language used in this text run. This is produced by /// segmenting the inline formatting context's text content by font, script, and bidi level. #[conditional_malloc_size_of] pub info: Arc, /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content. pub range: Range, /// The range of characters in the parent [`super::InlineFormattingContext`]'s text content. pub character_range: Range, /// Whether or not the linebreaker said that we should allow a line break at the start of this /// segment. pub break_at_start: bool, /// The shaped runs within this segment. #[conditional_malloc_size_of] pub runs: Vec>, } impl TextRunSegment { fn new( info: Arc, start_offset: usize, start_character_offset: usize, ) -> Self { Self { info, range: start_offset..start_offset, character_range: start_character_offset..start_character_offset, runs: Vec::new(), break_at_start: false, } } /// Update this segment if the Font and Script are compatible. The update will only /// ever make the Script specific. Returns true if the new Font and Script are /// compatible with this segment or false otherwise. fn update_if_compatible( &mut self, new_font: &FontRef, new_script: Script, new_bidi_level: Level, ) -> bool { if self.info.bidi_level != new_bidi_level || !Arc::ptr_eq(&self.info.font, new_font) { return false; } fn is_specific(script: Script) -> bool { script != Script::Common && script != Script::Inherited } if !is_specific(self.info.script) && is_specific(new_script) { self.info = Arc::new(FontAndScriptInfo { script: new_script, ..(*self.info).clone() }); } new_script == self.info.script || !is_specific(new_script) } fn layout_into_line_items( &self, text_run: &TextRun, mut soft_wrap_policy: SegmentStartSoftWrapPolicy, ifc: &mut InlineFormattingContextLayout, ) { if self.break_at_start && soft_wrap_policy == SegmentStartSoftWrapPolicy::FollowLinebreaker { soft_wrap_policy = SegmentStartSoftWrapPolicy::Force; } let mut character_range_start = self.character_range.start; for (run_index, run) in self.runs.iter().enumerate() { ifc.possibly_flush_deferred_forced_line_break(); let new_character_range_end = character_range_start + run.character_count(); let offsets = ifc .ifc .shared_selection .clone() .map(|shared_selection| TextRunOffsets { shared_selection, character_range: character_range_start..new_character_range_end, }); // If this whitespace forces a line break, queue up a hard line break the next time we // see any content. We don't line break immediately, because we'd like to finish processing // any ongoing inline boxes before ending the line. if run.is_single_preserved_newline() { ifc.possibly_push_empty_text_run_to_unbreakable_segment( text_run, &self.info, offsets, ); character_range_start = new_character_range_end; ifc.defer_forced_line_break(); continue; } // Break before each unbreakable run in this TextRun, except the first unless the // linebreaker was set to break before the first run. if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force { ifc.process_soft_wrap_opportunity(); } ifc.push_glyph_store_to_unbreakable_segment(run.clone(), text_run, &self.info, offsets); character_range_start = new_character_range_end; } } fn shape_and_push_range( &mut self, range: &Range, formatting_context_text: &str, options: &ShapingOptions, ) { self.runs.push( self.info .font .shape_text(&formatting_context_text[range.clone()], options), ); } /// Shape the text of this [`TextRunSegment`], first finding "words" for the shaper by processing /// the linebreaks found in the owning [`super::InlineFormattingContext`]. Linebreaks are filtered, /// based on the style of the parent inline box. fn shape_text( &mut self, parent_style: &ComputedValues, formatting_context_text: &str, linebreaker: &mut LineBreaker, ) { let options: ShapingOptions = (&*self.info).into(); // Gather the linebreaks that apply to this segment from the inline formatting context's collection // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final // piece of text is processed. let range = self.range.clone(); let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone()); let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end)); self.runs.clear(); self.runs.reserve(linebreaks.len()); self.break_at_start = false; let text_style = parent_style.get_inherited_text().clone(); let can_break_anywhere = text_style.word_break == WordBreak::BreakAll || text_style.overflow_wrap == OverflowWrap::Anywhere || text_style.overflow_wrap == OverflowWrap::BreakWord; let mut last_slice = self.range.start..self.range.start; for break_index in linebreak_iter { let mut options = options; if *break_index == self.range.start { self.break_at_start = true; continue; } // Extend the slice to the next UAX#14 line break opportunity. let mut slice = last_slice.end..*break_index; let word = &formatting_context_text[slice.clone()]; // Split off any trailing whitespace into a separate glyph run. let mut whitespace = slice.end..slice.end; let mut rev_char_indices = word.char_indices().rev().peekable(); let mut ends_with_whitespace = false; let ends_with_newline = rev_char_indices .peek() .is_some_and(|&(_, character)| character == '\n'); if let Some((first_white_space_index, first_white_space_character)) = rev_char_indices .take_while(|&(_, character)| char_is_whitespace(character)) .last() { ends_with_whitespace = true; whitespace.start = slice.start + first_white_space_index; // If line breaking for a piece of text that has `white-space-collapse: break-spaces` there // is a line break opportunity *after* every preserved space, but not before. This means // that we should not split off the first whitespace, unless that white-space is a preserved // newline. // // An exception to this is if the style tells us that we can break in the middle of words. if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces && first_white_space_character != '\n' && !can_break_anywhere { whitespace.start += first_white_space_character.len_utf8(); options .flags .insert(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG); } slice.end = whitespace.start; } // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice. // TODO: This should only happen for CJK text. if !ends_with_whitespace && *break_index != self.range.end && text_style.word_break == WordBreak::KeepAll && !can_break_anywhere { continue; } // Only advance the last slice if we are not going to try to expand the slice. last_slice = slice.start..*break_index; // Push the non-whitespace part of the range. if !slice.is_empty() { self.shape_and_push_range(&slice, formatting_context_text, &options); } if whitespace.is_empty() { continue; } options.flags.insert( ShapingFlags::IS_WHITESPACE_SHAPING_FLAG | ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG, ); // If `white-space-collapse: break-spaces` is active, insert a line breaking opportunity // between each white space character in the white space that we trimmed off. if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces { let start_index = whitespace.start; for (index, character) in formatting_context_text[whitespace].char_indices() { let index = start_index + index; self.shape_and_push_range( &(index..index + character.len_utf8()), formatting_context_text, &options, ); } continue; } // The breaker breaks after every newline, so either there is none, // or there is exactly one at the very end. In the latter case, // split it into a different run. That's because shaping considers // a newline to have the same advance as a space, but during layout // we want to treat the newline as having no advance. if ends_with_newline && whitespace.len() > 1 { self.shape_and_push_range( &(whitespace.start..whitespace.end - 1), formatting_context_text, &options, ); self.shape_and_push_range( &(whitespace.end - 1..whitespace.end), formatting_context_text, &options, ); } else { self.shape_and_push_range(&whitespace, formatting_context_text, &options); } } } } /// A single [`TextRun`] for the box tree. These are all descendants of /// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`]. During /// box tree construction, text is split into [`TextRun`]s based on their font, script, /// etc. When these are created text is already shaped. /// /// #[derive(Debug, MallocSizeOf)] pub(crate) struct TextRun { /// The [`BaseFragmentInfo`] for this [`TextRun`]. Usually this comes from the /// original text node in the DOM for the text. pub base_fragment_info: BaseFragmentInfo, /// A weak reference to the parent of this layout box. This becomes valid as soon /// as the *parent* of this box is added to the tree. pub parent_box: Option, /// The [`crate::SharedStyle`] from this [`TextRun`]s parent element. This is /// shared so that incremental layout can simply update the parent element and /// this [`TextRun`] will be updated automatically. pub inline_styles: SharedInlineStyles, /// The range of text in [`super::InlineFormattingContext::text_content`] of the /// [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are UTF-8 offsets. pub text_range: Range, /// The range of characters in this text in [`super::InlineFormattingContext::text_content`] /// of the [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are *not* /// UTF-8 offsets. pub character_range: Range, /// The text of this [`TextRun`] with a font selected, broken into unbreakable /// segments, and shaped. pub shaped_text: Vec, } impl TextRun { pub(crate) fn new( base_fragment_info: BaseFragmentInfo, inline_styles: SharedInlineStyles, text_range: Range, character_range: Range, ) -> Self { Self { base_fragment_info, parent_box: None, inline_styles, text_range, character_range, shaped_text: Vec::new(), } } pub(super) fn segment_and_shape( &mut self, formatting_context_text: &str, layout_context: &LayoutContext, linebreaker: &mut LineBreaker, bidi_info: &BidiInfo, ) { let parent_style = self.inline_styles.style.borrow().clone(); let mut segments = self.segment_text_by_font( layout_context, formatting_context_text, bidi_info, &parent_style, ); for segment in segments.iter_mut() { segment.shape_text(&parent_style, formatting_context_text, linebreaker); } let _ = std::mem::replace(&mut self.shaped_text, segments); } /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched /// font and script. Fonts may differ when glyphs are found in fallback fonts. /// [`super::InlineFormattingContext`]. fn segment_text_by_font( &mut self, layout_context: &LayoutContext, formatting_context_text: &str, bidi_info: &BidiInfo, parent_style: &ServoArc, ) -> Vec { let font_group = layout_context .font_context .font_group(parent_style.clone_font()); let mut current: Option = None; let mut results = Vec::new(); let x_lang = parent_style.get_font()._x_lang.clone(); let language = x_lang.0.parse().unwrap_or(Language::UND); let text_run_text = &formatting_context_text[self.text_range.clone()]; let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars()); let parent_style = self.inline_styles.style.borrow().clone(); let inherited_text_style = parent_style.get_inherited_text().clone(); let letter_spacing = inherited_text_style .letter_spacing .0 .resolve(parent_style.clone_font().font_size.computed_size()); let letter_spacing = if letter_spacing.px() != 0. { Some(app_units::Au::from(letter_spacing)) } else { None }; let text_rendering = inherited_text_style.text_rendering; let word_spacing = inherited_text_style.word_spacing.to_length().map(Au::from); // The next current character index within the entire inline formatting context's text. let mut next_character_index = self.character_range.start; // The next bytes index of the charcter within the entire inline formatting context's text. let mut next_byte_index = self.text_range.start; let resolve_word_spacing_for_font = |font: &FontRef| { word_spacing.unwrap_or_else(|| { let space_width = font .glyph_index(' ') .map(|glyph_id| font.glyph_h_advance(glyph_id)) .unwrap_or(LAST_RESORT_GLYPH_ADVANCE); inherited_text_style .word_spacing .to_used_value(Au::from_f64_px(space_width)) }) }; for (character, next_character) in char_iterator { let current_character_index = next_character_index; next_character_index += 1; let current_byte_index = next_byte_index; next_byte_index += character.len_utf8(); if char_does_not_change_font(character) { continue; } let Some(font) = font_group.find_by_codepoint( &layout_context.font_context, character, next_character, x_lang.clone(), ) else { continue; }; let script = Script::from(character); let bidi_level = bidi_info.levels[current_byte_index]; // If the existing segment is compatible with the character, keep going. if let Some(current) = current.as_mut() { if current.update_if_compatible(&font, script, bidi_level) { continue; } } // From https://www.w3.org/TR/css-text-3/#cursive-script: // Cursive scripts do not admit gaps between their letters for either // justification or letter-spacing. let letter_spacing = if is_cursive_script(script) { None } else { letter_spacing }; let word_spacing = Some(resolve_word_spacing_for_font(&font)); let info = FontAndScriptInfo { font, script, bidi_level, language, word_spacing, letter_spacing, text_rendering, }; // Add the new segment and finish the existing one, if we had one. If the first // characters in the run were control characters we may be creating the first // segment in the middle of the run (ie the start should be the start of this // text run's text). let (start_byte_index, start_character_index) = match current { Some(_) => (current_byte_index, current_character_index), None => (self.text_range.start, self.character_range.start), }; let new = TextRunSegment::new(Arc::new(info), start_byte_index, start_character_index); if let Some(mut finished) = current.replace(new) { // The end of the previous segment is the start of the next one. finished.range.end = current_byte_index; finished.character_range.end = current_character_index; results.push(finished); } } // Either we have a current segment or we only had control characters and whitespace. In both // of those cases, just use the first font. if current.is_none() { current = font_group.first(&layout_context.font_context).map(|font| { let word_spacing = Some(resolve_word_spacing_for_font(&font)); TextRunSegment::new( Arc::new(FontAndScriptInfo { font, script: Script::Common, language, bidi_level: Level::ltr(), letter_spacing, word_spacing, text_rendering, }), self.text_range.start, self.character_range.start, ) }) } // Extend the last segment to the end of the string and add it to the results. if let Some(mut last_segment) = current.take() { last_segment.range.end = self.text_range.end; last_segment.character_range.end = self.character_range.end; results.push(last_segment); } results } pub(super) fn layout_into_line_items(&self, ifc: &mut InlineFormattingContextLayout) { if self.text_range.is_empty() { return; } // If we are following replaced content, we should have a soft wrap opportunity, unless the // first character of this `TextRun` prevents that soft wrap opportunity. If we see such a // character it should also override the LineBreaker's indication to break at the start. let have_deferred_soft_wrap_opportunity = mem::replace(&mut ifc.have_deferred_soft_wrap_opportunity, false); let mut soft_wrap_policy = match have_deferred_soft_wrap_opportunity { true => SegmentStartSoftWrapPolicy::Force, false => SegmentStartSoftWrapPolicy::FollowLinebreaker, }; for segment in self.shaped_text.iter() { segment.layout_into_line_items(self, soft_wrap_policy, ifc); soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker; } } } /// From : /// Cursive scripts do not admit gaps between their letters for either justification /// or letter-spacing. The following Unicode scripts are included: Arabic, Hanifi /// Rohingya, Mandaic, Mongolian, N’Ko, Phags Pa, Syriac fn is_cursive_script(script: Script) -> bool { matches!( script, Script::Arabic | Script::Hanifi_Rohingya | Script::Mandaic | Script::Mongolian | Script::Nko | Script::Phags_Pa | Script::Syriac ) } /// Whether or not this character should be able to change the font during segmentation. Certain /// character are not rendered at all, so it doesn't matter what font we use to render them. They /// should just be added to the current segment. fn char_does_not_change_font(character: char) -> bool { if character.is_control() { return true; } if character == '\u{00A0}' { return true; } if is_bidi_control(character) { return false; } let class = linebreak_property(character); class == XI_LINE_BREAKING_CLASS_CM || class == XI_LINE_BREAKING_CLASS_GL || class == XI_LINE_BREAKING_CLASS_ZW || class == XI_LINE_BREAKING_CLASS_WJ || class == XI_LINE_BREAKING_CLASS_ZWJ } pub(super) fn get_font_for_first_font_for_style( style: &ComputedValues, font_context: &FontContext, ) -> Option { let font = font_context .font_group(style.clone_font()) .first(font_context); if font.is_none() { warn!("Could not find font for style: {:?}", style.clone_font()); } font } pub(crate) struct TwoCharsAtATimeIterator { /// The input character iterator. iterator: InputIterator, /// The first character to produce in the next run of the iterator. next_character: Option, } impl TwoCharsAtATimeIterator { fn new(iterator: InputIterator) -> Self { Self { iterator, next_character: None, } } } impl Iterator for TwoCharsAtATimeIterator where InputIterator: Iterator, { type Item = (char, Option); fn next(&mut self) -> Option { // If the iterator isn't initialized do that now. if self.next_character.is_none() { self.next_character = self.iterator.next(); } let character = self.next_character?; self.next_character = self.iterator.next(); Some((character, self.next_character)) } }