Files
servo/components/script/dom/textdecoder.rs
Narfinger 423800eec4 Script: Lazily transform the DOMString into Rust String instead of immediately. (#39509)
This implements LazyDOMString (from now on DOMString) as outlined in
https://github.com/servo/servo/issues/39479.
Constructing from a *mut JSString we keep the in a
RootedTraceableBox<Heap<*mut JSString>> and transform
the string into a rust string if necessary via the `make_rust_string`
method.
Methods used in script are implemented on this string. Currently we
transform the string at all times.
But in the future more efficient implementations are possible.

We implement the safety critical sections in a separate module
DOMStringInner which allows simple constructors, `make_rust_string` and
the `bytes` method.
This method returns the new type `EncodedBytes` which contains the
reference to the underlying string in either format.

Testing: WPT tests still seem to work, so this should test this
functionality.

---------

Signed-off-by: Narfinger <Narfinger@users.noreply.github.com>
2025-10-09 18:18:03 +00:00

152 lines
5.3 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::borrow::ToOwned;
use std::cell::Cell;
use dom_struct::dom_struct;
use encoding_rs::Encoding;
use js::rust::HandleObject;
use crate::dom::bindings::codegen::Bindings::TextDecoderBinding;
use crate::dom::bindings::codegen::Bindings::TextDecoderBinding::{
TextDecodeOptions, TextDecoderMethods,
};
use crate::dom::bindings::codegen::UnionTypes::ArrayBufferViewOrArrayBuffer;
use crate::dom::bindings::error::{Error, Fallible};
use crate::dom::bindings::reflector::{Reflector, reflect_dom_object_with_proto};
use crate::dom::bindings::root::DomRoot;
use crate::dom::bindings::str::{DOMString, USVString};
use crate::dom::globalscope::GlobalScope;
use crate::dom::textdecodercommon::TextDecoderCommon;
use crate::script_runtime::CanGc;
/// <https://encoding.spec.whatwg.org/#textdecoder>
#[dom_struct]
#[allow(non_snake_case)]
pub(crate) struct TextDecoder {
reflector_: Reflector,
/// <https://encoding.spec.whatwg.org/#textdecodercommon>
decoder: TextDecoderCommon,
/// <https://encoding.spec.whatwg.org/#textdecoder-do-not-flush-flag>
do_not_flush: Cell<bool>,
}
#[allow(non_snake_case)]
impl TextDecoder {
fn new_inherited(encoding: &'static Encoding, fatal: bool, ignoreBOM: bool) -> TextDecoder {
let decoder = TextDecoderCommon::new_inherited(encoding, fatal, ignoreBOM);
TextDecoder {
reflector_: Reflector::new(),
decoder,
do_not_flush: Cell::new(false),
}
}
fn make_range_error() -> Fallible<DomRoot<TextDecoder>> {
Err(Error::Range(
"The given encoding is not supported.".to_owned(),
))
}
fn new(
global: &GlobalScope,
proto: Option<HandleObject>,
encoding: &'static Encoding,
fatal: bool,
ignoreBOM: bool,
can_gc: CanGc,
) -> DomRoot<TextDecoder> {
reflect_dom_object_with_proto(
Box::new(TextDecoder::new_inherited(encoding, fatal, ignoreBOM)),
global,
proto,
can_gc,
)
}
}
#[allow(non_snake_case)]
impl TextDecoderMethods<crate::DomTypeHolder> for TextDecoder {
/// <https://encoding.spec.whatwg.org/#dom-textdecoder>
fn Constructor(
global: &GlobalScope,
proto: Option<HandleObject>,
can_gc: CanGc,
label: DOMString,
options: &TextDecoderBinding::TextDecoderOptions,
) -> Fallible<DomRoot<TextDecoder>> {
let encoding = match Encoding::for_label_no_replacement(&label.as_bytes()) {
None => return TextDecoder::make_range_error(),
Some(enc) => enc,
};
Ok(TextDecoder::new(
global,
proto,
encoding,
options.fatal,
options.ignoreBOM,
can_gc,
))
}
/// <https://encoding.spec.whatwg.org/#dom-textdecoder-encoding>
fn Encoding(&self) -> DOMString {
DOMString::from(self.decoder.encoding().name().to_ascii_lowercase())
}
/// <https://encoding.spec.whatwg.org/#dom-textdecoder-fatal>
fn Fatal(&self) -> bool {
self.decoder.fatal()
}
/// <https://encoding.spec.whatwg.org/#dom-textdecoder-ignorebom>
fn IgnoreBOM(&self) -> bool {
self.decoder.ignore_bom()
}
/// <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
fn Decode(
&self,
input: Option<ArrayBufferViewOrArrayBuffer>,
options: &TextDecodeOptions,
) -> Fallible<USVString> {
// Step 1. If thiss do not flush is false, then set thiss decoder to a new
// instance of thiss encodings decoder, thiss I/O queue to the I/O queue
// of bytes « end-of-queue », and thiss BOM seen to false.
if !self.do_not_flush.get() {
if self.decoder.ignore_bom() {
self.decoder
.decoder()
.replace(self.decoder.encoding().new_decoder_without_bom_handling());
} else {
self.decoder
.decoder()
.replace(self.decoder.encoding().new_decoder_with_bom_removal());
}
self.decoder.io_queue().replace(Vec::new());
}
// Step 2. Set thiss do not flush to options["stream"].
self.do_not_flush.set(options.stream);
// Step 3. If input is given, then push a copy of input to thiss I/O queue.
// Step 4. Let output be the I/O queue of scalar values « end-of-queue ».
// Step 5. While true:
// Step 5.1 Let item be the result of reading from thiss I/O queue.
// Step 5.2 If item is end-of-queue and thiss do not flush is true,
// then return the result of running serialize I/O queue with this and output.
// Step 5.3 Otherwise:
// Step 5.3.1 Let result be the result of processing an item with item, thiss decoder,
// thiss I/O queue, output, and thiss error mode.
// Step 5.3.2 If result is finished, then return the result of running serialize I/O
// queue with this and output.
self.decoder
.decode(input.as_ref(), !options.stream)
.map(USVString)
}
}