mirror of
https://github.com/servo/servo
synced 2026-04-25 17:15:48 +02:00
Removes some unneeded lints. Testing: Refactor Part of: #40383 Signed-off-by: WaterWhisperer <waterwhisperer24@qq.com>
1465 lines
47 KiB
Rust
1465 lines
47 KiB
Rust
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
|
|
|
#![allow(clippy::non_canonical_partial_ord_impl)]
|
|
use std::borrow::{Cow, ToOwned};
|
|
use std::cell::{Ref, RefCell};
|
|
use std::default::Default;
|
|
use std::ops::Deref;
|
|
use std::ptr::{self, NonNull};
|
|
use std::str::{Chars, FromStr};
|
|
use std::sync::LazyLock;
|
|
use std::{fmt, slice, str};
|
|
|
|
use html5ever::{LocalName, Namespace};
|
|
use js::conversions::{ToJSValConvertible, jsstr_to_string};
|
|
use js::gc::MutableHandleValue;
|
|
use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
|
|
use js::jsval::StringValue;
|
|
use js::rust::{Runtime, Trace};
|
|
use malloc_size_of::MallocSizeOfOps;
|
|
use num_traits::{ToPrimitive, Zero};
|
|
use regex::Regex;
|
|
use style::Atom;
|
|
use style::str::HTML_SPACE_CHARACTERS;
|
|
|
|
use crate::script_runtime::JSContext as SafeJSContext;
|
|
use crate::trace::RootedTraceableBox;
|
|
|
|
const ASCII_END: u8 = 0x7E;
|
|
const ASCII_CAPITAL_A: u8 = 0x41;
|
|
const ASCII_CAPITAL_Z: u8 = 0x5A;
|
|
const ASCII_LOWERCASE_A: u8 = 0x61;
|
|
const ASCII_LOWERCASE_Z: u8 = 0x7A;
|
|
const ASCII_TAB: u8 = 0x09;
|
|
const ASCII_NEWLINE: u8 = 0x0A;
|
|
const ASCII_FORMFEED: u8 = 0x0C;
|
|
const ASCII_CR: u8 = 0x0D;
|
|
const ASCII_SPACE: u8 = 0x20;
|
|
|
|
/// Gets the latin1 bytes from the js engine.
|
|
/// Safety: Make sure the *mut JSString is not null.
|
|
unsafe fn get_latin1_string_bytes(
|
|
rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
|
|
) -> &[u8] {
|
|
debug_assert!(!rooted_traceable_box.get().is_null());
|
|
let mut length = 0;
|
|
unsafe {
|
|
let chars = JS_GetLatin1StringCharsAndLength(
|
|
Runtime::get().expect("JS runtime has shut down").as_ptr(),
|
|
ptr::null(),
|
|
rooted_traceable_box.get(),
|
|
&mut length,
|
|
);
|
|
assert!(!chars.is_null());
|
|
slice::from_raw_parts(chars, length)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
/// A type representing the underlying encoded bytes. Either Latin1 or Utf8.
|
|
pub enum EncodedBytes<'a> {
|
|
/// These bytes are Latin1 encoded.
|
|
Latin1Bytes(&'a [u8]),
|
|
/// This is a normal utf8 string given in bytes.
|
|
Utf8Bytes(&'a [u8]),
|
|
}
|
|
|
|
enum DOMStringType {
|
|
/// A simple rust string
|
|
Rust(String),
|
|
/// A JS String stored in mozjs.
|
|
JSString(RootedTraceableBox<Heap<*mut JSString>>),
|
|
#[cfg(test)]
|
|
/// This is used for testing of the bindings to give
|
|
/// a raw u8 Latin1 encoded string without having a js engine.
|
|
Latin1Vec(Vec<u8>),
|
|
}
|
|
|
|
impl DOMStringType {
|
|
/// Returns the str if Rust and otherwise panic. You need to call `make_rust`.
|
|
fn str(&self) -> &str {
|
|
match self {
|
|
DOMStringType::Rust(s) => s,
|
|
DOMStringType::JSString(_rooted_traceable_box) => {
|
|
panic!("Cannot do a string")
|
|
},
|
|
#[cfg(test)]
|
|
&DOMStringType::Latin1Vec(_) => panic!("Cannot do a string"),
|
|
}
|
|
}
|
|
|
|
/// Warning:
|
|
/// This function does not checking and just returns the raw bytes of teh string,
|
|
/// independently if they are utf8 or latin1.
|
|
/// The caller needs to take care that these make sense in context.
|
|
fn as_raw_bytes(&self) -> &[u8] {
|
|
match self {
|
|
DOMStringType::Rust(s) => s.as_bytes(),
|
|
DOMStringType::JSString(rooted_traceable_box) => unsafe {
|
|
get_latin1_string_bytes(rooted_traceable_box)
|
|
},
|
|
#[cfg(test)]
|
|
DOMStringType::Latin1Vec(items) => items,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
/// A view of the underlying string. This is always converted to Utf8.
|
|
pub struct StringView<'a>(Ref<'a, DOMStringType>);
|
|
|
|
impl<'a> StringView<'a> {
|
|
pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
|
|
self.0
|
|
.str()
|
|
.split(HTML_SPACE_CHARACTERS)
|
|
.filter(|s| !s.is_empty())
|
|
}
|
|
|
|
pub fn strip_prefix(&self, needle: &str) -> Option<&str> {
|
|
self.0.str().strip_prefix(needle)
|
|
}
|
|
|
|
pub fn chars(&self) -> Chars<'_> {
|
|
self.0.str().chars()
|
|
}
|
|
|
|
pub fn as_bytes(&self) -> &[u8] {
|
|
self.0.str().as_bytes()
|
|
}
|
|
}
|
|
|
|
impl Deref for StringView<'_> {
|
|
type Target = str;
|
|
fn deref(&self) -> &str {
|
|
self.0.str()
|
|
}
|
|
}
|
|
|
|
impl AsRef<str> for StringView<'_> {
|
|
fn as_ref(&self) -> &str {
|
|
self.deref()
|
|
}
|
|
}
|
|
|
|
impl PartialEq for StringView<'_> {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.0.str() == other.0.str()
|
|
}
|
|
}
|
|
|
|
impl PartialEq<&str> for StringView<'_> {
|
|
fn eq(&self, other: &&str) -> bool {
|
|
self.0.str() == *other
|
|
}
|
|
}
|
|
|
|
impl Eq for StringView<'_> {}
|
|
|
|
impl PartialOrd for StringView<'_> {
|
|
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
|
self.0.str().partial_cmp(other.0.str())
|
|
}
|
|
}
|
|
|
|
impl Ord for StringView<'_> {
|
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
|
self.0.str().cmp(other.0.str())
|
|
}
|
|
}
|
|
|
|
impl From<StringView<'_>> for String {
|
|
fn from(value: StringView<'_>) -> Self {
|
|
String::from(value.0.str())
|
|
}
|
|
}
|
|
|
|
/// Safety comment:
|
|
///
|
|
/// This method will _not_ trace the pointer if the rust string exists.
|
|
/// The js string could be garbage collected and, hence, violating this
|
|
/// could lead to undefined behavior
|
|
unsafe impl Trace for DOMStringType {
|
|
unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
|
|
unsafe {
|
|
match self {
|
|
DOMStringType::Rust(_s) => {},
|
|
DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
|
|
#[cfg(test)]
|
|
DOMStringType::Latin1Vec(_s) => {},
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl malloc_size_of::MallocSizeOf for DOMStringType {
|
|
fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
|
|
match self {
|
|
DOMStringType::Rust(s) => s.size_of(ops),
|
|
DOMStringType::JSString(_rooted_traceable_box) => {
|
|
// Managed by JS Engine
|
|
0
|
|
},
|
|
#[cfg(test)]
|
|
DOMStringType::Latin1Vec(s) => s.size_of(ops),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Debug for DOMStringType {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
|
|
DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
|
|
#[cfg(test)]
|
|
DOMStringType::Latin1Vec(s) => f
|
|
.debug_struct("DOMString")
|
|
.field("latin1_string", s)
|
|
.finish(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
/// A view of the underlying string. This is never converted to Utf8
|
|
pub struct EncodedBytesView<'a>(Ref<'a, DOMStringType>);
|
|
|
|
impl EncodedBytesView<'_> {
|
|
/// Get the bytes of the string in either latin1 or utf8 without costly conversion.
|
|
pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
|
|
match *self.0 {
|
|
DOMStringType::Rust(ref s) => EncodedBytes::Utf8Bytes(s.as_bytes()),
|
|
DOMStringType::JSString(ref rooted_traceable_box) => {
|
|
EncodedBytes::Latin1Bytes(unsafe { get_latin1_string_bytes(rooted_traceable_box) })
|
|
},
|
|
#[cfg(test)]
|
|
DOMStringType::Latin1Vec(ref s) => EncodedBytes::Latin1Bytes(s),
|
|
}
|
|
}
|
|
|
|
fn is_empty(&self) -> bool {
|
|
match self.encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => items.is_empty(),
|
|
EncodedBytes::Utf8Bytes(s) => s.is_empty(),
|
|
}
|
|
}
|
|
|
|
fn len(&self) -> usize {
|
|
match self.encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => items
|
|
.iter()
|
|
.map(|b| if *b <= ASCII_END { 1 } else { 2 })
|
|
.sum(),
|
|
EncodedBytes::Utf8Bytes(s) => s.len(),
|
|
}
|
|
}
|
|
}
|
|
|
|
////// A DOMString.
|
|
///
|
|
/// This type corresponds to the [`DOMString`] type in WebIDL.
|
|
///
|
|
/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString
|
|
///
|
|
/// Conceptually, a DOMString has the same value space as a JavaScript String,
|
|
/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
|
|
/// unpaired surrogates present (also sometimes called WTF-16).
|
|
///
|
|
/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have
|
|
/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values
|
|
/// can not be represented as a Rust `String`). This introduces the question of
|
|
/// what to do with values being passed from JavaScript to Rust that contain
|
|
/// unpaired surrogates.
|
|
///
|
|
/// The hypothesis is that it does not matter much how exactly those values are
|
|
/// transformed, because passing unpaired surrogates into the DOM is very rare.
|
|
/// Instead Servo withh replace the unpaired surrogate by a U+FFFD replacement
|
|
/// character.
|
|
///
|
|
/// Currently, the lack of crash reports about this issue provides some
|
|
/// evidence to support the hypothesis. This evidence will hopefully be used to
|
|
/// convince other browser vendors that it would be safe to replace unpaired
|
|
/// surrogates at the boundary between JavaScript and native code. (This would
|
|
/// unify the `DOMString` and `USVString` types, both in the WebIDL standard
|
|
/// and in Servo.)
|
|
///
|
|
/// This string class will keep either the Reference to the mozjs object alive
|
|
/// or will have an internal rust string.
|
|
/// We currently default to doing most of the string operation on the rust side.
|
|
/// You should use `str()` to get the Rust string (represented by a `StringView`
|
|
/// which you can deref to a string). You should assume that this conversion costs.
|
|
/// You should assume that all the functions incur the conversion cost.
|
|
///
|
|
#[repr(transparent)]
|
|
#[derive(Debug, MallocSizeOf, JSTraceable)]
|
|
pub struct DOMString(RefCell<DOMStringType>);
|
|
|
|
impl Clone for DOMString {
|
|
fn clone(&self) -> Self {
|
|
self.make_rust();
|
|
if let DOMStringType::Rust(ref s) = *self.0.borrow() {
|
|
DOMString::from_string(s.to_owned())
|
|
} else {
|
|
unreachable!()
|
|
}
|
|
}
|
|
}
|
|
|
|
pub enum DOMStringErrorType {
|
|
JSConversionError,
|
|
}
|
|
|
|
impl DOMString {
|
|
/// Creates a new `DOMString`.
|
|
pub fn new() -> DOMString {
|
|
DOMString(RefCell::new(DOMStringType::Rust(String::new())))
|
|
}
|
|
|
|
/// Creates the string from js. If the string can be encoded in latin1, just take the reference
|
|
/// to the JSString. Otherwise do the conversion to utf8 now.
|
|
pub fn from_js_string(
|
|
cx: SafeJSContext,
|
|
value: js::gc::HandleValue,
|
|
) -> Result<DOMString, DOMStringErrorType> {
|
|
let string_ptr = unsafe { js::rust::ToString(*cx, value) };
|
|
if string_ptr.is_null() {
|
|
debug!("ToString failed");
|
|
Err(DOMStringErrorType::JSConversionError)
|
|
} else {
|
|
let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
|
|
let inner = if latin1 {
|
|
let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
|
|
DOMStringType::JSString(h)
|
|
} else {
|
|
// We need to convert the string anyway as it is not just latin1
|
|
DOMStringType::Rust(unsafe {
|
|
jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
|
|
})
|
|
};
|
|
Ok(DOMString(RefCell::new(inner)))
|
|
}
|
|
}
|
|
|
|
pub fn from_string(s: String) -> DOMString {
|
|
DOMString(RefCell::new(DOMStringType::Rust(s)))
|
|
}
|
|
|
|
/// Transforms the string into rust string if not yet a rust string.
|
|
fn make_rust(&self) {
|
|
let string = {
|
|
let inner = self.0.borrow();
|
|
match *inner {
|
|
DOMStringType::Rust(_) => return,
|
|
DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
|
|
jsstr_to_string(
|
|
Runtime::get().expect("JS runtime has shut down").as_ptr(),
|
|
NonNull::new(rooted_traceable_box.get()).unwrap(),
|
|
)
|
|
},
|
|
#[cfg(test)]
|
|
DOMStringType::Latin1Vec(ref items) => {
|
|
let mut v = vec![0; items.len() * 2];
|
|
let real_size = tendril::encoding_rs::mem::convert_latin1_to_utf8(
|
|
items.as_slice(),
|
|
v.as_mut_slice(),
|
|
);
|
|
v.truncate(real_size);
|
|
|
|
// Safety: convert_latin1_to_utf8 converts the raw bytes to utf8 and the
|
|
// buffer is the size specified in the documentation, so this should be safe.
|
|
unsafe { String::from_utf8_unchecked(v) }
|
|
},
|
|
}
|
|
};
|
|
*self.0.borrow_mut() = DOMStringType::Rust(string);
|
|
}
|
|
|
|
/// Debug the current state of the string without modifying it.
|
|
#[expect(unused)]
|
|
fn debug_js(&self) {
|
|
match *self.0.borrow() {
|
|
DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
|
|
DOMStringType::JSString(ref rooted_traceable_box) => {
|
|
let s = unsafe {
|
|
jsstr_to_string(
|
|
Runtime::get().expect("JS runtime has shut down").as_ptr(),
|
|
ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
|
|
)
|
|
};
|
|
info!("JSString ({})", s);
|
|
},
|
|
#[cfg(test)]
|
|
DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
|
|
}
|
|
}
|
|
|
|
/// Returns the underlying rust string.
|
|
pub fn str(&self) -> StringView<'_> {
|
|
self.make_rust();
|
|
StringView(self.0.borrow())
|
|
}
|
|
|
|
/// Use this if you want to work on the `EncodedBytes` directly.
|
|
/// This will not do any conversions for you.
|
|
pub fn view(&self) -> EncodedBytesView<'_> {
|
|
EncodedBytesView(self.0.borrow())
|
|
}
|
|
|
|
pub fn clear(&mut self) {
|
|
*self.0.borrow_mut() = DOMStringType::Rust(String::new())
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.view().is_empty()
|
|
}
|
|
|
|
/// This length (as rust spec) is in bytes if the string would be utf8 not chars.
|
|
pub fn len(&self) -> usize {
|
|
self.view().len()
|
|
}
|
|
|
|
pub fn make_ascii_lowercase(&mut self) {
|
|
self.make_rust();
|
|
if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
|
|
s.make_ascii_lowercase();
|
|
}
|
|
}
|
|
|
|
pub fn push_str(&mut self, s: &str) {
|
|
self.make_rust();
|
|
if let DOMStringType::Rust(ref mut string) = *self.0.borrow_mut() {
|
|
string.push_str(s)
|
|
}
|
|
}
|
|
|
|
pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
|
|
if self.is_empty() {
|
|
return;
|
|
}
|
|
|
|
self.make_rust();
|
|
if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
|
|
let trailing_whitespace_len = s
|
|
.trim_end_matches(|ref c| char::is_ascii_whitespace(c))
|
|
.len();
|
|
s.truncate(trailing_whitespace_len);
|
|
if s.is_empty() {
|
|
return;
|
|
}
|
|
|
|
let first_non_whitespace = s.find(|ref c| !char::is_ascii_whitespace(c)).unwrap();
|
|
s.replace_range(0..first_non_whitespace, "");
|
|
}
|
|
}
|
|
|
|
/// This is a dom spec
|
|
pub fn is_valid_floating_point_number_string(&self) -> bool {
|
|
static RE: LazyLock<Regex> = LazyLock::new(|| {
|
|
Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
|
|
});
|
|
self.make_rust();
|
|
|
|
if let DOMStringType::Rust(ref s) = *self.0.borrow() {
|
|
RE.is_match(s) && self.parse_floating_point_number().is_some()
|
|
} else {
|
|
unreachable!()
|
|
}
|
|
}
|
|
|
|
pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
|
|
self.make_rust();
|
|
self.str().parse::<T>()
|
|
}
|
|
|
|
/// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
|
|
pub fn parse_floating_point_number(&self) -> Option<f64> {
|
|
self.make_rust();
|
|
parse_floating_point_number(&self.str())
|
|
}
|
|
|
|
/// <https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number>
|
|
pub fn set_best_representation_of_the_floating_point_number(&mut self) {
|
|
if let Some(val) = self.parse_floating_point_number() {
|
|
// [tc39] Step 2: If x is either +0 or -0, return "0".
|
|
let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
|
|
|
|
*self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
|
|
}
|
|
}
|
|
|
|
pub fn to_lowercase(&self) -> String {
|
|
self.make_rust();
|
|
self.str().to_lowercase()
|
|
}
|
|
|
|
pub fn to_uppercase(&self) -> String {
|
|
self.make_rust();
|
|
self.str().to_uppercase()
|
|
}
|
|
|
|
pub fn strip_newlines(&mut self) {
|
|
// > To strip newlines from a string, remove any U+000A LF and U+000D CR code
|
|
// > points from the string.
|
|
self.make_rust();
|
|
if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
|
|
s.retain(|c| c != '\r' && c != '\n');
|
|
}
|
|
}
|
|
|
|
/// Normalize newlines according to <https://infra.spec.whatwg.org/#normalize-newlines>.
|
|
pub fn normalize_newlines(&mut self) {
|
|
self.make_rust();
|
|
// > To normalize newlines in a string, replace every U+000D CR U+000A LF code point
|
|
// > pair with a single U+000A LF code point, and then replace every remaining
|
|
// > U+000D CR code point with a U+000A LF code point.
|
|
if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
|
|
*s = s.replace("\r\n", "\n").replace("\r", "\n")
|
|
}
|
|
}
|
|
|
|
pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
|
|
self.make_rust();
|
|
let new_string = self.str().to_owned();
|
|
DOMString(RefCell::new(DOMStringType::Rust(
|
|
new_string.replace(needle, replace_char),
|
|
)))
|
|
}
|
|
|
|
/// Pattern is not yet stable in rust, hence, we need different methods for str and char
|
|
pub fn starts_with(&self, c: char) -> bool {
|
|
if !c.is_ascii() {
|
|
self.make_rust();
|
|
self.str().starts_with(c)
|
|
} else {
|
|
match self.view().encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => items,
|
|
EncodedBytes::Utf8Bytes(s) => s,
|
|
}
|
|
// For both cases as we tested the char being ascii we can safely convert to a single u8.
|
|
.starts_with(&[c as u8])
|
|
}
|
|
}
|
|
|
|
pub fn starts_with_str(&self, needle: &str) -> bool {
|
|
self.make_rust();
|
|
self.str().starts_with(needle)
|
|
}
|
|
|
|
pub fn contains(&self, needle: &str) -> bool {
|
|
self.make_rust();
|
|
self.str().contains(needle)
|
|
}
|
|
|
|
pub fn to_ascii_lowercase(&self) -> String {
|
|
let conversion = match self.view().encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => {
|
|
if items.iter().all(|c| *c <= ASCII_END) {
|
|
// We are just simple ascii
|
|
Some(unsafe {
|
|
String::from_utf8_unchecked(
|
|
items
|
|
.iter()
|
|
.map(|c| {
|
|
if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
|
|
c + 32
|
|
} else {
|
|
*c
|
|
}
|
|
})
|
|
.collect(),
|
|
)
|
|
})
|
|
} else {
|
|
None
|
|
}
|
|
},
|
|
EncodedBytes::Utf8Bytes(s) => unsafe {
|
|
// Save because we know it was a utf8 string
|
|
Some(str::from_utf8_unchecked(s).to_ascii_lowercase())
|
|
},
|
|
};
|
|
// We otherwise would double borrow the refcell
|
|
if let Some(conversion) = conversion {
|
|
conversion
|
|
} else {
|
|
self.make_rust();
|
|
self.str().to_ascii_lowercase()
|
|
}
|
|
}
|
|
|
|
pub fn contains_html_space_characters(&self) -> bool {
|
|
const SPACE_BYTES: [u8; 5] = [
|
|
ASCII_TAB,
|
|
ASCII_NEWLINE,
|
|
ASCII_FORMFEED,
|
|
ASCII_CR,
|
|
ASCII_SPACE,
|
|
];
|
|
match self.view().encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => SPACE_BYTES.iter().any(|byte| items.contains(byte)),
|
|
EncodedBytes::Utf8Bytes(s) => {
|
|
// Save because we know it was a utf8 string
|
|
let s = unsafe { str::from_utf8_unchecked(s) };
|
|
s.contains(HTML_SPACE_CHARACTERS)
|
|
},
|
|
}
|
|
}
|
|
|
|
/// This returns the string in utf8 bytes, i.e., `[u8]` encoded with utf8.
|
|
pub fn as_bytes(&self) -> BytesView<'_> {
|
|
// BytesView will just give the raw bytes on dereference.
|
|
// If we are ascii this is the same for latin1 and utf8.
|
|
// Otherwise we convert to rust.
|
|
if self.is_ascii() {
|
|
BytesView(self.0.borrow())
|
|
} else {
|
|
self.make_rust();
|
|
BytesView(self.0.borrow())
|
|
}
|
|
}
|
|
|
|
/// Tests if there are only ascii lowercase characters. Does not include special characters.
|
|
pub fn is_ascii_lowercase(&self) -> bool {
|
|
match self.view().encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => items
|
|
.iter()
|
|
.all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
|
|
EncodedBytes::Utf8Bytes(s) => s
|
|
.iter()
|
|
.map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
|
|
.all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
|
|
}
|
|
}
|
|
|
|
/// Is the string only ascii characters
|
|
pub fn is_ascii(&self) -> bool {
|
|
match self.view().encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => items,
|
|
EncodedBytes::Utf8Bytes(items) => items,
|
|
}
|
|
.is_ascii()
|
|
}
|
|
}
|
|
|
|
/// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
|
|
pub fn parse_floating_point_number(input: &str) -> Option<f64> {
|
|
// Steps 15-16 are telling us things about IEEE rounding modes
|
|
// for floating-point significands; this code assumes the Rust
|
|
// compiler already matches them in any cases where
|
|
// that actually matters. They are not
|
|
// related to f64::round(), which is for rounding to integers.
|
|
input.trim().parse::<f64>().ok().filter(|value| {
|
|
// A valid number is the same as what rust considers to be valid,
|
|
// except for +1., NaN, and Infinity.
|
|
!(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
|
|
})
|
|
}
|
|
|
|
pub struct BytesView<'a>(Ref<'a, DOMStringType>);
|
|
|
|
impl Deref for BytesView<'_> {
|
|
type Target = [u8];
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
// This does the correct thing by the construction of BytesView in `DOMString::as_bytes`.
|
|
self.0.as_raw_bytes()
|
|
}
|
|
}
|
|
|
|
impl Ord for DOMString {
|
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
|
self.make_rust();
|
|
other.make_rust();
|
|
self.str().cmp(&other.str())
|
|
}
|
|
}
|
|
|
|
impl PartialOrd for DOMString {
|
|
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
|
self.make_rust();
|
|
other.make_rust();
|
|
self.str().partial_cmp(&other.str())
|
|
}
|
|
}
|
|
|
|
impl Extend<char> for DOMString {
|
|
fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
|
|
self.make_rust();
|
|
if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
|
|
s.extend(iter)
|
|
}
|
|
}
|
|
}
|
|
|
|
impl ToJSValConvertible for DOMString {
|
|
unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
|
|
let val = self.0.borrow();
|
|
match *val {
|
|
DOMStringType::Rust(ref s) => unsafe {
|
|
s.to_jsval(cx, rval);
|
|
},
|
|
DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
|
|
rval.set(StringValue(&*rooted_traceable_box.get()));
|
|
},
|
|
#[cfg(test)]
|
|
DOMStringType::Latin1Vec(ref items) => {
|
|
let mut v = vec![0; items.len() * 2];
|
|
let real_size = tendril::encoding_rs::mem::convert_latin1_to_utf8(
|
|
items.as_slice(),
|
|
v.as_mut_slice(),
|
|
);
|
|
v.truncate(real_size);
|
|
|
|
String::from_utf8(v)
|
|
.expect("Error in constructin test string")
|
|
.to_jsval(cx, rval);
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
impl std::hash::Hash for DOMString {
|
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
|
self.make_rust();
|
|
self.str().hash(state);
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for DOMString {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
self.make_rust();
|
|
fmt::Display::fmt(self.str().deref(), f)
|
|
}
|
|
}
|
|
|
|
impl Default for DOMString {
|
|
fn default() -> Self {
|
|
DOMString::new()
|
|
}
|
|
}
|
|
|
|
impl std::cmp::PartialEq<str> for DOMString {
|
|
fn eq(&self, other: &str) -> bool {
|
|
if other.is_ascii() {
|
|
other.as_bytes() ==
|
|
match self.view().encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => items,
|
|
EncodedBytes::Utf8Bytes(s) => s,
|
|
}
|
|
} else {
|
|
self.make_rust();
|
|
self.str().deref() == other
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::cmp::PartialEq<&str> for DOMString {
|
|
fn eq(&self, other: &&str) -> bool {
|
|
if other.is_ascii() {
|
|
other.as_bytes() ==
|
|
match self.view().encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => items,
|
|
EncodedBytes::Utf8Bytes(s) => s,
|
|
}
|
|
} else {
|
|
self.make_rust();
|
|
self.str().deref() == *other
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::cmp::PartialEq<String> for DOMString {
|
|
fn eq(&self, other: &String) -> bool {
|
|
if other.is_ascii() {
|
|
other.as_bytes() ==
|
|
match self.view().encoded_bytes() {
|
|
EncodedBytes::Latin1Bytes(items) => items,
|
|
EncodedBytes::Utf8Bytes(s) => s,
|
|
}
|
|
} else {
|
|
self.make_rust();
|
|
self.str().deref() == other
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::cmp::PartialEq<DOMString> for String {
|
|
fn eq(&self, other: &DOMString) -> bool {
|
|
other.eq(self)
|
|
}
|
|
}
|
|
|
|
impl std::cmp::PartialEq<DOMString> for str {
|
|
fn eq(&self, other: &DOMString) -> bool {
|
|
other.eq(self)
|
|
}
|
|
}
|
|
|
|
impl std::cmp::PartialEq for DOMString {
|
|
fn eq(&self, other: &DOMString) -> bool {
|
|
let result = match (self.view().encoded_bytes(), other.view().encoded_bytes()) {
|
|
(EncodedBytes::Latin1Bytes(items), EncodedBytes::Latin1Bytes(other_items)) => {
|
|
Some(items == other_items)
|
|
},
|
|
(EncodedBytes::Latin1Bytes(items), EncodedBytes::Utf8Bytes(other_s))
|
|
if other_s.is_ascii() =>
|
|
{
|
|
Some(items == other_s)
|
|
},
|
|
(EncodedBytes::Utf8Bytes(s), EncodedBytes::Latin1Bytes(other_items))
|
|
if s.is_ascii() =>
|
|
{
|
|
Some(s == other_items)
|
|
},
|
|
(EncodedBytes::Utf8Bytes(s), EncodedBytes::Utf8Bytes(other_s)) => Some(s == other_s),
|
|
_ => None,
|
|
};
|
|
|
|
if let Some(eq_result) = result {
|
|
eq_result
|
|
} else {
|
|
self.make_rust();
|
|
other.make_rust();
|
|
self.str() == other.str()
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::cmp::Eq for DOMString {}
|
|
|
|
impl From<std::string::String> for DOMString {
|
|
fn from(value: String) -> Self {
|
|
DOMString::from_string(value)
|
|
}
|
|
}
|
|
|
|
impl From<DOMString> for LocalName {
|
|
fn from(contents: DOMString) -> LocalName {
|
|
{
|
|
let view = contents.view();
|
|
let bytes = view.encoded_bytes();
|
|
let str = match bytes {
|
|
EncodedBytes::Latin1Bytes(items) => {
|
|
if items.iter().all(|c| c.is_ascii()) {
|
|
unsafe { Some(str::from_utf8_unchecked(items)) }
|
|
} else {
|
|
None
|
|
}
|
|
},
|
|
EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
|
|
};
|
|
if let Some(s) = str {
|
|
return LocalName::from(s);
|
|
}
|
|
}
|
|
contents.make_rust();
|
|
LocalName::from(contents.str().deref())
|
|
}
|
|
}
|
|
|
|
impl From<&DOMString> for LocalName {
|
|
fn from(contents: &DOMString) -> LocalName {
|
|
{
|
|
let view = contents.view();
|
|
let bytes = view.encoded_bytes();
|
|
let str = match bytes {
|
|
EncodedBytes::Latin1Bytes(items) => {
|
|
if items.iter().all(|c| c.is_ascii()) {
|
|
// This is safe as the string is ascii and it comes from a DOMString
|
|
unsafe { Some(str::from_utf8_unchecked(items)) }
|
|
} else {
|
|
None
|
|
}
|
|
},
|
|
EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
|
|
};
|
|
if let Some(s) = str {
|
|
return LocalName::from(s);
|
|
}
|
|
}
|
|
contents.make_rust();
|
|
LocalName::from(contents.str().deref())
|
|
}
|
|
}
|
|
|
|
impl From<DOMString> for Namespace {
|
|
fn from(contents: DOMString) -> Namespace {
|
|
{
|
|
let view = contents.view();
|
|
let bytes = view.encoded_bytes();
|
|
let str = match bytes {
|
|
EncodedBytes::Latin1Bytes(items) => {
|
|
if items.iter().all(|c| c.is_ascii()) {
|
|
// This is safe as the string is ascii and it comes from a DOMString
|
|
unsafe { Some(str::from_utf8_unchecked(items)) }
|
|
} else {
|
|
None
|
|
}
|
|
},
|
|
EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
|
|
};
|
|
if let Some(s) = str {
|
|
return Namespace::from(s);
|
|
}
|
|
}
|
|
contents.make_rust();
|
|
Namespace::from(contents.str().deref())
|
|
}
|
|
}
|
|
|
|
impl From<DOMString> for Atom {
|
|
fn from(contents: DOMString) -> Atom {
|
|
{
|
|
let view = contents.view();
|
|
let bytes = view.encoded_bytes();
|
|
let str = match bytes {
|
|
EncodedBytes::Latin1Bytes(items) => {
|
|
if items.iter().all(|c| c.is_ascii()) {
|
|
// Safety: The string only has ascii chars, hence this is ok.
|
|
unsafe { Some(str::from_utf8_unchecked(items)) }
|
|
} else {
|
|
None
|
|
}
|
|
},
|
|
EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
|
|
};
|
|
if let Some(s) = str {
|
|
return Atom::from(s);
|
|
}
|
|
}
|
|
contents.make_rust();
|
|
Atom::from(contents.str().deref())
|
|
}
|
|
}
|
|
|
|
impl From<&str> for DOMString {
|
|
fn from(contents: &str) -> DOMString {
|
|
DOMString(RefCell::new(DOMStringType::Rust(String::from(contents))))
|
|
}
|
|
}
|
|
|
|
impl From<DOMString> for String {
|
|
fn from(val: DOMString) -> Self {
|
|
val.make_rust();
|
|
val.str().to_owned()
|
|
}
|
|
}
|
|
|
|
impl From<DOMString> for Vec<u8> {
|
|
fn from(value: DOMString) -> Self {
|
|
value.make_rust();
|
|
value.str().as_bytes().to_vec()
|
|
}
|
|
}
|
|
|
|
impl From<Cow<'_, str>> for DOMString {
|
|
fn from(value: Cow<'_, str>) -> Self {
|
|
DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
|
|
}
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! match_domstring_ascii_inner {
|
|
($variant: expr, $input: expr, $p: literal => $then: expr, $($rest:tt)*) => {
|
|
if {
|
|
debug_assert!(($p).is_ascii());
|
|
$variant($p.as_bytes())
|
|
} == $input {
|
|
$then
|
|
} else {
|
|
match_domstring_ascii_inner!($variant, $input, $($rest)*)
|
|
}
|
|
|
|
};
|
|
($variant: expr, $input: expr, $p: pat => $then: expr,) => {
|
|
match $input {
|
|
$p => $then
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Use this to match &str against lazydomstring efficiently.
|
|
/// You are only allowed to match ascii strings otherwise this macro will
|
|
/// lead to wrong results.
|
|
/// ```ignore
|
|
/// let s = DOMString::from_string(String::from("test"));
|
|
/// let value = match_domstring!(s,
|
|
/// "test1" => 1,
|
|
/// "test2" => 2,
|
|
/// "test" => 3,
|
|
/// _ => 4,
|
|
/// );
|
|
/// assert_eq!(value, 3);
|
|
/// ```
|
|
#[macro_export]
|
|
macro_rules! match_domstring_ascii {
|
|
($input:expr, $($tail:tt)*) => {
|
|
{
|
|
use $crate::match_domstring_ascii_inner;
|
|
use $crate::domstring::EncodedBytes;
|
|
|
|
let view = $input.view();
|
|
let s = view.encoded_bytes();
|
|
if matches!(s, EncodedBytes::Latin1Bytes(_)) {
|
|
match_domstring_ascii_inner!(EncodedBytes::Latin1Bytes, s, $($tail)*)
|
|
} else {
|
|
match_domstring_ascii_inner!(EncodedBytes::Utf8Bytes, s, $($tail)*)
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
const LATIN1_PILLCROW: u8 = 0xB6;
|
|
const UTF8_PILLCROW: [u8; 2] = [194, 182];
|
|
const LATIN1_POWER2: u8 = 0xB2;
|
|
|
|
fn from_latin1(l1vec: Vec<u8>) -> DOMString {
|
|
DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
|
|
}
|
|
|
|
#[test]
|
|
fn string_functions() {
|
|
let s = DOMString::from("AbBcC❤&%$#");
|
|
let s_copy = s.clone();
|
|
assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
|
|
assert_eq!(s, s_copy);
|
|
assert_eq!(s.len(), 12);
|
|
assert_eq!(s_copy.len(), 12);
|
|
assert!(s.starts_with('A'));
|
|
let s2 = DOMString::from("");
|
|
assert!(s2.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn string_functions_latin1() {
|
|
{
|
|
let s = from_latin1(vec![
|
|
b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
|
|
]);
|
|
assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
|
|
}
|
|
{
|
|
let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
|
|
assert_eq!(s.to_ascii_lowercase(), "abbcc");
|
|
}
|
|
{
|
|
let s = from_latin1(vec![
|
|
b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
|
|
]);
|
|
assert_eq!(s.len(), 11);
|
|
assert!(s.starts_with('A'));
|
|
}
|
|
{
|
|
let s = from_latin1(vec![]);
|
|
assert!(s.is_empty());
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_length() {
|
|
let s1 = from_latin1(vec![
|
|
0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
|
|
0xAE, 0xAF,
|
|
]);
|
|
let s2 = from_latin1(vec![
|
|
0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
|
|
0xBE, 0xBF,
|
|
]);
|
|
let s3 = from_latin1(vec![
|
|
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
|
|
0xCE, 0xCF,
|
|
]);
|
|
let s4 = from_latin1(vec![
|
|
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
|
|
0xDE, 0xDF,
|
|
]);
|
|
let s5 = from_latin1(vec![
|
|
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
|
|
0xEE, 0xEF,
|
|
]);
|
|
let s6 = from_latin1(vec![
|
|
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
|
|
0xFE, 0xFF,
|
|
]);
|
|
|
|
let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
|
|
let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
|
|
let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
|
|
let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
|
|
let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
|
|
let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
|
|
|
|
assert_eq!(s1.len(), s1_utf8.len());
|
|
assert_eq!(s2.len(), s2_utf8.len());
|
|
assert_eq!(s3.len(), s3_utf8.len());
|
|
assert_eq!(s4.len(), s4_utf8.len());
|
|
assert_eq!(s5.len(), s5_utf8.len());
|
|
assert_eq!(s6.len(), s6_utf8.len());
|
|
|
|
s1.make_rust();
|
|
s2.make_rust();
|
|
s3.make_rust();
|
|
s4.make_rust();
|
|
s5.make_rust();
|
|
s6.make_rust();
|
|
assert_eq!(s1.len(), s1_utf8.len());
|
|
assert_eq!(s2.len(), s2_utf8.len());
|
|
assert_eq!(s3.len(), s3_utf8.len());
|
|
assert_eq!(s4.len(), s4_utf8.len());
|
|
assert_eq!(s5.len(), s5_utf8.len());
|
|
assert_eq!(s6.len(), s6_utf8.len());
|
|
}
|
|
|
|
#[test]
|
|
fn test_convert() {
|
|
let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
|
|
s.make_rust();
|
|
assert_eq!(&*s.str(), "abc%$");
|
|
}
|
|
|
|
#[test]
|
|
fn partial_eq() {
|
|
let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
|
|
let string = String::from("abc%$");
|
|
let s2 = DOMString::from_string(string.clone());
|
|
assert_eq!(s, s2);
|
|
assert_eq!(s, string);
|
|
}
|
|
|
|
#[test]
|
|
fn encoded_bytes() {
|
|
let bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
|
|
let s = from_latin1(bytes.clone());
|
|
if let EncodedBytes::Latin1Bytes(s) = s.view().encoded_bytes() {
|
|
assert_eq!(s, bytes)
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn testing_stringview() {
|
|
let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
|
|
|
|
assert_eq!(
|
|
s.str().chars().collect::<Vec<char>>(),
|
|
vec!['a', 'b', 'c', '%', '$', '²']
|
|
);
|
|
assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
|
|
}
|
|
|
|
// We need to be extra careful here as two strings that have different
|
|
// representation need to have the same hash.
|
|
// Additionally, the interior mutability is only used for the conversion
|
|
// which is forced by Hash. Hence, it is safe to have this interior mutability.
|
|
#[test]
|
|
fn test_hash() {
|
|
use std::hash::{DefaultHasher, Hash, Hasher};
|
|
fn hash_value(d: &DOMString) -> u64 {
|
|
let mut hasher = DefaultHasher::new();
|
|
d.hash(&mut hasher);
|
|
hasher.finish()
|
|
}
|
|
|
|
let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
|
|
let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
|
|
s_converted.make_rust();
|
|
let s2 = DOMString::from_string(String::from("abc%$²"));
|
|
|
|
let hash_s = hash_value(&s);
|
|
let hash_s_converted = hash_value(&s_converted);
|
|
let hash_s2 = hash_value(&s2);
|
|
|
|
assert_eq!(hash_s, hash_s2);
|
|
assert_eq!(hash_s, hash_s_converted);
|
|
}
|
|
|
|
// Testing match_lazydomstring if it executes the statements in the match correctly
|
|
#[test]
|
|
fn test_match_executing() {
|
|
// executing
|
|
{
|
|
let s = from_latin1(vec![b'a', b'b', b'c']);
|
|
match_domstring_ascii!( s,
|
|
"abc" => assert!(true),
|
|
"bcd" => assert!(false),
|
|
_ => (),
|
|
);
|
|
}
|
|
|
|
{
|
|
let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
|
|
match_domstring_ascii!( s,
|
|
"abc/" => assert!(true),
|
|
"bcd" => assert!(false),
|
|
_ => (),
|
|
);
|
|
}
|
|
|
|
{
|
|
let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
|
|
match_domstring_ascii!( s,
|
|
"bcd" => assert!(false),
|
|
"abc%$" => assert!(true),
|
|
_ => (),
|
|
);
|
|
}
|
|
|
|
{
|
|
let s = DOMString::from_string(String::from("abcde"));
|
|
match_domstring_ascii!( s,
|
|
"abc" => assert!(false),
|
|
"bcd" => assert!(false),
|
|
_ => assert!(true),
|
|
);
|
|
}
|
|
{
|
|
let s = DOMString::from_string(String::from("abc%$"));
|
|
match_domstring_ascii!( s,
|
|
"bcd" => assert!(false),
|
|
"abc%$" => assert!(true),
|
|
_ => (),
|
|
);
|
|
}
|
|
{
|
|
let s = from_latin1(vec![b'a', b'b', b'c']);
|
|
match_domstring_ascii!( s,
|
|
"abcdd" => assert!(false),
|
|
"bcd" => assert!(false),
|
|
_ => (),
|
|
);
|
|
}
|
|
}
|
|
|
|
// Testing match_lazydomstring if it evaluates to the correct expression
|
|
#[test]
|
|
fn test_match_returning_result() {
|
|
{
|
|
let s = from_latin1(vec![b'a', b'b', b'c']);
|
|
let res = match_domstring_ascii!( s,
|
|
"abc" => true,
|
|
"bcd" => false,
|
|
_ => false,
|
|
);
|
|
assert_eq!(res, true);
|
|
}
|
|
{
|
|
let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
|
|
let res = match_domstring_ascii!( s,
|
|
"abc/" => true,
|
|
"bcd" => false,
|
|
_ => false,
|
|
);
|
|
assert_eq!(res, true);
|
|
}
|
|
{
|
|
let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
|
|
let res = match_domstring_ascii!( s,
|
|
"bcd" => false,
|
|
"abc%$" => true,
|
|
_ => false,
|
|
);
|
|
assert_eq!(res, true);
|
|
}
|
|
|
|
{
|
|
let s = DOMString::from_string(String::from("abcde"));
|
|
let res = match_domstring_ascii!( s,
|
|
"abc" => false,
|
|
"bcd" => false,
|
|
_ => true,
|
|
);
|
|
assert_eq!(res, true);
|
|
}
|
|
{
|
|
let s = DOMString::from_string(String::from("abc%$"));
|
|
let res = match_domstring_ascii!( s,
|
|
"bcd" => false,
|
|
"abc%$" => true,
|
|
_ => false,
|
|
);
|
|
assert_eq!(res, true);
|
|
}
|
|
{
|
|
let s = from_latin1(vec![b'a', b'b', b'c']);
|
|
let res = match_domstring_ascii!( s,
|
|
"abcdd" => false,
|
|
"bcd" => false,
|
|
_ => true,
|
|
);
|
|
assert_eq!(res, true);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
#[should_panic]
|
|
fn test_match_panic() {
|
|
let s = DOMString::from_string(String::from("abcd"));
|
|
let _res = match_domstring_ascii!(s,
|
|
"❤" => true,
|
|
_ => false,);
|
|
}
|
|
|
|
#[test]
|
|
#[should_panic]
|
|
fn test_match_panic2() {
|
|
let s = DOMString::from_string(String::from("abcd"));
|
|
let _res = match_domstring_ascii!(s,
|
|
"abc" => false,
|
|
"❤" => true,
|
|
_ => false,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_strip_whitespace() {
|
|
{
|
|
let mut s = from_latin1(vec![
|
|
b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
|
|
]);
|
|
|
|
s.strip_leading_and_trailing_ascii_whitespace();
|
|
s.make_rust();
|
|
assert_eq!(&*s.str(), "abc%$²");
|
|
}
|
|
{
|
|
let mut s = DOMString::from_string(String::from(" \n abc%$ "));
|
|
|
|
s.strip_leading_and_trailing_ascii_whitespace();
|
|
s.make_rust();
|
|
assert_eq!(&*s.str(), "abc%$");
|
|
}
|
|
}
|
|
|
|
// https://infra.spec.whatwg.org/#ascii-whitespace
|
|
#[test]
|
|
fn contains_html_space_characters() {
|
|
let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); // TAB
|
|
assert!(s.contains_html_space_characters());
|
|
s.make_rust();
|
|
assert!(s.contains_html_space_characters());
|
|
|
|
let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); // NEWLINE
|
|
assert!(s.contains_html_space_characters());
|
|
s.make_rust();
|
|
assert!(s.contains_html_space_characters());
|
|
|
|
let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); // FF
|
|
assert!(s.contains_html_space_characters());
|
|
s.make_rust();
|
|
assert!(s.contains_html_space_characters());
|
|
|
|
let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); // Carriage Return
|
|
assert!(s.contains_html_space_characters());
|
|
s.make_rust();
|
|
assert!(s.contains_html_space_characters());
|
|
|
|
let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); // SPACE
|
|
assert!(s.contains_html_space_characters());
|
|
s.make_rust();
|
|
assert!(s.contains_html_space_characters());
|
|
|
|
let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
|
|
assert!(!s.contains_html_space_characters());
|
|
s.make_rust();
|
|
assert!(!s.contains_html_space_characters());
|
|
}
|
|
|
|
#[test]
|
|
fn atom() {
|
|
let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
|
|
let atom1 = Atom::from(s);
|
|
let s2 = DOMString::from_string(String::from("aaa aa"));
|
|
let atom2 = Atom::from(s2);
|
|
assert_eq!(atom1, atom2);
|
|
let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
|
|
let atom3 = Atom::from(s3);
|
|
assert_ne!(atom1, atom3);
|
|
}
|
|
|
|
#[test]
|
|
fn namespace() {
|
|
let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
|
|
let atom1 = Namespace::from(s);
|
|
let s2 = DOMString::from_string(String::from("aaa aa"));
|
|
let atom2 = Namespace::from(s2);
|
|
assert_eq!(atom1, atom2);
|
|
let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
|
|
let atom3 = Namespace::from(s3);
|
|
assert_ne!(atom1, atom3);
|
|
}
|
|
|
|
#[test]
|
|
fn localname() {
|
|
let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
|
|
let atom1 = LocalName::from(s);
|
|
let s2 = DOMString::from_string(String::from("aaa aa"));
|
|
let atom2 = LocalName::from(s2);
|
|
assert_eq!(atom1, atom2);
|
|
let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
|
|
let atom3 = LocalName::from(s3);
|
|
assert_ne!(atom1, atom3);
|
|
}
|
|
|
|
#[test]
|
|
fn is_ascii_lowercase() {
|
|
let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
|
|
assert!(!s.is_ascii_lowercase());
|
|
let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
|
|
assert!(!s.is_ascii_lowercase());
|
|
let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
|
|
assert!(s.is_ascii_lowercase());
|
|
let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
|
|
assert!(!s.is_ascii_lowercase());
|
|
let s = DOMString::from_string(String::from("`aaaz"));
|
|
assert!(!s.is_ascii_lowercase());
|
|
let s = DOMString::from_string(String::from("aaaz"));
|
|
assert!(s.is_ascii_lowercase());
|
|
}
|
|
|
|
#[test]
|
|
fn test_as_bytes() {
|
|
const ASCII_SMALL_A: u8 = b'a';
|
|
const ASCII_SMALL_Z: u8 = b'z';
|
|
|
|
let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
|
|
let s = from_latin1(v1.clone());
|
|
assert_eq!(
|
|
*s.as_bytes(),
|
|
[
|
|
ASCII_SMALL_A,
|
|
ASCII_SMALL_A,
|
|
ASCII_SMALL_A,
|
|
UTF8_PILLCROW[0],
|
|
UTF8_PILLCROW[1],
|
|
ASCII_SMALL_A,
|
|
ASCII_SMALL_A
|
|
]
|
|
);
|
|
|
|
let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
|
|
let s = from_latin1(v2.clone());
|
|
assert_eq!(
|
|
*s.as_bytes(),
|
|
[
|
|
ASCII_SMALL_A,
|
|
ASCII_SMALL_A,
|
|
ASCII_SMALL_A,
|
|
ASCII_SMALL_A,
|
|
ASCII_SMALL_Z
|
|
]
|
|
);
|
|
|
|
let str = "abc%$²".to_owned();
|
|
let s = DOMString::from(str.clone());
|
|
assert_eq!(&*s.as_bytes(), str.as_bytes());
|
|
let str = "AbBcC❤&%$#".to_owned();
|
|
let s = DOMString::from(str.clone());
|
|
assert_eq!(&*s.as_bytes(), str.as_bytes());
|
|
}
|
|
}
|