mirror of
https://github.com/servo/servo
synced 2026-05-12 18:06:32 +02:00
Servo currently completely ignores `<meta charset>` tags. When we find one with an encoding that is incompatible to the current one, then we should reload the page and start over with the new encoding. A common optimization that has even made its way into the specification is to wait for a few bytes to arrive and inspect them for `meta` tags, so the browser is able to use the correct encoding from the very beginng. In practice, I've run into problems with our WPT harness when reloading the page after `meta` tags. Therefore, this change implement the optimization first, so we never have to reload when running WPT. I've implemented prescanning in a way where we wait for 1024 bytes to arrive or for one second to pass, whichever one happens first. This causes a large number of web platform tests to flip around. I've looked at most of the new failures and I believe they're reasonable. Testing: New tests start to pass. Part of https://github.com/servo/servo/issues/6414 --------- Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
85 lines
2.2 KiB
Rust
85 lines
2.2 KiB
Rust
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
|
|
|
use encoding_rs::{UTF_8, UTF_16BE, UTF_16LE};
|
|
use script::test::encoding_detection::{
|
|
get_xml_encoding, prescan_the_byte_stream_to_determine_the_encoding,
|
|
};
|
|
|
|
#[test]
|
|
fn html_encoding_with_xml_declaration() {
|
|
assert_eq!(
|
|
prescan_the_byte_stream_to_determine_the_encoding(&[0x3C, 0x0, 0x3F, 0x0, 0x78, 0x0, 0x42]),
|
|
Some(UTF_16LE)
|
|
);
|
|
|
|
assert_eq!(
|
|
prescan_the_byte_stream_to_determine_the_encoding(&[0x0, 0x3C, 0x0, 0x3F, 0x0, 0x78, 0x42]),
|
|
Some(UTF_16BE)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn meta_charset_within_comment() {
|
|
assert_eq!(
|
|
prescan_the_byte_stream_to_determine_the_encoding(b"<!-- <meta charset='utf8'> -->"),
|
|
None
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn meta_charset_with_preceding_comment() {
|
|
assert_eq!(
|
|
prescan_the_byte_stream_to_determine_the_encoding(b"<!-- --> <meta charset='utf8'>"),
|
|
Some(UTF_8)
|
|
);
|
|
|
|
assert_eq!(
|
|
prescan_the_byte_stream_to_determine_the_encoding(b"<!--> <meta charset='utf8'>"),
|
|
Some(UTF_8)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn xml_encoding_invalid_start() {
|
|
assert_eq!(get_xml_encoding(b"<?xmX encoding='UTF8'>"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn xml_encoding_outside_of_declaration() {
|
|
assert_eq!(get_xml_encoding(b"<?xml> encoding='UTF8'"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn xml_encoding_missing_quotes() {
|
|
// Missing opening quote
|
|
assert_eq!(get_xml_encoding(b"<?xml encoding=UTF8'>"), None);
|
|
|
|
// Missing closing quote
|
|
assert_eq!(get_xml_encoding(b"<?xml encoding='UTF8>"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn xml_encoding_containing_whitespace_within_quotes() {
|
|
assert_eq!(get_xml_encoding(b"<?xml encoding=' UTF8'>"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn xml_encoding_single_quotes() {
|
|
assert_eq!(get_xml_encoding(b"<?xml encoding='UTF8'>"), Some(UTF_8));
|
|
}
|
|
|
|
#[test]
|
|
fn xml_encoding_double_quotes() {
|
|
assert_eq!(get_xml_encoding(b"<?xml encoding=\"UTF8\">"), Some(UTF_8));
|
|
}
|
|
|
|
#[test]
|
|
fn xml_encoding_with_whitespace_around_equal_sign() {
|
|
assert_eq!(
|
|
get_xml_encoding(b"<?xml encoding \x00 = \x00 \"UTF8\">"),
|
|
Some(UTF_8)
|
|
);
|
|
}
|