url: Let origins of file:// URLs be potentially trustworthy (#43989)

The origin of a `file` URL is unspecified. Engines act like they're
opaque except in a few special cases - one of which is the "is
potentially trustworthy" algorithm. This change allows consumers of
`servo-url` to distinguish between regular opaque origins and file
origins. Then we use that to mark file origins as "potentially
trustworthy" which is what the spec wants.

For now we can get away without changes to the `url` crate (the one used
in the wider ecosystem, not just servo), but I'm unsure if that will be
the case in the future.

Testing: This change adds a test
Fixes: https://github.com/servo/servo/issues/42540

---------

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
Simon Wülker
2026-04-07 20:29:30 +02:00
committed by GitHub
parent f977c06f9d
commit 37a1f93b91
6 changed files with 100 additions and 38 deletions

View File

@@ -42,7 +42,7 @@ use serde::{Deserialize, Serialize};
use servo_arc::Arc as ServoArc;
use servo_base::generic_channel::CallbackSetter;
use servo_base::id::PipelineId;
use servo_url::{Host, ImmutableOrigin, ServoUrl};
use servo_url::{Host, ServoUrl};
use tokio::sync::Mutex as TokioMutex;
use tokio::sync::mpsc::{UnboundedReceiver as TokioReceiver, UnboundedSender as TokioSender};
@@ -652,9 +652,10 @@ pub async fn main_fetch(
// Step 14. If response is not a network error and response is not a filtered response, then:
let mut response = if !response.is_network_error() && response.internal_response.is_none() {
// Substep 1.
// Step 14.1 If requests response tainting is "cors", then:
if request.response_tainting == ResponseTainting::CorsTainting {
// Subsubstep 1.
// Step 14.1.1 Let headerNames be the result of extracting header list values given
// `Access-Control-Expose-Headers` and responses header list.
let header_names: Option<Vec<HeaderName>> = response
.headers
.typed_get::<AccessControlExposeHeaders>()
@@ -680,7 +681,8 @@ pub async fn main_fetch(
}
}
// Substep 2.
// Step 14.2 Set response to the following filtered response with response as its internal response,
// depending on requests response tainting:
let response_type = match request.response_tainting {
ResponseTainting::Basic => ResponseType::Basic,
ResponseTainting::CorsTainting => ResponseType::Cors,
@@ -1363,15 +1365,10 @@ pub enum MixedSecurityProhibited {
/// <https://w3c.github.io/webappsec-mixed-content/#categorize-settings-object>
fn do_settings_prohibit_mixed_security_contexts(request: &Request) -> MixedSecurityProhibited {
if let Origin::Origin(ref origin) = request.origin {
// Workers created from a data: url are secure if they were created from secure contexts
let is_origin_data_url_worker = matches!(
*origin,
ImmutableOrigin::Opaque(servo_url::OpaqueOrigin::SecureWorkerFromDataUrl(_))
);
// Step 1. If settings origin is a potentially trustworthy origin,
// then return "Prohibits Mixed Security Contexts".
if origin.is_potentially_trustworthy() || is_origin_data_url_worker {
// NOTE: Workers created from a data: url are secure if they were created from secure contexts
if origin.is_potentially_trustworthy() || origin.is_for_data_worker_from_secure_context() {
return MixedSecurityProhibited::Prohibited;
}
}

View File

@@ -1639,7 +1639,7 @@ fn test_fetch_compressed_response_update_count() {
fn test_origin_serialization_compatibility() {
let ensure_serialiations_match = |url_string| {
let url = Url::parse(url_string).unwrap();
let origin = ImmutableOrigin::new(url.origin());
let origin = ImmutableOrigin::new(&url);
let serialized = format!("{}", serialize_origin(&origin));
assert_eq!(serialized, origin.ascii_serialization());
};

View File

@@ -68,8 +68,7 @@ pub fn parse_blob_url(url: &ServoUrl) -> Result<(Uuid, ImmutableOrigin), &'stati
let origin = Url::parse(url.path())
.ok()
.filter(|url| matches!(url.scheme(), "http" | "https" | "file"))
.map(|url| url.origin())
.map(ImmutableOrigin::new)
.map(|url| ImmutableOrigin::new(&url))
.unwrap_or(ImmutableOrigin::new_opaque());
let id = Uuid::from_str(uuid).map_err(|_| "Failed to parse UUID from path segment")?;

View File

@@ -88,7 +88,7 @@ impl ServoUrl {
}
pub fn origin(&self) -> ImmutableOrigin {
ImmutableOrigin::new(self.0.origin())
ImmutableOrigin::new(self.as_url())
}
pub fn scheme(&self) -> &str {
@@ -235,15 +235,17 @@ impl ServoUrl {
/// <https://w3c.github.io/webappsec-secure-contexts/#potentially-trustworthy-url>
pub fn is_potentially_trustworthy(&self) -> bool {
// Step 1
// Step 1. If url is "about:blank" or "about:srcdoc", return "Potentially Trustworthy".
if self.as_str() == "about:blank" || self.as_str() == "about:srcdoc" {
return true;
}
// Step 2
// Step 2. If urls scheme is "data", return "Potentially Trustworthy".
if self.scheme() == "data" {
return true;
}
// Step 3
// Step 3. Return the result of executing §3.1 Is origin potentially trustworthy? on urls origin.
self.origin().is_potentially_trustworthy()
}

View File

@@ -9,7 +9,7 @@ use std::rc::Rc;
use malloc_size_of::malloc_size_of_is_0;
use malloc_size_of_derive::MallocSizeOf;
use serde::{Deserialize, Serialize};
use url::{Host, Origin};
use url::{Host, Origin, Url};
use uuid::Uuid;
/// The origin of an URL
@@ -46,8 +46,12 @@ impl DomainComparable for MutableOrigin {
}
impl ImmutableOrigin {
pub fn new(origin: Origin) -> ImmutableOrigin {
match origin {
pub fn new(url: &Url) -> ImmutableOrigin {
if url.scheme() == "file" {
return Self::new_opaque_for_file();
}
match url.origin() {
Origin::Opaque(_) => ImmutableOrigin::new_opaque(),
Origin::Tuple(scheme, host, port) => ImmutableOrigin::Tuple(scheme, host, port),
}
@@ -63,12 +67,28 @@ impl ImmutableOrigin {
/// Creates a new opaque origin that is only equal to itself.
pub fn new_opaque() -> ImmutableOrigin {
ImmutableOrigin::Opaque(OpaqueOrigin::Opaque(Uuid::new_v4()))
ImmutableOrigin::Opaque(OpaqueOrigin {
id: Uuid::new_v4(),
is_for_data_worker_from_secure_context: false,
is_file_origin: false,
})
}
// For use in mixed security context tests because data: URL workers inherit contexts
/// For use in mixed security context tests because data: URL workers inherit contexts
pub fn new_opaque_data_url_worker() -> ImmutableOrigin {
ImmutableOrigin::Opaque(OpaqueOrigin::SecureWorkerFromDataUrl(Uuid::new_v4()))
ImmutableOrigin::Opaque(OpaqueOrigin {
id: Uuid::new_v4(),
is_for_data_worker_from_secure_context: true,
is_file_origin: false,
})
}
pub fn new_opaque_for_file() -> ImmutableOrigin {
ImmutableOrigin::Opaque(OpaqueOrigin {
id: Uuid::new_v4(),
is_for_data_worker_from_secure_context: false,
is_file_origin: true,
})
}
pub fn scheme(&self) -> Option<&str> {
@@ -102,16 +122,42 @@ impl ImmutableOrigin {
/// Return whether this origin is a (scheme, host, port) tuple
/// (as opposed to an opaque origin).
pub fn is_tuple(&self) -> bool {
match *self {
ImmutableOrigin::Opaque(..) => false,
ImmutableOrigin::Tuple(..) => true,
}
matches!(self, ImmutableOrigin::Tuple(..))
}
pub fn is_file_origin(&self) -> bool {
matches!(
self,
ImmutableOrigin::Opaque(OpaqueOrigin {
is_file_origin: true,
..
})
)
}
pub fn is_for_data_worker_from_secure_context(&self) -> bool {
matches!(
self,
ImmutableOrigin::Opaque(OpaqueOrigin {
is_for_data_worker_from_secure_context: true,
..
})
)
}
/// <https://w3c.github.io/webappsec-secure-contexts/#is-origin-trustworthy>
pub fn is_potentially_trustworthy(&self) -> bool {
// 1. If origin is an opaque origin return "Not Trustworthy"
if matches!(self, ImmutableOrigin::Opaque(_)) {
if let ImmutableOrigin::Opaque(opaque_origin) = self {
// The webappsec spec assumes that file:// urls have a tuple origin,
// which is implementation defined.
// See <https://github.com/w3c/webappsec-secure-contexts/issues/66>.
//
// They're not tuple origins in our implementation (which is the more correct choice),
// so we have to return here instead of Step 6.
if opaque_origin.is_file_origin {
return true;
}
return false;
}
@@ -120,10 +166,10 @@ impl ImmutableOrigin {
if scheme == "https" || scheme == "wss" {
return true;
}
// 6. If origins scheme is "file", return "Potentially Trustworthy".
if scheme == "file" {
return true;
}
// NOTE: The comment at Step 1 explains why this is unreachable here.
debug_assert_ne!(scheme, "file", "File URLs don't have a tuple origin");
// 4. If origins host matches one of the CIDR notations 127.0.0.0/8 or ::1/128,
// return "Potentially Trustworthy".
@@ -141,6 +187,7 @@ impl ImmutableOrigin {
}
}
}
// 9. Return "Not Trustworthy".
false
}
@@ -153,13 +200,19 @@ impl ImmutableOrigin {
/// Opaque identifier for URLs that have file or other schemes
#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
pub enum OpaqueOrigin {
Opaque(Uuid),
// Workers created from `data:` urls will have opaque origins but need to be treated
// as inheriting the secure context they were created in. This tracks that the origin
// was created in such a context
SecureWorkerFromDataUrl(Uuid),
pub struct OpaqueOrigin {
id: Uuid,
/// Workers created from `data:` urls will have opaque origins but need to be treated
/// as inheriting the secure context they were created in. This tracks that the origin
/// was created in such a context
is_for_data_worker_from_secure_context: bool,
/// `file://` URLs are *usually* treated as opaque, but not always. This flag serves
/// as an indicator that they need special handling in certain cases.
///
/// See <https://github.com/whatwg/html/issues/3099>.
is_file_origin: bool,
}
malloc_size_of_is_0!(OpaqueOrigin);
/// A snapshot of a MutableOrigin at a moment in time.

View File

@@ -2,6 +2,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::path::Path;
use std::str::FromStr;
use servo_url::ServoUrl;
@@ -78,3 +79,13 @@ fn test_matches_about_blank_does_not_match_invariants_maintained_from_url() {
let servo_url = ServoUrl::from_url(url);
assert!(!servo_url.matches_about_blank());
}
#[test]
fn test_file_urls_are_potentially_trustworthy() {
let path = Path::new(&env!("CARGO_MANIFEST_PATH"))
.canonicalize()
.unwrap();
let url: ServoUrl = ServoUrl::from_file_path(path.clone()).unwrap();
assert!(url.origin().is_potentially_trustworthy())
}