/* * Copyright (c) 2026, Aliaksandr Kalenik * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Web::HTML { GC_DEFINE_ALLOCATOR(SpeculativeHTMLParser); GC::Ref SpeculativeHTMLParser::create(JS::Realm& realm, GC::Ref document, String pending_input, URL::URL base_url) { return realm.create(document, move(pending_input), move(base_url)); } SpeculativeHTMLParser::SpeculativeHTMLParser(GC::Ref document, String pending_input, URL::URL base_url) : m_document(document) , m_input(move(pending_input)) , m_tokenizer(m_input.bytes_as_string_view(), "UTF-8"sv) , m_base_url(move(base_url)) { } SpeculativeHTMLParser::~SpeculativeHTMLParser() = default; void SpeculativeHTMLParser::visit_edges(JS::Cell::Visitor& visitor) { Base::visit_edges(visitor); visitor.visit(m_document); m_tokenizer.visit_edges(visitor); } void SpeculativeHTMLParser::stop() { // https://html.spec.whatwg.org/multipage/parsing.html#stop-the-speculative-html-parser // 3. Throw away any pending content in speculativeParser's input stream, and discard any future content // that would have been added to it. m_tokenizer.abort(); } void SpeculativeHTMLParser::run() { while (true) { auto token = m_tokenizer.next_token(); if (!token.has_value()) break; if (token->is_start_tag()) { process_start_tag(*token); } else if (token->is_end_tag()) { process_end_tag(*token); } else if (token->is_end_of_file()) { break; } } } namespace { Vector attributes_from_token(HTMLToken const& token) { Vector attributes; token.for_each_attribute([&](HTMLToken::Attribute const& attribute) { attributes.append(attribute); return IterationDecision::Continue; }); return attributes; } // https://html.spec.whatwg.org/multipage/parsing.html#speculative-fetch // Step 4 says "fetch url as if the element was processed normally". For the regular parser to // dedup against the in-flight speculative fetch, we follow the preload algorithm's shape: create // a preload entry, register it under create_a_preload_key(request), and supply a // processResponseConsumeBody callback that populates the entry. Then when the regular parser later // processes the element, fetch()'s consume_a_preloaded_resource() check joins the entry rather than // issuing a duplicate request. void issue_speculative_fetch(JS::Realm& realm, DOM::Document& document, URL::URL url, Optional destination, CORSSettingAttribute cors_setting) { auto& vm = realm.vm(); auto request = create_potential_CORS_request(vm, url, destination, cors_setting); request->set_client(&document.relevant_settings_object()); // Mirrors the preload algorithm step 6 ("Let entry be a new preload entry...") and step 7 // ("Let key be the result of creating a preload key given request"): // https://html.spec.whatwg.org/multipage/links.html#preload auto entry = realm.create(); auto key = create_a_preload_key(*request); Fetch::Infrastructure::FetchAlgorithms::Input fetch_algorithms_input {}; fetch_algorithms_input.process_response_consume_body = [&realm, entry](GC::Ref response, Fetch::Infrastructure::FetchAlgorithms::BodyBytes body_bytes) { // 1. If bodyBytes is a byte sequence, then set response's body to bodyBytes as a body. // 2. Otherwise, set response to a network error. // 5. If entry's on response available is null, then set entry's response to response; // otherwise call entry's on response available given response. // (No processResponse, no reportTiming — those steps only apply to .) (void)deliver_preload_response(realm, *entry, response, body_bytes.get_pointer()); }; auto algorithms = Fetch::Infrastructure::FetchAlgorithms::create(vm, move(fetch_algorithms_input)); // The fetch stays alive via ResourceLoader's GC::Root callbacks for the duration of the // network request, so we don't need to retain the FetchController. (void)Fetch::Fetching::fetch(realm, request, algorithms); // Mirrors the preload algorithm step 12.2 ("Set document's map of preloaded resources[key] to // entry"). Note: the insert happens *after* fetch() starts because fetch() runs // consume_a_preloaded_resource() synchronously — registering the entry beforehand would let // the speculative fetch short-circuit itself. document.map_of_preloaded_resources().set(key, entry); } bool rel_contains_keyword(StringView rel, StringView keyword) { for (auto token : rel.split_view_if(Infra::is_ascii_whitespace)) { if (token.equals_ignoring_ascii_case(keyword)) return true; } return false; } // https://html.spec.whatwg.org/multipage/parsing.html#speculative-fetch void speculative_fetch(SpeculativeMockElement& element, DOM::Document& document, URL::URL& base_url) { auto& realm = document.realm(); // 1. If the speculative HTML parser encounters one of the following elements, then act as if that // element is processed for the purpose of its effect on subsequent speculative fetches. // - A base element. if (element.local_name == HTML::TagNames::base) { if (auto href = element.attribute(HTML::AttributeNames::href); href.has_value() && !href->is_empty()) { if (auto parsed = document.encoding_parse_url(*href); parsed.has_value()) base_url = parsed.release_value(); } return; } // FIXME: A meta element whose http-equiv attribute is in the Content security policy state. // FIXME: A meta element whose name attribute is referrer. // FIXME: A meta element whose name attribute is viewport. // 2. Let url be the URL that element would fetch if it was processed normally. If there is no such // URL or if it is the empty string, then do nothing. // We resolve URLs against the speculative parser's tracked base_url (which may have been updated // by an earlier speculative ); this is why we use complete_url here rather than // document.encoding_parse_url, which would resolve against the document's base instead. Optional url; Optional destination; auto cors_setting = cors_setting_attribute_from_keyword(element.attribute(HTML::AttributeNames::crossorigin)); if (element.local_name == HTML::TagNames::script) { auto src = element.attribute(HTML::AttributeNames::src); if (!src.has_value() || src->is_empty()) return; url = base_url.complete_url(*src); destination = Fetch::Infrastructure::Request::Destination::Script; } else if (element.local_name == HTML::TagNames::link) { auto rel = element.attribute(HTML::AttributeNames::rel); auto href = element.attribute(HTML::AttributeNames::href); if (!href.has_value() || href->is_empty() || !rel.has_value()) return; auto rel_view = rel->bytes_as_string_view(); if (rel_contains_keyword(rel_view, "stylesheet"sv)) { url = base_url.complete_url(*href); destination = Fetch::Infrastructure::Request::Destination::Style; } else if (rel_contains_keyword(rel_view, "preload"sv)) { auto translated = translate_a_preload_destination(element.attribute(HTML::AttributeNames::as)); if (translated.has()) return; destination = translated.get>(); url = base_url.complete_url(*href); } else { return; } } else if (element.local_name == HTML::TagNames::img) { auto src = element.attribute(HTML::AttributeNames::src); if (!src.has_value() || src->is_empty()) return; url = base_url.complete_url(*src); destination = Fetch::Infrastructure::Request::Destination::Image; } else { return; } if (!url.has_value()) return; // 3. Otherwise, if url is already in the list of speculative fetch URLs, then do nothing. if (document.has_speculative_fetch_url(*url)) return; // 4. Otherwise, fetch url as if the element was processed normally, and add url to the list of // speculative fetch URLs. document.add_speculative_fetch_url(*url); issue_speculative_fetch(realm, document, *url, destination, cors_setting); } } void SpeculativeHTMLParser::process_start_tag(HTMLToken const& token) { auto const& tag_name = token.tag_name(); if (tag_name == HTML::TagNames::template_) { ++m_template_depth; return; } if (tag_name == HTML::TagNames::svg || tag_name == HTML::TagNames::math) { ++m_foreign_depth; return; } if (m_template_depth > 0 || m_foreign_depth > 0) return; if (!tag_name.is_one_of(HTML::TagNames::script, HTML::TagNames::link, HTML::TagNames::img, HTML::TagNames::base)) return; auto element = create_a_speculative_mock_element(tag_name, attributes_from_token(token)); // 6. Optionally, perform a speculative fetch for element. speculative_fetch(element, *m_document, m_base_url); } void SpeculativeHTMLParser::process_end_tag(HTMLToken const& token) { auto const& tag_name = token.tag_name(); if (tag_name == HTML::TagNames::template_ && m_template_depth > 0) { --m_template_depth; } else if ((tag_name == HTML::TagNames::svg || tag_name == HTML::TagNames::math) && m_foreign_depth > 0) { --m_foreign_depth; } } }