mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-05-11 01:22:43 +02:00
When the regular HTML parser is blocked on an external script, the speculative parser scans ahead and pre-fetches discoverable sub-resources. Previously those fetches were tracked only in the parser's own URL list and never registered in the document's preload map, so when the regular parser later reached each element fetch()'s consume_a_preloaded_resource() lookup found nothing and issued a duplicate request — every parser-blocked sub-resource was fetched twice. issue_speculative_fetch now creates a PreloadEntry, registers it under create_a_preload_key(request) in the document's preload map, and supplies a processResponseConsumeBody callback that populates the entry. The map insertion happens after fetch() starts because fetch() runs consume_a_preloaded_resource() synchronously, so registering the entry beforehand would short-circuit the speculative fetch itself. The body-handling steps (1, 2, 5 of the preload algorithm's processResponseConsumeBody) are factored into a shared deliver_preload_response helper used by both the speculative parser and HTMLLinkElement::preload.
251 lines
10 KiB
C++
251 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2026, Aliaksandr Kalenik <kalenik.aliaksandr@gmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <LibJS/Runtime/Realm.h>
|
|
#include <LibWeb/DOM/Document.h>
|
|
#include <LibWeb/Fetch/Fetching/Fetching.h>
|
|
#include <LibWeb/Fetch/Infrastructure/FetchAlgorithms.h>
|
|
#include <LibWeb/Fetch/Infrastructure/FetchController.h>
|
|
#include <LibWeb/Fetch/Infrastructure/HTTP/Requests.h>
|
|
#include <LibWeb/HTML/AttributeNames.h>
|
|
#include <LibWeb/HTML/CORSSettingAttribute.h>
|
|
#include <LibWeb/HTML/Parser/HTMLToken.h>
|
|
#include <LibWeb/HTML/Parser/SpeculativeHTMLParser.h>
|
|
#include <LibWeb/HTML/Parser/SpeculativeMockElement.h>
|
|
#include <LibWeb/HTML/PotentialCORSRequest.h>
|
|
#include <LibWeb/HTML/PreloadEntry.h>
|
|
#include <LibWeb/HTML/Scripting/Environments.h>
|
|
#include <LibWeb/HTML/TagNames.h>
|
|
#include <LibWeb/Infra/CharacterTypes.h>
|
|
|
|
namespace Web::HTML {
|
|
|
|
GC_DEFINE_ALLOCATOR(SpeculativeHTMLParser);
|
|
|
|
GC::Ref<SpeculativeHTMLParser> SpeculativeHTMLParser::create(JS::Realm& realm, GC::Ref<DOM::Document> document, String pending_input, URL::URL base_url)
|
|
{
|
|
return realm.create<SpeculativeHTMLParser>(document, move(pending_input), move(base_url));
|
|
}
|
|
|
|
SpeculativeHTMLParser::SpeculativeHTMLParser(GC::Ref<DOM::Document> document, String pending_input, URL::URL base_url)
|
|
: m_document(document)
|
|
, m_input(move(pending_input))
|
|
, m_tokenizer(m_input.bytes_as_string_view(), "UTF-8"sv)
|
|
, m_base_url(move(base_url))
|
|
{
|
|
}
|
|
|
|
SpeculativeHTMLParser::~SpeculativeHTMLParser() = default;
|
|
|
|
void SpeculativeHTMLParser::visit_edges(JS::Cell::Visitor& visitor)
|
|
{
|
|
Base::visit_edges(visitor);
|
|
visitor.visit(m_document);
|
|
m_tokenizer.visit_edges(visitor);
|
|
}
|
|
|
|
void SpeculativeHTMLParser::stop()
|
|
{
|
|
// https://html.spec.whatwg.org/multipage/parsing.html#stop-the-speculative-html-parser
|
|
// 3. Throw away any pending content in speculativeParser's input stream, and discard any future content
|
|
// that would have been added to it.
|
|
m_tokenizer.abort();
|
|
}
|
|
|
|
void SpeculativeHTMLParser::run()
|
|
{
|
|
while (true) {
|
|
auto token = m_tokenizer.next_token();
|
|
if (!token.has_value())
|
|
break;
|
|
|
|
if (token->is_start_tag()) {
|
|
process_start_tag(*token);
|
|
} else if (token->is_end_tag()) {
|
|
process_end_tag(*token);
|
|
} else if (token->is_end_of_file()) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
|
|
Vector<HTMLToken::Attribute> attributes_from_token(HTMLToken const& token)
|
|
{
|
|
Vector<HTMLToken::Attribute> attributes;
|
|
token.for_each_attribute([&](HTMLToken::Attribute const& attribute) {
|
|
attributes.append(attribute);
|
|
return IterationDecision::Continue;
|
|
});
|
|
return attributes;
|
|
}
|
|
|
|
// https://html.spec.whatwg.org/multipage/parsing.html#speculative-fetch
|
|
// Step 4 says "fetch url as if the element was processed normally". For the regular parser to
|
|
// dedup against the in-flight speculative fetch, we follow the preload algorithm's shape: create
|
|
// a preload entry, register it under create_a_preload_key(request), and supply a
|
|
// processResponseConsumeBody callback that populates the entry. Then when the regular parser later
|
|
// processes the element, fetch()'s consume_a_preloaded_resource() check joins the entry rather than
|
|
// issuing a duplicate request.
|
|
void issue_speculative_fetch(JS::Realm& realm, DOM::Document& document, URL::URL url, Optional<Fetch::Infrastructure::Request::Destination> destination, CORSSettingAttribute cors_setting)
|
|
{
|
|
auto& vm = realm.vm();
|
|
auto request = create_potential_CORS_request(vm, url, destination, cors_setting);
|
|
request->set_client(&document.relevant_settings_object());
|
|
|
|
// Mirrors the preload algorithm step 6 ("Let entry be a new preload entry...") and step 7
|
|
// ("Let key be the result of creating a preload key given request"):
|
|
// https://html.spec.whatwg.org/multipage/links.html#preload
|
|
auto entry = realm.create<PreloadEntry>();
|
|
auto key = create_a_preload_key(*request);
|
|
|
|
Fetch::Infrastructure::FetchAlgorithms::Input fetch_algorithms_input {};
|
|
fetch_algorithms_input.process_response_consume_body = [&realm, entry](GC::Ref<Fetch::Infrastructure::Response> response, Fetch::Infrastructure::FetchAlgorithms::BodyBytes body_bytes) {
|
|
// 1. If bodyBytes is a byte sequence, then set response's body to bodyBytes as a body.
|
|
// 2. Otherwise, set response to a network error.
|
|
// 5. If entry's on response available is null, then set entry's response to response;
|
|
// otherwise call entry's on response available given response.
|
|
// (No processResponse, no reportTiming — those steps only apply to <link rel=preload>.)
|
|
(void)deliver_preload_response(realm, *entry, response, body_bytes.get_pointer<ByteBuffer>());
|
|
};
|
|
auto algorithms = Fetch::Infrastructure::FetchAlgorithms::create(vm, move(fetch_algorithms_input));
|
|
|
|
// The fetch stays alive via ResourceLoader's GC::Root callbacks for the duration of the
|
|
// network request, so we don't need to retain the FetchController.
|
|
(void)Fetch::Fetching::fetch(realm, request, algorithms);
|
|
|
|
// Mirrors the preload algorithm step 12.2 ("Set document's map of preloaded resources[key] to
|
|
// entry"). Note: the insert happens *after* fetch() starts because fetch() runs
|
|
// consume_a_preloaded_resource() synchronously — registering the entry beforehand would let
|
|
// the speculative fetch short-circuit itself.
|
|
document.map_of_preloaded_resources().set(key, entry);
|
|
}
|
|
|
|
bool rel_contains_keyword(StringView rel, StringView keyword)
|
|
{
|
|
for (auto token : rel.split_view_if(Infra::is_ascii_whitespace)) {
|
|
if (token.equals_ignoring_ascii_case(keyword))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// https://html.spec.whatwg.org/multipage/parsing.html#speculative-fetch
|
|
void speculative_fetch(SpeculativeMockElement& element, DOM::Document& document, URL::URL& base_url)
|
|
{
|
|
auto& realm = document.realm();
|
|
|
|
// 1. If the speculative HTML parser encounters one of the following elements, then act as if that
|
|
// element is processed for the purpose of its effect on subsequent speculative fetches.
|
|
// - A base element.
|
|
if (element.local_name == HTML::TagNames::base) {
|
|
if (auto href = element.attribute(HTML::AttributeNames::href); href.has_value() && !href->is_empty()) {
|
|
if (auto parsed = document.encoding_parse_url(*href); parsed.has_value())
|
|
base_url = parsed.release_value();
|
|
}
|
|
return;
|
|
}
|
|
// FIXME: A meta element whose http-equiv attribute is in the Content security policy state.
|
|
// FIXME: A meta element whose name attribute is referrer.
|
|
// FIXME: A meta element whose name attribute is viewport.
|
|
|
|
// 2. Let url be the URL that element would fetch if it was processed normally. If there is no such
|
|
// URL or if it is the empty string, then do nothing.
|
|
// We resolve URLs against the speculative parser's tracked base_url (which may have been updated
|
|
// by an earlier speculative <base href>); this is why we use complete_url here rather than
|
|
// document.encoding_parse_url, which would resolve against the document's base instead.
|
|
Optional<URL::URL> url;
|
|
Optional<Fetch::Infrastructure::Request::Destination> destination;
|
|
auto cors_setting = cors_setting_attribute_from_keyword(element.attribute(HTML::AttributeNames::crossorigin));
|
|
|
|
if (element.local_name == HTML::TagNames::script) {
|
|
auto src = element.attribute(HTML::AttributeNames::src);
|
|
if (!src.has_value() || src->is_empty())
|
|
return;
|
|
url = base_url.complete_url(*src);
|
|
destination = Fetch::Infrastructure::Request::Destination::Script;
|
|
} else if (element.local_name == HTML::TagNames::link) {
|
|
auto rel = element.attribute(HTML::AttributeNames::rel);
|
|
auto href = element.attribute(HTML::AttributeNames::href);
|
|
if (!href.has_value() || href->is_empty() || !rel.has_value())
|
|
return;
|
|
auto rel_view = rel->bytes_as_string_view();
|
|
if (rel_contains_keyword(rel_view, "stylesheet"sv)) {
|
|
url = base_url.complete_url(*href);
|
|
destination = Fetch::Infrastructure::Request::Destination::Style;
|
|
} else if (rel_contains_keyword(rel_view, "preload"sv)) {
|
|
auto translated = translate_a_preload_destination(element.attribute(HTML::AttributeNames::as));
|
|
if (translated.has<Empty>())
|
|
return;
|
|
destination = translated.get<Optional<Fetch::Infrastructure::Request::Destination>>();
|
|
url = base_url.complete_url(*href);
|
|
} else {
|
|
return;
|
|
}
|
|
} else if (element.local_name == HTML::TagNames::img) {
|
|
auto src = element.attribute(HTML::AttributeNames::src);
|
|
if (!src.has_value() || src->is_empty())
|
|
return;
|
|
url = base_url.complete_url(*src);
|
|
destination = Fetch::Infrastructure::Request::Destination::Image;
|
|
} else {
|
|
return;
|
|
}
|
|
|
|
if (!url.has_value())
|
|
return;
|
|
|
|
// 3. Otherwise, if url is already in the list of speculative fetch URLs, then do nothing.
|
|
if (document.has_speculative_fetch_url(*url))
|
|
return;
|
|
|
|
// 4. Otherwise, fetch url as if the element was processed normally, and add url to the list of
|
|
// speculative fetch URLs.
|
|
document.add_speculative_fetch_url(*url);
|
|
issue_speculative_fetch(realm, document, *url, destination, cors_setting);
|
|
}
|
|
|
|
}
|
|
|
|
void SpeculativeHTMLParser::process_start_tag(HTMLToken const& token)
|
|
{
|
|
auto const& tag_name = token.tag_name();
|
|
|
|
if (tag_name == HTML::TagNames::template_) {
|
|
++m_template_depth;
|
|
return;
|
|
}
|
|
|
|
if (tag_name == HTML::TagNames::svg || tag_name == HTML::TagNames::math) {
|
|
++m_foreign_depth;
|
|
return;
|
|
}
|
|
|
|
if (m_template_depth > 0 || m_foreign_depth > 0)
|
|
return;
|
|
|
|
if (!tag_name.is_one_of(HTML::TagNames::script, HTML::TagNames::link, HTML::TagNames::img, HTML::TagNames::base))
|
|
return;
|
|
|
|
auto element = create_a_speculative_mock_element(tag_name, attributes_from_token(token));
|
|
|
|
// 6. Optionally, perform a speculative fetch for element.
|
|
speculative_fetch(element, *m_document, m_base_url);
|
|
}
|
|
|
|
void SpeculativeHTMLParser::process_end_tag(HTMLToken const& token)
|
|
{
|
|
auto const& tag_name = token.tag_name();
|
|
if (tag_name == HTML::TagNames::template_ && m_template_depth > 0) {
|
|
--m_template_depth;
|
|
} else if ((tag_name == HTML::TagNames::svg || tag_name == HTML::TagNames::math) && m_foreign_depth > 0) {
|
|
--m_foreign_depth;
|
|
}
|
|
}
|
|
|
|
}
|