LibXML+LibWeb: Use existing HTML entities table for XML parsing too

For XHTML documents, resolve named character entities (e.g.,  )
using the HTML entity table via a getEntity SAX callback. This avoids
parsing a large embedded DTD on every document and matches the approach
used by Blink and WebKit.

This also removes the now-unused DTD infrastructure:

- Remove resolve_external_resource callback from Parser::Options
- Remove resolve_xml_resource() function and its ~60KB embedded DTD
- Remove all call sites passing the unused callback
This commit is contained in:
sideshowbarker
2026-01-09 04:32:12 +09:00
committed by Tim Ledbetter
parent 35bb1e20ee
commit 1b41659efd
Notes: github-actions[bot] 2026-01-09 19:14:36 +00:00
43 changed files with 28321 additions and 55 deletions

View File

@@ -0,0 +1,49 @@
<!DOCTYPE html>
<meta charset=utf-8>
<script>
var parser = new DOMParser();
var parse = parser.parseFromString.bind(parser);
function generateTestFunction(entitystring, expectedString, publicId, systemId, mimeType, friendlyMime) {
return function () {
var doctypeString = '<!DOCTYPE html';
if (publicId != null)
doctypeString += ' PUBLIC "' + publicId + '" "' + systemId + '">';
else if (systemId != null)
doctypeString += ' SYSTEM "' + systemId + '">';
else // both are null
doctypeString += '>';
var doc = parse(doctypeString + "<html><head></head><body id='test'>"+entitystring+"</body></html>", mimeType);
var root = doc.getElementById('test');
parent.assert_not_equals(root, null, friendlyMime + " parsing the entity reference caused a parse error;");
parent.assert_true(!!root.firstChild);
// Next line because some browsers include the partial parsed result in the parser error returned document.
parent.assert_equals(root.firstChild.nodeType, 3/*Text*/, friendlyMime + " parsing the entity reference caused a parse error;");
var text = root.firstChild.data;
for (var i = 0, len = expectedString.length; i < len; i++) {
parent.assert_equals(text.charCodeAt(i),expectedString.charCodeAt(i));
}
}
}
function setupTests(jsonEntities, publicId, systemId, mimeType, friendlyMime) {
for (entityName in jsonEntities) {
if ((mimeType == "text/html") || /;$/.test(entityName)) {
parent.test(generateTestFunction(entityName, jsonEntities[entityName].characters, publicId, systemId, mimeType, friendlyMime), friendlyMime + " parsing " + entityName);
}
}
}
parent.setup(function() {}, {explicit_done: true});
function run(row) {
var xhr = new XMLHttpRequest();
xhr.open("GET", "entities.json");
xhr.onload = function () {
var entitiesJSON = JSON.parse(xhr.response);
setupTests(entitiesJSON, row[1], row[2], row[0], row[3]);
parent.done();
}
xhr.send();
}
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//W3C//DTD XHTML 1.0 Transitional//EN", "foo", "XHTML1.0 Transitional"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["text/html", null, null, "HTML"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//WAPFORUM//DTD XHTML Mobile 1.1//EN", "foo", "XHTML Mobile 1.1"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//WAPFORUM//DTD XHTML Mobile 1.2//EN", "foo", "XHTML Mobile 1.2"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//W3C//DTD XHTML 1.1//EN", "foo", "XHTML1.1"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//W3C//DTD XHTML 1.0 Strict//EN", "foo", "XHTML1.0 Strict"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//W3C//DTD XHTML 1.0 Frameset//EN", "foo", "XHTML1.0 Frameset"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//W3C//DTD XHTML Basic 1.0//EN", "foo", "XHTML Basic"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", "foo", "XHTML1.1+MathML"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN", "foo", "XHTML1.1+MathML+SVG"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//W3C//DTD MathML 2.0//EN", "foo", "MathML"]);
</script>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<meta name=timeout content=long>
<title>HTML entities for various XHTML Doctype</title>
<link rel=help href="http://w3c.github.io/html/xhtml.html#parsing-xhtml-documents">
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<iframe id="test" src="support/xhtml-mathml-dtd-entity.htm"></iframe>
<script>
onload = () => document.getElementById("test").contentWindow.run(
["application/xhtml+xml", "-//WAPFORUM//DTD XHTML Mobile 1.0//EN", "foo", "XHTML Mobile"]);
</script>

View File

@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/css" href="support/simple-style.css?pipe=trickle(d2)"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<script><![CDATA[
window.observedZIndex = getComputedStyle(document.documentElement).zIndex;
]]></script>
<title>xml-stylesheet blocks script execution and rendering</title>
<link rel="help" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1986042" />
<link rel="author" title="Emilio Cobos Álvarez" href="mailto:emilio@crisal.io" />
<link rel="author" title="Mozilla" href="https://mozilla.org" />
<script src="../../../resources/testharness.js"/>
<script src="../../../resources/testharnessreport.js"/>
</head>
<body>
<script><![CDATA[
test(function() {
assert_equals(window.observedZIndex, "3", "XML processing instruction should've blocked script execution and rendering");
});
]]></script>
</body>
</html>