LibWeb: Make replaceData create new surrogate pairs

When inserting a new utf-16 surrogate next to an existing surrogate
with replaceData, the surrogates would not get merged correctly into a
single code point. This is because internally the text data is stored
as utf-8, and the two surrogates would be converted seperately. This
has now been fixed by first recreating the whole string in utf-16 and
then converting it back to utf-8.

It's not the most efficient solution, but this fixes at least 6 WPT
subtests.
This commit is contained in:
Milo van der Tier
2024-11-23 14:46:42 +01:00
committed by Tim Ledbetter
parent 01f4bbbba7
commit 15741350ba
Notes: github-actions[bot] 2024-11-24 13:05:02 +00:00
3 changed files with 104 additions and 9 deletions

View File

@@ -0,0 +1,74 @@
<!DOCTYPE html>
<meta charset=utf-8>
<title>Splitting and joining surrogate pairs in CharacterData methods</title>
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-substringdata">
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-replacedata">
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-insertdata">
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-deletedata">
<link rel=help href="https://dom.spec.whatwg.org/#dom-characterdata-data">
<script src="../../resources/testharness.js"></script>
<script src="../../resources/testharnessreport.js"></script>
<div id="log"></div>
<script>
function testNode(create, type) {
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
assert_equals(node.substringData(1, 8), "\uDF20 test \uD83C")
}, type + ".substringData() splitting surrogate pairs")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
node.replaceData(1, 4, "--");
assert_equals(node.data, "\uD83C--st 🌠 TEST");
node.replaceData(1, 2, "\uDF1F ");
assert_equals(node.data, "🌟 st 🌠 TEST");
node.replaceData(5, 2, "---");
assert_equals(node.data, "🌟 st---\uDF20 TEST");
node.replaceData(6, 2, " \uD83D");
assert_equals(node.data, "🌟 st- 🜠 TEST");
}, type + ".replaceData() splitting and creating surrogate pairs")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
node.deleteData(1, 4);
assert_equals(node.data, "\uD83Cst 🌠 TEST");
node.deleteData(1, 4);
assert_equals(node.data, "🌠 TEST");
}, type + ".deleteData() splitting and creating surrogate pairs")
test(function() {
var node = create()
assert_equals(node.data, "test")
node.data = "🌠 test 🌠 TEST"
node.insertData(1, "--");
assert_equals(node.data, "\uD83C--\uDF20 test 🌠 TEST");
node.insertData(1, "\uDF1F ");
assert_equals(node.data, "🌟 --\uDF20 test 🌠 TEST");
node.insertData(5, " \uD83D");
assert_equals(node.data, "🌟 -- 🜠 test 🌠 TEST");
}, type + ".insertData() splitting and creating surrogate pairs")
}
testNode(function() { return document.createTextNode("test") }, "Text")
testNode(function() { return document.createComment("test") }, "Comment")
</script>