mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-04-27 02:05:07 +02:00
LibWeb: Implement encoding sniffing algorithm
This patch implements the HTML specification's "encoding sniffing algorithm", which is used when no encoding can be obtained from the Content-Type header (either because it doesn't contain a charset=...) value or the file has not been opened via HTTP (as with local files). It also modifies the creator of the HTMLDocumentParser to use the new HTMLDocumentParser::create_with_uncertain_encoding static method, which runs the encoding sniffing algorithm before instantiating the parser. This now allows us to load local HTML pages (or remote pages without a charset specified in the 'Content-Type' header) with a non-UTF-8 encoding such as 'windows-1252'. This would previously crash the browser. :^)
This commit is contained in:
committed by
Andreas Kling
parent
67a9ebc817
commit
f808279769
Notes:
sideshowbarker
2024-07-18 17:48:35 +09:00
Author: https://github.com/MaxWipfli Commit: https://github.com/SerenityOS/serenity/commit/f8082797694 Pull-request: https://github.com/SerenityOS/serenity/pull/7055 Issue: https://github.com/SerenityOS/serenity/issues/6910 Reviewed-by: https://github.com/Dexesttp Reviewed-by: https://github.com/awesomekling Reviewed-by: https://github.com/linusg
@@ -22,6 +22,7 @@
|
||||
#include <LibWeb/HTML/HTMLTableElement.h>
|
||||
#include <LibWeb/HTML/HTMLTemplateElement.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLDocumentParser.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLToken.h>
|
||||
#include <LibWeb/Namespace.h>
|
||||
#include <LibWeb/SVG/TagNames.h>
|
||||
@@ -3039,4 +3040,14 @@ NonnullRefPtrVector<DOM::Node> HTMLDocumentParser::parse_html_fragment(DOM::Elem
|
||||
}
|
||||
return children;
|
||||
}
|
||||
|
||||
NonnullOwnPtr<HTMLDocumentParser> HTMLDocumentParser::create_with_uncertain_encoding(DOM::Document& document, const ByteBuffer& input)
|
||||
{
|
||||
if (document.has_encoding())
|
||||
return make<HTMLDocumentParser>(document, input, document.encoding().value());
|
||||
auto encoding = run_encoding_sniffing_algorithm(input);
|
||||
dbgln("The encoding sniffing algorithm returned encoding '{}'", encoding);
|
||||
return make<HTMLDocumentParser>(document, input, encoding);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user