mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-05-01 20:17:13 +02:00
LibWeb: Start implementing character token parsing
Now that we've gotten rid of the misguided character buffering in the tokenizer, it actually spits out character tokens that we have to deal with in the parser. This patch implements enough to bring us back to speed with simple.html
This commit is contained in:
Notes:
sideshowbarker
2024-07-19 06:10:26 +09:00
Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/31db3f21ae9
@@ -54,6 +54,31 @@ public:
|
||||
bool is_character() const { return m_type == Type::Character; }
|
||||
bool is_end_of_file() const { return m_type == Type::EndOfFile; }
|
||||
|
||||
u32 codepoint() const
|
||||
{
|
||||
ASSERT(is_character());
|
||||
// FIXME: Handle non-ASCII codepoints properly.
|
||||
ASSERT(m_comment_or_character.data.length() == 1);
|
||||
return m_comment_or_character.data.string_view()[0];
|
||||
}
|
||||
|
||||
bool is_parser_whitespace() const
|
||||
{
|
||||
// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
|
||||
if (!is_character())
|
||||
return false;
|
||||
switch (codepoint()) {
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\f':
|
||||
case '\r':
|
||||
case ' ':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
String tag_name() const
|
||||
{
|
||||
ASSERT(is_start_tag() || is_end_tag());
|
||||
|
||||
Reference in New Issue
Block a user