mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-05-11 17:37:33 +02:00
A stealth merge conflict meant consume_specific_string() and consume_specific_char() got combined into a single method and no longer exist.
171 lines
6.2 KiB
Python
171 lines
6.2 KiB
Python
from math import inf
|
|
|
|
from Utils.CSSGrammar.Parser.component_values import Keyword
|
|
from Utils.CSSGrammar.Parser.component_values import NumericTypeRangeRestriction
|
|
from Utils.CSSGrammar.Parser.component_values import Type
|
|
from Utils.CSSGrammar.Parser.component_values import is_dimension_percentage_mix_type
|
|
from Utils.CSSGrammar.Parser.component_values import is_dimension_type
|
|
from Utils.CSSGrammar.Parser.component_values import is_numeric_type
|
|
from Utils.CSSGrammar.Parser.token import Token
|
|
from Utils.CSSGrammar.Parser.token import TokenType
|
|
from Utils.lexer import Lexer
|
|
|
|
|
|
def is_identifier_character(ch: str) -> bool:
|
|
return ch.isascii() and (ch.isalnum() or ch == "-")
|
|
|
|
|
|
class Tokenizer:
|
|
def __init__(self, input: str) -> None:
|
|
self.lexer = Lexer(input)
|
|
|
|
@classmethod
|
|
def tokenize(cls, input: str) -> list[Token]:
|
|
return cls(input).tokenize_impl()
|
|
|
|
def tokenize_impl(self) -> list[Token]:
|
|
tokens = []
|
|
|
|
while True:
|
|
self.discard_whitespace()
|
|
if self.lexer.is_eof():
|
|
tokens.append(Token.create(TokenType.END_OF_FILE))
|
|
return tokens
|
|
|
|
tokens.append(self.consume_a_token())
|
|
|
|
def discard_whitespace(self) -> None:
|
|
self.lexer.ignore_while(lambda ch: ch.isspace() and ch.isascii())
|
|
|
|
def consume_an_identifier(self) -> str:
|
|
return self.lexer.consume_while(is_identifier_character)
|
|
|
|
def consume_a_token(self) -> Token:
|
|
match self.lexer.peek():
|
|
case "|":
|
|
self.lexer.consume()
|
|
return Token.create(TokenType.SINGLE_BAR)
|
|
case "<":
|
|
return self.consume_a_non_terminal_token()
|
|
|
|
if is_identifier_character(self.lexer.peek()):
|
|
return self.consume_a_keyword_token()
|
|
|
|
raise SyntaxError("CSSGrammar::Tokenizer: Unexpected character")
|
|
|
|
def consume_custom_ident_blacklist(self) -> list[str]:
|
|
# NB: This notation isn't yet included in the spec but we use it internally and the CSSWG has resolved to add it in
|
|
# https://github.com/w3c/csswg-drafts/issues/11924
|
|
|
|
self.discard_whitespace()
|
|
|
|
if not self.lexer.consume_specific("!["):
|
|
return []
|
|
|
|
blacklist = []
|
|
|
|
while True:
|
|
self.discard_whitespace()
|
|
|
|
ident = self.consume_an_identifier()
|
|
if not ident:
|
|
raise SyntaxError("Expected identifier in custom-ident blacklist")
|
|
|
|
blacklist.append(ident)
|
|
|
|
self.discard_whitespace()
|
|
if self.lexer.consume_specific("]"):
|
|
return blacklist
|
|
|
|
if not self.lexer.consume_specific(","):
|
|
raise SyntaxError("Expected ',' in custom-ident blacklist")
|
|
|
|
# https://drafts.csswg.org/css-values-4/#css-bracketed-range-notation
|
|
def consume_bracketed_range_notation(self, type_name: str) -> NumericTypeRangeRestriction:
|
|
self.discard_whitespace()
|
|
|
|
# If no range is indicated, either by using the bracketed range notation or in the property description, then
|
|
# [-∞,∞] is assumed.
|
|
if not self.lexer.consume_specific("["):
|
|
return NumericTypeRangeRestriction(-inf, inf)
|
|
|
|
self.discard_whitespace()
|
|
minimum = self.consume_bracketed_range_bound(type_name)
|
|
|
|
self.discard_whitespace()
|
|
if not self.lexer.consume_specific(","):
|
|
raise SyntaxError("Expected ',' in bracketed range notation")
|
|
|
|
self.discard_whitespace()
|
|
maximum = self.consume_bracketed_range_bound(type_name)
|
|
|
|
self.discard_whitespace()
|
|
if not self.lexer.consume_specific("]"):
|
|
raise SyntaxError("Expected ']' to close bracketed range notation")
|
|
|
|
return NumericTypeRangeRestriction(minimum, maximum)
|
|
|
|
def consume_bracketed_range_bound(self, type_name: str) -> float:
|
|
# Values of -∞ or ∞ must be written without units, even if the value type uses units.
|
|
if self.lexer.consume_specific("-∞"):
|
|
return -inf
|
|
|
|
if self.lexer.consume_specific("∞"):
|
|
return inf
|
|
|
|
# FIXME: Do we need to allow non-integer values?
|
|
bound_value = self.consume_decimal_integer()
|
|
|
|
if bound_value != 0 and (is_dimension_percentage_mix_type(type_name) or type_name == "length"):
|
|
raise SyntaxError("Types with units not resolvable at parse time only support zero and infinite bounds")
|
|
|
|
# FIXME: Validate and store the unit, for now we drop it and assume it was the relevant canonical unit.
|
|
unit = self.lexer.consume_while(is_identifier_character)
|
|
|
|
if unit and not is_dimension_type(type_name) and not is_dimension_percentage_mix_type(type_name):
|
|
raise SyntaxError("Unexpected unit for unitless bound value")
|
|
|
|
if not unit and bound_value != 0 and is_dimension_type(type_name):
|
|
raise SyntaxError("Expected unit for non-zero, non-infinite bound value")
|
|
|
|
return float(bound_value)
|
|
|
|
def consume_decimal_integer(self) -> int:
|
|
sign = 1
|
|
if self.lexer.consume_specific("-"):
|
|
sign = -1
|
|
|
|
digits = self.lexer.consume_while(lambda ch: ch.isdigit())
|
|
if not digits:
|
|
raise SyntaxError("Expected decimal integer")
|
|
|
|
return sign * int(digits)
|
|
|
|
def consume_a_non_terminal_token(self) -> Token:
|
|
assert self.lexer.consume_specific("<")
|
|
|
|
name = self.consume_an_identifier()
|
|
|
|
if not name:
|
|
raise SyntaxError("CSSGrammar::Tokenizer: Expected a type name")
|
|
|
|
custom_ident_blacklist = None
|
|
if name == "custom-ident":
|
|
custom_ident_blacklist = self.consume_custom_ident_blacklist()
|
|
|
|
numeric_type_accepted_range = None
|
|
if is_numeric_type(name) or is_dimension_percentage_mix_type(name):
|
|
numeric_type_accepted_range = self.consume_bracketed_range_notation(name)
|
|
|
|
if not self.lexer.consume_specific(">"):
|
|
raise SyntaxError("CSSGrammar::Tokenizer: Expected '>'")
|
|
|
|
return Token.create_component_value(Type(name, custom_ident_blacklist, numeric_type_accepted_range))
|
|
|
|
def consume_a_keyword_token(self) -> Token:
|
|
value = self.consume_an_identifier()
|
|
|
|
assert value
|
|
|
|
return Token.create_component_value(Keyword(value))
|