mirror of
https://github.com/servo/servo
synced 2026-05-14 19:06:31 +02:00
335 lines
15 KiB
Perl
335 lines
15 KiB
Perl
##############################################################################
|
|
# the processor #
|
|
##############################################################################
|
|
|
|
# This code is a mess and has numerous subtle bugs in its namespace
|
|
# handling. Do not expect it to pass any tests of its own.
|
|
|
|
package utils::parser;
|
|
use utils::helpers;
|
|
use strict;
|
|
|
|
my $NAMESPACE = 'http://www.example.org/css3tests';
|
|
my %months = (
|
|
'january' => 1,
|
|
'february' => 2,
|
|
'march' => 3,
|
|
'april' => 4,
|
|
'may' => 5,
|
|
'june' => 6,
|
|
'july' => 7,
|
|
'august' => 8,
|
|
'september' => 9,
|
|
'october' => 10,
|
|
'november' => 11,
|
|
'december' => 12,
|
|
);
|
|
|
|
sub Init {
|
|
my $parser = shift;
|
|
$parser->{'Walker Data'} = {};
|
|
}
|
|
|
|
# This is the big workhorse -- it gets called for each start tag.
|
|
sub Start {
|
|
my $parser = shift;
|
|
my($tagName, @attrs) = @_;
|
|
my @context = $parser->context;
|
|
my($qualifiedTagName, @qualifiedAttrs) = &utils::helpers::qualifyStartTag($parser, $tagName, @attrs);
|
|
my %qualifiedAttrs = (@qualifiedAttrs);
|
|
|
|
# The root element
|
|
if ((scalar(@context) == 0) and ($qualifiedTagName eq "{$NAMESPACE}csstest")) {
|
|
foreach my $name (qw(def module modulename number rev)) {
|
|
if (defined($qualifiedAttrs{$name})) {
|
|
$parser->{'Walker Data'}->{$name} = $qualifiedAttrs{$name};
|
|
}
|
|
}
|
|
if (defined($qualifiedAttrs{date})) {
|
|
my $date = $qualifiedAttrs{date};
|
|
$date =~ s/(.+)-(.+)-(.+)/sprintf('%02d-%02d-%02d', $3, $months{$2}, $1)/gose;
|
|
$parser->{'Walker Data'}->{date} = $date;
|
|
}
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest']]) and
|
|
($qualifiedTagName eq "{$NAMESPACE}author")) {
|
|
if (defined($parser->{'Walker Data'}->{'author'})) {
|
|
push(@{$parser->{'Walker Data'}->{'author'}}, '');
|
|
} else {
|
|
$parser->{'Walker Data'}->{'author'} = [''];
|
|
}
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest']]) and
|
|
($qualifiedTagName eq "{$NAMESPACE}cssrules")) {
|
|
# ok
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest']]) and
|
|
($qualifiedTagName eq "{$NAMESPACE}userinteraction")) {
|
|
$parser->{'Walker Data'}->{'interactive'} = 1;
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest']]) and
|
|
($qualifiedTagName eq "{$NAMESPACE}dynamic")) {
|
|
$parser->{'Walker Data'}->{'dynamic'} = 1;
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest']]) and
|
|
($qualifiedTagName eq "{$NAMESPACE}historyneeded")) {
|
|
$parser->{'Walker Data'}->{'historyneeded'} = 1;
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest']]) and
|
|
($qualifiedTagName eq "{$NAMESPACE}code") and
|
|
(not defined($parser->{'Walker Data'}->{'prefixes'}))) {
|
|
# here we must begin to take stuff into account
|
|
$parser->{'Walker Data'}->{'code-xml'} = '';
|
|
$parser->{'Walker Data'}->{'code-xhtml'} = '';
|
|
$parser->{'Walker Data'}->{'code-html'} = '';
|
|
# first, all the namespace prefixes in scope
|
|
$parser->{'Walker Data'}->{'prefixes'} = {};
|
|
$parser->{'Walker Data'}->{'prefixesUsed'} = {};
|
|
foreach my $prefix ($parser->current_ns_prefixes) {
|
|
if ($prefix ne '#default') {
|
|
$parser->{'Walker Data'}->{'prefixes'}->{$prefix} = $parser->expand_ns_prefix($prefix);
|
|
$parser->{'Walker Data'}->{'prefixesUsed'}->{$prefix} = 0;
|
|
}
|
|
}
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest'],
|
|
[$NAMESPACE, 'code']])) { # child of code element
|
|
$parser->xpcroak('restrict cannot be a child of code') if $qualifiedTagName eq "{$NAMESPACE}restrict";
|
|
&processElement($parser, $tagName, 1, @attrs);
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest'],
|
|
[$NAMESPACE, 'code']], 1) and
|
|
($qualifiedTagName eq "{$NAMESPACE}restrict")) { # <restrict>, descendant of code element (must not be child)
|
|
if (defined($parser->{'Walker Data'}->{'restrict'})) {
|
|
$parser->xpcroak('<restrict> may not be nested');
|
|
}
|
|
if (defined($qualifiedAttrs{'for'})) {
|
|
$parser->{'Walker Data'}->{'restrict'} = $qualifiedAttrs{'for'};
|
|
} else {
|
|
$parser->xpcroak('required attribute \'for\' missing');
|
|
}
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest'],
|
|
[$NAMESPACE, 'code']], 1)) { # descendant of code element
|
|
&processElement($parser, $tagName, 0, @attrs);
|
|
} else {
|
|
$parser->xpcroak("unexpected element $tagName in namespace ".$parser->namespace($tagName));
|
|
}
|
|
}
|
|
|
|
sub CdataStart {
|
|
my $parser = shift;
|
|
if (&utils::helpers::matchContext($parser, [
|
|
[$NAMESPACE, 'csstest'], [$NAMESPACE, 'code']], 1)) {
|
|
$parser->{'Walker Data'}->{'code-xml'} .= '<![CDATA[' if applicable($parser, 'xml');
|
|
$parser->{'Walker Data'}->{'code-xhtml'} .= '<![CDATA[' if applicable($parser, 'xhtml');
|
|
# $parser->{'Walker Data'}->{'code-html'} .= '' if applicable($parser, 'html'); # HTML has no CDATA blocks
|
|
$parser->{'Walker Data'}->{'cdata'} = 1;
|
|
} else {
|
|
# not technically invalid...
|
|
}
|
|
}
|
|
|
|
sub CdataEnd {
|
|
my $parser = shift;
|
|
if (&utils::helpers::matchContext($parser, [
|
|
[$NAMESPACE, 'csstest'], [$NAMESPACE, 'code']], 1)) {
|
|
$parser->{'Walker Data'}->{'code-xml'} .= ']]>' if applicable($parser, 'xml');
|
|
$parser->{'Walker Data'}->{'code-xhtml'} .= ']]>' if applicable($parser, 'xhtml');
|
|
# $parser->{'Walker Data'}->{'code-html'} .= '' if applicable($parser, 'html'); # HTML has no CDATA blocks
|
|
$parser->{'Walker Data'}->{'cdata'} = 0;
|
|
} else {
|
|
# not technically invalid...
|
|
}
|
|
}
|
|
|
|
sub Comment {
|
|
my $parser = shift;
|
|
my($comment) = @_;
|
|
if (&utils::helpers::matchContext($parser, [
|
|
[$NAMESPACE, 'csstest'], [$NAMESPACE, 'code']], 1)) {
|
|
$parser->{'Walker Data'}->{'code-xml'} .= "<!--$comment-->" if applicable($parser, 'xml');
|
|
$parser->{'Walker Data'}->{'code-xhtml'} .= "<!--$comment-->" if applicable($parser, 'xhtml');
|
|
$parser->{'Walker Data'}->{'code-html'} .= "<!--$comment-->" if applicable($parser, 'html');
|
|
} else {
|
|
# not technically invalid...
|
|
}
|
|
}
|
|
|
|
sub Proc {
|
|
my $parser = shift;
|
|
my($target, $data) = @_;
|
|
if (&utils::helpers::matchContext($parser, [
|
|
[$NAMESPACE, 'csstest'], [$NAMESPACE, 'code']], 1)) {
|
|
$parser->{'Walker Data'}->{'code-xml'} .= "<?$target $data?>" if applicable($parser, 'xml');
|
|
$parser->{'Walker Data'}->{'code-xhtml'} .= "<?$target $data?>" if applicable($parser, 'xhtml');
|
|
$parser->{'Walker Data'}->{'code-html'} .= "<?$target $data>" if applicable($parser, 'html');
|
|
} else {
|
|
# not technically invalid...
|
|
}
|
|
}
|
|
|
|
# This is called for each line of a string of text (as well as the contents of any CDATA blocks, etc)
|
|
sub Char {
|
|
my $parser = shift;
|
|
my($text) = @_;
|
|
if (&utils::helpers::matchContext($parser, [
|
|
[$NAMESPACE, 'csstest'], [$NAMESPACE, 'author']])) {
|
|
$parser->{'Walker Data'}->{'author'}->[$#{$parser->{'Walker Data'}->{'author'}}] .= $text;
|
|
} elsif (&utils::helpers::matchContext($parser, [
|
|
[$NAMESPACE, 'csstest'], [$NAMESPACE, 'cssrules']])) {
|
|
$parser->{'Walker Data'}->{'cssrules'} .= $text;
|
|
} elsif (&utils::helpers::matchContext($parser, [
|
|
[$NAMESPACE, 'csstest']]) and ($text =~ /^\s+$/os)) {
|
|
# ok
|
|
} elsif (&utils::helpers::matchContext($parser, [
|
|
[$NAMESPACE, 'csstest'], [$NAMESPACE, 'code']], 1)) {
|
|
if (not $parser->{'Walker Data'}->{'cdata'}) {
|
|
$text = &utils::helpers::escape($text);
|
|
}
|
|
$parser->{'Walker Data'}->{'code-xml'} .= $text if applicable($parser, 'xml');
|
|
$parser->{'Walker Data'}->{'code-xhtml'} .= $text if applicable($parser, 'xhtml');
|
|
$parser->{'Walker Data'}->{'code-html'} .= $text if applicable($parser, 'html');
|
|
} else {
|
|
$parser->xpcroak("found unexpected text");
|
|
}
|
|
}
|
|
|
|
sub End {
|
|
my $parser = shift;
|
|
my($tagName) = @_;
|
|
if (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest'],
|
|
[$NAMESPACE, 'code']], 1) and
|
|
(($tagName eq 'restrict') and ($parser->namespace($tagName) eq $NAMESPACE))) { # <restrict>, descendant of code element
|
|
delete($parser->{'Walker Data'}->{'restrict'});
|
|
} elsif (&utils::helpers::matchContext($parser, [[$NAMESPACE, 'csstest'],
|
|
[$NAMESPACE, 'code']], 1)) {
|
|
if ($parser->recognized_string ne '') {
|
|
$parser->{'Walker Data'}->{'endTag'} = $parser->recognized_string;
|
|
} else {
|
|
# This was an empty tag with the short form <foo/>. This
|
|
# guarentees that the element can have no children, so we
|
|
# don't need to ensure the endTag bit is propagated
|
|
# correctly across children.
|
|
}
|
|
# XML output
|
|
$parser->{'Walker Data'}->{'code-xml'} .= $parser->{'Walker Data'}->{'endTag'} if applicable($parser, 'xml');
|
|
# XHTML output
|
|
$parser->{'Walker Data'}->{'code-xhtml'} .= $parser->{'Walker Data'}->{'endTag'} if applicable($parser, 'xhtml');
|
|
# HTML output
|
|
if (($parser->{'Walker Data'}->{'endTag'} ne '</input>') and
|
|
($parser->{'Walker Data'}->{'endTag'} ne '</br>')) {
|
|
$parser->{'Walker Data'}->{'code-html'} .= $parser->{'Walker Data'}->{'endTag'} if applicable($parser, 'html');
|
|
} # else HTML doesn't allow end tags for those
|
|
} else {
|
|
# ok
|
|
}
|
|
}
|
|
|
|
sub Final {
|
|
my $parser = shift;
|
|
my $data = $parser->{'Walker Data'};
|
|
$data->{'escapedcode-xml'} = &utils::helpers::escape($data->{'code-xml'});
|
|
$data->{'escapedcode-xhtml'} = &utils::helpers::escape($data->{'code-xhtml'});
|
|
$data->{'escapedcode-html'} = &utils::helpers::escape($data->{'code-html'});
|
|
$data->{'escapedcode-css'} = &utils::helpers::escape($data->{'cssrules'});
|
|
$data->{'namespaces'} = '';
|
|
foreach my $prefix (keys %{$data->{'prefixes'}}) {
|
|
if ($data->{'prefixesUsed'}->{$prefix}) {
|
|
$data->{'namespaces'} .= " xmlns:${prefix}=\"$data->{'prefixes'}->{$prefix}\"";
|
|
}
|
|
}
|
|
delete($parser->{'Walker Data'});
|
|
return $data;
|
|
}
|
|
|
|
sub processElement {
|
|
my $parser = shift;
|
|
my($tagName, $child, @attrs) = @_;
|
|
# $child is true if the element should declare its own default namespace if needed
|
|
# (i.e. if element is a direct child of the <code> element)
|
|
my @prefixes = $parser->current_ns_prefixes;
|
|
# get the element stuff
|
|
my $prefix = '';
|
|
if ($parser->recognized_string =~ m/<([^\s:]+):/o) {
|
|
$prefix = $1;
|
|
}
|
|
$parser->{'Walker Data'}->{'prefixesUsed'}->{$prefix} += 1 if exists $parser->{'Walker Data'}->{'prefixesUsed'}->{$prefix};
|
|
my $default = $parser->expand_ns_prefix('#default');
|
|
my $defaultXML = '';
|
|
my $defaultXHTML = '';
|
|
if ($child) {
|
|
if (defined($default)) {
|
|
if ($default ne 'http://www.w3.org/1999/xhtml') {
|
|
$defaultXHTML = ' xmlns="'.&utils::helpers::escape($default).'"';
|
|
}
|
|
$defaultXML = ' xmlns="'.&utils::helpers::escape($default).'"';
|
|
} else {
|
|
$defaultXHTML = ' xmlns=""';
|
|
}
|
|
} # else handled as part of the new_ns_prefix fixup
|
|
my $newNamespaces = '';
|
|
my $newNamespacePrefixes = {};
|
|
foreach my $newPrefix ($parser->new_ns_prefixes) {
|
|
my $namespace = $parser->expand_ns_prefix($newPrefix);
|
|
if (not defined($namespace)) {
|
|
$namespace = '';
|
|
}
|
|
if ($newPrefix ne '#default') {
|
|
$newNamespaces .= " xmlns:$newPrefix=\"".&utils::helpers::escape($namespace).'"';
|
|
$newNamespacePrefixes->{$newPrefix} = $namespace;
|
|
} elsif (not $child) {
|
|
$newNamespaces .= ' xmlns="'.&utils::helpers::escape($namespace).'"';
|
|
}
|
|
}
|
|
my %prefixLookup = map { if ($_ ne '#default') { $parser->expand_ns_prefix($_) => $_ } else { (); } } @prefixes;
|
|
my $attributes = '';
|
|
my $isName = 1;
|
|
foreach my $attribute (@attrs) {
|
|
if ($isName) {
|
|
# we currently lose the actual prefix used and look it back up... this can be wrong if
|
|
# there are multiple prefixes defined for the same namespace.
|
|
my $attrNamespace;
|
|
if ($attribute =~ s/^\|//o) {
|
|
# this handles a bug in XML::Parser::Expat with attributes of the form:
|
|
# <element xmlns="" xmlns:none="" none:this="will be called '|this' and not 'this' in $attribute" />
|
|
# XXX actually the bug is that that doesn't throw a well-formedness exception XXX
|
|
$attrNamespace = '';
|
|
} else {
|
|
$attrNamespace = $parser->namespace($attribute);
|
|
}
|
|
my $attrPrefix;
|
|
if (defined($attrNamespace)) {
|
|
$parser->{'Walker Data'}->{'namespaced'} = 1;
|
|
$parser->{'Walker Data'}->{'only-xml'} = 1 if applicable($parser, 'html');
|
|
if ($attrNamespace eq 'http://www.w3.org/XML/1998/namespace') {
|
|
$attrPrefix = 'xml';
|
|
} else {
|
|
$attrPrefix = $prefixLookup{$attrNamespace};
|
|
$parser->{'Walker Data'}->{'prefixesUsed'}->{$attrPrefix} += 1 if exists $parser->{'Walker Data'}->{'prefixesUsed'}->{$attrPrefix};
|
|
}
|
|
$attrPrefix .= ':';
|
|
} else {
|
|
$attrPrefix = '';
|
|
}
|
|
$attributes .= " $attrPrefix$attribute=\"";
|
|
} else {
|
|
$attributes .= &utils::helpers::escape($attribute).'"';
|
|
}
|
|
$isName = not($isName);
|
|
}
|
|
$prefix .= ':' if $prefix ne '';
|
|
# XML output:
|
|
$parser->{'Walker Data'}->{'code-xml'} .= "<$prefix$tagName$defaultXML$newNamespaces$attributes>" if applicable($parser, 'xml');
|
|
# XHTML output
|
|
$parser->{'Walker Data'}->{'code-xhtml'} .= "<$prefix$tagName$defaultXHTML$newNamespaces$attributes>" if applicable($parser, 'xhtml');
|
|
# HTML output is same as XHTML output except for namespaces - flag if there are any
|
|
$parser->{'Walker Data'}->{'code-html'} .= "<$tagName$attributes>" if applicable($parser, 'html');
|
|
|
|
if ($prefix ne '' or $defaultXHTML ne '' or $newNamespaces ne '') {
|
|
$parser->{'Walker Data'}->{'namespaced'} = 1;
|
|
$parser->{'Walker Data'}->{'only-xml'} = 1 if applicable($parser, 'html');
|
|
}
|
|
$parser->{'Walker Data'}->{'endTag'} = "</$prefix$tagName>"; # used to regenerate the end tag if required (i.e. if this was originally an empty start tag)
|
|
}
|
|
|
|
sub applicable {
|
|
my($parser, $for) = @_;
|
|
return ((not defined($parser->{'Walker Data'}->{'restrict'})) or
|
|
($parser->{'Walker Data'}->{'restrict'} =~ m/\b # word boundary
|
|
\Q$for\E # quote $for string (so that $for is not treated as regexp)
|
|
\b # word boundary
|
|
/x));
|
|
}
|