xarxaprod-wp-theme/vendor/mck89/peast/lib/Peast/Selector/Parser.php

615 lines
17 KiB
PHP
Raw Permalink Normal View History

2024-01-09 16:13:20 +01:00
<?php
/**
* This file is part of the Peast package
*
* (c) Marco Marchiò <marco.mm89@gmail.com>
*
* For the full copyright and license information refer to the LICENSE file
* distributed with this source code
*/
namespace Peast\Selector;
/**
* Selector parser class
*
* @author Marco Marchiò <marco.mm89@gmail.com>
*/
class Parser
{
/**
* Selector string
*
* @var string
*/
protected $selector;
/**
* Current parser index
*
* @var int
*/
protected $index = 0;
/**
* Selector length
*
* @var int
*/
protected $length;
/**
* Whitespaces
*
* @var array
*/
protected $whitespaces = array(" ", "\t", "\n", "\r", "\f");
/**
* Combinators
*
* @var array
*/
protected $combinators = array(">", "+", "~");
/**
* Attribute selector operator characters
*
* @var array
*/
protected $attrOperatorChars = array("=", "<", ">", "^", "$", "*");
/**
* Attribute selector operators
*
* @var array
*/
protected $attrOperators = array("=", "<", ">", "<=", ">=", "^=", "$=", "*=");
/**
* Valid pseudo selectors. The value indicates the argument type:
* - 0: no arguments
* - 1: index formula (An+B syntax)
* - 2: selector
* @var array
*/
protected $validPseudo = array(
"pattern" => 0, "statement" => 0, "expression" => 0, "declaration" => 0,
"first-child" => 0, "last-child" => 0,
"nth-child" => 1, "nth-last-child" => 1,
"has" => 2, "is" => 2, "not" => 2
);
/**
* Class constructor
*
* @param string $selector Selector string
* @param array $options Options array. See Query class
* documentation for available options
*/
public function __construct($selector, $options = array())
{
$encoding = isset($options["encoding"]) ? $options["encoding"] : null;
if ($encoding && !preg_match("/UTF-?8/i", $encoding)) {
$selector = mb_convert_encoding($selector, "UTF-8", $encoding);
}
$this->selector = $selector;
$this->length = strlen($selector);
}
/**
* Starts the parsing and returns the parsed selector
*
* @param bool $filter True if the selector must be used for a filter
*
* @return Node\Selector
*
* @throws Exception
*/
public function parse($filter = false)
{
$selector = $this->parseSelector($filter);
//Throw an exception if the end has not been reached
if (($char = $this->getChar()) !== null) {
throw new Exception("Invalid syntax '$char'");
}
return $selector;
}
/**
* Parses a selector
*
* @param bool $filter True if the selector must be used for a filter
*
* @return Node\Selector
*
* @throws Exception
*/
public function parseSelector($filter = false)
{
$selector = new Node\Selector;
do {
$first = true;
$group = new Node\Group;
while (true) {
$combinator = $this->consumeCombinator();
if (!$first && !$combinator) {
break;
}
$parts = $this->parseSelectorParts();
if (!count($parts)) {
throw new Exception("Missing selector after combinator");
}
$first = false;
$selCombinator = new Node\Combinator;
$selCombinator->setOperator(
$combinator ?: ($filter ? null : " ")
);
foreach ($parts as $part) {
$selCombinator->addPart($part);
}
$group->addCombinator($selCombinator);
}
$selector->addGroup($group);
$this->consumeWhitespaces();
} while ($this->consume(","));
return $selector;
}
/**
* Parses a set of selector pats
*
* @return array
*
* @throws Exception
*/
protected function parseSelectorParts()
{
$parts = array();
while (true) {
if (
($part = $this->parseSelectorPartType()) ||
($part = $this->parseSelectorPartAttribute()) ||
($part = $this->parseSelectorPartPseudo())
) {
$parts[] = $part;
} else {
break;
}
}
return $parts;
}
/**
* Parses a type selector part
*
* @return Node\Part\Type|null
*/
protected function parseSelectorPartType()
{
$type = $this->consumeWord();
if ($type) {
$part = new Node\Part\Type;
$part->setType($type);
return $part;
}
return null;
}
/**
* Parses an attribute selector part
*
* @return Node\Part\Attribute|null
*
* @throws Exception
*/
protected function parseSelectorPartAttribute()
{
if (!$this->consume("[")) {
return null;
}
$this->consumeWhitespaces();
$part = new Node\Part\Attribute;
if (!($name = $this->consumeWord())) {
throw new Exception("Missing attribute name");
}
$part->addName($name);
while ($this->consume(".")) {
if (!($name = $this->consumeWord())) {
throw new Exception("Missing attribute name after dot");
}
$part->addName($name);
}
$this->consumeWhitespaces();
$operator = $this->consumeAny($this->attrOperatorChars);
if ($operator) {
if (!in_array($operator, $this->attrOperators)) {
throw new Exception("Invalid attribute operator '$operator'");
}
$part->setOperator($operator);
$this->consumeWhitespaces();
if (!($value = $this->parseLiteral())) {
throw new Exception("Missing attribute value");
}
$part->setValue($value[0]);
if ($value[1]) {
if ($operator != "=") {
throw new Exception(
"Only '=' operator is valid for attribute regex match"
);
}
$part->setRegex(true);
}
$this->consumeWhitespaces();
if ($this->consume("i")) {
if (!is_string($value[0]) || $value[1]) {
throw new Exception(
"Case insensitive flag can be used only for string values"
);
}
$part->setCaseInsensitive(true);
$this->consumeWhitespaces();
}
}
if (!$this->consume("]")) {
throw new Exception("Unterminated attribute selector");
}
return $part;
}
/**
* Parses a pseudo selector part
*
* @return Node\Part\Pseudo|null
*
* @throws Exception
*/
protected function parseSelectorPartPseudo()
{
if (!$this->consume(":")) {
return null;
}
$name = $this->consumeWord("-");
if (!isset($this->validPseudo[$name])) {
throw new Exception("Unsupported pseudo selector '$name'");
}
$argsType = $this->validPseudo[$name];
$error = false;
if ($argsType === 1) {
$part = new Node\Part\PseudoIndex;
if (!$this->consume("(")) {
$error = true;
}
if (!$error) {
$this->consumeWhitespaces();
if ($indices = $this->consumeRegex("-?\d*n(?:\+\d+)?|\d+")) {
$indices = explode("n", $indices);
if (count($indices) === 1) {
$part->setOffset((int) $indices[0]);
} else {
switch ($indices[0]) {
case "":
$part->setStep(1);
break;
case "-":
$part->setStep(-1);
break;
default:
$part->setStep((int) $indices[0]);
break;
}
if ($indices[1] !== "") {
$part->setOffset((int) $indices[1]);
}
}
} elseif (
($word = $this->consumeWord()) &&
($word === "even" || $word === "odd")
) {
$part->setStep(2);
if ($word === "odd") {
$part->setOffset(1);
}
} else {
$error = true;
}
$this->consumeWhitespaces();
if (!$error && !$this->consume(")")) {
$error = true;
}
}
} elseif ($argsType === 2) {
$part = new Node\Part\PseudoSelector;
if (
$this->consume("(") &&
($selector = $this->parseSelector($name !== "has")) &&
$this->consume(")")
) {
$part->setSelector($selector);
} else {
$error = true;
}
} else {
$part = new Node\Part\PseudoSimple;
}
if ($error) {
throw new Exception(
"Invalid argument for pseudo selector '$name'"
);
}
$part->setName($name);
return $part;
}
/**
* Parses a literal value
*
* @return array|null
*
* @throws Exception
*/
protected function parseLiteral()
{
if (
($literal = $this->parseLiteralBoolNull()) !== 0 ||
($literal = $this->parseLiteralString()) !== null ||
($literal = $this->parseLiteralNumber()) !== null
) {
return array($literal, false);
} elseif ($literal = $this->parseLiteralRegex()) {
return array($literal, true);
}
return null;
}
/**
* Parses a literal boolean or null value
*
* @return int|bool|null
*
* @throws Exception
*/
protected function parseLiteralBoolNull()
{
$word = $this->consumeWord();
if (!$word) {
return 0;
} elseif ($word === "true") {
return true;
} elseif ($word === "false") {
return false;
} elseif ($word === "null") {
return null;
}
throw new Exception("Invalid attribute value '$word'");
}
/**
* Parses a literal string
*
* @return string|null
*
* @throws Exception
*/
protected function parseLiteralString()
{
if (!($quote = $this->consumeAny(array("'", '"'), true))) {
return null;
}
if (($str = $this->consumeUntil($quote)) === null) {
throw new Exception("Unterminated string in attribute value");
}
return $str;
}
/**
* Parses a literal number
*
* @return int|float|null
*/
protected function parseLiteralNumber()
{
if (
$this->getChar() === "0" &&
($val = $this->consumeRegex("0[xX][a-fA-F]+|0[bB][01]+|0[oO][0-7]+"))
) {
$form = strtolower($val[1]);
$val = substr($val, 2);
if ($form === "x") {
return hexdec($val);
} elseif ($form === "o") {
return octdec($val);
}
return bindec($val);
}
$reg = "-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|-?\.\d+(?:[eE][+-]?\d+)?";
if (!($val = $this->consumeRegex($reg))) {
return null;
}
return (float) $val;
}
/**
* Parses a literal regex
*
* @return string|null
*
* @throws Exception
*/
protected function parseLiteralRegex()
{
if (!($sep = $this->consume("/"))) {
return null;
}
if (($reg = $this->consumeUntil($sep, false, true)) === null) {
throw new Exception("Unterminated regex in attribute value");
}
$modifiers = $this->consumeWord();
return $sep . $reg . ($modifiers ?: "");
}
/**
* Consumes the given regex
*
* @param string $regex Regex to consume
*
* @return mixed|null
*/
protected function consumeRegex($regex)
{
if ($this->getChar() === null) {
return null;
}
if (!preg_match("#^($regex)#", substr($this->selector, $this->index), $matches)) {
return null;
}
$this->index += strlen($matches[1]);
return $matches[1];
}
/**
* Consumes all the characters until the given one is reached
*
* @param string $stop Stop character
* @param bool $removeEscapes If false escape characters won't be removed
* @param false $includeStop If true stop character will be returned
*
* @return string|null
*/
protected function consumeUntil($stop, $removeEscapes = true, $includeStop = false)
{
$buffer = "";
$escaped = false;
while (($char = $this->getChar()) !== null) {
$this->index += 1;
if (!$escaped) {
if ($char === "\\") {
$escaped = true;
if (!$removeEscapes) {
$buffer .= $char;
}
continue;
} elseif ($char === $stop) {
if ($includeStop) {
$buffer .= $char;
}
return $buffer;
}
}
$buffer .= $char;
$escaped = false;
}
return null;
}
/**
* Consumes a word composed by characters a-z
*
* @param null|string $extraChar Extra character to match
*
* @return string
*/
protected function consumeWord($extraChar = null)
{
$buffer = "";
while ($char = $this->getChar()) {
if (
($char >= "a" && $char <= "z") ||
($char >= "A" && $char <= "Z") ||
($extraChar !== null && $char === $extraChar)
) {
$buffer .= $char;
$this->index += 1;
} else {
break;
}
}
return $buffer;
}
/**
* Consumes a combinator
*
* @return string|null
*/
protected function consumeCombinator()
{
//Initial ws can be trimmed if followed by another combinator
$ws = $this->consumeWhitespaces();
if ($combinator = $this->consumeAny($this->combinators, true)) {
$this->consumeWhitespaces();
} elseif ($ws) {
//If there's no other combinators use the space
$combinator = " ";
} else {
$combinator = null;
}
return $combinator;
}
/**
* Consumes as much whitespaces as possible
*
* @return string
*/
protected function consumeWhitespaces()
{
return $this->consumeAny($this->whitespaces);
}
/**
* Consumes the given characters
*
* @param array $chars Characters to consume
* @param false $stopAtFirst If true only the first matching character
* is consumed
*
* @return string
*/
protected function consumeAny($chars, $stopAtFirst = false)
{
$buffer = "";
while (($char = $this->getChar()) !== null) {
if (in_array($char, $chars)) {
$buffer .= $char;
$this->index++;
if ($stopAtFirst) {
break;
}
} else {
break;
}
}
return $buffer;
}
/**
* Consumes the current character if it is equal to the
* given one
*
* @param string $char Character to compare
*
* @return string|null
*/
protected function consume($char)
{
if ($this->getChar() === $char) {
$this->index++;
return $char;
}
return null;
}
/**
* Returns the current character or null if the end
* have been reached
*
* @return string|null
*/
protected function getChar()
{
if ($this->index < $this->length) {
return $this->selector[$this->index];
}
return null;
}
}