oficinasuport-wp-theme/vendor/mck89/peast/lib/Peast/Syntax/Scanner.php

1820 lines
56 KiB
PHP
Raw Permalink Normal View History

2022-10-24 23:58:16 +02:00
<?php
/**
* This file is part of the Peast package
*
* (c) Marco Marchiò <marco.mm89@gmail.com>
*
* For the full copyright and license information refer to the LICENSE file
* distributed with this source code
*/
namespace Peast\Syntax;
/**
* Base class for scanners.
*
* @author Marco Marchiò <marco.mm89@gmail.com>
*/
class Scanner
{
use JSX\Scanner;
/**
* Scanner features
*
* @var Features
*/
protected $features;
/**
* Current column
*
* @var int
*/
protected $column = 0;
/**
* Current line
*
* @var int
*/
protected $line = 1;
/**
* Current index
*
* @var int
*/
protected $index = 0;
/**
* Source length
*
* @var int
*/
protected $length;
/**
* Source characters
*
* @var array
*/
protected $source;
/**
* Consumed position
*
* @var Position
*/
protected $position;
/**
* Current token
*
* @var Token
*/
protected $currentToken;
/**
* Next token
*
* @var Token
*/
protected $nextToken;
/**
* Strict mode flag
*
* @var bool
*/
protected $strictMode = false;
/**
* True to register tokens in the tokens array
*
* @var bool
*/
protected $registerTokens = false;
/**
* Module mode
*
* @var bool
*/
protected $isModule = false;
/**
* Comments handling
*
* @var bool
*/
protected $comments = false;
/**
* Internal JSX scan flag
*
* @var bool
*/
protected $jsx = false;
/**
* Registered tokens array
*
* @var array
*/
protected $tokens = array();
/**
* Comments to tokens map
*
* @var array
*/
protected $commentsMap = array();
/**
* Events emitter
*
* @var EventsEmitter
*/
protected $eventsEmitter;
/**
* Regex to match identifiers starts
*
* @var string
*/
protected $idStartRegex = "/[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\x{1885}\x{1886}\x{2118}\x{212E}\x{309B}\x{309C}]/u";
/**
* Regex to match identifiers parts
*
* @var string
*/
protected $idPartRegex = "/[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\x{1885}\x{1886}\x{2118}\x{212E}\x{309B}\x{309C}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\x{00B7}\x{0387}\x{1369}\x{136A}\x{136B}\x{136C}\x{136D}\x{136E}\x{136F}\x{1370}\x{1371}\x{19DA}\x{200C}\x{200D}]/u";
/**
* Keywords array
*
* @var array
*/
protected $keywords = array(
"break", "do", "in", "typeof", "case", "else", "instanceof", "var",
"catch", "export", "new", "void", "class", "extends", "return", "while",
"const", "finally", "super", "with", "continue", "for", "switch",
"debugger", "function", "this", "default", "if", "throw",
"delete", "import", "try", "enum", "await"
);
/**
* Array of words that are keywords only in strict mode
*
* @var array
*/
protected $strictModeKeywords = array(
"implements", "interface", "package", "private", "protected", "public",
"static", "let", "yield"
);
/**
* Punctuators array
*
* @var array
*/
protected $punctuators = array(
".", ";", ",", "<", ">", "<=", ">=", "==", "!=", "===", "!==", "+",
"-", "*", "%", "++", "--", "<<", ">>", ">>>", "&", "|", "^", "!", "~",
"&&", "||", "?", ":", "=", "+=", "-=", "*=", "%=", "<<=", ">>=", ">>>=",
"&=", "|=", "^=", "=>", "...", "/", "/=", "**", "**=", "??", "?.",
"&&=", "||=", "??="
);
/**
* Punctuators LSM
*
* @var LSM
*/
protected $punctuatorsLSM;
/**
* Strings stops LSM
*
* @var LSM
*/
protected $stringsStopsLSM;
/**
* Brackets array
*
* @var array
*/
protected $brackets = array(
"(" => "", "[" => "", "{" => "", ")" => "(", "]" => "[", "}" => "{"
);
/**
* Open brackets array
*
* @var array
*/
protected $openBrackets = array();
/**
* Open templates array
*
* @var array
*/
protected $openTemplates = array();
/**
* Whitespaces array
*
* @var array
*/
protected $whitespaces = array(
" ", "\t", "\n", "\r", "\f", "\v", 0x00A0, 0xFEFF, 0x00A0,
0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006,
0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x205F, 0x3000, 0x2028,
0x2029
);
/**
* Line terminators characters array
*
* @var array
*
* @static
*/
public static $lineTerminatorsChars = array("\n", "\r", 0x2028, 0x2029);
/**
* Line terminators sequences array
*
* @var array
*
* @static
*/
public static $lineTerminatorsSequences = array("\r\n");
/**
* Regex to split texts using valid ES line terminators
*
* @var array
*/
protected $linesSplitter;
/**
* Concatenation of line terminators characters and line terminators
* sequences
*
* @var array
*/
protected $lineTerminators;
/**
* Properties to copy when getting the scanner state
*
* @var array
*/
protected $stateProps = array("position", "index", "column", "line",
"currentToken", "nextToken", "strictMode",
"openBrackets", "openTemplates",
"commentsMap");
/**
* Decimal numbers
*
* @var array
*/
protected $numbers = array("0", "1", "2", "3", "4", "5", "6", "7", "8",
"9");
/**
* Hexadecimal numbers
*
* @var array
*/
protected $xnumbers = array("0", "1", "2", "3", "4", "5", "6", "7", "8",
"9", "a", "b", "c", "d", "e", "f", "A", "B",
"C", "D", "E", "F");
/**
* Octal numbers
*
* @var array
*/
protected $onumbers = array("0", "1", "2", "3", "4", "5", "6", "7");
/**
* Binary numbers
*
* @var array
*/
protected $bnumbers = array("0", "1");
/**
* Class constructor
*
* @param string $source Source code
* @param Features $features Scanner features
* @param array $options Parsing options
*/
function __construct(
$source, Features $features, $options
) {
$this->features = $features;
$encoding = isset($options["sourceEncoding"]) ?
$options["sourceEncoding"] :
null;
//Strip BOM characters from the source
$this->stripBOM($source, $encoding);
//Convert to UTF8 if needed
if ($encoding && !preg_match("/UTF-?8/i", $encoding)) {
$source = mb_convert_encoding($source, "UTF-8", $encoding);
}
//Instead of using mb_substr for each character, split the source
//into an array of UTF8 characters for performance reasons
$this->source = Utils::stringToUTF8Array(
$source,
!isset($options["strictEncoding"]) || $options["strictEncoding"]
);
$this->length = count($this->source);
//Convert character codes to UTF8 characters in whitespaces and line
//terminators
$this->lineTerminators = array_merge(
self::$lineTerminatorsSequences, self::$lineTerminatorsChars
);
foreach (array("whitespaces", "lineTerminators") as $key) {
foreach ($this->$key as $i => $char) {
if (is_int($char)) {
$this->{$key}[$i] = Utils::unicodeToUtf8($char);
}
}
}
//Remove exponentiation operator if the feature
//is not enabled
if (!$this->features->exponentiationOperator) {
Utils::removeArrayValue($this->punctuators, "**");
Utils::removeArrayValue($this->punctuators, "**=");
}
if (!$this->features->optionalChaining) {
Utils::removeArrayValue($this->punctuators, "?.");
}
//Remove logical assignment operators if the feature
//is not enabled
if (!$this->features->logicalAssignmentOperators) {
Utils::removeArrayValue($this->punctuators, "&&=");
Utils::removeArrayValue($this->punctuators, "||=");
Utils::removeArrayValue($this->punctuators, "??=");
}
//Create a LSM for punctuators array
$this->punctuatorsLSM = new LSM($this->punctuators);
//Create a LSM for strings stops
$this->stringsStopsLSM = new LSM($this->lineTerminators, true);
//Allow paragraph and line separators in strings
if ($this->features->paragraphLineSepInStrings) {
$this->stringsStopsLSM->remove(Utils::unicodeToUtf8(0x2028));
$this->stringsStopsLSM->remove(Utils::unicodeToUtf8(0x2029));
}
//Remove await as keyword if async/await is enabled
if ($this->features->asyncAwait) {
Utils::removeArrayValue($this->keywords, "await");
}
$this->linesSplitter = "/" .
implode("|", $this->lineTerminators) .
"/uS";
$this->position = new Position(0, 0, 0);
}
/**
* Strips BOM characters from the source and detects source encoding if not
* given by the user
*
* @param string $source Source
* @param string $encoding User specified encoding
*/
public function stripBOM(&$source, &$encoding)
{
$boms = array(
"\xEF" => array(array("\xBB", "\xBF"), "UTF-8"),
"\xFE" => array(array("\xFF"), "UTF-16BE"),
"\xFF" => array(array("\xFE"), "UTF-16LE"),
);
if (!isset($source[0]) || !isset($boms[$source[0]])) {
return;
}
$bom = $boms[$source[0]];
$l = count($bom[0]);
for ($i = 0; $i < $l; $i++) {
if (!isset($source[$i + 1]) || $source[$i + 1] !== $bom[0][$i]) {
return;
}
}
$source = substr($source, $l + 1);
if (!$encoding) {
$encoding = $bom[1];
}
}
/**
* Enables or disables module scanning mode
*
* @param bool $enable True to enable module scanning mode, false to disable it
*
* @return $this
*/
public function enableModuleMode($enable = true)
{
$this->isModule = $enable;
return $this;
}
/**
* Enables or disables comments handling
*
* @param bool $enable True to enable comments handling, false to disable it
*
* @return $this
*/
public function enableComments($enable = true)
{
$this->comments = $enable;
return $this;
}
/**
* Enables or disables tokens registration in the token array
*
* @param bool $enable True to enable token registration, false to disable it
*
* @return $this
*/
public function enableTokenRegistration($enable = true)
{
$this->registerTokens = $enable;
return $this;
}
/**
* Return registered tokens
*
* @return array
*/
public function getTokens()
{
return $this->tokens;
}
/**
* Returns the scanner's event emitter
*
* @return EventsEmitter
*/
public function getEventsEmitter()
{
if (!$this->eventsEmitter) {
//The event emitter is created here so that it won't exist if not
//necessary
$this->eventsEmitter = new EventsEmitter;
}
return $this->eventsEmitter;
}
/**
* Enables or disables strict mode
*
* @param bool $strictMode Strict mode state
*
* @return $this
*/
public function setStrictMode($strictMode)
{
$this->strictMode = $strictMode;
return $this;
}
/**
* Return strict mode state
*
* @return bool
*/
public function getStrictMode()
{
return $this->strictMode;
}
/**
* Checks if the given token is a keyword in the current strict mode state
*
* @param Token $token Token to checks
*
* @return bool
*/
public function isStrictModeKeyword($token)
{
return $token->type === Token::TYPE_KEYWORD &&
(in_array($token->value, $this->keywords) || (
$this->strictMode &&
in_array($token->value, $this->strictModeKeywords)));
}
/**
* Returns the current scanner state
*
* @return array
*/
public function getState()
{
//Consume current and next tokens so that they wont' be parsed again
//if the state is restored. If the current token is a slash the next
//token isn't parsed, this prevents some edge cases where a regexp
//that contains something that can be interpreted as a comment causes
//the content to be parsed as a real comment too
$token = $this->currentToken ?: $this->getToken();
if ($token && $token->value !== "/") {
$this->getNextToken();
}
$state = array();
foreach ($this->stateProps as $prop) {
$state[$prop] = $this->$prop;
}
if ($this->registerTokens) {
$state["tokensNum"] = count($this->tokens);
}
//Emit the FreezeState event and pass the given state so that listeners
//attached to this event can add data
$this->eventsEmitter && $this->eventsEmitter->fire(
"FreezeState", array(&$state)
);
return $state;
}
/**
* Sets the current scanner state
*
* @param array $state State
*
* @return $this
*/
public function setState($state)
{
if ($this->registerTokens) {
//Check if tokens have been added
if (isset($this->tokens[$state["tokensNum"]])) {
//Remove all added tokens
for ($i = count($this->tokens) - 1; $i >= $state["tokensNum"]; $i--) {
array_pop($this->tokens);
}
}
unset($state["tokensNum"]);
}
//Emit the ResetState event and pass the given state
$this->eventsEmitter && $this->eventsEmitter->fire(
"ResetState", array(&$state)
);
foreach ($state as $key => $value) {
$this->$key = $value;
}
return $this;
}
/**
* Returns current scanner state
*
* @param bool $scanPosition By default this method returns the scanner
* consumed position, if this parameter is true
* the scanned position will be returned
*
* @return Position
*/
public function getPosition($scanPosition = false)
{
if ($scanPosition) {
return new Position($this->line, $this->column, $this->index);
} else {
return $this->position;
}
}
/**
* Sets the current scan position at the given one
*
* @param Position $position Position at which the scan position will be set
*
* @return $this
*/
public function setScanPosition(Position $position = null)
{
$this->line = $position->getLine();
$this->column = $position->getColumn();
$this->index = $position->getIndex();
return $this;
}
/**
* Return the character at the given index in the source code or null if the
* end is reached.
*
* @param int $index Index, if not given it will use the current index
*
* @return string|null
*/
public function charAt($index = null)
{
if ($index === null) {
$index = $this->index;
}
return $index < $this->length ? $this->source[$index] : null;
}
/**
* Throws a syntax error
*
* @param string $message Error message
*
* @return void
*
* @throws Exception
*/
protected function error($message = null)
{
if (!$message) {
$message = "Unexpected " . $this->charAt();
}
throw new Exception($message, $this->getPosition(true));
}
/**
* Consumes the current token
*
* @return $this
*/
public function consumeToken()
{
//Move the scanner position to the end of the current position
$this->position = $this->currentToken->location->end;
//Before consume the token, consume comments associated with it
if ($this->comments) {
$this->consumeCommentsForCurrentToken();
}
//Register the token if required
if ($this->registerTokens) {
$this->tokens[] = $this->currentToken;
}
//Emit the TokenConsumed event for the consumed token
$this->eventsEmitter && $this->eventsEmitter->fire(
"TokenConsumed", array($this->currentToken)
);
$this->currentToken = $this->nextToken;
$this->nextToken = null;
return $this;
}
/**
* Checks if the given string is matched, if so it consumes the token
*
* @param string $expected String to check
*
* @return Token|null
*/
public function consume($expected)
{
//Do not call getToken if there's already a pending token for
//performance reasons
$token = $this->currentToken ?: $this->getToken();
if ($token && $token->value === $expected) {
$this->consumeToken();
return $token;
}
return null;
}
/**
* Checks if one of the given strings is matched, if so it consumes the
* token
*
* @param array $expected Strings to check
*
* @return Token|null
*/
public function consumeOneOf($expected)
{
//Do not call getToken if there's already a pending token for
//performance reasons
$token = $this->currentToken ?: $this->getToken();
if ($token && in_array($token->value, $expected)) {
$this->consumeToken();
return $token;
}
return null;
}
/**
* Checks that there are not line terminators following the current scan
* position before next token
*
* @param bool $nextToken By default it checks the current token position
* relative to the current position, if this
* parameter is true the check will be done relative
* to the next token
*
* @return bool
*/
public function noLineTerminators($nextToken = false)
{
if ($nextToken) {
$nextToken = $this->getNextToken();
$refLine = !$nextToken ? null :
$nextToken->location->end->getLine();
} else {
$refLine = $this->getPosition()->getLine();
}
$token = $this->currentToken ?: $this->getToken();
return $token &&
$token->location->start->getLine() === $refLine;
}
/**
* Checks if one of the given strings follows the current scan position
*
* @param string|array $expected String or array of strings to check
* @param bool $nextToken This parameter must be true if the first
* parameter is an array so that it will
* check also next tokens
*
* @return bool
*/
public function isBefore($expected, $nextToken = false)
{
$token = $this->currentToken ?: $this->getToken();
if (!$token) {
return false;
} elseif (in_array($token->value, $expected)) {
return true;
} elseif (!$nextToken) {
return false;
}
if (!$this->getNextToken()) {
return false;
}
foreach ($expected as $val) {
if (!is_array($val) || $val[0] !== $token->value) {
continue;
}
//If the second value in the array is true check that the current
//token is not followed by line terminators, otherwise compare its
//value to the next token
if (($val[1] === true && $this->noLineTerminators(true)) ||
($val[1] !== true && $val[1] === $this->nextToken->value)) {
return true;
}
}
return false;
}
/**
* Returns the next token
*
* @return Token|null
*/
public function getNextToken()
{
if (!$this->nextToken) {
$token = $this->currentToken ?: $this->getToken();
$this->currentToken = null;
$this->nextToken = $this->getToken(true);
$this->currentToken = $token;
}
return $this->nextToken;
}
/**
* Returns the current token
*
* @param bool $skipEOFChecks True to skip end of file checks
* even if the end is reached
*
* @return Token|null
*/
public function getToken($skipEOFChecks = false)
{
//The current token is returned until consumed
if ($this->currentToken) {
return $this->currentToken;
}
$comments = $this->skipWhitespacesAndComments();
//Emit the TokenCreated event for all the comments found
if ($comments) {
foreach ($comments as $comment) {
$this->eventsEmitter && $this->eventsEmitter->fire(
"TokenCreated", array($comment)
);
}
}
//When the end of the source is reached
if ($this->index >= $this->length) {
//Check if there are open brackets
if (!$skipEOFChecks) {
foreach ($this->openBrackets as $bracket => $num) {
if ($num) {
$this->error("Unclosed $bracket");
}
}
//Check if there are open templates
if (count($this->openTemplates)) {
$this->error("Unterminated template");
}
}
//Register comments and consume them
if ($this->comments && $comments) {
$this->commentsForCurrentToken($comments);
}
//Emit the EndReached event when at the end of the source
$this->eventsEmitter && $this->eventsEmitter->fire(
"EndReached"
);
return null;
}
$startPosition = $this->getPosition(true);
$origException = null;
try {
//Try to match a token
if (
($this->jsx && ($token = $this->scanJSXIdentifier())) ||
($token = $this->scanTemplate()) ||
($token = $this->scanNumber()) ||
($this->jsx && ($token = $this->scanJSXPunctuator())) ||
($token = $this->scanPunctuator()) ||
($token = $this->scanKeywordOrIdentifier()) ||
($this->jsx && ($token = $this->scanJSXString())) ||
($token = $this->scanString())
) {
//Set the token start and end positions
$token->location->start = $startPosition;
$token->location->end = $this->getPosition(true);
$this->currentToken = $token;
//Register comments if required
if ($this->comments && $comments) {
$this->commentsForCurrentToken($comments);
}
//Emit the TokenCreated event for the token just created
$this->eventsEmitter && $this->eventsEmitter->fire(
"TokenCreated", array($this->currentToken)
);
return $this->currentToken;
}
} catch (Exception $e) {
$origException = $e;
}
//If last token was "/" do not throw an error if the token has not be
//recognized since it can be the first character in a regexp and it will
//be consumed when the current token will be reconsumed as a regexp
if ($this->isAfterSlash($startPosition)) {
$this->setScanPosition($startPosition);
return null;
}
//No valid token found. If there was a scan error, throw the same
//exception again, otherwise throw a new error
if ($origException) {
throw $origException;
}
$this->error();
}
/**
* Executes the operations to handle the end of the source scanning
*
* @return $this
*/
public function consumeEnd()
{
//Consume final comments
if ($this->comments) {
$this->consumeCommentsForCurrentToken();
}
//Emit the EndReached event when at the end of the source
$this->eventsEmitter && $this->eventsEmitter->fire(
"EndReached"
);
return $this;
}
/**
* Gets or sets comments for the current token. If the parameter is an
* array it associates the given comments array to the current node,
* otherwise comments for the current token are returned
*
* @param array $comments Comments array
*
* @return array
*/
protected function commentsForCurrentToken($comments = null)
{
$id = $this->currentToken ? spl_object_hash($this->currentToken) : "";
if ($comments !== null) {
$this->commentsMap[$id] = $comments;
} elseif (isset($this->commentsMap[$id])) {
$comments = $this->commentsMap[$id];
unset($this->commentsMap[$id]);
}
return $comments;
}
/**
* Consumes comment tokens associated with the current token
*
* @return $this
*/
protected function consumeCommentsForCurrentToken()
{
$comments = $this->commentsForCurrentToken();
if ($comments && ($this->registerTokens || $this->eventsEmitter)) {
foreach ($comments as $comment) {
//Register the token if required
if ($this->registerTokens) {
$this->tokens[] = $comment;
}
//Emit the TokenConsumed event for the comment
$this->eventsEmitter && $this->eventsEmitter->fire(
"TokenConsumed", array($comment)
);
}
}
return $this;
}
/**
* Checks if the consumed or the scanned position follow a slash.
*
* @param Position $position Additional position to check
*
* @return bool
*/
protected function isAfterSlash($position = null)
{
$consumedIndex = $this->getPosition()->getIndex();
$checkIndices = array($consumedIndex, $consumedIndex + 1);
if ($position) {
$checkIndices[] = $position->getIndex() - 1;
}
foreach ($checkIndices as $i) {
if ($i >= 0 && $this->charAt($i) === "/") {
return true;
}
}
return false;
}
/**
* Tries to reconsume the current token as a regexp if possible
*
* @return Token|null
*/
public function reconsumeCurrentTokenAsRegexp()
{
$token = $this->currentToken ?: $this->getToken();
$value = $token ? $token->value : null;
//Check if the token starts with "/"
if (!$value || $value[0] !== "/") {
return null;
}
//Reset the scanner position to the token's start position
$startPosition = $token->location->start;
$this->setScanPosition($startPosition);
$buffer = "/";
$this->index++;
$this->column++;
$inClass = false;
while (true) {
//In a characters class the delimiter "/" is allowed without escape,
//so the characters class must be closed before closing the regexp
$stops = $inClass ? array("]") : array("/", "[");
$tempBuffer = $this->consumeUntil($stops);
if ($tempBuffer === null) {
if ($inClass) {
$this->error(
"Unterminated character class in regexp"
);
} else {
$this->error("Unterminated regexp");
}
}
$buffer .= $tempBuffer[0];
if ($tempBuffer[1] === "/") {
break;
} else {
$inClass = $tempBuffer[1] === "[";
}
}
//Flags
while (($char = $this->charAt()) !== null) {
$lower = strtolower($char);
if ($lower >= "a" && $lower <= "z") {
$buffer .= $char;
$this->index++;
$this->column++;
} else {
break;
}
}
//If next token has already been parsed and it's a bracket exclude it
//from the count of open brackets
if ($this->nextToken) {
$nextVal = $this->nextToken->value;
if (isset($this->brackets[$nextVal]) &&
isset($this->openBrackets[$nextVal])
) {
if ($this->brackets[$nextVal]) {
$this->openBrackets[$nextVal]++;
} else {
$this->openBrackets[$nextVal]--;
}
}
$this->nextToken = null;
}
//If comments handling is enabled, get the comments associated with the
//current token
$comments = $this->comments ? $this->commentsForCurrentToken() : null;
//Replace the current token with a regexp token
$token = new Token(Token::TYPE_REGULAR_EXPRESSION, $buffer);
$token->location->start = $startPosition;
$token->location->end = $this->getPosition(true);
$this->currentToken = $token;
if ($comments) {
//Attach the comments to the new current token
$this->commentsForCurrentToken($comments);
}
return $this->currentToken;
}
/**
* Skips whitespaces and comments from the current scan position. If
* comments handling is enabled, the array of parsed comments
*
* @return array
*/
protected function skipWhitespacesAndComments()
{
$comments = [];
$content = "";
$secStartIdx = $this->index;
while (($char = $this->charAt()) !== null) {
//Whitespace
if (in_array($char, $this->whitespaces)) {
$content .= $char;
$this->index++;
} elseif ($char === "/" || $char === "#") {
$nextChar = $this->charAt($this->index + 1);
//Hashbang comment. This will be parsed only if hashbangs comments are enabled
//and if it appears at the beginning of the code
$hashBang = (
$char === "#" && $nextChar === "!" &&
$this->features->hashbangComments && !$this->index
);
//Comment
if ($nextChar === "/" || $nextChar === "*" || $hashBang) {
//If comments must be handled, empty the current content too
//and get the comment start position
if ($this->comments) {
if ($content !== "") {
$this->adjustColumnAndLine($content);
$content = "";
}
$start = $this->getPosition(true);
}
$inline = $nextChar !== "*";
$this->index += 2;
$content .= $char . $nextChar;
while (true) {
$char = $this->charAt();
if ($char === null) {
if (!$inline) {
//If the end of the source has been reached and
//a multiline comment is still open, it's an
//error
$this->error("Unterminated comment");
}
$isEnd = true;
} else {
$content .= $char;
$this->index++;
$isEnd = $inline ?
//Inline comment
in_array($char, $this->lineTerminators) :
//Multiline comment
$char === "*" && $this->charAt() === "/";
}
if ($isEnd) {
if (!$inline) {
$content .= "/";
$this->index++;
}
if ($this->comments) {
//For inline comments the closing line
//terminator must be excluded from comment text
if ($inline && $char !== null) {
$this->index--;
$content = substr($content, 0, -strlen($char));
}
$this->adjustColumnAndLine($content);
$token = new Token(Token::TYPE_COMMENT, $content);
$token->location->start = $start;
$token->location->end = $this->getPosition(true);
$comments[] = $token;
//For inline comments the new content contains
//the closing line terminator since the char has
//already been processed
$content = "";
if ($inline && $char !== null) {
$content = $char;
$this->index++;
}
}
break;
}
}
} else {
break;
}
} elseif (!$this->isModule && $char === "<" &&
$this->charAt($this->index + 1) === "!" &&
$this->charAt($this->index + 2) === "-" &&
$this->charAt($this->index + 3) === "-"
) {
//If comments must be handled, empty the current content too
//and get the comment start position
if ($this->comments) {
if ($content !== "") {
$this->adjustColumnAndLine($content);
$content = "";
}
$start = $this->getPosition(true);
}
//Open html comment
$this->index += 4;
$content .= "<!--";
while (true) {
$char = $this->charAt();
if ($char === null) {
$isEnd = true;
} else {
$content .= $char;
$this->index++;
$isEnd = in_array($char, $this->lineTerminators);
}
if ($isEnd) {
if ($this->comments) {
//Remove the closing line terminator from the
//comment text
if ($char !== null) {
$this->index--;
$content = substr($content, 0, -strlen($char));
}
$this->adjustColumnAndLine($content);
$token = new Token(Token::TYPE_COMMENT, $content);
$token->location->start = $start;
$token->location->end = $this->getPosition(true);
$comments[] = $token;
$content = "";
if ($char !== null) {
$content = $char;
$this->index++;
}
}
break;
}
}
} elseif (!$this->isModule && $char === "-" &&
$this->charAt($this->index + 1) === "-" &&
$this->charAt($this->index + 2) === ">"
) {
//Close html comment
//Check if it is on it's own line
$allow = false;
if (!$secStartIdx) {
$allow = true;
} else {
for ($index = $this->index - 1; $index >= $secStartIdx; $index--) {
if (in_array($this->charAt($index), $this->lineTerminators)) {
$allow = true;
break;
}
}
}
if ($allow) {
//If comments must be handled, empty the current content too
//and get the comment start position
if ($this->comments) {
if ($content !== "") {
$this->adjustColumnAndLine($content);
$content = "";
}
$start = $this->getPosition(true);
}
$this->index += 3;
$content .= "-->";
while (true) {
$char = $this->charAt();
if ($char === null) {
$isEnd = true;
} else {
$content .= $char;
$this->index++;
$isEnd = in_array($char, $this->lineTerminators);
}
if ($isEnd) {
if ($this->comments) {
//Remove the closing line terminator from the
//comment text
if ($char !== null) {
$this->index--;
$content = substr($content, 0, -strlen($char));
}
$this->adjustColumnAndLine($content);
$token = new Token(Token::TYPE_COMMENT, $content);
$token->location->start = $start;
$token->location->end = $this->getPosition(true);
$comments[] = $token;
$content = "";
if ($char !== null) {
$content = $char;
$this->index++;
}
}
break;
}
}
} else {
break;
}
} else {
break;
}
}
if ($content !== "") {
$this->adjustColumnAndLine($content);
}
return $comments;
}
/**
* String scanning method
*
* @param bool $handleEscape True to handle escaping
*
* @return Token|null
*/
protected function scanString($handleEscape = true)
{
$char = $this->charAt();
if ($char === "'" || $char === '"') {
$this->index++;
$this->column++;
//Add the quote to the LSM and then remove it after consuming
$this->stringsStopsLSM->add($char);
$buffer = $this->consumeUntil($this->stringsStopsLSM, $handleEscape);
$this->stringsStopsLSM->remove($char);
if ($buffer === null || $buffer[1] !== $char) {
$this->error("Unterminated string");
}
return new Token(Token::TYPE_STRING_LITERAL, $char . $buffer[0]);
}
return null;
}
/**
* Template scanning method
*
* @return Token|null
*/
protected function scanTemplate()
{
$char = $this->charAt();
//Get the current number of open curly brackets
$openCurly = isset($this->openBrackets["{"]) ?
$this->openBrackets["{"] :
0;
//If the character is a curly bracket check and the number of open
//curly brackets matches the last number in the open templates stack,
//then the bracket closes the open template expression
$endExpression = false;
if ($char === "}") {
$len = count($this->openTemplates);
if ($len && $this->openTemplates[$len - 1] === $openCurly) {
$endExpression = true;
array_pop($this->openTemplates);
}
}
if ($char === "`" || $endExpression) {
$this->index++;
$this->column++;
$buffer = $char;
while (true) {
$tempBuffer = $this->consumeUntil(array("`", "$"));
if (!$tempBuffer) {
$this->error("Unterminated template");
}
$buffer .= $tempBuffer[0];
if ($tempBuffer[1] !== "$" || $this->charAt() === "{") {
//If "${" is found it's a new template expression, register
//the current number of open curly brackets in the open
//templates stack
if ($tempBuffer[1] === "$") {
$this->index++;
$this->column++;
$buffer .= "{";
$this->openTemplates[] = $openCurly;
}
break;
}
}
return new Token(Token::TYPE_TEMPLATE, $buffer);
}
return null;
}
/**
* Number scanning method
*
* @return Token|null
*/
protected function scanNumber()
{
//Numbers can start with a decimal number or with a dot (.5)
$char = $this->charAt();
if (!(($char >= "0" && $char <= "9") || $char === ".")) {
return null;
}
$buffer = "";
$allowedDecimals = true;
//Parse the integer part
if ($char !== ".") {
//Consume all decimal numbers
$buffer = $this->consumeNumbers();
$char = $this->charAt();
if ($this->features->bigInt && $char === "n") {
$this->index++;
$this->column++;
return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
}
$lower = $char !== null ? strtolower($char) : null;
//Handle hexadecimal (0x), octal (0o) and binary (0b) forms
if ($buffer === "0" && $lower !== null &&
isset($this->{$lower . "numbers"})
) {
$this->index++;
$this->column++;
$tempBuffer = $this->consumeNumbers($lower);
if ($tempBuffer === null) {
$this->error("Missing numbers after 0$char");
}
$buffer .= $char . $tempBuffer;
//Check that there are not numbers left
if ($this->consumeNumbers() !== null) {
$this->error();
}
if ($this->features->bigInt && $this->charAt() === "n") {
$this->index++;
$this->column++;
return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
}
return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
}
//Consume exponent part if present
if ($tempBuffer = $this->consumeExponentPart()) {
$buffer .= $tempBuffer;
$allowedDecimals = false;
}
}
//Parse the decimal part
if ($allowedDecimals && $this->charAt() === ".") {
//Consume the dot
$this->index++;
$this->column++;
$buffer .= ".";
//Consume all decimal numbers
$tempBuffer = $this->consumeNumbers();
$buffer .= $tempBuffer;
//If the buffer contains only the dot it should be parsed as
//punctuator
if ($buffer === ".") {
$this->index--;
$this->column--;
return null;
}
//Consume exponent part if present
if (($tempBuffer = $this->consumeExponentPart()) !== null) {
$buffer .= $tempBuffer;
}
}
return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
}
/**
* Consumes the maximum number of digits
*
* @param string $type Digits type (decimal, hexadecimal, etc...)
* @param int $max Maximum number of digits to match
*
* @return string|null
*/
protected function consumeNumbers($type = "", $max = null)
{
$buffer = "";
$char = $this->charAt();
$count = 0;
$extra = $this->features->numericLiteralSeparator ? "_" : "";
while (
in_array($char, $this->{$type . "numbers"}) ||
($count && $char === $extra)
) {
$buffer .= $char;
$this->index++;
$this->column++;
$count ++;
if ($count === $max) {
break;
}
$char = $this->charAt();
}
if ($count && substr($buffer, -1) === "_") {
$this->error(
"Numeric separators are not allowed at the end of a number"
);
}
return $count ? $buffer : null;
}
/**
* Consumes the exponent part of a number
*
* @return string|null
*/
protected function consumeExponentPart()
{
$buffer = "";
$char = $this->charAt();
if ($char !== null && strtolower($char) === "e") {
$this->index++;
$this->column++;
$buffer .= $char;
$char = $this->charAt();
if ($char === "+" || $char === "-") {
$this->index++;
$this->column++;
$buffer .= $char;
}
$tempBuffer = $this->consumeNumbers();
if ($tempBuffer === null) {
$this->error("Missing exponent");
}
$buffer .= $tempBuffer;
}
return $buffer;
}
/**
* Punctuator scanning method
*
* @return Token|null
*/
protected function scanPunctuator()
{
$token = null;
$char = $this->charAt();
//Check if the next char is a bracket
if (isset($this->brackets[$char])) {
//Check if it is a closing bracket
if ($this->brackets[$char]) {
$openBracket = $this->brackets[$char];
//Check if there is a corresponding open bracket
if (!isset($this->openBrackets[$openBracket]) ||
!$this->openBrackets[$openBracket]
) {
if (!$this->isAfterSlash($this->getPosition(true))) {
$this->error();
}
} else {
$this->openBrackets[$openBracket]--;
}
} else {
if (!isset($this->openBrackets[$char])) {
$this->openBrackets[$char] = 0;
}
$this->openBrackets[$char]++;
}
$this->index++;
$this->column++;
$token = new Token(Token::TYPE_PUNCTUATOR, $char);
} elseif (
//Try to match the longest punctuator
$match = $this->punctuatorsLSM->match($this, $this->index, $char)
) {
//Optional chaining punctuator cannot appear before a number, in this
//case only the question mark must be consumed
if ($match[1] === "?." &&
($nextChar = $this->charAt($this->index + $match[0])) !== null &&
$nextChar >= "0" && $nextChar <= "9"
) {
$match = array(1, "?");
}
$this->index += $match[0];
$this->column += $match[0];
$token = new Token(Token::TYPE_PUNCTUATOR, $match[1]);
}
return $token;
}
/**
* Keywords and identifiers scanning method
*
* @return Token|null
*/
protected function scanKeywordOrIdentifier()
{
//Check private identifier start character
if ($private = $this->features->privateMethodsAndFields && $this->charAt() === "#") {
$this->index++;
$this->column++;
}
//Consume the maximum number of characters that are unicode escape
//sequences or valid identifier starts (only the first character) or
//parts
$buffer = "";
$start = true;
while (($char = $this->charAt()) !== null) {
if (
($char >= "a" && $char <= "z") ||
($char >= "A" && $char <= "Z") ||
$char === "_" || $char === "$" ||
(!$start && $char >= "0" && $char <= "9") ||
$this->isIdentifierChar($char, $start)
) {
$buffer .= $char;
$this->index++;
$this->column++;
} elseif ($char === "\\" && ($seq = $this->consumeUnicodeEscapeSequence())) {
//Verify that it's a valid character
if (!$this->isIdentifierChar($seq[1], $start)) {
break;
}
$buffer .= $seq[0];
} else {
break;
}
$start = false;
}
//Identify token type
if ($buffer === "") {
//Unconsume the hash if nothing was found after that
if ($private) {
$this->index--;
$this->column--;
}
return null;
} elseif ($private) {
$type = Token::TYPE_PRIVATE_IDENTIFIER;
$buffer = "#" . $buffer;
} elseif ($buffer === "null") {
$type = Token::TYPE_NULL_LITERAL;
} elseif ($buffer === "true" || $buffer === "false") {
$type = Token::TYPE_BOOLEAN_LITERAL;
} elseif (in_array($buffer, $this->keywords) ||
in_array($buffer, $this->strictModeKeywords)
) {
$type = Token::TYPE_KEYWORD;
} else {
$type = Token::TYPE_IDENTIFIER;
}
return new Token($type, $buffer);
}
/**
* Consumes an unicode escape sequence
*
* @return array|null
*/
protected function consumeUnicodeEscapeSequence()
{
if ($this->charAt() !== "\\" ||
$this->charAt($this->index + 1) !== "u"
) {
return null;
}
$startIndex = $this->index;
$startColumn = $this->column;
$this->index += 2;
$this->column += 2;
$brackets = false;
if ($this->charAt() === "{") {
//\u{FFF}
$brackets = true;
$this->index++;
$this->column++;
$code = $this->consumeNumbers("x");
if ($code && $this->charAt() !== "}") {
$code = null;
} else {
$this->index++;
$this->column++;
}
} else {
//\uFFFF
$code = $this->consumeNumbers("x", 4);
if ($code && strlen($code) !== 4) {
$code = null;
}
}
//Unconsume everything if the format is invalid
if ($code === null) {
$this->index = $startIndex;
$this->column = $startColumn;
return null;
}
//Return an array where the first element is the matched sequence
//and the second one is the decoded character
return array(
$brackets ? "\\u{" . $code . "}" : "\\u" . $code,
Utils::unicodeToUtf8(hexdec($code))
);
}
/**
* Checks if the given character is valid for an identifier
*
* @param string $char Character to check
* @param bool $start If true it will check that the character is
* valid to start an identifier
*
* @return bool
*/
protected function isIdentifierChar($char, $start = true)
{
return ($char >= "a" && $char <= "z") ||
($char >= "A" && $char <= "Z") ||
$char === "_" || $char === "$" ||
(!$start && $char >= "0" && $char <= "9") ||
preg_match($start ? $this->idStartRegex : $this->idPartRegex, $char);
}
/**
* Increases columns and lines count according to the given string
*
* @param string $buffer String to analyze
*
* @return void
*/
protected function adjustColumnAndLine($buffer)
{
$lines = preg_split($this->linesSplitter, $buffer);
$linesCount = count($lines) - 1;
$this->line += $linesCount;
$columns = mb_strlen($lines[$linesCount], "UTF-8");
if ($linesCount) {
$this->column = $columns;
} else {
$this->column += $columns;
}
}
/**
* Consumes characters until one of the given characters is found
*
* @param array|LSM $stops Characters to search
* @param bool $handleEscape True to handle escaping
* @param bool $collectStop True to include the stop character
*
* @return array|null
*/
protected function consumeUntil(
$stops, $handleEscape = true, $collectStop = true
) {
$isLSM = $stops instanceof LSM;
$buffer = "";
$escaped = false;
while (($char = $this->charAt()) !== null) {
$incrIndex = 1;
$isStop = false;
if ($isLSM) {
$m = $stops->match($this, $this->index, $char);
if ($m) {
$isStop = true;
$incrIndex = $m[0];
$char = $m[1];
}
} else {
$isStop = in_array($char, $stops);
}
$validStop = $isStop && !$escaped;
if (!$validStop || $collectStop) {
$this->index += $incrIndex;
$buffer .= $char;
}
if ($validStop) {
if (!$collectStop && $buffer === "") {
return null;
}
$this->adjustColumnAndLine($buffer);
return array($buffer, $char);
} elseif (!$escaped && $char === "\\" && $handleEscape) {
$escaped = true;
} else {
$escaped = false;
}
}
return null;
}
}