1820 lines
56 KiB
PHP
1820 lines
56 KiB
PHP
<?php
|
|
/**
|
|
* This file is part of the Peast package
|
|
*
|
|
* (c) Marco Marchiò <marco.mm89@gmail.com>
|
|
*
|
|
* For the full copyright and license information refer to the LICENSE file
|
|
* distributed with this source code
|
|
*/
|
|
namespace Peast\Syntax;
|
|
|
|
/**
|
|
* Base class for scanners.
|
|
*
|
|
* @author Marco Marchiò <marco.mm89@gmail.com>
|
|
*/
|
|
class Scanner
|
|
{
|
|
use JSX\Scanner;
|
|
|
|
/**
|
|
* Scanner features
|
|
*
|
|
* @var Features
|
|
*/
|
|
protected $features;
|
|
|
|
/**
|
|
* Current column
|
|
*
|
|
* @var int
|
|
*/
|
|
protected $column = 0;
|
|
|
|
/**
|
|
* Current line
|
|
*
|
|
* @var int
|
|
*/
|
|
protected $line = 1;
|
|
|
|
/**
|
|
* Current index
|
|
*
|
|
* @var int
|
|
*/
|
|
protected $index = 0;
|
|
|
|
/**
|
|
* Source length
|
|
*
|
|
* @var int
|
|
*/
|
|
protected $length;
|
|
|
|
/**
|
|
* Source characters
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $source;
|
|
|
|
/**
|
|
* Consumed position
|
|
*
|
|
* @var Position
|
|
*/
|
|
protected $position;
|
|
|
|
/**
|
|
* Current token
|
|
*
|
|
* @var Token
|
|
*/
|
|
protected $currentToken;
|
|
|
|
/**
|
|
* Next token
|
|
*
|
|
* @var Token
|
|
*/
|
|
protected $nextToken;
|
|
|
|
/**
|
|
* Strict mode flag
|
|
*
|
|
* @var bool
|
|
*/
|
|
protected $strictMode = false;
|
|
|
|
/**
|
|
* True to register tokens in the tokens array
|
|
*
|
|
* @var bool
|
|
*/
|
|
protected $registerTokens = false;
|
|
|
|
/**
|
|
* Module mode
|
|
*
|
|
* @var bool
|
|
*/
|
|
protected $isModule = false;
|
|
|
|
/**
|
|
* Comments handling
|
|
*
|
|
* @var bool
|
|
*/
|
|
protected $comments = false;
|
|
|
|
/**
|
|
* Internal JSX scan flag
|
|
*
|
|
* @var bool
|
|
*/
|
|
protected $jsx = false;
|
|
|
|
/**
|
|
* Registered tokens array
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $tokens = array();
|
|
|
|
/**
|
|
* Comments to tokens map
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $commentsMap = array();
|
|
|
|
/**
|
|
* Events emitter
|
|
*
|
|
* @var EventsEmitter
|
|
*/
|
|
protected $eventsEmitter;
|
|
|
|
/**
|
|
* Regex to match identifiers starts
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $idStartRegex = "/[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\x{1885}\x{1886}\x{2118}\x{212E}\x{309B}\x{309C}]/u";
|
|
|
|
/**
|
|
* Regex to match identifiers parts
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $idPartRegex = "/[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\x{1885}\x{1886}\x{2118}\x{212E}\x{309B}\x{309C}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\x{00B7}\x{0387}\x{1369}\x{136A}\x{136B}\x{136C}\x{136D}\x{136E}\x{136F}\x{1370}\x{1371}\x{19DA}\x{200C}\x{200D}]/u";
|
|
|
|
/**
|
|
* Keywords array
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $keywords = array(
|
|
"break", "do", "in", "typeof", "case", "else", "instanceof", "var",
|
|
"catch", "export", "new", "void", "class", "extends", "return", "while",
|
|
"const", "finally", "super", "with", "continue", "for", "switch",
|
|
"debugger", "function", "this", "default", "if", "throw",
|
|
"delete", "import", "try", "enum", "await"
|
|
);
|
|
|
|
/**
|
|
* Array of words that are keywords only in strict mode
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $strictModeKeywords = array(
|
|
"implements", "interface", "package", "private", "protected", "public",
|
|
"static", "let", "yield"
|
|
);
|
|
|
|
/**
|
|
* Punctuators array
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $punctuators = array(
|
|
".", ";", ",", "<", ">", "<=", ">=", "==", "!=", "===", "!==", "+",
|
|
"-", "*", "%", "++", "--", "<<", ">>", ">>>", "&", "|", "^", "!", "~",
|
|
"&&", "||", "?", ":", "=", "+=", "-=", "*=", "%=", "<<=", ">>=", ">>>=",
|
|
"&=", "|=", "^=", "=>", "...", "/", "/=", "**", "**=", "??", "?.",
|
|
"&&=", "||=", "??="
|
|
);
|
|
|
|
/**
|
|
* Punctuators LSM
|
|
*
|
|
* @var LSM
|
|
*/
|
|
protected $punctuatorsLSM;
|
|
|
|
/**
|
|
* Strings stops LSM
|
|
*
|
|
* @var LSM
|
|
*/
|
|
protected $stringsStopsLSM;
|
|
|
|
/**
|
|
* Brackets array
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $brackets = array(
|
|
"(" => "", "[" => "", "{" => "", ")" => "(", "]" => "[", "}" => "{"
|
|
);
|
|
|
|
/**
|
|
* Open brackets array
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $openBrackets = array();
|
|
|
|
/**
|
|
* Open templates array
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $openTemplates = array();
|
|
|
|
/**
|
|
* Whitespaces array
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $whitespaces = array(
|
|
" ", "\t", "\n", "\r", "\f", "\v", 0x00A0, 0xFEFF, 0x00A0,
|
|
0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006,
|
|
0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x205F, 0x3000, 0x2028,
|
|
0x2029
|
|
);
|
|
|
|
/**
|
|
* Line terminators characters array
|
|
*
|
|
* @var array
|
|
*
|
|
* @static
|
|
*/
|
|
public static $lineTerminatorsChars = array("\n", "\r", 0x2028, 0x2029);
|
|
|
|
/**
|
|
* Line terminators sequences array
|
|
*
|
|
* @var array
|
|
*
|
|
* @static
|
|
*/
|
|
public static $lineTerminatorsSequences = array("\r\n");
|
|
|
|
/**
|
|
* Regex to split texts using valid ES line terminators
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $linesSplitter;
|
|
|
|
/**
|
|
* Concatenation of line terminators characters and line terminators
|
|
* sequences
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $lineTerminators;
|
|
|
|
/**
|
|
* Properties to copy when getting the scanner state
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $stateProps = array("position", "index", "column", "line",
|
|
"currentToken", "nextToken", "strictMode",
|
|
"openBrackets", "openTemplates",
|
|
"commentsMap");
|
|
|
|
/**
|
|
* Decimal numbers
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $numbers = array("0", "1", "2", "3", "4", "5", "6", "7", "8",
|
|
"9");
|
|
|
|
/**
|
|
* Hexadecimal numbers
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $xnumbers = array("0", "1", "2", "3", "4", "5", "6", "7", "8",
|
|
"9", "a", "b", "c", "d", "e", "f", "A", "B",
|
|
"C", "D", "E", "F");
|
|
|
|
/**
|
|
* Octal numbers
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $onumbers = array("0", "1", "2", "3", "4", "5", "6", "7");
|
|
|
|
/**
|
|
* Binary numbers
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $bnumbers = array("0", "1");
|
|
|
|
/**
|
|
* Class constructor
|
|
*
|
|
* @param string $source Source code
|
|
* @param Features $features Scanner features
|
|
* @param array $options Parsing options
|
|
*/
|
|
function __construct(
|
|
$source, Features $features, $options
|
|
) {
|
|
$this->features = $features;
|
|
|
|
$encoding = isset($options["sourceEncoding"]) ?
|
|
$options["sourceEncoding"] :
|
|
null;
|
|
|
|
//Strip BOM characters from the source
|
|
$this->stripBOM($source, $encoding);
|
|
|
|
//Convert to UTF8 if needed
|
|
if ($encoding && !preg_match("/UTF-?8/i", $encoding)) {
|
|
$source = mb_convert_encoding($source, "UTF-8", $encoding);
|
|
}
|
|
|
|
//Instead of using mb_substr for each character, split the source
|
|
//into an array of UTF8 characters for performance reasons
|
|
$this->source = Utils::stringToUTF8Array(
|
|
$source,
|
|
!isset($options["strictEncoding"]) || $options["strictEncoding"]
|
|
);
|
|
$this->length = count($this->source);
|
|
|
|
//Convert character codes to UTF8 characters in whitespaces and line
|
|
//terminators
|
|
$this->lineTerminators = array_merge(
|
|
self::$lineTerminatorsSequences, self::$lineTerminatorsChars
|
|
);
|
|
foreach (array("whitespaces", "lineTerminators") as $key) {
|
|
foreach ($this->$key as $i => $char) {
|
|
if (is_int($char)) {
|
|
$this->{$key}[$i] = Utils::unicodeToUtf8($char);
|
|
}
|
|
}
|
|
}
|
|
|
|
//Remove exponentiation operator if the feature
|
|
//is not enabled
|
|
if (!$this->features->exponentiationOperator) {
|
|
Utils::removeArrayValue($this->punctuators, "**");
|
|
Utils::removeArrayValue($this->punctuators, "**=");
|
|
}
|
|
|
|
if (!$this->features->optionalChaining) {
|
|
Utils::removeArrayValue($this->punctuators, "?.");
|
|
}
|
|
|
|
//Remove logical assignment operators if the feature
|
|
//is not enabled
|
|
if (!$this->features->logicalAssignmentOperators) {
|
|
Utils::removeArrayValue($this->punctuators, "&&=");
|
|
Utils::removeArrayValue($this->punctuators, "||=");
|
|
Utils::removeArrayValue($this->punctuators, "??=");
|
|
}
|
|
|
|
//Create a LSM for punctuators array
|
|
$this->punctuatorsLSM = new LSM($this->punctuators);
|
|
|
|
//Create a LSM for strings stops
|
|
$this->stringsStopsLSM = new LSM($this->lineTerminators, true);
|
|
|
|
//Allow paragraph and line separators in strings
|
|
if ($this->features->paragraphLineSepInStrings) {
|
|
$this->stringsStopsLSM->remove(Utils::unicodeToUtf8(0x2028));
|
|
$this->stringsStopsLSM->remove(Utils::unicodeToUtf8(0x2029));
|
|
}
|
|
|
|
//Remove await as keyword if async/await is enabled
|
|
if ($this->features->asyncAwait) {
|
|
Utils::removeArrayValue($this->keywords, "await");
|
|
}
|
|
|
|
$this->linesSplitter = "/" .
|
|
implode("|", $this->lineTerminators) .
|
|
"/uS";
|
|
$this->position = new Position(0, 0, 0);
|
|
}
|
|
|
|
/**
|
|
* Strips BOM characters from the source and detects source encoding if not
|
|
* given by the user
|
|
*
|
|
* @param string $source Source
|
|
* @param string $encoding User specified encoding
|
|
*/
|
|
public function stripBOM(&$source, &$encoding)
|
|
{
|
|
$boms = array(
|
|
"\xEF" => array(array("\xBB", "\xBF"), "UTF-8"),
|
|
"\xFE" => array(array("\xFF"), "UTF-16BE"),
|
|
"\xFF" => array(array("\xFE"), "UTF-16LE"),
|
|
);
|
|
if (!isset($source[0]) || !isset($boms[$source[0]])) {
|
|
return;
|
|
}
|
|
$bom = $boms[$source[0]];
|
|
$l = count($bom[0]);
|
|
for ($i = 0; $i < $l; $i++) {
|
|
if (!isset($source[$i + 1]) || $source[$i + 1] !== $bom[0][$i]) {
|
|
return;
|
|
}
|
|
}
|
|
$source = substr($source, $l + 1);
|
|
if (!$encoding) {
|
|
$encoding = $bom[1];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Enables or disables module scanning mode
|
|
*
|
|
* @param bool $enable True to enable module scanning mode, false to disable it
|
|
*
|
|
* @return $this
|
|
*/
|
|
public function enableModuleMode($enable = true)
|
|
{
|
|
$this->isModule = $enable;
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Enables or disables comments handling
|
|
*
|
|
* @param bool $enable True to enable comments handling, false to disable it
|
|
*
|
|
* @return $this
|
|
*/
|
|
public function enableComments($enable = true)
|
|
{
|
|
$this->comments = $enable;
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Enables or disables tokens registration in the token array
|
|
*
|
|
* @param bool $enable True to enable token registration, false to disable it
|
|
*
|
|
* @return $this
|
|
*/
|
|
public function enableTokenRegistration($enable = true)
|
|
{
|
|
$this->registerTokens = $enable;
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Return registered tokens
|
|
*
|
|
* @return array
|
|
*/
|
|
public function getTokens()
|
|
{
|
|
return $this->tokens;
|
|
}
|
|
|
|
/**
|
|
* Returns the scanner's event emitter
|
|
*
|
|
* @return EventsEmitter
|
|
*/
|
|
public function getEventsEmitter()
|
|
{
|
|
if (!$this->eventsEmitter) {
|
|
//The event emitter is created here so that it won't exist if not
|
|
//necessary
|
|
$this->eventsEmitter = new EventsEmitter;
|
|
}
|
|
return $this->eventsEmitter;
|
|
}
|
|
|
|
/**
|
|
* Enables or disables strict mode
|
|
*
|
|
* @param bool $strictMode Strict mode state
|
|
*
|
|
* @return $this
|
|
*/
|
|
public function setStrictMode($strictMode)
|
|
{
|
|
$this->strictMode = $strictMode;
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Return strict mode state
|
|
*
|
|
* @return bool
|
|
*/
|
|
public function getStrictMode()
|
|
{
|
|
return $this->strictMode;
|
|
}
|
|
|
|
/**
|
|
* Checks if the given token is a keyword in the current strict mode state
|
|
*
|
|
* @param Token $token Token to checks
|
|
*
|
|
* @return bool
|
|
*/
|
|
public function isStrictModeKeyword($token)
|
|
{
|
|
return $token->type === Token::TYPE_KEYWORD &&
|
|
(in_array($token->value, $this->keywords) || (
|
|
$this->strictMode &&
|
|
in_array($token->value, $this->strictModeKeywords)));
|
|
}
|
|
|
|
/**
|
|
* Returns the current scanner state
|
|
*
|
|
* @return array
|
|
*/
|
|
public function getState()
|
|
{
|
|
//Consume current and next tokens so that they wont' be parsed again
|
|
//if the state is restored. If the current token is a slash the next
|
|
//token isn't parsed, this prevents some edge cases where a regexp
|
|
//that contains something that can be interpreted as a comment causes
|
|
//the content to be parsed as a real comment too
|
|
$token = $this->currentToken ?: $this->getToken();
|
|
if ($token && $token->value !== "/") {
|
|
$this->getNextToken();
|
|
}
|
|
$state = array();
|
|
foreach ($this->stateProps as $prop) {
|
|
$state[$prop] = $this->$prop;
|
|
}
|
|
if ($this->registerTokens) {
|
|
$state["tokensNum"] = count($this->tokens);
|
|
}
|
|
//Emit the FreezeState event and pass the given state so that listeners
|
|
//attached to this event can add data
|
|
$this->eventsEmitter && $this->eventsEmitter->fire(
|
|
"FreezeState", array(&$state)
|
|
);
|
|
return $state;
|
|
}
|
|
|
|
/**
|
|
* Sets the current scanner state
|
|
*
|
|
* @param array $state State
|
|
*
|
|
* @return $this
|
|
*/
|
|
public function setState($state)
|
|
{
|
|
if ($this->registerTokens) {
|
|
//Check if tokens have been added
|
|
if (isset($this->tokens[$state["tokensNum"]])) {
|
|
//Remove all added tokens
|
|
for ($i = count($this->tokens) - 1; $i >= $state["tokensNum"]; $i--) {
|
|
array_pop($this->tokens);
|
|
}
|
|
}
|
|
unset($state["tokensNum"]);
|
|
}
|
|
//Emit the ResetState event and pass the given state
|
|
$this->eventsEmitter && $this->eventsEmitter->fire(
|
|
"ResetState", array(&$state)
|
|
);
|
|
foreach ($state as $key => $value) {
|
|
$this->$key = $value;
|
|
}
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Returns current scanner state
|
|
*
|
|
* @param bool $scanPosition By default this method returns the scanner
|
|
* consumed position, if this parameter is true
|
|
* the scanned position will be returned
|
|
*
|
|
* @return Position
|
|
*/
|
|
public function getPosition($scanPosition = false)
|
|
{
|
|
if ($scanPosition) {
|
|
return new Position($this->line, $this->column, $this->index);
|
|
} else {
|
|
return $this->position;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sets the current scan position at the given one
|
|
*
|
|
* @param Position $position Position at which the scan position will be set
|
|
*
|
|
* @return $this
|
|
*/
|
|
public function setScanPosition(Position $position = null)
|
|
{
|
|
$this->line = $position->getLine();
|
|
$this->column = $position->getColumn();
|
|
$this->index = $position->getIndex();
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Return the character at the given index in the source code or null if the
|
|
* end is reached.
|
|
*
|
|
* @param int $index Index, if not given it will use the current index
|
|
*
|
|
* @return string|null
|
|
*/
|
|
public function charAt($index = null)
|
|
{
|
|
if ($index === null) {
|
|
$index = $this->index;
|
|
}
|
|
return $index < $this->length ? $this->source[$index] : null;
|
|
}
|
|
|
|
/**
|
|
* Throws a syntax error
|
|
*
|
|
* @param string $message Error message
|
|
*
|
|
* @return void
|
|
*
|
|
* @throws Exception
|
|
*/
|
|
protected function error($message = null)
|
|
{
|
|
if (!$message) {
|
|
$message = "Unexpected " . $this->charAt();
|
|
}
|
|
throw new Exception($message, $this->getPosition(true));
|
|
}
|
|
|
|
/**
|
|
* Consumes the current token
|
|
*
|
|
* @return $this
|
|
*/
|
|
public function consumeToken()
|
|
{
|
|
//Move the scanner position to the end of the current position
|
|
$this->position = $this->currentToken->location->end;
|
|
|
|
//Before consume the token, consume comments associated with it
|
|
if ($this->comments) {
|
|
$this->consumeCommentsForCurrentToken();
|
|
}
|
|
|
|
//Register the token if required
|
|
if ($this->registerTokens) {
|
|
$this->tokens[] = $this->currentToken;
|
|
}
|
|
|
|
//Emit the TokenConsumed event for the consumed token
|
|
$this->eventsEmitter && $this->eventsEmitter->fire(
|
|
"TokenConsumed", array($this->currentToken)
|
|
);
|
|
|
|
$this->currentToken = $this->nextToken;
|
|
$this->nextToken = null;
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Checks if the given string is matched, if so it consumes the token
|
|
*
|
|
* @param string $expected String to check
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
public function consume($expected)
|
|
{
|
|
//Do not call getToken if there's already a pending token for
|
|
//performance reasons
|
|
$token = $this->currentToken ?: $this->getToken();
|
|
if ($token && $token->value === $expected) {
|
|
$this->consumeToken();
|
|
return $token;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Checks if one of the given strings is matched, if so it consumes the
|
|
* token
|
|
*
|
|
* @param array $expected Strings to check
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
public function consumeOneOf($expected)
|
|
{
|
|
//Do not call getToken if there's already a pending token for
|
|
//performance reasons
|
|
$token = $this->currentToken ?: $this->getToken();
|
|
if ($token && in_array($token->value, $expected)) {
|
|
$this->consumeToken();
|
|
return $token;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Checks that there are not line terminators following the current scan
|
|
* position before next token
|
|
*
|
|
* @param bool $nextToken By default it checks the current token position
|
|
* relative to the current position, if this
|
|
* parameter is true the check will be done relative
|
|
* to the next token
|
|
*
|
|
* @return bool
|
|
*/
|
|
public function noLineTerminators($nextToken = false)
|
|
{
|
|
if ($nextToken) {
|
|
$nextToken = $this->getNextToken();
|
|
$refLine = !$nextToken ? null :
|
|
$nextToken->location->end->getLine();
|
|
} else {
|
|
$refLine = $this->getPosition()->getLine();
|
|
}
|
|
$token = $this->currentToken ?: $this->getToken();
|
|
return $token &&
|
|
$token->location->start->getLine() === $refLine;
|
|
}
|
|
|
|
/**
|
|
* Checks if one of the given strings follows the current scan position
|
|
*
|
|
* @param string|array $expected String or array of strings to check
|
|
* @param bool $nextToken This parameter must be true if the first
|
|
* parameter is an array so that it will
|
|
* check also next tokens
|
|
*
|
|
* @return bool
|
|
*/
|
|
public function isBefore($expected, $nextToken = false)
|
|
{
|
|
$token = $this->currentToken ?: $this->getToken();
|
|
if (!$token) {
|
|
return false;
|
|
} elseif (in_array($token->value, $expected)) {
|
|
return true;
|
|
} elseif (!$nextToken) {
|
|
return false;
|
|
}
|
|
if (!$this->getNextToken()) {
|
|
return false;
|
|
}
|
|
foreach ($expected as $val) {
|
|
if (!is_array($val) || $val[0] !== $token->value) {
|
|
continue;
|
|
}
|
|
//If the second value in the array is true check that the current
|
|
//token is not followed by line terminators, otherwise compare its
|
|
//value to the next token
|
|
if (($val[1] === true && $this->noLineTerminators(true)) ||
|
|
($val[1] !== true && $val[1] === $this->nextToken->value)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Returns the next token
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
public function getNextToken()
|
|
{
|
|
if (!$this->nextToken) {
|
|
$token = $this->currentToken ?: $this->getToken();
|
|
$this->currentToken = null;
|
|
$this->nextToken = $this->getToken(true);
|
|
$this->currentToken = $token;
|
|
}
|
|
return $this->nextToken;
|
|
}
|
|
|
|
/**
|
|
* Returns the current token
|
|
*
|
|
* @param bool $skipEOFChecks True to skip end of file checks
|
|
* even if the end is reached
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
public function getToken($skipEOFChecks = false)
|
|
{
|
|
//The current token is returned until consumed
|
|
if ($this->currentToken) {
|
|
return $this->currentToken;
|
|
}
|
|
|
|
$comments = $this->skipWhitespacesAndComments();
|
|
|
|
//Emit the TokenCreated event for all the comments found
|
|
if ($comments) {
|
|
foreach ($comments as $comment) {
|
|
$this->eventsEmitter && $this->eventsEmitter->fire(
|
|
"TokenCreated", array($comment)
|
|
);
|
|
}
|
|
}
|
|
|
|
//When the end of the source is reached
|
|
if ($this->index >= $this->length) {
|
|
//Check if there are open brackets
|
|
if (!$skipEOFChecks) {
|
|
foreach ($this->openBrackets as $bracket => $num) {
|
|
if ($num) {
|
|
$this->error("Unclosed $bracket");
|
|
}
|
|
}
|
|
|
|
//Check if there are open templates
|
|
if (count($this->openTemplates)) {
|
|
$this->error("Unterminated template");
|
|
}
|
|
}
|
|
|
|
//Register comments and consume them
|
|
if ($this->comments && $comments) {
|
|
$this->commentsForCurrentToken($comments);
|
|
}
|
|
|
|
//Emit the EndReached event when at the end of the source
|
|
$this->eventsEmitter && $this->eventsEmitter->fire(
|
|
"EndReached"
|
|
);
|
|
|
|
return null;
|
|
}
|
|
|
|
$startPosition = $this->getPosition(true);
|
|
$origException = null;
|
|
try {
|
|
|
|
//Try to match a token
|
|
if (
|
|
($this->jsx && ($token = $this->scanJSXIdentifier())) ||
|
|
($token = $this->scanTemplate()) ||
|
|
($token = $this->scanNumber()) ||
|
|
($this->jsx && ($token = $this->scanJSXPunctuator())) ||
|
|
($token = $this->scanPunctuator()) ||
|
|
($token = $this->scanKeywordOrIdentifier()) ||
|
|
($this->jsx && ($token = $this->scanJSXString())) ||
|
|
($token = $this->scanString())
|
|
) {
|
|
//Set the token start and end positions
|
|
$token->location->start = $startPosition;
|
|
$token->location->end = $this->getPosition(true);
|
|
$this->currentToken = $token;
|
|
|
|
//Register comments if required
|
|
if ($this->comments && $comments) {
|
|
$this->commentsForCurrentToken($comments);
|
|
}
|
|
|
|
//Emit the TokenCreated event for the token just created
|
|
$this->eventsEmitter && $this->eventsEmitter->fire(
|
|
"TokenCreated", array($this->currentToken)
|
|
);
|
|
|
|
return $this->currentToken;
|
|
}
|
|
|
|
} catch (Exception $e) {
|
|
$origException = $e;
|
|
}
|
|
|
|
//If last token was "/" do not throw an error if the token has not be
|
|
//recognized since it can be the first character in a regexp and it will
|
|
//be consumed when the current token will be reconsumed as a regexp
|
|
if ($this->isAfterSlash($startPosition)) {
|
|
$this->setScanPosition($startPosition);
|
|
return null;
|
|
}
|
|
|
|
//No valid token found. If there was a scan error, throw the same
|
|
//exception again, otherwise throw a new error
|
|
if ($origException) {
|
|
throw $origException;
|
|
}
|
|
$this->error();
|
|
}
|
|
|
|
/**
|
|
* Executes the operations to handle the end of the source scanning
|
|
*
|
|
* @return $this
|
|
*/
|
|
public function consumeEnd()
|
|
{
|
|
//Consume final comments
|
|
if ($this->comments) {
|
|
$this->consumeCommentsForCurrentToken();
|
|
}
|
|
|
|
//Emit the EndReached event when at the end of the source
|
|
$this->eventsEmitter && $this->eventsEmitter->fire(
|
|
"EndReached"
|
|
);
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Gets or sets comments for the current token. If the parameter is an
|
|
* array it associates the given comments array to the current node,
|
|
* otherwise comments for the current token are returned
|
|
*
|
|
* @param array $comments Comments array
|
|
*
|
|
* @return array
|
|
*/
|
|
protected function commentsForCurrentToken($comments = null)
|
|
{
|
|
$id = $this->currentToken ? spl_object_hash($this->currentToken) : "";
|
|
if ($comments !== null) {
|
|
$this->commentsMap[$id] = $comments;
|
|
} elseif (isset($this->commentsMap[$id])) {
|
|
$comments = $this->commentsMap[$id];
|
|
unset($this->commentsMap[$id]);
|
|
}
|
|
return $comments;
|
|
}
|
|
|
|
/**
|
|
* Consumes comment tokens associated with the current token
|
|
*
|
|
* @return $this
|
|
*/
|
|
protected function consumeCommentsForCurrentToken()
|
|
{
|
|
$comments = $this->commentsForCurrentToken();
|
|
if ($comments && ($this->registerTokens || $this->eventsEmitter)) {
|
|
foreach ($comments as $comment) {
|
|
//Register the token if required
|
|
if ($this->registerTokens) {
|
|
$this->tokens[] = $comment;
|
|
}
|
|
//Emit the TokenConsumed event for the comment
|
|
$this->eventsEmitter && $this->eventsEmitter->fire(
|
|
"TokenConsumed", array($comment)
|
|
);
|
|
}
|
|
}
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Checks if the consumed or the scanned position follow a slash.
|
|
*
|
|
* @param Position $position Additional position to check
|
|
*
|
|
* @return bool
|
|
*/
|
|
protected function isAfterSlash($position = null)
|
|
{
|
|
$consumedIndex = $this->getPosition()->getIndex();
|
|
$checkIndices = array($consumedIndex, $consumedIndex + 1);
|
|
if ($position) {
|
|
$checkIndices[] = $position->getIndex() - 1;
|
|
}
|
|
foreach ($checkIndices as $i) {
|
|
if ($i >= 0 && $this->charAt($i) === "/") {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Tries to reconsume the current token as a regexp if possible
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
public function reconsumeCurrentTokenAsRegexp()
|
|
{
|
|
$token = $this->currentToken ?: $this->getToken();
|
|
$value = $token ? $token->value : null;
|
|
|
|
//Check if the token starts with "/"
|
|
if (!$value || $value[0] !== "/") {
|
|
return null;
|
|
}
|
|
|
|
//Reset the scanner position to the token's start position
|
|
$startPosition = $token->location->start;
|
|
$this->setScanPosition($startPosition);
|
|
|
|
$buffer = "/";
|
|
$this->index++;
|
|
$this->column++;
|
|
$inClass = false;
|
|
while (true) {
|
|
//In a characters class the delimiter "/" is allowed without escape,
|
|
//so the characters class must be closed before closing the regexp
|
|
$stops = $inClass ? array("]") : array("/", "[");
|
|
$tempBuffer = $this->consumeUntil($stops);
|
|
if ($tempBuffer === null) {
|
|
if ($inClass) {
|
|
$this->error(
|
|
"Unterminated character class in regexp"
|
|
);
|
|
} else {
|
|
$this->error("Unterminated regexp");
|
|
}
|
|
}
|
|
$buffer .= $tempBuffer[0];
|
|
if ($tempBuffer[1] === "/") {
|
|
break;
|
|
} else {
|
|
$inClass = $tempBuffer[1] === "[";
|
|
}
|
|
}
|
|
|
|
//Flags
|
|
while (($char = $this->charAt()) !== null) {
|
|
$lower = strtolower($char);
|
|
if ($lower >= "a" && $lower <= "z") {
|
|
$buffer .= $char;
|
|
$this->index++;
|
|
$this->column++;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
//If next token has already been parsed and it's a bracket exclude it
|
|
//from the count of open brackets
|
|
if ($this->nextToken) {
|
|
$nextVal = $this->nextToken->value;
|
|
if (isset($this->brackets[$nextVal]) &&
|
|
isset($this->openBrackets[$nextVal])
|
|
) {
|
|
if ($this->brackets[$nextVal]) {
|
|
$this->openBrackets[$nextVal]++;
|
|
} else {
|
|
$this->openBrackets[$nextVal]--;
|
|
}
|
|
}
|
|
$this->nextToken = null;
|
|
}
|
|
|
|
//If comments handling is enabled, get the comments associated with the
|
|
//current token
|
|
$comments = $this->comments ? $this->commentsForCurrentToken() : null;
|
|
|
|
//Replace the current token with a regexp token
|
|
$token = new Token(Token::TYPE_REGULAR_EXPRESSION, $buffer);
|
|
$token->location->start = $startPosition;
|
|
$token->location->end = $this->getPosition(true);
|
|
$this->currentToken = $token;
|
|
|
|
if ($comments) {
|
|
//Attach the comments to the new current token
|
|
$this->commentsForCurrentToken($comments);
|
|
}
|
|
|
|
return $this->currentToken;
|
|
}
|
|
|
|
/**
|
|
* Skips whitespaces and comments from the current scan position. If
|
|
* comments handling is enabled, the array of parsed comments
|
|
*
|
|
* @return array
|
|
*/
|
|
protected function skipWhitespacesAndComments()
|
|
{
|
|
$comments = [];
|
|
$content = "";
|
|
$secStartIdx = $this->index;
|
|
while (($char = $this->charAt()) !== null) {
|
|
//Whitespace
|
|
if (in_array($char, $this->whitespaces)) {
|
|
|
|
$content .= $char;
|
|
$this->index++;
|
|
|
|
} elseif ($char === "/" || $char === "#") {
|
|
|
|
$nextChar = $this->charAt($this->index + 1);
|
|
|
|
//Hashbang comment. This will be parsed only if hashbangs comments are enabled
|
|
//and if it appears at the beginning of the code
|
|
$hashBang = (
|
|
$char === "#" && $nextChar === "!" &&
|
|
$this->features->hashbangComments && !$this->index
|
|
);
|
|
|
|
//Comment
|
|
if ($nextChar === "/" || $nextChar === "*" || $hashBang) {
|
|
|
|
//If comments must be handled, empty the current content too
|
|
//and get the comment start position
|
|
if ($this->comments) {
|
|
if ($content !== "") {
|
|
$this->adjustColumnAndLine($content);
|
|
$content = "";
|
|
}
|
|
$start = $this->getPosition(true);
|
|
}
|
|
|
|
$inline = $nextChar !== "*";
|
|
$this->index += 2;
|
|
$content .= $char . $nextChar;
|
|
|
|
while (true) {
|
|
$char = $this->charAt();
|
|
|
|
if ($char === null) {
|
|
if (!$inline) {
|
|
//If the end of the source has been reached and
|
|
//a multiline comment is still open, it's an
|
|
//error
|
|
$this->error("Unterminated comment");
|
|
}
|
|
$isEnd = true;
|
|
} else {
|
|
$content .= $char;
|
|
$this->index++;
|
|
$isEnd = $inline ?
|
|
//Inline comment
|
|
in_array($char, $this->lineTerminators) :
|
|
//Multiline comment
|
|
$char === "*" && $this->charAt() === "/";
|
|
}
|
|
|
|
if ($isEnd) {
|
|
if (!$inline) {
|
|
$content .= "/";
|
|
$this->index++;
|
|
}
|
|
if ($this->comments) {
|
|
//For inline comments the closing line
|
|
//terminator must be excluded from comment text
|
|
if ($inline && $char !== null) {
|
|
$this->index--;
|
|
$content = substr($content, 0, -strlen($char));
|
|
}
|
|
$this->adjustColumnAndLine($content);
|
|
$token = new Token(Token::TYPE_COMMENT, $content);
|
|
$token->location->start = $start;
|
|
$token->location->end = $this->getPosition(true);
|
|
$comments[] = $token;
|
|
//For inline comments the new content contains
|
|
//the closing line terminator since the char has
|
|
//already been processed
|
|
$content = "";
|
|
if ($inline && $char !== null) {
|
|
$content = $char;
|
|
$this->index++;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
} elseif (!$this->isModule && $char === "<" &&
|
|
$this->charAt($this->index + 1) === "!" &&
|
|
$this->charAt($this->index + 2) === "-" &&
|
|
$this->charAt($this->index + 3) === "-"
|
|
) {
|
|
|
|
//If comments must be handled, empty the current content too
|
|
//and get the comment start position
|
|
if ($this->comments) {
|
|
if ($content !== "") {
|
|
$this->adjustColumnAndLine($content);
|
|
$content = "";
|
|
}
|
|
$start = $this->getPosition(true);
|
|
}
|
|
|
|
//Open html comment
|
|
$this->index += 4;
|
|
$content .= "<!--";
|
|
while (true) {
|
|
$char = $this->charAt();
|
|
if ($char === null) {
|
|
$isEnd = true;
|
|
} else {
|
|
$content .= $char;
|
|
$this->index++;
|
|
$isEnd = in_array($char, $this->lineTerminators);
|
|
}
|
|
if ($isEnd) {
|
|
if ($this->comments) {
|
|
//Remove the closing line terminator from the
|
|
//comment text
|
|
if ($char !== null) {
|
|
$this->index--;
|
|
$content = substr($content, 0, -strlen($char));
|
|
}
|
|
$this->adjustColumnAndLine($content);
|
|
$token = new Token(Token::TYPE_COMMENT, $content);
|
|
$token->location->start = $start;
|
|
$token->location->end = $this->getPosition(true);
|
|
$comments[] = $token;
|
|
$content = "";
|
|
if ($char !== null) {
|
|
$content = $char;
|
|
$this->index++;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
} elseif (!$this->isModule && $char === "-" &&
|
|
$this->charAt($this->index + 1) === "-" &&
|
|
$this->charAt($this->index + 2) === ">"
|
|
) {
|
|
|
|
//Close html comment
|
|
//Check if it is on it's own line
|
|
$allow = false;
|
|
if (!$secStartIdx) {
|
|
$allow = true;
|
|
} else {
|
|
for ($index = $this->index - 1; $index >= $secStartIdx; $index--) {
|
|
if (in_array($this->charAt($index), $this->lineTerminators)) {
|
|
$allow = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if ($allow) {
|
|
|
|
//If comments must be handled, empty the current content too
|
|
//and get the comment start position
|
|
if ($this->comments) {
|
|
if ($content !== "") {
|
|
$this->adjustColumnAndLine($content);
|
|
$content = "";
|
|
}
|
|
$start = $this->getPosition(true);
|
|
}
|
|
|
|
$this->index += 3;
|
|
$content .= "-->";
|
|
while (true) {
|
|
$char = $this->charAt();
|
|
|
|
if ($char === null) {
|
|
$isEnd = true;
|
|
} else {
|
|
$content .= $char;
|
|
$this->index++;
|
|
$isEnd = in_array($char, $this->lineTerminators);
|
|
}
|
|
|
|
if ($isEnd) {
|
|
if ($this->comments) {
|
|
//Remove the closing line terminator from the
|
|
//comment text
|
|
if ($char !== null) {
|
|
$this->index--;
|
|
$content = substr($content, 0, -strlen($char));
|
|
}
|
|
$this->adjustColumnAndLine($content);
|
|
$token = new Token(Token::TYPE_COMMENT, $content);
|
|
$token->location->start = $start;
|
|
$token->location->end = $this->getPosition(true);
|
|
$comments[] = $token;
|
|
$content = "";
|
|
if ($char !== null) {
|
|
$content = $char;
|
|
$this->index++;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ($content !== "") {
|
|
$this->adjustColumnAndLine($content);
|
|
}
|
|
|
|
return $comments;
|
|
}
|
|
|
|
/**
|
|
* String scanning method
|
|
*
|
|
* @param bool $handleEscape True to handle escaping
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
protected function scanString($handleEscape = true)
|
|
{
|
|
$char = $this->charAt();
|
|
if ($char === "'" || $char === '"') {
|
|
$this->index++;
|
|
$this->column++;
|
|
//Add the quote to the LSM and then remove it after consuming
|
|
$this->stringsStopsLSM->add($char);
|
|
$buffer = $this->consumeUntil($this->stringsStopsLSM, $handleEscape);
|
|
$this->stringsStopsLSM->remove($char);
|
|
if ($buffer === null || $buffer[1] !== $char) {
|
|
$this->error("Unterminated string");
|
|
}
|
|
return new Token(Token::TYPE_STRING_LITERAL, $char . $buffer[0]);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Template scanning method
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
protected function scanTemplate()
|
|
{
|
|
$char = $this->charAt();
|
|
|
|
//Get the current number of open curly brackets
|
|
$openCurly = isset($this->openBrackets["{"]) ?
|
|
$this->openBrackets["{"] :
|
|
0;
|
|
|
|
//If the character is a curly bracket check and the number of open
|
|
//curly brackets matches the last number in the open templates stack,
|
|
//then the bracket closes the open template expression
|
|
$endExpression = false;
|
|
if ($char === "}") {
|
|
$len = count($this->openTemplates);
|
|
if ($len && $this->openTemplates[$len - 1] === $openCurly) {
|
|
$endExpression = true;
|
|
array_pop($this->openTemplates);
|
|
}
|
|
}
|
|
|
|
if ($char === "`" || $endExpression) {
|
|
$this->index++;
|
|
$this->column++;
|
|
$buffer = $char;
|
|
while (true) {
|
|
$tempBuffer = $this->consumeUntil(array("`", "$"));
|
|
if (!$tempBuffer) {
|
|
$this->error("Unterminated template");
|
|
}
|
|
$buffer .= $tempBuffer[0];
|
|
if ($tempBuffer[1] !== "$" || $this->charAt() === "{") {
|
|
//If "${" is found it's a new template expression, register
|
|
//the current number of open curly brackets in the open
|
|
//templates stack
|
|
if ($tempBuffer[1] === "$") {
|
|
$this->index++;
|
|
$this->column++;
|
|
$buffer .= "{";
|
|
$this->openTemplates[] = $openCurly;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return new Token(Token::TYPE_TEMPLATE, $buffer);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Number scanning method
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
protected function scanNumber()
|
|
{
|
|
//Numbers can start with a decimal number or with a dot (.5)
|
|
$char = $this->charAt();
|
|
if (!(($char >= "0" && $char <= "9") || $char === ".")) {
|
|
return null;
|
|
}
|
|
|
|
$buffer = "";
|
|
$allowedDecimals = true;
|
|
|
|
//Parse the integer part
|
|
if ($char !== ".") {
|
|
|
|
//Consume all decimal numbers
|
|
$buffer = $this->consumeNumbers();
|
|
$char = $this->charAt();
|
|
|
|
if ($this->features->bigInt && $char === "n") {
|
|
$this->index++;
|
|
$this->column++;
|
|
return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
|
|
}
|
|
|
|
$lower = $char !== null ? strtolower($char) : null;
|
|
|
|
//Handle hexadecimal (0x), octal (0o) and binary (0b) forms
|
|
if ($buffer === "0" && $lower !== null &&
|
|
isset($this->{$lower . "numbers"})
|
|
) {
|
|
|
|
$this->index++;
|
|
$this->column++;
|
|
$tempBuffer = $this->consumeNumbers($lower);
|
|
if ($tempBuffer === null) {
|
|
$this->error("Missing numbers after 0$char");
|
|
}
|
|
$buffer .= $char . $tempBuffer;
|
|
|
|
//Check that there are not numbers left
|
|
if ($this->consumeNumbers() !== null) {
|
|
$this->error();
|
|
}
|
|
|
|
if ($this->features->bigInt && $this->charAt() === "n") {
|
|
$this->index++;
|
|
$this->column++;
|
|
return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
|
|
}
|
|
|
|
return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
|
|
}
|
|
|
|
//Consume exponent part if present
|
|
if ($tempBuffer = $this->consumeExponentPart()) {
|
|
$buffer .= $tempBuffer;
|
|
$allowedDecimals = false;
|
|
}
|
|
}
|
|
|
|
//Parse the decimal part
|
|
if ($allowedDecimals && $this->charAt() === ".") {
|
|
|
|
//Consume the dot
|
|
$this->index++;
|
|
$this->column++;
|
|
$buffer .= ".";
|
|
|
|
//Consume all decimal numbers
|
|
$tempBuffer = $this->consumeNumbers();
|
|
$buffer .= $tempBuffer;
|
|
|
|
//If the buffer contains only the dot it should be parsed as
|
|
//punctuator
|
|
if ($buffer === ".") {
|
|
$this->index--;
|
|
$this->column--;
|
|
return null;
|
|
}
|
|
|
|
//Consume exponent part if present
|
|
if (($tempBuffer = $this->consumeExponentPart()) !== null) {
|
|
$buffer .= $tempBuffer;
|
|
}
|
|
}
|
|
|
|
return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
|
|
}
|
|
|
|
/**
|
|
* Consumes the maximum number of digits
|
|
*
|
|
* @param string $type Digits type (decimal, hexadecimal, etc...)
|
|
* @param int $max Maximum number of digits to match
|
|
*
|
|
* @return string|null
|
|
*/
|
|
protected function consumeNumbers($type = "", $max = null)
|
|
{
|
|
$buffer = "";
|
|
$char = $this->charAt();
|
|
$count = 0;
|
|
$extra = $this->features->numericLiteralSeparator ? "_" : "";
|
|
while (
|
|
in_array($char, $this->{$type . "numbers"}) ||
|
|
($count && $char === $extra)
|
|
) {
|
|
$buffer .= $char;
|
|
$this->index++;
|
|
$this->column++;
|
|
$count ++;
|
|
if ($count === $max) {
|
|
break;
|
|
}
|
|
$char = $this->charAt();
|
|
}
|
|
if ($count && substr($buffer, -1) === "_") {
|
|
$this->error(
|
|
"Numeric separators are not allowed at the end of a number"
|
|
);
|
|
}
|
|
return $count ? $buffer : null;
|
|
}
|
|
|
|
/**
|
|
* Consumes the exponent part of a number
|
|
*
|
|
* @return string|null
|
|
*/
|
|
protected function consumeExponentPart()
|
|
{
|
|
$buffer = "";
|
|
$char = $this->charAt();
|
|
if ($char !== null && strtolower($char) === "e") {
|
|
$this->index++;
|
|
$this->column++;
|
|
$buffer .= $char;
|
|
$char = $this->charAt();
|
|
if ($char === "+" || $char === "-") {
|
|
$this->index++;
|
|
$this->column++;
|
|
$buffer .= $char;
|
|
}
|
|
$tempBuffer = $this->consumeNumbers();
|
|
if ($tempBuffer === null) {
|
|
$this->error("Missing exponent");
|
|
}
|
|
$buffer .= $tempBuffer;
|
|
}
|
|
return $buffer;
|
|
}
|
|
|
|
/**
|
|
* Punctuator scanning method
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
protected function scanPunctuator()
|
|
{
|
|
$token = null;
|
|
$char = $this->charAt();
|
|
|
|
//Check if the next char is a bracket
|
|
if (isset($this->brackets[$char])) {
|
|
//Check if it is a closing bracket
|
|
if ($this->brackets[$char]) {
|
|
$openBracket = $this->brackets[$char];
|
|
//Check if there is a corresponding open bracket
|
|
if (!isset($this->openBrackets[$openBracket]) ||
|
|
!$this->openBrackets[$openBracket]
|
|
) {
|
|
if (!$this->isAfterSlash($this->getPosition(true))) {
|
|
$this->error();
|
|
}
|
|
} else {
|
|
$this->openBrackets[$openBracket]--;
|
|
}
|
|
} else {
|
|
if (!isset($this->openBrackets[$char])) {
|
|
$this->openBrackets[$char] = 0;
|
|
}
|
|
$this->openBrackets[$char]++;
|
|
}
|
|
$this->index++;
|
|
$this->column++;
|
|
$token = new Token(Token::TYPE_PUNCTUATOR, $char);
|
|
} elseif (
|
|
//Try to match the longest punctuator
|
|
$match = $this->punctuatorsLSM->match($this, $this->index, $char)
|
|
) {
|
|
//Optional chaining punctuator cannot appear before a number, in this
|
|
//case only the question mark must be consumed
|
|
if ($match[1] === "?." &&
|
|
($nextChar = $this->charAt($this->index + $match[0])) !== null &&
|
|
$nextChar >= "0" && $nextChar <= "9"
|
|
) {
|
|
$match = array(1, "?");
|
|
}
|
|
$this->index += $match[0];
|
|
$this->column += $match[0];
|
|
$token = new Token(Token::TYPE_PUNCTUATOR, $match[1]);
|
|
}
|
|
return $token;
|
|
}
|
|
|
|
/**
|
|
* Keywords and identifiers scanning method
|
|
*
|
|
* @return Token|null
|
|
*/
|
|
protected function scanKeywordOrIdentifier()
|
|
{
|
|
//Check private identifier start character
|
|
if ($private = $this->features->privateMethodsAndFields && $this->charAt() === "#") {
|
|
$this->index++;
|
|
$this->column++;
|
|
}
|
|
|
|
//Consume the maximum number of characters that are unicode escape
|
|
//sequences or valid identifier starts (only the first character) or
|
|
//parts
|
|
$buffer = "";
|
|
$start = true;
|
|
while (($char = $this->charAt()) !== null) {
|
|
if (
|
|
($char >= "a" && $char <= "z") ||
|
|
($char >= "A" && $char <= "Z") ||
|
|
$char === "_" || $char === "$" ||
|
|
(!$start && $char >= "0" && $char <= "9") ||
|
|
$this->isIdentifierChar($char, $start)
|
|
) {
|
|
$buffer .= $char;
|
|
$this->index++;
|
|
$this->column++;
|
|
} elseif ($char === "\\" && ($seq = $this->consumeUnicodeEscapeSequence())) {
|
|
//Verify that it's a valid character
|
|
if (!$this->isIdentifierChar($seq[1], $start)) {
|
|
break;
|
|
}
|
|
$buffer .= $seq[0];
|
|
} else {
|
|
break;
|
|
}
|
|
$start = false;
|
|
}
|
|
|
|
//Identify token type
|
|
if ($buffer === "") {
|
|
//Unconsume the hash if nothing was found after that
|
|
if ($private) {
|
|
$this->index--;
|
|
$this->column--;
|
|
}
|
|
return null;
|
|
} elseif ($private) {
|
|
$type = Token::TYPE_PRIVATE_IDENTIFIER;
|
|
$buffer = "#" . $buffer;
|
|
} elseif ($buffer === "null") {
|
|
$type = Token::TYPE_NULL_LITERAL;
|
|
} elseif ($buffer === "true" || $buffer === "false") {
|
|
$type = Token::TYPE_BOOLEAN_LITERAL;
|
|
} elseif (in_array($buffer, $this->keywords) ||
|
|
in_array($buffer, $this->strictModeKeywords)
|
|
) {
|
|
$type = Token::TYPE_KEYWORD;
|
|
} else {
|
|
$type = Token::TYPE_IDENTIFIER;
|
|
}
|
|
|
|
return new Token($type, $buffer);
|
|
}
|
|
|
|
/**
|
|
* Consumes an unicode escape sequence
|
|
*
|
|
* @return array|null
|
|
*/
|
|
protected function consumeUnicodeEscapeSequence()
|
|
{
|
|
if ($this->charAt() !== "\\" ||
|
|
$this->charAt($this->index + 1) !== "u"
|
|
) {
|
|
return null;
|
|
}
|
|
|
|
$startIndex = $this->index;
|
|
$startColumn = $this->column;
|
|
$this->index += 2;
|
|
$this->column += 2;
|
|
$brackets = false;
|
|
if ($this->charAt() === "{") {
|
|
//\u{FFF}
|
|
$brackets = true;
|
|
$this->index++;
|
|
$this->column++;
|
|
$code = $this->consumeNumbers("x");
|
|
if ($code && $this->charAt() !== "}") {
|
|
$code = null;
|
|
} else {
|
|
$this->index++;
|
|
$this->column++;
|
|
}
|
|
} else {
|
|
//\uFFFF
|
|
$code = $this->consumeNumbers("x", 4);
|
|
if ($code && strlen($code) !== 4) {
|
|
$code = null;
|
|
}
|
|
}
|
|
|
|
//Unconsume everything if the format is invalid
|
|
if ($code === null) {
|
|
$this->index = $startIndex;
|
|
$this->column = $startColumn;
|
|
return null;
|
|
}
|
|
|
|
//Return an array where the first element is the matched sequence
|
|
//and the second one is the decoded character
|
|
return array(
|
|
$brackets ? "\\u{" . $code . "}" : "\\u" . $code,
|
|
Utils::unicodeToUtf8(hexdec($code))
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Checks if the given character is valid for an identifier
|
|
*
|
|
* @param string $char Character to check
|
|
* @param bool $start If true it will check that the character is
|
|
* valid to start an identifier
|
|
*
|
|
* @return bool
|
|
*/
|
|
protected function isIdentifierChar($char, $start = true)
|
|
{
|
|
return ($char >= "a" && $char <= "z") ||
|
|
($char >= "A" && $char <= "Z") ||
|
|
$char === "_" || $char === "$" ||
|
|
(!$start && $char >= "0" && $char <= "9") ||
|
|
preg_match($start ? $this->idStartRegex : $this->idPartRegex, $char);
|
|
}
|
|
|
|
/**
|
|
* Increases columns and lines count according to the given string
|
|
*
|
|
* @param string $buffer String to analyze
|
|
*
|
|
* @return void
|
|
*/
|
|
protected function adjustColumnAndLine($buffer)
|
|
{
|
|
$lines = preg_split($this->linesSplitter, $buffer);
|
|
$linesCount = count($lines) - 1;
|
|
$this->line += $linesCount;
|
|
$columns = mb_strlen($lines[$linesCount], "UTF-8");
|
|
if ($linesCount) {
|
|
$this->column = $columns;
|
|
} else {
|
|
$this->column += $columns;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Consumes characters until one of the given characters is found
|
|
*
|
|
* @param array|LSM $stops Characters to search
|
|
* @param bool $handleEscape True to handle escaping
|
|
* @param bool $collectStop True to include the stop character
|
|
*
|
|
* @return array|null
|
|
*/
|
|
protected function consumeUntil(
|
|
$stops, $handleEscape = true, $collectStop = true
|
|
) {
|
|
$isLSM = $stops instanceof LSM;
|
|
$buffer = "";
|
|
$escaped = false;
|
|
while (($char = $this->charAt()) !== null) {
|
|
$incrIndex = 1;
|
|
$isStop = false;
|
|
if ($isLSM) {
|
|
$m = $stops->match($this, $this->index, $char);
|
|
if ($m) {
|
|
$isStop = true;
|
|
$incrIndex = $m[0];
|
|
$char = $m[1];
|
|
}
|
|
} else {
|
|
$isStop = in_array($char, $stops);
|
|
}
|
|
$validStop = $isStop && !$escaped;
|
|
if (!$validStop || $collectStop) {
|
|
$this->index += $incrIndex;
|
|
$buffer .= $char;
|
|
}
|
|
if ($validStop) {
|
|
if (!$collectStop && $buffer === "") {
|
|
return null;
|
|
}
|
|
$this->adjustColumnAndLine($buffer);
|
|
return array($buffer, $char);
|
|
} elseif (!$escaped && $char === "\\" && $handleEscape) {
|
|
$escaped = true;
|
|
} else {
|
|
$escaped = false;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
} |