<?php
/**
 * This file is part of the Peast package
 *
 * (c) Marco Marchiò <marco.mm89@gmail.com>
 *
 * For the full copyright and license information refer to the LICENSE file
 * distributed with this source code
 */
namespace Peast\Syntax;

/**
 * Base class for scanners.
 * 
 * @author Marco Marchiò <marco.mm89@gmail.com>
 */
class Scanner
{
    use JSX\Scanner;

    /**
     * Scanner features
     * 
     * @var Features
     */
    protected $features;

    /**
     * Current column
     * 
     * @var int
     */
    protected $column = 0;
    
    /**
     * Current line
     * 
     * @var int
     */
    protected $line = 1;
    
    /**
     * Current index
     * 
     * @var int
     */
    protected $index = 0;
    
    /**
     * Source length
     * 
     * @var int
     */
    protected $length;
    
    /**
     * Source characters
     * 
     * @var array
     */
    protected $source;
    
    /**
     * Consumed position
     * 
     * @var Position
     */
    protected $position;
    
    /**
     * Current token
     * 
     * @var Token 
     */
    protected $currentToken;
    
    /**
     * Next token
     * 
     * @var Token 
     */
    protected $nextToken;
    
    /**
     * Strict mode flag
     * 
     * @var bool 
     */
    protected $strictMode = false;
    
    /**
     * True to register tokens in the tokens array
     * 
     * @var bool 
     */
    protected $registerTokens = false;
    
    /**
     * Module mode
     * 
     * @var bool 
     */
    protected $isModule = false;
    
    /**
     * Comments handling
     * 
     * @var bool 
     */
    protected $comments = false;
    
    /**
     * Internal JSX scan flag
     * 
     * @var bool
     */
    protected $jsx = false;
    
    /**
     * Registered tokens array
     * 
     * @var array 
     */
    protected $tokens = array();
    
    /**
     * Comments to tokens map
     * 
     * @var array 
     */
    protected $commentsMap = array();
    
    /**
     * Events emitter
     *
     * @var EventsEmitter
     */
    protected $eventsEmitter;
    
    /**
     * Regex to match identifiers starts
     * 
     * @var string 
     */
    protected $idStartRegex = "/[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\x{1885}\x{1886}\x{2118}\x{212E}\x{309B}\x{309C}]/u";
    
    /**
     * Regex to match identifiers parts
     * 
     * @var string 
     */
    protected $idPartRegex = "/[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\x{1885}\x{1886}\x{2118}\x{212E}\x{309B}\x{309C}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\x{00B7}\x{0387}\x{1369}\x{136A}\x{136B}\x{136C}\x{136D}\x{136E}\x{136F}\x{1370}\x{1371}\x{19DA}\x{200C}\x{200D}]/u";
    
    /**
     * Keywords array
     * 
     * @var array 
     */
    protected $keywords = array(
        "break", "do", "in", "typeof", "case", "else", "instanceof", "var",
        "catch", "export", "new", "void", "class", "extends", "return", "while",
        "const", "finally", "super", "with", "continue", "for", "switch",
        "debugger", "function", "this", "default", "if", "throw",
        "delete", "import", "try", "enum", "await"
    );
    
    /**
     * Array of words that are keywords only in strict mode
     * 
     * @var array 
     */
    protected $strictModeKeywords = array(
        "implements", "interface", "package", "private", "protected", "public",
        "static", "let", "yield"
    );

    /**
     * Punctuators array
     * 
     * @var array 
     */
    protected $punctuators = array(
        ".", ";", ",", "<", ">", "<=", ">=", "==", "!=", "===", "!==", "+",
        "-", "*", "%", "++", "--", "<<", ">>", ">>>", "&", "|", "^", "!", "~",
        "&&", "||", "?", ":", "=", "+=", "-=", "*=", "%=", "<<=", ">>=", ">>>=",
        "&=", "|=", "^=", "=>", "...", "/", "/=", "**", "**=", "??", "?.",
        "&&=", "||=", "??="
    );
    
    /**
     * Punctuators LSM
     * 
     * @var LSM 
     */
    protected $punctuatorsLSM;
    
    /**
     * Strings stops LSM
     * 
     * @var LSM 
     */
    protected $stringsStopsLSM;
    
    /**
     * Brackets array
     * 
     * @var array 
     */
    protected $brackets = array(
        "(" => "", "[" => "", "{" => "", ")" => "(", "]" => "[", "}" => "{"
    );
    
    /**
     * Open brackets array
     * 
     * @var array 
     */
    protected $openBrackets = array();
    
    /**
     * Open templates array
     * 
     * @var array 
     */
    protected $openTemplates = array();
    
    /**
     * Whitespaces array
     * 
     * @var array 
     */
    protected $whitespaces = array(
        " ", "\t", "\n", "\r", "\f", "\v", 0x00A0, 0xFEFF, 0x00A0,
        0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006,
        0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x205F, 0x3000, 0x2028,
        0x2029
    );
    
    /**
     * Line terminators characters array
     * 
     * @var array 
     * 
     * @static
     */
    public static $lineTerminatorsChars = array("\n", "\r", 0x2028, 0x2029);
    
    /**
     * Line terminators sequences array
     * 
     * @var array
     * 
     * @static
     */
    public static $lineTerminatorsSequences = array("\r\n");
    
    /**
     * Regex to split texts using valid ES line terminators
     * 
     * @var array 
     */
    protected $linesSplitter;
    
    /**
     * Concatenation of line terminators characters and line terminators
     * sequences
     * 
     * @var array 
     */
    protected $lineTerminators;
    
    /**
     * Properties to copy when getting the scanner state
     * 
     * @var array
     */
    protected $stateProps = array("position", "index", "column", "line",
                                  "currentToken", "nextToken", "strictMode",
                                  "openBrackets", "openTemplates",
                                  "commentsMap");
    
    /**
     * Decimal numbers
     * 
     * @var array
     */
    protected $numbers = array("0", "1", "2", "3", "4", "5", "6", "7", "8",
                               "9");
    
    /**
     * Hexadecimal numbers
     * 
     * @var array
     */
    protected $xnumbers = array("0", "1", "2", "3", "4", "5", "6", "7", "8",
                                "9", "a", "b", "c", "d", "e", "f", "A", "B",
                                "C", "D", "E", "F");

    /**
     * Octal numbers
     *
     * @var array
     */
    protected $onumbers = array("0", "1", "2", "3", "4", "5", "6", "7");

    /**
     * Binary numbers
     *
     * @var array
     */
    protected $bnumbers = array("0", "1");

    /**
     * Class constructor
     * 
     * @param string   $source   Source code
     * @param Features $features Scanner features
     * @param array    $options  Parsing options
     */
    function __construct(
        $source, Features $features, $options
    ) {
        $this->features = $features;

        $encoding = isset($options["sourceEncoding"]) ?
                    $options["sourceEncoding"] :
                    null;
        
        //Strip BOM characters from the source
        $this->stripBOM($source, $encoding);
        
        //Convert to UTF8 if needed
        if ($encoding && !preg_match("/UTF-?8/i", $encoding)) {
            $source = mb_convert_encoding($source, "UTF-8", $encoding);
        }
        
        //Instead of using mb_substr for each character, split the source
        //into an array of UTF8 characters for performance reasons
        $this->source = Utils::stringToUTF8Array(
            $source,
            !isset($options["strictEncoding"]) || $options["strictEncoding"]
        );
        $this->length = count($this->source);
        
        //Convert character codes to UTF8 characters in whitespaces and line
        //terminators
        $this->lineTerminators = array_merge(
            self::$lineTerminatorsSequences, self::$lineTerminatorsChars
        );
        foreach (array("whitespaces", "lineTerminators") as $key) {
            foreach ($this->$key as $i => $char) {
                if (is_int($char)) {
                    $this->{$key}[$i] = Utils::unicodeToUtf8($char);
                }
            }
        }

        //Remove exponentiation operator if the feature
        //is not enabled
        if (!$this->features->exponentiationOperator) {
            Utils::removeArrayValue($this->punctuators, "**");
            Utils::removeArrayValue($this->punctuators, "**=");
        }

        if (!$this->features->optionalChaining) {
            Utils::removeArrayValue($this->punctuators, "?.");
        }

        //Remove logical assignment operators if the feature
        //is not enabled
        if (!$this->features->logicalAssignmentOperators) {
            Utils::removeArrayValue($this->punctuators, "&&=");
            Utils::removeArrayValue($this->punctuators, "||=");
            Utils::removeArrayValue($this->punctuators, "??=");
        }
        
        //Create a LSM for punctuators array
        $this->punctuatorsLSM = new LSM($this->punctuators);
        
        //Create a LSM for strings stops
        $this->stringsStopsLSM = new LSM($this->lineTerminators, true);
        
        //Allow paragraph and line separators in strings
        if ($this->features->paragraphLineSepInStrings) {
            $this->stringsStopsLSM->remove(Utils::unicodeToUtf8(0x2028));
            $this->stringsStopsLSM->remove(Utils::unicodeToUtf8(0x2029));
        }

        //Remove await as keyword if async/await is enabled
        if ($this->features->asyncAwait) {
            Utils::removeArrayValue($this->keywords, "await");
        }
        
        $this->linesSplitter = "/" .
                               implode("|", $this->lineTerminators) .
                               "/uS";
        $this->position = new Position(0, 0, 0);
    }
    
    /**
     * Strips BOM characters from the source and detects source encoding if not
     * given by the user
     * 
     * @param string $source   Source
     * @param string $encoding User specified encoding
     */
    public function stripBOM(&$source, &$encoding)
    {
        $boms = array(
            "\xEF" => array(array("\xBB", "\xBF"), "UTF-8"),
            "\xFE" => array(array("\xFF"), "UTF-16BE"),
            "\xFF" => array(array("\xFE"), "UTF-16LE"),
        );
        if (!isset($source[0]) || !isset($boms[$source[0]])) {
            return;
        }
        $bom = $boms[$source[0]];
        $l = count($bom[0]);
        for ($i = 0; $i < $l; $i++) {
            if (!isset($source[$i + 1]) || $source[$i + 1] !== $bom[0][$i]) {
                return;
            }
        }
        $source = substr($source, $l + 1);
        if (!$encoding) {
            $encoding = $bom[1];
        }
    }
    
    /**
     * Enables or disables module scanning mode
     * 
     * @param bool $enable True to enable module scanning mode, false to disable it
     * 
     * @return $this
     */
    public function enableModuleMode($enable = true)
    {
        $this->isModule = $enable;
        return $this;
    }
    
    /**
     * Enables or disables comments handling
     * 
     * @param bool $enable True to enable comments handling, false to disable it
     * 
     * @return $this
     */
    public function enableComments($enable = true)
    {
        $this->comments = $enable;
        return $this;
    }
    
    /**
     * Enables or disables tokens registration in the token array
     * 
     * @param bool $enable True to enable token registration, false to disable it
     * 
     * @return $this
     */
    public function enableTokenRegistration($enable = true)
    {
        $this->registerTokens = $enable;
        return $this;
    }
    
    /**
     * Return registered tokens
     * 
     * @return array
     */
    public function getTokens()
    {
        return $this->tokens;
    }
    
    /**
     * Returns the scanner's event emitter
     * 
     * @return EventsEmitter
     */
    public function getEventsEmitter()
    {
        if (!$this->eventsEmitter) {
            //The event emitter is created here so that it won't exist if not
            //necessary
            $this->eventsEmitter = new EventsEmitter;
        }
        return $this->eventsEmitter;
    }
    
    /**
     * Enables or disables strict mode
     * 
     * @param bool $strictMode Strict mode state
     * 
     * @return $this
     */
    public function setStrictMode($strictMode)
    {
        $this->strictMode = $strictMode;
        return $this;
    }
    
    /**
     * Return strict mode state
     * 
     * @return bool
     */
    public function getStrictMode()
    {
        return $this->strictMode;
    }
    
    /**
     * Checks if the given token is a keyword in the current strict mode state
     * 
     * @param Token $token Token to checks
     * 
     * @return bool
     */
    public function isStrictModeKeyword($token)
    {
        return $token->type === Token::TYPE_KEYWORD &&
               (in_array($token->value, $this->keywords) || (
                $this->strictMode &&
                in_array($token->value, $this->strictModeKeywords)));
    }
    
    /**
     * Returns the current scanner state
     * 
     * @return array
     */
    public function getState()
    {
        //Consume current and next tokens so that they wont' be parsed again
        //if the state is restored. If the current token is a slash the next
        //token isn't parsed, this prevents some edge cases where a regexp
        //that contains something that can be interpreted as a comment causes
        //the content to be parsed as a real comment too
        $token = $this->currentToken ?: $this->getToken();
        if ($token && $token->value !== "/") {
            $this->getNextToken();
        }
        $state = array();
        foreach ($this->stateProps as $prop) {
            $state[$prop] = $this->$prop;
        }
        if ($this->registerTokens) {
            $state["tokensNum"] = count($this->tokens);
        }
        //Emit the FreezeState event and pass the given state so that listeners
        //attached to this event can add data
        $this->eventsEmitter && $this->eventsEmitter->fire(
            "FreezeState", array(&$state)
        );
        return $state;
    }
    
    /**
     * Sets the current scanner state
     * 
     * @param array $state State
     * 
     * @return $this
     */
    public function setState($state)
    {
        if ($this->registerTokens) {
            //Check if tokens have been added
            if (isset($this->tokens[$state["tokensNum"]])) {
                //Remove all added tokens
                for ($i = count($this->tokens) - 1; $i >= $state["tokensNum"]; $i--) {
                    array_pop($this->tokens);
                }
            }
            unset($state["tokensNum"]);
        }
        //Emit the ResetState event and pass the given state
        $this->eventsEmitter && $this->eventsEmitter->fire(
            "ResetState", array(&$state)
        );
        foreach ($state as $key => $value) {
            $this->$key = $value;
        }
        return $this;
    }
    
    /**
     * Returns current scanner state
     * 
     * @param bool $scanPosition By default this method returns the scanner
     *                           consumed position, if this parameter is true
     *                           the scanned position will be returned
     * 
     * @return Position
     */
    public function getPosition($scanPosition = false)
    {
        if ($scanPosition) {
            return new Position($this->line, $this->column, $this->index);
        } else {
            return $this->position;
        }
    }
    
    /**
     * Sets the current scan position at the given one
     * 
     * @param Position $position Position at which the scan position will be set
     * 
     * @return $this
     */
    public function setScanPosition(Position $position = null)
    {
        $this->line = $position->getLine();
        $this->column = $position->getColumn();
        $this->index = $position->getIndex();
        return $this;
    }
    
    /**
     * Return the character at the given index in the source code or null if the
     * end is reached.
     * 
     * @param int $index Index, if not given it will use the current index
     * 
     * @return string|null
     */
    public function charAt($index = null)
    {
        if ($index === null) {
            $index = $this->index;
        }
        return $index < $this->length ? $this->source[$index] : null;
    }
    
    /**
     * Throws a syntax error
     * 
     * @param string $message Error message
     * 
     * @return void
     * 
     * @throws Exception
     */
    protected function error($message = null)
    {
        if (!$message) {
            $message = "Unexpected " . $this->charAt();
        }
        throw new Exception($message, $this->getPosition(true));
    }
    
    /**
     * Consumes the current token
     * 
     * @return $this
     */
    public function consumeToken()
    {
        //Move the scanner position to the end of the current position
        $this->position = $this->currentToken->location->end;
        
        //Before consume the token, consume comments associated with it
        if ($this->comments) {
            $this->consumeCommentsForCurrentToken();
        }
        
        //Register the token if required
        if ($this->registerTokens) {
            $this->tokens[] = $this->currentToken;
        }
        
        //Emit the TokenConsumed event for the consumed token
        $this->eventsEmitter && $this->eventsEmitter->fire(
            "TokenConsumed", array($this->currentToken)
        );
        
        $this->currentToken = $this->nextToken;
        $this->nextToken = null;
        return $this;
    }
    
    /**
     * Checks if the given string is matched, if so it consumes the token
     * 
     * @param string $expected String to check
     * 
     * @return Token|null
     */
    public function consume($expected)
    {
        //Do not call getToken if there's already a pending token for
        //performance reasons
        $token = $this->currentToken ?: $this->getToken();
        if ($token && $token->value === $expected) {
            $this->consumeToken();
            return $token;
        }
        return null;
    }
    
    /**
     * Checks if one of the given strings is matched, if so it consumes the
     * token
     * 
     * @param array $expected Strings to check
     * 
     * @return Token|null
     */
    public function consumeOneOf($expected)
    {
        //Do not call getToken if there's already a pending token for
        //performance reasons
        $token = $this->currentToken ?: $this->getToken();
        if ($token && in_array($token->value, $expected)) {
            $this->consumeToken();
            return $token;
        }
        return null;
    }
    
    /**
     * Checks that there are not line terminators following the current scan
     * position before next token
     * 
     * @param bool $nextToken By default it checks the current token position
     *                        relative to the current position, if this
     *                        parameter is true the check will be done relative
     *                        to the next token
     * 
     * @return bool
     */
    public function noLineTerminators($nextToken = false)
    {
        if ($nextToken) {
            $nextToken = $this->getNextToken();
            $refLine = !$nextToken ? null :
                        $nextToken->location->end->getLine();
        } else {
            $refLine = $this->getPosition()->getLine();
        }
        $token = $this->currentToken ?: $this->getToken();
        return $token &&
               $token->location->start->getLine() === $refLine;
    }
    
    /**
     * Checks if one of the given strings follows the current scan position
     * 
     * @param string|array $expected  String or array of strings to check
     * @param bool         $nextToken This parameter must be true if the first
     *                                parameter is an array so that it will
     *                                check also next tokens
     * 
     * @return bool
     */
    public function isBefore($expected, $nextToken = false)
    {
        $token = $this->currentToken ?: $this->getToken();
        if (!$token) {
            return false;
        } elseif (in_array($token->value, $expected)) {
            return true;
        } elseif (!$nextToken) {
            return false;
        }
        if (!$this->getNextToken()) {
            return false;
        }
        foreach ($expected as $val) {
            if (!is_array($val) || $val[0] !== $token->value) {
                continue;
            }
            //If the second value in the array is true check that the current
            //token is not followed by line terminators, otherwise compare its
            //value to the next token
            if (($val[1] === true && $this->noLineTerminators(true)) ||
                ($val[1] !== true && $val[1] === $this->nextToken->value)) {
                return true;
            }
        }
        return false;
    }
    
    /**
     * Returns the next token
     * 
     * @return Token|null
     */
    public function getNextToken()
    {
        if (!$this->nextToken) {
            $token = $this->currentToken ?: $this->getToken();
            $this->currentToken = null;
            $this->nextToken = $this->getToken(true);
            $this->currentToken = $token;
        }
        return $this->nextToken;
    }
    
    /**
     * Returns the current token
     *
     * @param bool $skipEOFChecks  True to skip end of file checks
     *                             even if the end is reached
     * 
     * @return Token|null
     */
    public function getToken($skipEOFChecks = false)
    {
        //The current token is returned until consumed
        if ($this->currentToken) {
            return $this->currentToken;
        }
        
        $comments = $this->skipWhitespacesAndComments();
        
        //Emit the TokenCreated event for all the comments found
        if ($comments) {
            foreach ($comments as $comment) {
                $this->eventsEmitter && $this->eventsEmitter->fire(
                    "TokenCreated", array($comment)
                );
            }
        }

        //When the end of the source is reached
        if ($this->index >= $this->length) {
            //Check if there are open brackets
            if (!$skipEOFChecks) {
                foreach ($this->openBrackets as $bracket => $num) {
                    if ($num) {
                        $this->error("Unclosed $bracket");
                    }
                }

                //Check if there are open templates
                if (count($this->openTemplates)) {
                    $this->error("Unterminated template");
                }
            }
            
            //Register comments and consume them
            if ($this->comments && $comments) {
                $this->commentsForCurrentToken($comments);
            }
            
            //Emit the EndReached event when at the end of the source
            $this->eventsEmitter && $this->eventsEmitter->fire(
                "EndReached"
            );
            
            return null;
        }
        
        $startPosition = $this->getPosition(true);
        $origException = null;
        try {
            
            //Try to match a token
            if (
                ($this->jsx && ($token = $this->scanJSXIdentifier())) ||
                ($token = $this->scanTemplate()) ||
                ($token = $this->scanNumber()) ||
                ($this->jsx && ($token = $this->scanJSXPunctuator())) ||
                ($token = $this->scanPunctuator()) ||
                ($token = $this->scanKeywordOrIdentifier()) ||
                ($this->jsx && ($token = $this->scanJSXString())) ||
                ($token = $this->scanString())
            ) {
                //Set the token start and end positions
                $token->location->start = $startPosition;
                $token->location->end = $this->getPosition(true);
                $this->currentToken = $token;
                                            
                //Register comments if required
                if ($this->comments && $comments) {
                    $this->commentsForCurrentToken($comments);
                }
                
                //Emit the TokenCreated event for the token just created
                $this->eventsEmitter && $this->eventsEmitter->fire(
                    "TokenCreated", array($this->currentToken)
                );
                
                return $this->currentToken;
            }
            
        } catch (Exception $e) {
            $origException = $e;
        }
        
        //If last token was "/" do not throw an error if the token has not be
        //recognized since it can be the first character in a regexp and it will
        //be consumed when the current token will be reconsumed as a regexp
        if ($this->isAfterSlash($startPosition)) {
            $this->setScanPosition($startPosition);
            return null;
        }
        
        //No valid token found. If there was a scan error, throw the same
        //exception again, otherwise throw a new error
        if ($origException) {
            throw $origException;
        }
        $this->error();
    }
    
    /**
     * Executes the operations to handle the end of the source scanning
     * 
     * @return $this
     */
    public function consumeEnd()
    {
        //Consume final comments
        if ($this->comments) {
            $this->consumeCommentsForCurrentToken();
        }
        
        //Emit the EndReached event when at the end of the source
        $this->eventsEmitter && $this->eventsEmitter->fire(
            "EndReached"
        );
        
        return $this;
    }
    
    /**
     * Gets or sets comments for the current token. If the parameter is an
     * array it associates the given comments array to the current node,
     * otherwise comments for the current token are returned
     * 
     * @param array $comments  Comments array
     * 
     * @return array
     */
    protected function commentsForCurrentToken($comments = null)
    {
        $id = $this->currentToken ? spl_object_hash($this->currentToken) : "";
        if ($comments !== null) {
            $this->commentsMap[$id] = $comments;
        } elseif (isset($this->commentsMap[$id])) {
            $comments = $this->commentsMap[$id];
            unset($this->commentsMap[$id]);
        }
        return $comments;
    }
    
    /**
     * Consumes comment tokens associated with the current token
     * 
     * @return $this
     */
    protected function consumeCommentsForCurrentToken()
    {
        $comments = $this->commentsForCurrentToken();
        if ($comments && ($this->registerTokens || $this->eventsEmitter)) {
            foreach ($comments as $comment) {
                //Register the token if required
                if ($this->registerTokens) {
                    $this->tokens[] = $comment;
                }
                //Emit the TokenConsumed event for the comment
                $this->eventsEmitter && $this->eventsEmitter->fire(
                    "TokenConsumed", array($comment)
                );
            }
        }
        return $this;
    }
    
    /**
     * Checks if the consumed or the scanned position follow a slash.
     * 
     * @param Position $position  Additional position to check
     * 
     * @return bool
     */
    protected function isAfterSlash($position = null)
    {
        $consumedIndex = $this->getPosition()->getIndex();
        $checkIndices = array($consumedIndex, $consumedIndex + 1);
        if ($position) {
            $checkIndices[] = $position->getIndex() - 1;
        }
        foreach ($checkIndices as $i) {
            if ($i >= 0 && $this->charAt($i) === "/") {
                return true;
            }
        }
        return false;
    }
    
    /**
     * Tries to reconsume the current token as a regexp if possible
     * 
     * @return Token|null
     */
    public function reconsumeCurrentTokenAsRegexp()
    {
        $token = $this->currentToken ?: $this->getToken();
        $value = $token ? $token->value : null;
        
        //Check if the token starts with "/"
        if (!$value || $value[0] !== "/") {
            return null;
        }
        
        //Reset the scanner position to the token's start position
        $startPosition = $token->location->start;
        $this->setScanPosition($startPosition);
        
        $buffer = "/";
        $this->index++;
        $this->column++;
        $inClass = false;
        while (true) {
            //In a characters class the delimiter "/" is allowed without escape,
            //so the characters class must be closed before closing the regexp
            $stops = $inClass ? array("]") : array("/", "[");
            $tempBuffer = $this->consumeUntil($stops);
            if ($tempBuffer === null) {
                if ($inClass) {
                    $this->error(
                        "Unterminated character class in regexp"
                    );
                } else {
                    $this->error("Unterminated regexp");
                }
            }
            $buffer .= $tempBuffer[0];
            if ($tempBuffer[1] === "/") {
                break;
            } else {
                $inClass = $tempBuffer[1] === "[";
            }
        }
        
        //Flags
        while (($char = $this->charAt()) !== null) {
            $lower = strtolower($char);
            if ($lower >= "a" && $lower <= "z") {
                $buffer .= $char;
                $this->index++;
                $this->column++;
            } else {
                break;
            }
        }
        
        //If next token has already been parsed and it's a bracket exclude it
        //from the count of open brackets
        if ($this->nextToken) {
            $nextVal = $this->nextToken->value;
            if (isset($this->brackets[$nextVal]) &&
                isset($this->openBrackets[$nextVal])
            ) {
                if ($this->brackets[$nextVal]) {
                    $this->openBrackets[$nextVal]++;
                } else {
                    $this->openBrackets[$nextVal]--;
                }
            }
            $this->nextToken = null;
        }
        
        //If comments handling is enabled, get the comments associated with the
        //current token
        $comments = $this->comments ? $this->commentsForCurrentToken() : null;
            
        //Replace the current token with a regexp token
        $token = new Token(Token::TYPE_REGULAR_EXPRESSION, $buffer);
        $token->location->start = $startPosition;
        $token->location->end = $this->getPosition(true);
        $this->currentToken = $token;
                                    
        if ($comments) {
            //Attach the comments to the new current token
            $this->commentsForCurrentToken($comments);
        }
        
        return $this->currentToken;
    }
    
    /**
     * Skips whitespaces and comments from the current scan position. If
     * comments handling is enabled, the array of parsed comments
     * 
     * @return array
     */
    protected function skipWhitespacesAndComments()
    {
        $comments = [];
        $content = "";
        $secStartIdx = $this->index;
        while (($char = $this->charAt()) !== null) {
            //Whitespace
            if (in_array($char, $this->whitespaces)) {
                
                $content .= $char;
                $this->index++;
                
            } elseif ($char === "/" || $char === "#") {

                $nextChar = $this->charAt($this->index + 1);

                //Hashbang comment. This will be parsed only if hashbangs comments are enabled
                //and if it appears at the beginning of the code
                $hashBang = (
                    $char === "#" && $nextChar === "!" &&
                    $this->features->hashbangComments && !$this->index
                );

                //Comment
                if ($nextChar === "/" || $nextChar === "*" || $hashBang) {
                    
                    //If comments must be handled, empty the current content too
                    //and get the comment start position
                    if ($this->comments) {
                        if ($content !== "") {
                            $this->adjustColumnAndLine($content);
                            $content = "";
                        }
                        $start = $this->getPosition(true);
                    }
                    
                    $inline = $nextChar !== "*";
                    $this->index += 2;
                    $content .= $char . $nextChar;
                    
                    while (true) {
                        $char = $this->charAt();
                        
                        if ($char === null) {
                            if (!$inline) {
                                //If the end of the source has been reached and
                                //a multiline comment is still open, it's an
                                //error
                                $this->error("Unterminated comment");
                            }
                            $isEnd = true;
                        } else {
                            $content .= $char;
                            $this->index++;
                            $isEnd = $inline ?
                                     //Inline comment
                                     in_array($char, $this->lineTerminators) :
                                     //Multiline comment
                                     $char === "*" && $this->charAt() === "/";
                        }
                        
                        if ($isEnd) {
                            if (!$inline) {
                                $content .= "/";
                                $this->index++;
                            }
                            if ($this->comments) {
                                //For inline comments the closing line
                                //terminator must be excluded from comment text
                                if ($inline && $char !== null) {
                                    $this->index--;
                                    $content = substr($content, 0, -strlen($char));
                                }
                                $this->adjustColumnAndLine($content);
                                $token = new Token(Token::TYPE_COMMENT, $content);
                                $token->location->start = $start;
                                $token->location->end = $this->getPosition(true);
                                $comments[] = $token;
                                //For inline comments the new content contains
                                //the closing line terminator since the char has
                                //already been processed
                                $content = "";
                                if ($inline && $char !== null) {
                                    $content = $char;
                                    $this->index++;
                                }
                            }
                            break;
                        }
                    }
                    
                } else {
                    break;
                }
                
            } elseif (!$this->isModule && $char === "<" &&
                $this->charAt($this->index + 1) === "!" &&
                $this->charAt($this->index + 2) === "-" &&
                $this->charAt($this->index + 3) === "-"
            ) {
                
                //If comments must be handled, empty the current content too
                //and get the comment start position
                if ($this->comments) {
                    if ($content !== "") {
                        $this->adjustColumnAndLine($content);
                        $content = "";
                    }
                    $start = $this->getPosition(true);
                }
                
                //Open html comment
                $this->index += 4;
                $content .= "<!--";
                while (true) {
                    $char = $this->charAt();
                    if ($char === null) {
                        $isEnd = true;
                    } else {
                        $content .= $char;
                        $this->index++;
                        $isEnd = in_array($char, $this->lineTerminators);
                    }
                    if ($isEnd) {
                        if ($this->comments) {
                            //Remove the closing line terminator from the
                            //comment text
                            if ($char !== null) {
                                $this->index--;
                                $content = substr($content, 0, -strlen($char));
                            }
                            $this->adjustColumnAndLine($content);
                            $token = new Token(Token::TYPE_COMMENT, $content);
                            $token->location->start = $start;
                            $token->location->end = $this->getPosition(true);
                            $comments[] = $token;
                            $content = "";
                            if ($char !== null) {
                                $content = $char;
                                $this->index++;
                            }
                        }
                        break;
                    }
                }
                
            } elseif (!$this->isModule && $char === "-" &&
                $this->charAt($this->index + 1) === "-" &&
                $this->charAt($this->index + 2) === ">"
            ) {
                
                //Close html comment
                //Check if it is on it's own line
                $allow = false;
                if (!$secStartIdx) {
                    $allow = true;
                } else {
                    for ($index = $this->index - 1; $index >= $secStartIdx; $index--) {
                        if (in_array($this->charAt($index), $this->lineTerminators)) {
                            $allow = true;
                            break;
                        }
                    }
                }
                if ($allow) {
                    
                    //If comments must be handled, empty the current content too
                    //and get the comment start position
                    if ($this->comments) {
                        if ($content !== "") {
                            $this->adjustColumnAndLine($content);
                            $content = "";
                        }
                        $start = $this->getPosition(true);
                    }
                    
                    $this->index += 3;
                    $content .= "-->";
                    while (true) {
                        $char = $this->charAt();
                        
                        if ($char === null) {
                            $isEnd = true;
                        } else {
                            $content .= $char;
                            $this->index++;
                            $isEnd = in_array($char, $this->lineTerminators);
                        }
                        
                        if ($isEnd) {
                            if ($this->comments) {
                                //Remove the closing line terminator from the
                                //comment text
                                if ($char !== null) {
                                    $this->index--;
                                    $content = substr($content, 0, -strlen($char));
                                }
                                $this->adjustColumnAndLine($content);
                                $token = new Token(Token::TYPE_COMMENT, $content);
                                $token->location->start = $start;
                                $token->location->end = $this->getPosition(true);
                                $comments[] = $token;
                                $content = "";
                                if ($char !== null) {
                                    $content = $char;
                                    $this->index++;
                                }
                            }
                            break;
                        }
                    }
                } else {
                    break;
                }
                
            } else {
                break;
            }
        }
        
        if ($content !== "") {
            $this->adjustColumnAndLine($content);
        }
        
        return $comments;
    }
    
    /**
     * String scanning method
     * 
     * @param bool $handleEscape True to handle escaping
     * 
     * @return Token|null
     */
    protected function scanString($handleEscape = true)
    {
        $char = $this->charAt();
        if ($char === "'" || $char === '"') {
            $this->index++;
            $this->column++;
            //Add the quote to the LSM and then remove it after consuming
            $this->stringsStopsLSM->add($char);
            $buffer = $this->consumeUntil($this->stringsStopsLSM, $handleEscape);
            $this->stringsStopsLSM->remove($char);
            if ($buffer === null || $buffer[1] !== $char) {
                $this->error("Unterminated string");
            }
            return new Token(Token::TYPE_STRING_LITERAL, $char . $buffer[0]);
        }
        
        return null;
    }
    
    /**
     * Template scanning method
     * 
     * @return Token|null
     */
    protected function scanTemplate()
    {
        $char = $this->charAt();
        
        //Get the current number of open curly brackets
        $openCurly = isset($this->openBrackets["{"]) ?
                     $this->openBrackets["{"] :
                     0;
        
        //If the character is a curly bracket check and the number of open
        //curly brackets matches the last number in the open templates stack,
        //then the bracket closes the open template expression
        $endExpression = false;
        if ($char === "}") {
            $len = count($this->openTemplates);
            if ($len && $this->openTemplates[$len - 1] === $openCurly) {
                $endExpression = true;
                array_pop($this->openTemplates);
            }
        }
        
        if ($char === "`" || $endExpression) {
            $this->index++;
            $this->column++;
            $buffer = $char;
            while (true) {
                $tempBuffer = $this->consumeUntil(array("`", "$"));
                if (!$tempBuffer) {
                    $this->error("Unterminated template");
                }
                $buffer .= $tempBuffer[0];
                if ($tempBuffer[1] !== "$" || $this->charAt() === "{") {
                    //If "${" is found it's a new template expression, register
                    //the current number of open curly brackets in the open
                    //templates stack
                    if ($tempBuffer[1] === "$") {
                        $this->index++;
                        $this->column++;
                        $buffer .= "{";
                        $this->openTemplates[] = $openCurly;
                    }
                    break;
                }
            }
            return new Token(Token::TYPE_TEMPLATE, $buffer);
        }
        
        return null;
    }
    
    /**
     * Number scanning method
     * 
     * @return Token|null
     */
    protected function scanNumber()
    {
        //Numbers can start with a decimal number or with a dot (.5)
        $char = $this->charAt();
        if (!(($char >= "0" && $char <= "9") || $char === ".")) {
            return null;
        }

        $buffer = "";
        $allowedDecimals = true;
        
        //Parse the integer part
        if ($char !== ".") {
            
            //Consume all decimal numbers
            $buffer = $this->consumeNumbers();
            $char = $this->charAt();
            
            if ($this->features->bigInt && $char === "n") {
                $this->index++;
                $this->column++;
                return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
            }
            
            $lower = $char !== null ? strtolower($char) : null;
            
            //Handle hexadecimal (0x), octal (0o) and binary (0b) forms
            if ($buffer === "0" && $lower !== null &&
                isset($this->{$lower . "numbers"})
            ) {
                
                $this->index++;
                $this->column++;
                $tempBuffer = $this->consumeNumbers($lower);
                if ($tempBuffer === null) {
                    $this->error("Missing numbers after 0$char");
                }
                $buffer .= $char . $tempBuffer;
                
                //Check that there are not numbers left
                if ($this->consumeNumbers() !== null) {
                    $this->error();
                }

                if ($this->features->bigInt && $this->charAt() === "n") {
                    $this->index++;
                    $this->column++;
                    return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
                }
                
                return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
            }
            
            //Consume exponent part if present
            if ($tempBuffer = $this->consumeExponentPart()) {
                $buffer .= $tempBuffer;
                $allowedDecimals = false;
            }
        }
        
        //Parse the decimal part
        if ($allowedDecimals && $this->charAt() === ".") {
            
            //Consume the dot
            $this->index++;
            $this->column++;
            $buffer .= ".";
            
            //Consume all decimal numbers
            $tempBuffer = $this->consumeNumbers();
            $buffer .= $tempBuffer;
            
            //If the buffer contains only the dot it should be parsed as
            //punctuator
            if ($buffer === ".") {
                $this->index--;
                $this->column--;
                return null;
            }
            
            //Consume exponent part if present
            if (($tempBuffer = $this->consumeExponentPart()) !== null) {
                $buffer .= $tempBuffer;
            }
        }
        
        return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
    }
    
    /**
     * Consumes the maximum number of digits
     * 
     * @param string $type Digits type (decimal, hexadecimal, etc...)
     * @param int    $max  Maximum number of digits to match
     * 
     * @return string|null
     */
    protected function consumeNumbers($type = "", $max = null)
    {
        $buffer = "";
        $char = $this->charAt();
        $count = 0;
        $extra = $this->features->numericLiteralSeparator ? "_" : "";
        while (
            in_array($char, $this->{$type . "numbers"}) ||
            ($count && $char === $extra)
        ) {
            $buffer .= $char;
            $this->index++;
            $this->column++;
            $count ++;
            if ($count === $max) {
                break;
            }
            $char = $this->charAt();
        }
        if ($count && substr($buffer, -1) === "_") {
            $this->error(
                "Numeric separators are not allowed at the end of a number"
            );
        }
        return $count ? $buffer : null;
    }
    
    /**
     * Consumes the exponent part of a number
     * 
     * @return string|null
     */
    protected function consumeExponentPart()
    {
        $buffer = "";
        $char = $this->charAt();
        if ($char !== null && strtolower($char) === "e") {
            $this->index++;
            $this->column++;
            $buffer .= $char;
            $char = $this->charAt();
            if ($char === "+" || $char === "-") {
                $this->index++;
                $this->column++;
                $buffer .= $char;
            }
            $tempBuffer = $this->consumeNumbers();
            if ($tempBuffer === null) {
                $this->error("Missing exponent");
            }
            $buffer .= $tempBuffer;
        }
        return $buffer;
    }
    
    /**
     * Punctuator scanning method
     * 
     * @return Token|null
     */
    protected function scanPunctuator()
    {
        $token = null;
        $char = $this->charAt();
        
        //Check if the next char is a bracket
        if (isset($this->brackets[$char])) {
            //Check if it is a closing bracket
            if ($this->brackets[$char]) {
                $openBracket = $this->brackets[$char];
                //Check if there is a corresponding open bracket
                if (!isset($this->openBrackets[$openBracket]) ||
                    !$this->openBrackets[$openBracket]
                ) {
                    if (!$this->isAfterSlash($this->getPosition(true))) {
                        $this->error();
                    }
                } else {
                    $this->openBrackets[$openBracket]--;
                }
            } else {
                if (!isset($this->openBrackets[$char])) {
                    $this->openBrackets[$char] = 0;
                }
                $this->openBrackets[$char]++;
            }
            $this->index++;
            $this->column++;
            $token = new Token(Token::TYPE_PUNCTUATOR, $char);
        } elseif (
            //Try to match the longest punctuator
            $match = $this->punctuatorsLSM->match($this, $this->index, $char)
        ) {
            //Optional chaining punctuator cannot appear before a number, in this
            //case only the question mark must be consumed
            if ($match[1] === "?." &&
                ($nextChar = $this->charAt($this->index + $match[0])) !== null &&
                $nextChar >= "0" && $nextChar <= "9"
            ) {
                $match = array(1, "?");
            }
            $this->index += $match[0];
            $this->column += $match[0];
            $token = new Token(Token::TYPE_PUNCTUATOR, $match[1]);
        }
        return $token;
    }
    
    /**
     * Keywords and identifiers scanning method
     * 
     * @return Token|null
     */
    protected function scanKeywordOrIdentifier()
    {
        //Check private identifier start character
        if ($private = $this->features->privateMethodsAndFields && $this->charAt() === "#") {
            $this->index++;
            $this->column++;
        }

        //Consume the maximum number of characters that are unicode escape
        //sequences or valid identifier starts (only the first character) or
        //parts
        $buffer = "";
        $start = true;
        while (($char = $this->charAt()) !== null) {
            if (
                ($char >= "a" && $char <= "z") ||
                ($char >= "A" && $char <= "Z") ||
                $char === "_" || $char === "$" ||
                (!$start && $char >= "0" && $char <= "9") ||
                $this->isIdentifierChar($char, $start)
            ) {
                $buffer .= $char;
                $this->index++;
                $this->column++;
            } elseif ($char === "\\" && ($seq = $this->consumeUnicodeEscapeSequence())) {
                //Verify that it's a valid character
                if (!$this->isIdentifierChar($seq[1], $start)) {
                    break;
                }
                $buffer .= $seq[0];
            } else {
                break;
            }
            $start = false;
        }
        
        //Identify token type
        if ($buffer === "") {
            //Unconsume the hash if nothing was found after that
            if ($private) {
                $this->index--;
                $this->column--;
            }
            return null;
        } elseif ($private) {
            $type = Token::TYPE_PRIVATE_IDENTIFIER;
            $buffer = "#" . $buffer;
        } elseif ($buffer === "null") {
            $type = Token::TYPE_NULL_LITERAL;
        } elseif ($buffer === "true" || $buffer === "false") {
            $type = Token::TYPE_BOOLEAN_LITERAL;
        } elseif (in_array($buffer, $this->keywords) ||
            in_array($buffer, $this->strictModeKeywords)
        ) {
            $type = Token::TYPE_KEYWORD;
        } else {
            $type = Token::TYPE_IDENTIFIER;
        }
        
        return new Token($type, $buffer);
    }
    
    /**
     * Consumes an unicode escape sequence
     * 
     * @return array|null
     */
    protected function consumeUnicodeEscapeSequence()
    {
        if ($this->charAt() !== "\\" ||
            $this->charAt($this->index + 1) !== "u"
        ) {
            return null;
        }
        
        $startIndex = $this->index;
        $startColumn = $this->column;
        $this->index += 2;
        $this->column += 2;
        $brackets = false;
        if ($this->charAt() === "{") {
            //\u{FFF}
            $brackets = true;
            $this->index++;
            $this->column++;
            $code = $this->consumeNumbers("x");
            if ($code && $this->charAt() !== "}") {
                $code = null;
            } else {
                $this->index++;
                $this->column++;
            }
        } else {
            //\uFFFF
            $code = $this->consumeNumbers("x", 4);
            if ($code && strlen($code) !== 4) {
                $code = null;
            }
        }
        
        //Unconsume everything if the format is invalid
        if ($code === null) {
            $this->index = $startIndex;
            $this->column = $startColumn;
            return null;
        }
        
        //Return an array where the first element is the matched sequence
        //and the second one is the decoded character
        return array(
            $brackets ? "\\u{" . $code . "}" : "\\u" . $code,
            Utils::unicodeToUtf8(hexdec($code))
        );
    }
    
    /**
     * Checks if the given character is valid for an identifier
     * 
     * @param string $char  Character to check
     * @param bool   $start If true it will check that the character is
     *                      valid to start an identifier
     * 
     * @return bool
     */
    protected function isIdentifierChar($char, $start = true)
    {
        return ($char >= "a" && $char <= "z") ||
               ($char >= "A" && $char <= "Z") ||
               $char === "_" || $char === "$" ||
               (!$start && $char >= "0" && $char <= "9") ||
               preg_match($start ? $this->idStartRegex : $this->idPartRegex, $char);
    }
    
    /**
     * Increases columns and lines count according to the given string
     * 
     * @param string $buffer String to analyze
     * 
     * @return void
     */
    protected function adjustColumnAndLine($buffer)
    {
        $lines = preg_split($this->linesSplitter, $buffer);
        $linesCount = count($lines) - 1;
        $this->line += $linesCount;
        $columns = mb_strlen($lines[$linesCount], "UTF-8");
        if ($linesCount) {
            $this->column = $columns;
        } else {
            $this->column += $columns;
        }
    }
    
    /**
     * Consumes characters until one of the given characters is found
     * 
     * @param array|LSM $stops          Characters to search
     * @param bool      $handleEscape   True to handle escaping
     * @param bool      $collectStop    True to include the stop character
     * 
     * @return array|null
     */
    protected function consumeUntil(
        $stops, $handleEscape = true, $collectStop = true
    ) {
        $isLSM = $stops instanceof LSM;
        $buffer = "";
        $escaped = false;
        while (($char = $this->charAt()) !== null) {
            $incrIndex = 1;
            $isStop = false;
            if ($isLSM) {
                $m = $stops->match($this, $this->index, $char);
                if ($m) {
                    $isStop = true;
                    $incrIndex = $m[0];
                    $char = $m[1];
                }
            } else {
                $isStop = in_array($char, $stops);
            }
            $validStop = $isStop && !$escaped;
            if (!$validStop || $collectStop) {
                $this->index += $incrIndex;
                $buffer .= $char;
            }
            if ($validStop) {
                if (!$collectStop && $buffer === "") {
                    return null;
                }
                $this->adjustColumnAndLine($buffer);
                return array($buffer, $char);
            } elseif (!$escaped && $char === "\\" && $handleEscape) {
                $escaped = true;
            } else {
                $escaped = false;
            }
        }
        return null;
    }
}