153 lines
4.5 KiB
PHP
153 lines
4.5 KiB
PHP
|
<?php
|
||
|
/**
|
||
|
* This file is part of the Peast package
|
||
|
*
|
||
|
* (c) Marco Marchiò <marco.mm89@gmail.com>
|
||
|
*
|
||
|
* For the full copyright and license information refer to the LICENSE file
|
||
|
* distributed with this source code
|
||
|
*/
|
||
|
namespace Peast\Syntax;
|
||
|
|
||
|
/**
|
||
|
* Longest Sequence Matcher. Utility class used by the scanner to consume
|
||
|
* the longest sequence of character given a set of allowed characters sequences.
|
||
|
*
|
||
|
* @author Marco Marchiò <marco.mm89@gmail.com>
|
||
|
*/
|
||
|
class LSM
|
||
|
{
|
||
|
/**
|
||
|
* Internal sequences map
|
||
|
*
|
||
|
* @var array
|
||
|
*/
|
||
|
protected $map = array();
|
||
|
|
||
|
/**
|
||
|
* Encoding handle flag
|
||
|
*
|
||
|
* @var bool
|
||
|
*/
|
||
|
protected $handleEncoding = false;
|
||
|
|
||
|
/**
|
||
|
* Class constructor
|
||
|
*
|
||
|
* @param array $sequences Allowed characters sequences
|
||
|
* @param bool $handleEncoding True to handle encoding when matching
|
||
|
*/
|
||
|
function __construct($sequences, $handleEncoding = false)
|
||
|
{
|
||
|
$this->handleEncoding = $handleEncoding;
|
||
|
foreach ($sequences as $s) {
|
||
|
$this->add($s);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Adds a sequence
|
||
|
*
|
||
|
* @param string $sequence Sequence to add
|
||
|
*
|
||
|
* @return $this
|
||
|
*/
|
||
|
public function add($sequence)
|
||
|
{
|
||
|
if ($this->handleEncoding) {
|
||
|
$s = Utils::stringToUTF8Array($sequence);
|
||
|
$first = $s[0];
|
||
|
$len = count($s);
|
||
|
} else {
|
||
|
$first = $sequence[0];
|
||
|
$len = strlen($sequence);
|
||
|
}
|
||
|
if (!isset($this->map[$first])) {
|
||
|
$this->map[$first] = array(
|
||
|
"maxLen" => $len,
|
||
|
"map" => array($sequence)
|
||
|
);
|
||
|
} else {
|
||
|
$this->map[$first]["map"][] = $sequence;
|
||
|
$this->map[$first]["maxLen"] = max($this->map[$first]["maxLen"], $len);
|
||
|
}
|
||
|
return $this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Removes a sequence
|
||
|
*
|
||
|
* @param string $sequence Sequence to remove
|
||
|
*
|
||
|
* @return $this
|
||
|
*/
|
||
|
public function remove($sequence)
|
||
|
{
|
||
|
if ($this->handleEncoding) {
|
||
|
$s = Utils::stringToUTF8Array($sequence);
|
||
|
$first = $s[0];
|
||
|
} else {
|
||
|
$first = $sequence[0];
|
||
|
}
|
||
|
if (isset($this->map[$first])) {
|
||
|
$len = $this->handleEncoding ? count($s) : strlen($sequence);
|
||
|
$this->map[$first]["map"] = array_diff(
|
||
|
$this->map[$first]["map"], array($sequence)
|
||
|
);
|
||
|
if (!count($this->map[$first]["map"])) {
|
||
|
unset($this->map[$first]);
|
||
|
} elseif ($this->map[$first]["maxLen"] === $len) {
|
||
|
// Recalculate the max length if necessary
|
||
|
foreach ($this->map[$first]["map"] as $m) {
|
||
|
$this->map[$first]["maxLen"] = max(
|
||
|
$this->map[$first]["maxLen"],
|
||
|
strlen($m)
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return $this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Executes the match. It returns an array where the first element is the
|
||
|
* number of consumed characters and the second element is the match. If
|
||
|
* no match is found it returns null.
|
||
|
*
|
||
|
* @param Scanner $scanner Scanner instance
|
||
|
* @param int $index Current index
|
||
|
* @param string $char Current character
|
||
|
*
|
||
|
* @return array|null
|
||
|
*/
|
||
|
public function match($scanner, $index, $char)
|
||
|
{
|
||
|
$consumed = 1;
|
||
|
$bestMatch = null;
|
||
|
if (isset($this->map[$char])) {
|
||
|
//If the character is present in the map and it has a max length of
|
||
|
//1, match immediately
|
||
|
if ($this->map[$char]["maxLen"] === 1) {
|
||
|
$bestMatch = array($consumed, $char);
|
||
|
} else {
|
||
|
//Otherwise consume a number of characters equal to the max
|
||
|
//length and find the longest match
|
||
|
$buffer = $char;
|
||
|
$map = $this->map[$char]["map"];
|
||
|
$maxLen = $this->map[$char]["maxLen"];
|
||
|
do {
|
||
|
if (in_array($buffer, $map)) {
|
||
|
$bestMatch = array($consumed, $buffer);
|
||
|
}
|
||
|
$nextChar = $scanner->charAt($index + $consumed);
|
||
|
if ($nextChar === null) {
|
||
|
break;
|
||
|
}
|
||
|
$buffer .= $nextChar;
|
||
|
$consumed++;
|
||
|
} while ($consumed <= $maxLen);
|
||
|
}
|
||
|
}
|
||
|
return $bestMatch;
|
||
|
}
|
||
|
}
|