Obvody/vendor/nette/neon/src/Neon/Lexer.php

84 lines
1.9 KiB
PHP

<?php
/**
* This file is part of the Nette Framework (https://nette.org)
* Copyright (c) 2004 David Grudl (https://davidgrudl.com)
*/
declare(strict_types=1);
namespace Nette\Neon;
/** @internal */
final class Lexer
{
public const Patterns = [
// strings
Token::String => <<<'XX'
'''\n (?:(?: [^\n] | \n(?![\t ]*+''') )*+ \n)?[\t ]*+''' |
"""\n (?:(?: [^\n] | \n(?![\t ]*+""") )*+ \n)?[\t ]*+""" |
' (?: '' | [^'\n] )*+ ' |
" (?: \\. | [^"\\\n] )*+ "
XX,
// literal / boolean / integer / float
Token::Literal => <<<'XX'
(?: [^#"',:=[\]{}()\n\t `-] | (?<!["']) [:-] [^"',=[\]{}()\n\t ] )
(?:
[^,:=\]})(\n\t ]++ |
:(?! [\n\t ,\]})] | $ ) |
[ \t]++ [^#,:=\]})(\n\t ]
)*+
XX,
// punctuation
Token::Char => '[,:=[\]{}()-]',
// comment
Token::Comment => '\#.*+',
// new line
Token::Newline => '\n++',
// whitespace
Token::Whitespace => '[\t ]++',
];
public function tokenize(string $input): TokenStream
{
$input = str_replace("\r", '', $input);
$pattern = '~(' . implode(')|(', self::Patterns) . ')~Amixu';
$res = preg_match_all($pattern, $input, $matches, PREG_SET_ORDER);
if ($res === false) {
throw new Exception('Invalid UTF-8 sequence.');
}
$types = array_keys(self::Patterns);
$offset = 0;
$tokens = [];
foreach ($matches as $match) {
$type = $types[count($match) - 2];
$tokens[] = new Token($match[0], $type === Token::Char ? $match[0] : $type);
$offset += strlen($match[0]);
}
$stream = new TokenStream($tokens);
if ($offset !== strlen($input)) {
$s = str_replace("\n", '\n', substr($input, $offset, 40));
$stream->error("Unexpected '$s'", count($tokens));
}
return $stream;
}
public static function requiresDelimiters(string $s): bool
{
return preg_match('~[\x00-\x1F]|^[+-.]?\d|^(true|false|yes|no|on|off|null)$~Di', $s)
|| !preg_match('~^' . self::Patterns[Token::Literal] . '$~Dx', $s);
}
}