1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10:
11: 12: 13: 14:
15: class Tokenizer
16: {
17: const VALUE = 0,
18: OFFSET = 1,
19: TYPE = 2;
20:
21:
22: private $re;
23:
24:
25: private $types;
26:
27:
28: 29: 30: 31:
32: public function __construct(array $patterns, $flags = '')
33: {
34: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
35: $keys = array_keys($patterns);
36: $this->types = $keys === range(0, count($patterns) - 1) ? false : $keys;
37: }
38:
39:
40: 41: 42: 43: 44: 45:
46: public function tokenize($input)
47: {
48: if ($this->types) {
49: preg_match_all($this->re, $input, $tokens, PREG_SET_ORDER);
50: $len = 0;
51: $count = count($this->types);
52: foreach ($tokens as &$match) {
53: $type = null;
54: for ($i = 1; $i <= $count; $i++) {
55: if (!isset($match[$i])) {
56: break;
57: } elseif ($match[$i] != null) {
58: $type = $this->types[$i - 1];
59: break;
60: }
61: }
62: $match = [self::VALUE => $match[0], self::OFFSET => $len, self::TYPE => $type];
63: $len += strlen($match[self::VALUE]);
64: }
65: if ($len !== strlen($input)) {
66: $errorOffset = $len;
67: }
68:
69: } else {
70: $tokens = preg_split($this->re, $input, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
71: $last = end($tokens);
72: if ($tokens && !preg_match($this->re, $last[0])) {
73: $errorOffset = $last[1];
74: }
75: }
76:
77: if (isset($errorOffset)) {
78: list($line, $col) = $this->getCoordinates($input, $errorOffset);
79: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
80: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
81: }
82: return $tokens;
83: }
84:
85:
86: 87: 88: 89: 90: 91:
92: public static function getCoordinates($text, $offset)
93: {
94: $text = substr($text, 0, $offset);
95: return [substr_count($text, "\n") + 1, $offset - strrpos("\n" . $text, "\n") + 1];
96: }
97: }
98: