1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15: 16: 17: 18:
19: class Tokenizer extends Nette\Object
20: {
21: const VALUE = 0,
22: OFFSET = 1,
23: TYPE = 2;
24:
25:
26: private $re;
27:
28:
29: private $types;
30:
31:
32: 33: 34: 35:
36: public function __construct(array $patterns, $flags = '')
37: {
38: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
39: $keys = array_keys($patterns);
40: $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
41: }
42:
43:
44: 45: 46: 47: 48:
49: public function tokenize($input)
50: {
51: if ($this->types) {
52: $tokens = Strings::matchAll($input, $this->re);
53: $len = 0;
54: $count = count($this->types);
55: foreach ($tokens as & $match) {
56: $type = NULL;
57: for ($i = 1; $i <= $count; $i++) {
58: if (!isset($match[$i])) {
59: break;
60: } elseif ($match[$i] != NULL) {
61: $type = $this->types[$i - 1]; break;
62: }
63: }
64: $match = array(self::VALUE => $match[0], self::OFFSET => $len, self::TYPE => $type);
65: $len += strlen($match[self::VALUE]);
66: }
67: if ($len !== strlen($input)) {
68: $errorOffset = $len;
69: }
70:
71: } else {
72: $tokens = Strings::split($input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
73: $last = end($tokens);
74: if ($tokens && !Strings::match($last[0], $this->re)) {
75: $errorOffset = $last[1];
76: }
77: }
78:
79: if (isset($errorOffset)) {
80: list($line, $col) = $this->getCoordinates($input, $errorOffset);
81: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
82: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
83: }
84: return $tokens;
85: }
86:
87:
88: 89: 90: 91: 92:
93: public static function getCoordinates($text, $offset)
94: {
95: $text = substr($text, 0, $offset);
96: return array(substr_count($text, "\n") + 1, $offset - strrpos("\n" . $text, "\n") + 1);
97: }
98:
99: }
100:
101:
102: 103: 104:
105: class TokenizerException extends \Exception
106: {
107: }
108: