1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15: 16: 17:
18: class Tokenizer extends Nette\Object
19: {
20:
21: public $tokens;
22:
23:
24: public $position = 0;
25:
26:
27: public $ignored = array();
28:
29:
30: private $input;
31:
32:
33: private $re;
34:
35:
36: private $types;
37:
38:
39: public $current;
40:
41:
42: 43: 44: 45:
46: public function __construct(array $patterns, $flags = '')
47: {
48: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
49: $keys = array_keys($patterns);
50: $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
51: }
52:
53:
54: 55: 56: 57: 58:
59: public function tokenize($input)
60: {
61: $this->input = $input;
62: if ($this->types) {
63: $this->tokens = Strings::matchAll($input, $this->re);
64: $len = 0;
65: $count = count($this->types);
66: $line = 1;
67: foreach ($this->tokens as & $match) {
68: $type = NULL;
69: for ($i = 1; $i <= $count; $i++) {
70: if (!isset($match[$i])) {
71: break;
72: } elseif ($match[$i] != NULL) {
73: $type = $this->types[$i - 1]; break;
74: }
75: }
76: $match = self::createToken($match[0], $type, $line);
77: $len += strlen($match['value']);
78: $line += substr_count($match['value'], "\n");
79: }
80: if ($len !== strlen($input)) {
81: $errorOffset = $len;
82: }
83:
84: } else {
85: $this->tokens = Strings::split($input, $this->re, PREG_SPLIT_NO_EMPTY);
86: if ($this->tokens && !Strings::match(end($this->tokens), $this->re)) {
87: $tmp = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
88: list(, $errorOffset) = end($tmp);
89: }
90: }
91:
92: if (isset($errorOffset)) {
93: $line = $errorOffset ? substr_count($this->input, "\n", 0, $errorOffset) + 1 : 1;
94: $col = $errorOffset - strrpos(substr($this->input, 0, $errorOffset), "\n") + 1;
95: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
96: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
97: }
98: return $this->tokens;
99: }
100:
101:
102: public static function createToken($value, $type = NULL, $line = NULL)
103: {
104: return array('value' => $value, 'type' => $type, 'line' => $line);
105: }
106:
107:
108: 109: 110: 111: 112:
113: public function getOffset($i)
114: {
115: $tokens = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
116: $offset = isset($tokens[$i]) ? $tokens[$i][1] : strlen($this->input);
117: return array(
118: $offset,
119: ($offset ? substr_count($this->input, "\n", 0, $offset) + 1 : 1),
120: $offset - strrpos(substr($this->input, 0, $offset), "\n"),
121: );
122: }
123:
124:
125: 126: 127: 128: 129:
130: public function fetch()
131: {
132: return $this->scan(func_get_args(), TRUE);
133: }
134:
135:
136: 137: 138: 139: 140:
141: public function fetchToken()
142: {
143: return $this->scan(func_get_args(), TRUE) === FALSE ? FALSE : $this->current;
144: }
145:
146:
147: 148: 149: 150: 151:
152: public function fetchAll()
153: {
154: return $this->scan(func_get_args(), FALSE);
155: }
156:
157:
158: 159: 160: 161: 162:
163: public function fetchUntil($arg)
164: {
165: return $this->scan(func_get_args(), FALSE, TRUE, TRUE);
166: }
167:
168:
169: 170: 171: 172: 173:
174: public function isNext($arg)
175: {
176: return (bool) $this->scan(func_get_args(), TRUE, FALSE);
177: }
178:
179:
180: 181: 182: 183: 184:
185: public function isPrev($arg)
186: {
187: return (bool) $this->scan(func_get_args(), TRUE, FALSE, FALSE, TRUE);
188: }
189:
190:
191: 192: 193: 194:
195: public function hasNext()
196: {
197: return isset($this->tokens[$this->position]);
198: }
199:
200:
201: 202: 203: 204:
205: public function hasPrev()
206: {
207: return $this->position > 1;
208: }
209:
210:
211: 212: 213: 214: 215:
216: public function isCurrent($arg)
217: {
218: $args = func_get_args();
219: if (is_array($this->current)) {
220: return in_array($this->current['value'], $args, TRUE)
221: || in_array($this->current['type'], $args, TRUE);
222: } else {
223: return in_array($this->current, $args, TRUE);
224: }
225: }
226:
227:
228: public function reset()
229: {
230: $this->position = 0;
231: $this->current = NULL;
232: }
233:
234:
235: 236: 237: 238: 239:
240: private function scan($wanted, $first, $advance = TRUE, $neg = FALSE, $prev = FALSE)
241: {
242: $res = FALSE;
243: $pos = $this->position + ($prev ? -2 : 0);
244: while (isset($this->tokens[$pos])) {
245: $token = $this->tokens[$pos];
246: $pos += $prev ? -1 : 1;
247: $value = is_array($token) ? $token['value'] : $token;
248: $type = is_array($token) ? $token['type'] : $token;
249: if (!$wanted || (in_array($value, $wanted, TRUE) || in_array($type, $wanted, TRUE)) ^ $neg) {
250: if ($advance) {
251: $this->position = $pos;
252: $this->current = $token;
253: }
254: $res .= $value;
255: if ($first) {
256: break;
257: }
258:
259: } elseif ($neg || !in_array($type, $this->ignored, TRUE)) {
260: break;
261: }
262: }
263: return $res;
264: }
265:
266: }
267:
268:
269: 270: 271:
272: class TokenizerException extends \Exception
273: {
274: }
275: