1: <?php
2:
3: 4: 5: 6: 7:
8:
9:
10:
11: 12: 13: 14: 15: 16:
17: class Tokenizer extends Object
18: {
19:
20: public $tokens;
21:
22:
23: public $position = 0;
24:
25:
26: public $ignored = array();
27:
28:
29: private $input;
30:
31:
32: private $re;
33:
34:
35: private $types;
36:
37:
38: public $current;
39:
40:
41: 42: 43: 44:
45: public function __construct(array $patterns, $flags = '')
46: {
47: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
48: $keys = array_keys($patterns);
49: $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
50: }
51:
52:
53: 54: 55: 56: 57:
58: public function tokenize($input)
59: {
60: $this->input = $input;
61: if ($this->types) {
62: $this->tokens = Strings::matchAll($input, $this->re);
63: $len = 0;
64: $count = count($this->types);
65: $line = 1;
66: foreach ($this->tokens as & $match) {
67: $type = NULL;
68: for ($i = 1; $i <= $count; $i++) {
69: if (!isset($match[$i])) {
70: break;
71: } elseif ($match[$i] != NULL) {
72: $type = $this->types[$i - 1]; break;
73: }
74: }
75: $match = self::createToken($match[0], $type, $line);
76: $len += strlen($match['value']);
77: $line += substr_count($match['value'], "\n");
78: }
79: if ($len !== strlen($input)) {
80: $errorOffset = $len;
81: }
82:
83: } else {
84: $this->tokens = Strings::split($input, $this->re, PREG_SPLIT_NO_EMPTY);
85: if ($this->tokens && !Strings::match(end($this->tokens), $this->re)) {
86: $tmp = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
87: list(, $errorOffset) = end($tmp);
88: }
89: }
90:
91: if (isset($errorOffset)) {
92: $line = $errorOffset ? substr_count($this->input, "\n", 0, $errorOffset) + 1 : 1;
93: $col = $errorOffset - strrpos(substr($this->input, 0, $errorOffset), "\n") + 1;
94: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
95: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
96: }
97: return $this->tokens;
98: }
99:
100:
101: public static function createToken($value, $type = NULL, $line = NULL)
102: {
103: return array('value' => $value, 'type' => $type, 'line' => $line);
104: }
105:
106:
107: 108: 109: 110: 111:
112: public function getOffset($i)
113: {
114: $tokens = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
115: $offset = isset($tokens[$i]) ? $tokens[$i][1] : strlen($this->input);
116: return array(
117: $offset,
118: ($offset ? substr_count($this->input, "\n", 0, $offset) + 1 : 1),
119: $offset - strrpos(substr($this->input, 0, $offset), "\n"),
120: );
121: }
122:
123:
124: 125: 126: 127: 128:
129: public function fetch()
130: {
131: return $this->scan(func_get_args(), TRUE);
132: }
133:
134:
135: 136: 137: 138: 139:
140: public function fetchToken()
141: {
142: return $this->scan(func_get_args(), TRUE) === FALSE ? FALSE : $this->current;
143: }
144:
145:
146: 147: 148: 149: 150:
151: public function fetchAll()
152: {
153: return $this->scan(func_get_args(), FALSE);
154: }
155:
156:
157: 158: 159: 160: 161:
162: public function fetchUntil($arg)
163: {
164: return $this->scan(func_get_args(), FALSE, TRUE, TRUE);
165: }
166:
167:
168: 169: 170: 171: 172:
173: public function isNext($arg)
174: {
175: return (bool) $this->scan(func_get_args(), TRUE, FALSE);
176: }
177:
178:
179: 180: 181: 182: 183:
184: public function isPrev($arg)
185: {
186: return (bool) $this->scan(func_get_args(), TRUE, FALSE, FALSE, TRUE);
187: }
188:
189:
190: 191: 192: 193:
194: public function hasNext()
195: {
196: return isset($this->tokens[$this->position]);
197: }
198:
199:
200: 201: 202: 203:
204: public function hasPrev()
205: {
206: return $this->position > 1;
207: }
208:
209:
210: 211: 212: 213: 214:
215: public function isCurrent($arg)
216: {
217: $args = func_get_args();
218: if (is_array($this->current)) {
219: return in_array($this->current['value'], $args, TRUE)
220: || in_array($this->current['type'], $args, TRUE);
221: } else {
222: return in_array($this->current, $args, TRUE);
223: }
224: }
225:
226:
227: public function reset()
228: {
229: $this->position = 0;
230: $this->current = NULL;
231: }
232:
233:
234: 235: 236: 237: 238:
239: private function scan($wanted, $first, $advance = TRUE, $neg = FALSE, $prev = FALSE)
240: {
241: $res = FALSE;
242: $pos = $this->position + ($prev ? -2 : 0);
243: while (isset($this->tokens[$pos])) {
244: $token = $this->tokens[$pos];
245: $pos += $prev ? -1 : 1;
246: $value = is_array($token) ? $token['value'] : $token;
247: $type = is_array($token) ? $token['type'] : $token;
248: if (!$wanted || (in_array($value, $wanted, TRUE) || in_array($type, $wanted, TRUE)) ^ $neg) {
249: if ($advance) {
250: $this->position = $pos;
251: $this->current = $token;
252: }
253: $res .= $value;
254: if ($first) {
255: break;
256: }
257:
258: } elseif ($neg || !in_array($type, $this->ignored, TRUE)) {
259: break;
260: }
261: }
262: return $res;
263: }
264:
265: }
266:
267:
268: 269: 270: 271:
272: class TokenizerException extends Exception
273: {
274: }
275: