1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10:
11: 12: 13:
14: class Tokenizer
15: {
16: const VALUE = 0,
17: OFFSET = 1,
18: TYPE = 2;
19:
20:
21: private $re;
22:
23:
24: private $types;
25:
26:
27: 28: 29: 30:
31: public function __construct(array $patterns, $flags = '')
32: {
33: $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
34: $keys = array_keys($patterns);
35: $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
36: }
37:
38:
39: 40: 41: 42: 43: 44:
45: public function tokenize($input)
46: {
47: if ($this->types) {
48: preg_match_all($this->re, $input, $tokens, PREG_SET_ORDER);
49: $len = 0;
50: $count = count($this->types);
51: foreach ($tokens as & $match) {
52: $type = NULL;
53: for ($i = 1; $i <= $count; $i++) {
54: if (!isset($match[$i])) {
55: break;
56: } elseif ($match[$i] != NULL) {
57: $type = $this->types[$i - 1]; break;
58: }
59: }
60: $match = array(self::VALUE => $match[0], self::OFFSET => $len, self::TYPE => $type);
61: $len += strlen($match[self::VALUE]);
62: }
63: if ($len !== strlen($input)) {
64: $errorOffset = $len;
65: }
66:
67: } else {
68: $tokens = preg_split($this->re, $input, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
69: $last = end($tokens);
70: if ($tokens && !preg_match($this->re, $last[0])) {
71: $errorOffset = $last[1];
72: }
73: }
74:
75: if (isset($errorOffset)) {
76: list($line, $col) = $this->getCoordinates($input, $errorOffset);
77: $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
78: throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
79: }
80: return $tokens;
81: }
82:
83:
84: 85: 86: 87: 88: 89:
90: public static function getCoordinates($text, $offset)
91: {
92: $text = substr($text, 0, $offset);
93: return array(substr_count($text, "\n") + 1, $offset - strrpos("\n" . $text, "\n") + 1);
94: }
95:
96: }
97: