1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Neon;
9:
10:
11: 12: 13: 14:
15: class Decoder
16: {
17: const PATTERNS = [
18: '
19: \'\'\'\n (?:(?: [^\n] | \n(?![\t\ ]*+\'\'\') )*+ \n)?[\t\ ]*+\'\'\' |
20: """\n (?:(?: [^\n] | \n(?![\t\ ]*+""") )*+ \n)?[\t\ ]*+""" |
21: \'[^\'\n]*+\' |
22: " (?: \\\\. | [^"\\\\\n] )*+ "
23: ',
24: '
25: (?: [^#"\',:=[\]{}()\x00-\x20!`-] | [:-][^"\',\]})\s] )
26: (?:
27: [^,:=\]})(\x00-\x20]++ |
28: :(?! [\s,\]})] | $ ) |
29: [\ \t]++ [^#,:=\]})(\x00-\x20]
30: )*+
31: ',
32: '
33: [,:=[\]{}()-]
34: ',
35: '?:\#.*+',
36: '\n[\t\ ]*+',
37: '?:[\t\ ]++',
38: ];
39:
40: const PATTERN_DATETIME = '#\d\d\d\d-\d\d?-\d\d?(?:(?:[Tt]| ++)\d\d?:\d\d:\d\d(?:\.\d*+)? *+(?:Z|[-+]\d\d?(?::?\d\d)?)?)?\z#A';
41:
42: const PATTERN_HEX = '#0x[0-9a-fA-F]++\z#A';
43:
44: const PATTERN_OCTAL = '#0o[0-7]++\z#A';
45:
46: const PATTERN_BINARY = '#0b[0-1]++\z#A';
47:
48: const SIMPLE_TYPES = [
49: 'true' => 'TRUE', 'True' => 'TRUE', 'TRUE' => 'TRUE', 'yes' => 'TRUE', 'Yes' => 'TRUE', 'YES' => 'TRUE', 'on' => 'TRUE', 'On' => 'TRUE', 'ON' => 'TRUE',
50: 'false' => 'FALSE', 'False' => 'FALSE', 'FALSE' => 'FALSE', 'no' => 'FALSE', 'No' => 'FALSE', 'NO' => 'FALSE', 'off' => 'FALSE', 'Off' => 'FALSE', 'OFF' => 'FALSE',
51: 'null' => 'NULL', 'Null' => 'NULL', 'NULL' => 'NULL',
52: ];
53:
54: const ESCAPE_SEQUENCES = [
55: 't' => "\t", 'n' => "\n", 'r' => "\r", 'f' => "\x0C", 'b' => "\x08", '"' => '"', '\\' => '\\', '/' => '/', '_' => "\xc2\xa0",
56: ];
57:
58: const BRACKETS = [
59: '[' => ']',
60: '{' => '}',
61: '(' => ')',
62: ];
63:
64:
65: public static $patterns = self::PATTERNS;
66:
67:
68: private $input;
69:
70:
71: private $tokens;
72:
73:
74: private $pos;
75:
76:
77: 78: 79: 80: 81:
82: public function decode($input)
83: {
84: if (!is_string($input)) {
85: throw new \InvalidArgumentException(sprintf('Argument must be a string, %s given.', gettype($input)));
86:
87: } elseif (substr($input, 0, 3) === "\xEF\xBB\xBF") {
88: $input = substr($input, 3);
89: }
90: $this->input = "\n" . str_replace("\r", '', $input);
91:
92: $pattern = '~(' . implode(')|(', self::PATTERNS) . ')~Amix';
93: $this->tokens = preg_split($pattern, $this->input, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
94:
95: $last = end($this->tokens);
96: if ($this->tokens && !preg_match($pattern, $last[0])) {
97: $this->pos = count($this->tokens) - 1;
98: $this->error();
99: }
100:
101: $this->pos = 0;
102: $res = $this->parse(null);
103:
104: while (isset($this->tokens[$this->pos])) {
105: if ($this->tokens[$this->pos][0][0] === "\n") {
106: $this->pos++;
107: } else {
108: $this->error();
109: }
110: }
111: return $res;
112: }
113:
114:
115: 116: 117: 118:
119: private function parse($indent, $result = null, $key = null, $hasKey = false)
120: {
121: $inlineParser = $indent === false;
122: $value = null;
123: $hasValue = false;
124: $tokens = $this->tokens;
125: $n = &$this->pos;
126: $count = count($tokens);
127: $mainResult = &$result;
128:
129: for (; $n < $count; $n++) {
130: $t = $tokens[$n][0];
131:
132: if ($t === ',') {
133: if ((!$hasKey && !$hasValue) || !$inlineParser) {
134: $this->error();
135: }
136: $this->addValue($result, $hasKey ? $key : null, $hasValue ? $value : null);
137: $hasKey = $hasValue = false;
138:
139: } elseif ($t === ':' || $t === '=') {
140: if ($hasValue && (is_array($value) || is_object($value))) {
141: $this->error('Unacceptable key');
142:
143: } elseif ($hasKey && $key === null && $hasValue && !$inlineParser) {
144: $n++;
145: $result[] = $this->parse($indent . ' ', [], $value, true);
146: $newIndent = isset($tokens[$n], $tokens[$n + 1]) ? (string) substr($tokens[$n][0], 1) : '';
147: if (strlen($newIndent) > strlen($indent)) {
148: $n++;
149: $this->error('Bad indentation');
150: } elseif (strlen($newIndent) < strlen($indent)) {
151: return $mainResult;
152: }
153: $hasKey = $hasValue = false;
154:
155: } elseif ($hasKey || !$hasValue) {
156: $this->error();
157:
158: } else {
159: $key = (string) $value;
160: $hasKey = true;
161: $hasValue = false;
162: $result = &$mainResult;
163: }
164:
165: } elseif ($t === '-') {
166: if ($hasKey || $hasValue || $inlineParser) {
167: $this->error();
168: }
169: $key = null;
170: $hasKey = true;
171:
172: } elseif (($tmp = self::BRACKETS) && isset($tmp[$t])) {
173: if ($hasValue) {
174: if ($t !== '(') {
175: $this->error();
176: }
177: $n++;
178: if ($value instanceof Entity && $value->value === Neon::CHAIN) {
179: end($value->attributes)->attributes = $this->parse(false, []);
180: } else {
181: $value = new Entity($value, $this->parse(false, []));
182: }
183: } else {
184: $n++;
185: $value = $this->parse(false, []);
186: }
187: $hasValue = true;
188: if (!isset($tokens[$n]) || $tokens[$n][0] !== self::BRACKETS[$t]) {
189: $this->error();
190: }
191:
192: } elseif ($t === ']' || $t === '}' || $t === ')') {
193: if (!$inlineParser) {
194: $this->error();
195: }
196: break;
197:
198: } elseif ($t[0] === "\n") {
199: if ($inlineParser) {
200: if ($hasKey || $hasValue) {
201: $this->addValue($result, $hasKey ? $key : null, $hasValue ? $value : null);
202: $hasKey = $hasValue = false;
203: }
204:
205: } else {
206: while (isset($tokens[$n + 1]) && $tokens[$n + 1][0][0] === "\n") {
207: $n++;
208: }
209: if (!isset($tokens[$n + 1])) {
210: break;
211: }
212:
213: $newIndent = (string) substr($tokens[$n][0], 1);
214: if ($indent === null) {
215: $indent = $newIndent;
216: }
217: $minlen = min(strlen($newIndent), strlen($indent));
218: if ($minlen && (string) substr($newIndent, 0, $minlen) !== (string) substr($indent, 0, $minlen)) {
219: $n++;
220: $this->error('Invalid combination of tabs and spaces');
221: }
222:
223: if (strlen($newIndent) > strlen($indent)) {
224: if ($hasValue || !$hasKey) {
225: $n++;
226: $this->error('Bad indentation');
227: }
228: $this->addValue($result, $key, $this->parse($newIndent));
229: $newIndent = isset($tokens[$n], $tokens[$n + 1]) ? (string) substr($tokens[$n][0], 1) : '';
230: if (strlen($newIndent) > strlen($indent)) {
231: $n++;
232: $this->error('Bad indentation');
233: }
234: $hasKey = false;
235:
236: } else {
237: if ($hasValue && !$hasKey) {
238: break;
239:
240: } elseif ($hasKey) {
241: $this->addValue($result, $key, $hasValue ? $value : null);
242: if ($key !== null && !$hasValue && $newIndent === $indent && isset($tokens[$n + 1]) && $tokens[$n + 1][0] === '-') {
243: $result = &$result[$key];
244: }
245: $hasKey = $hasValue = false;
246: }
247: }
248:
249: if (strlen($newIndent) < strlen($indent)) {
250: return $mainResult;
251: }
252: }
253:
254: } else {
255: if ($t[0] === '"' || $t[0] === "'") {
256: if (preg_match('#^...\n++([\t ]*+)#', $t, $m)) {
257: $converted = substr($t, 3, -3);
258: $converted = str_replace("\n" . $m[1], "\n", $converted);
259: $converted = preg_replace('#^\n|\n[\t ]*+\z#', '', $converted);
260: } else {
261: $converted = substr($t, 1, -1);
262: }
263: if ($t[0] === '"') {
264: $converted = preg_replace_callback('#\\\\(?:ud[89ab][0-9a-f]{2}\\\\ud[c-f][0-9a-f]{2}|u[0-9a-f]{4}|x[0-9a-f]{2}|.)#i', [$this, 'cbString'], $converted);
265: }
266: } elseif (($fix56 = self::SIMPLE_TYPES) && isset($fix56[$t]) && (!isset($tokens[$n + 1][0]) || ($tokens[$n + 1][0] !== ':' && $tokens[$n + 1][0] !== '='))) {
267: $converted = constant(self::SIMPLE_TYPES[$t]);
268: } elseif (is_numeric($t)) {
269: $converted = $t * 1;
270: } elseif (preg_match(self::PATTERN_HEX, $t)) {
271: $converted = hexdec($t);
272: } elseif (preg_match(self::PATTERN_OCTAL, $t)) {
273: $converted = octdec($t);
274: } elseif (preg_match(self::PATTERN_BINARY, $t)) {
275: $converted = bindec($t);
276: } elseif (preg_match(self::PATTERN_DATETIME, $t)) {
277: $converted = new \DateTimeImmutable($t);
278: } else {
279: $converted = $t;
280: }
281: if ($hasValue) {
282: if ($value instanceof Entity) {
283: if ($value->value !== Neon::CHAIN) {
284: $value = new Entity(Neon::CHAIN, [$value]);
285: }
286: $value->attributes[] = new Entity($converted);
287: } else {
288: $this->error();
289: }
290: } else {
291: $value = $converted;
292: $hasValue = true;
293: }
294: }
295: }
296:
297: if ($inlineParser) {
298: if ($hasKey || $hasValue) {
299: $this->addValue($result, $hasKey ? $key : null, $hasValue ? $value : null);
300: }
301: } else {
302: if ($hasValue && !$hasKey) {
303: if ($result === null) {
304: $result = $value;
305: } else {
306: $this->error();
307: }
308: } elseif ($hasKey) {
309: $this->addValue($result, $key, $hasValue ? $value : null);
310: }
311: }
312: return $mainResult;
313: }
314:
315:
316: private function addValue(&$result, $key, $value)
317: {
318: if ($key === null) {
319: $result[] = $value;
320: } elseif ($result && array_key_exists($key, $result)) {
321: $this->error("Duplicated key '$key'");
322: } else {
323: $result[$key] = $value;
324: }
325: }
326:
327:
328: private function cbString($m)
329: {
330: $sq = $m[0];
331: if (($fix56 = self::ESCAPE_SEQUENCES) && isset($fix56[$sq[1]])) {
332: return self::ESCAPE_SEQUENCES[$sq[1]];
333: } elseif ($sq[1] === 'u' && strlen($sq) >= 6) {
334: $lead = hexdec(substr($sq, 2, 4));
335: $tail = hexdec(substr($sq, 8, 4));
336: $code = $tail ? (0x2400 + (($lead - 0xD800) << 10) + $tail) : $lead;
337: if ($code >= 0xD800 && $code <= 0xDFFF) {
338: $this->error("Invalid UTF-8 (lone surrogate) $sq");
339: }
340: return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
341: } elseif ($sq[1] === 'x' && strlen($sq) === 4) {
342: return chr(hexdec(substr($sq, 2)));
343: } else {
344: $this->error("Invalid escaping sequence $sq");
345: }
346: }
347:
348:
349: private function error($message = "Unexpected '%s'")
350: {
351: $last = isset($this->tokens[$this->pos]) ? $this->tokens[$this->pos] : null;
352: $offset = $last ? $last[1] : strlen($this->input);
353: $text = substr($this->input, 0, $offset);
354: $line = substr_count($text, "\n");
355: $col = $offset - strrpos("\n" . $text, "\n") + 1;
356: $token = $last ? str_replace("\n", '<new line>', substr($last[0], 0, 40)) : 'end';
357: throw new Exception(str_replace('%s', $token, $message) . " on line $line, column $col.");
358: }
359: }
360: