1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Latte;
9:
10:
11: 12: 13:
14: class Parser
15: {
16: use Strict;
17:
18:
19: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*+\'|"(?:\\\\.|[^"\\\\])*+"';
20:
21:
22: const N_PREFIX = 'n:';
23:
24:
25: const
26: CONTENT_HTML = Engine::CONTENT_HTML,
27: CONTENT_XHTML = Engine::CONTENT_XHTML,
28: CONTENT_XML = Engine::CONTENT_XML,
29: CONTENT_TEXT = Engine::CONTENT_TEXT;
30:
31:
32: const
33: CONTEXT_NONE = 'none',
34: CONTEXT_MACRO = 'macro',
35: CONTEXT_HTML_TEXT = 'htmlText',
36: CONTEXT_HTML_TAG = 'htmlTag',
37: CONTEXT_HTML_ATTRIBUTE = 'htmlAttribute',
38: = 'htmlComment',
39: CONTEXT_HTML_CDATA = 'htmlCData';
40:
41:
42: public $defaultSyntax = 'latte';
43:
44:
45: public $shortNoEscape;
46:
47:
48: public $syntaxes = [
49: 'latte' => ['\{(?![\s\'"{}])', '\}'],
50: 'double' => ['\{\{(?![\s\'"{}])', '\}\}'],
51: 'off' => ['\{(?=/syntax\})', '\}'],
52: ];
53:
54:
55: private $delimiters;
56:
57:
58: private $input;
59:
60:
61: private $output;
62:
63:
64: private $offset;
65:
66:
67: private $context = [self::CONTEXT_HTML_TEXT, null];
68:
69:
70: private $lastHtmlTag;
71:
72:
73: private $syntaxEndTag;
74:
75:
76: private $syntaxEndLevel = 0;
77:
78:
79: private $xmlMode;
80:
81:
82: 83: 84: 85: 86:
87: public function parse($input)
88: {
89: if (Helpers::startsWith($input, "\xEF\xBB\xBF")) {
90: $input = substr($input, 3);
91: }
92:
93: $this->input = $input = str_replace("\r\n", "\n", $input);
94: $this->offset = 0;
95: $this->output = [];
96:
97: if (!preg_match('##u', $input)) {
98: preg_match('#(?:[\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})*+#A', $input, $m);
99: $this->offset = strlen($m[0]) + 1;
100: throw new \InvalidArgumentException('Template is not valid UTF-8 stream.');
101: }
102:
103: $this->setSyntax($this->defaultSyntax);
104: $this->lastHtmlTag = $this->syntaxEndTag = null;
105:
106: $tokenCount = 0;
107: while ($this->offset < strlen($input)) {
108: if ($this->{'context' . $this->context[0]}() === false) {
109: break;
110: }
111: while ($tokenCount < count($this->output)) {
112: $this->filter($this->output[$tokenCount++]);
113: }
114: }
115: if ($this->context[0] === self::CONTEXT_MACRO) {
116: throw new CompileException('Malformed macro');
117: }
118:
119: if ($this->offset < strlen($input)) {
120: $this->addToken(Token::TEXT, substr($this->input, $this->offset));
121: }
122: return $this->output;
123: }
124:
125:
126: 127: 128:
129: private function contextHtmlText()
130: {
131: $matches = $this->match('~
132: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z][a-z0-9:_.-]*)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
133: <(?P<htmlcomment>!(?:--(?!>))?|\?(?!=|php))| ## begin of <!, <!--, <!DOCTYPE, <?, but not <?php and <?=
134: (?P<macro>' . $this->delimiters[0] . ')
135: ~xsi');
136:
137: if (!empty($matches['htmlcomment'])) {
138: $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
139: $end = $matches['htmlcomment'] === '!--' ? '--' : ($matches['htmlcomment'] === '?' && $this->xmlMode ? '\?' : '');
140: $this->setContext(self::CONTEXT_HTML_COMMENT, $end);
141:
142: } elseif (!empty($matches['tag'])) {
143: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
144: $token->name = $matches['tag'];
145: $token->closing = (bool) $matches['closing'];
146: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
147: $this->setContext(self::CONTEXT_HTML_TAG);
148:
149: } else {
150: return $this->processMacro($matches);
151: }
152: }
153:
154:
155: 156: 157:
158: private function contextHtmlCData()
159: {
160: $matches = $this->match('~
161: </(?P<tag>' . $this->lastHtmlTag . ')(?![a-z0-9:])| ## end HTML tag </tag
162: (?P<macro>' . $this->delimiters[0] . ')
163: ~xsi');
164:
165: if (!empty($matches['tag'])) {
166: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
167: $token->name = $this->lastHtmlTag;
168: $token->closing = true;
169: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
170: $this->setContext(self::CONTEXT_HTML_TAG);
171: } else {
172: return $this->processMacro($matches);
173: }
174: }
175:
176:
177: 178: 179:
180: private function contextHtmlTag()
181: {
182: $matches = $this->match('~
183: (?P<end>\s?/?>)([ \t]*\n)?| ## end of HTML tag
184: (?P<macro>' . $this->delimiters[0] . ')|
185: \s*(?P<attr>[^\s"\'>/={]+)(?:\s*=\s*(?P<value>["\']|[^\s"\'=<>`{]+))? ## beginning of HTML attribute
186: ~xsi');
187:
188: if (!empty($matches['end'])) {
189: $this->addToken(Token::HTML_TAG_END, $matches[0]);
190: $empty = strpos($matches[0], '/') !== false;
191: $this->setContext(!$this->xmlMode && !$empty && in_array($this->lastHtmlTag, ['script', 'style'], true) ? self::CONTEXT_HTML_CDATA : self::CONTEXT_HTML_TEXT);
192:
193: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
194: $token = $this->addToken(Token::HTML_ATTRIBUTE_BEGIN, $matches[0]);
195: $token->name = $matches['attr'];
196: $token->value = isset($matches['value']) ? $matches['value'] : '';
197:
198: if ($token->value === '"' || $token->value === "'") {
199: if (Helpers::startsWith($token->name, self::N_PREFIX)) {
200: $token->value = '';
201: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
202: $token->value = $m[1];
203: $token->text .= $m[0];
204: }
205: } else {
206: $this->setContext(self::CONTEXT_HTML_ATTRIBUTE, $matches['value']);
207: }
208: }
209: } else {
210: return $this->processMacro($matches);
211: }
212: }
213:
214:
215: 216: 217:
218: private function contextHtmlAttribute()
219: {
220: $matches = $this->match('~
221: (?P<quote>' . $this->context[1] . ')| ## end of HTML attribute
222: (?P<macro>' . $this->delimiters[0] . ')
223: ~xsi');
224:
225: if (!empty($matches['quote'])) {
226: $this->addToken(Token::HTML_ATTRIBUTE_END, $matches[0]);
227: $this->setContext(self::CONTEXT_HTML_TAG);
228: } else {
229: return $this->processMacro($matches);
230: }
231: }
232:
233:
234: 235: 236:
237: private function ()
238: {
239: $matches = $this->match('~
240: (?P<htmlcomment>' . $this->context[1] . '>)| ## end of HTML comment
241: (?P<macro>' . $this->delimiters[0] . ')
242: ~xsi');
243:
244: if (!empty($matches['htmlcomment'])) {
245: $this->addToken(Token::HTML_TAG_END, $matches[0]);
246: $this->setContext(self::CONTEXT_HTML_TEXT);
247: } else {
248: return $this->processMacro($matches);
249: }
250: }
251:
252:
253: 254: 255:
256: private function contextNone()
257: {
258: $matches = $this->match('~
259: (?P<macro>' . $this->delimiters[0] . ')
260: ~xsi');
261: return $this->processMacro($matches);
262: }
263:
264:
265: 266: 267:
268: private function contextMacro()
269: {
270: $matches = $this->match('~
271: (?P<comment>\\*.*?\\*' . $this->delimiters[1] . '\n{0,2})|
272: (?P<macro>(?>
273: ' . self::RE_STRING . '|
274: \{(?>' . self::RE_STRING . '|[^\'"{}])*+\}|
275: [^\'"{}]+
276: )++)
277: ' . $this->delimiters[1] . '
278: (?P<rmargin>[ \t]*(?=\n))?
279: ~xsiA');
280:
281: if (!empty($matches['macro'])) {
282: $token = $this->addToken(Token::MACRO_TAG, $this->context[1][1] . $matches[0]);
283: list($token->name, $token->value, $token->modifiers, $token->empty, $token->closing) = $this->parseMacroTag($matches['macro']);
284: $this->context = $this->context[1][0];
285:
286: } elseif (!empty($matches['comment'])) {
287: $this->addToken(Token::COMMENT, $this->context[1][1] . $matches[0]);
288: $this->context = $this->context[1][0];
289:
290: } else {
291: throw new CompileException('Malformed macro');
292: }
293: }
294:
295:
296: private function processMacro($matches)
297: {
298: if (!empty($matches['macro'])) {
299: $this->setContext(self::CONTEXT_MACRO, [$this->context, $matches['macro']]);
300: } else {
301: return false;
302: }
303: }
304:
305:
306: 307: 308: 309: 310:
311: private function match($re)
312: {
313: if (!preg_match($re, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->offset)) {
314: if (preg_last_error()) {
315: throw new RegexpException(null, preg_last_error());
316: }
317: return [];
318: }
319:
320: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
321: if ($value !== '') {
322: $this->addToken(Token::TEXT, $value);
323: }
324: $this->offset = $matches[0][1] + strlen($matches[0][0]);
325: foreach ($matches as $k => $v) {
326: $matches[$k] = $v[0];
327: }
328: return $matches;
329: }
330:
331:
332: 333: 334: 335:
336: public function setContentType($type)
337: {
338: if (in_array($type, [self::CONTENT_HTML, self::CONTENT_XHTML, self::CONTENT_XML], true)) {
339: $this->setContext(self::CONTEXT_HTML_TEXT);
340: $this->xmlMode = $type === self::CONTENT_XML;
341: } else {
342: $this->setContext(self::CONTEXT_NONE);
343: }
344: return $this;
345: }
346:
347:
348: 349: 350:
351: public function setContext($context, $quote = null)
352: {
353: $this->context = [$context, $quote];
354: return $this;
355: }
356:
357:
358: 359: 360: 361: 362:
363: public function setSyntax($type)
364: {
365: $type = $type ?: $this->defaultSyntax;
366: if (isset($this->syntaxes[$type])) {
367: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
368: } else {
369: throw new \InvalidArgumentException("Unknown syntax '$type'");
370: }
371: return $this;
372: }
373:
374:
375: 376: 377: 378: 379: 380:
381: public function setDelimiters($left, $right)
382: {
383: $this->delimiters = [$left, $right];
384: return $this;
385: }
386:
387:
388: 389: 390: 391: 392: 393:
394: public function parseMacroTag($tag)
395: {
396: if (!preg_match('~^
397: (?P<closing>/?)
398: (
399: (?P<name>\?|[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
400: (?P<noescape>!?)(?P<shortname>[=\~#%^&_]?) ## !expression, !=expression, ...
401: )(?P<args>(?:' . self::RE_STRING . '|[^\'"])*?)
402: (?P<modifiers>(?<!\|)\|[a-z](?P<modArgs>(?:' . self::RE_STRING . '|(?:\((?P>modArgs)\))|[^\'"/()]|/(?=.))*+))?
403: (?P<empty>/?\z)
404: ()\z~isx', $tag, $match)) {
405: if (preg_last_error()) {
406: throw new RegexpException(null, preg_last_error());
407: }
408: return null;
409: }
410: if ($match['name'] === '') {
411: $match['name'] = $match['shortname'] ?: ($match['closing'] ? '' : '=');
412: if ($match['noescape']) {
413: trigger_error("The noescape shortcut {!...} is deprecated, use {...|noescape} modifier on line {$this->getLine()}.", E_USER_DEPRECATED);
414: $match['modifiers'] .= '|noescape';
415: }
416: }
417: return [$match['name'], trim($match['args']), $match['modifiers'], (bool) $match['empty'], (bool) $match['closing']];
418: }
419:
420:
421: private function addToken($type, $text)
422: {
423: $this->output[] = $token = new Token;
424: $token->type = $type;
425: $token->text = $text;
426: $token->line = $this->getLine() - substr_count(ltrim($text), "\n");
427: return $token;
428: }
429:
430:
431: public function getLine()
432: {
433: return $this->offset
434: ? substr_count(substr($this->input, 0, $this->offset - 1), "\n") + 1
435: : 1;
436: }
437:
438:
439: 440: 441:
442: protected function filter(Token $token)
443: {
444: if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
445: $this->setSyntax($this->defaultSyntax);
446: $token->type = Token::COMMENT;
447:
448: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
449: $this->setSyntax($token->value);
450: $token->type = Token::COMMENT;
451:
452: } elseif ($token->type === Token::HTML_ATTRIBUTE_BEGIN && $token->name === 'n:syntax') {
453: $this->setSyntax($token->value);
454: $this->syntaxEndTag = $this->lastHtmlTag;
455: $this->syntaxEndLevel = 1;
456: $token->type = Token::COMMENT;
457:
458: } elseif ($token->type === Token::HTML_TAG_BEGIN && $this->lastHtmlTag === $this->syntaxEndTag) {
459: $this->syntaxEndLevel++;
460:
461: } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === ('/' . $this->syntaxEndTag) && --$this->syntaxEndLevel === 0) {
462: $this->setSyntax($this->defaultSyntax);
463:
464: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
465: if (strpos($token->value, 'html') !== false) {
466: $this->setContentType(self::CONTENT_HTML);
467: } elseif (strpos($token->value, 'xml') !== false) {
468: $this->setContentType(self::CONTENT_XML);
469: } else {
470: $this->setContentType(self::CONTENT_TEXT);
471: }
472: }
473: }
474: }
475: