1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Latte;
9:
10:
11: 12: 13:
14: class Parser extends Object
15: {
16:
17: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*+\'|"(?:\\\\.|[^"\\\\])*+"';
18:
19:
20: const N_PREFIX = 'n:';
21:
22:
23: public $defaultSyntax = 'latte';
24:
25:
26: public $shortNoEscape = FALSE;
27:
28:
29: public $syntaxes = array(
30: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
31: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
32: 'asp' => array('<%\s*', '\s*%>'),
33: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
34: 'off' => array('[^\x00-\xFF]', ''),
35: );
36:
37:
38: private $delimiters;
39:
40:
41: private $input;
42:
43:
44: private $output;
45:
46:
47: private $offset;
48:
49:
50: private $context;
51:
52:
53: private $lastHtmlTag;
54:
55:
56: private $syntaxEndTag;
57:
58:
59: private $syntaxEndLevel = 0;
60:
61:
62: private $xmlMode;
63:
64:
65: const CONTEXT_HTML_TEXT = 'htmlText',
66: CONTEXT_CDATA = 'cdata',
67: CONTEXT_HTML_TAG = 'htmlTag',
68: CONTEXT_HTML_ATTRIBUTE = 'htmlAttribute',
69: CONTEXT_RAW = 'raw',
70: = 'htmlComment',
71: CONTEXT_MACRO = 'macro';
72:
73:
74: 75: 76: 77: 78:
79: public function parse($input)
80: {
81: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
82: $input = substr($input, 3);
83: }
84:
85: $this->input = $input = str_replace("\r\n", "\n", $input);
86: $this->offset = 0;
87: $this->output = array();
88:
89: if (!preg_match('##u', $input)) {
90: preg_match('#(?:[\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})*+#A', $input, $m);
91: $this->offset = strlen($m[0]) + 1;
92: throw new \InvalidArgumentException('Template is not valid UTF-8 stream.');
93: }
94:
95: $this->setSyntax($this->defaultSyntax);
96: $this->setContext(self::CONTEXT_HTML_TEXT);
97: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
98:
99: $tokenCount = 0;
100: while ($this->offset < strlen($input)) {
101: if ($this->{'context' . $this->context[0]}() === FALSE) {
102: break;
103: }
104: while ($tokenCount < count($this->output)) {
105: $this->filter($this->output[$tokenCount++]);
106: }
107: }
108: if ($this->context[0] === self::CONTEXT_MACRO) {
109: throw new CompileException('Malformed macro');
110: }
111:
112: if ($this->offset < strlen($input)) {
113: $this->addToken(Token::TEXT, substr($this->input, $this->offset));
114: }
115: return $this->output;
116: }
117:
118:
119: 120: 121:
122: private function contextHtmlText()
123: {
124: $matches = $this->match('~
125: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
126: <(?P<htmlcomment>!--(?!>))| ## begin of HTML comment <!--, but not <!-->
127: (?P<macro>' . $this->delimiters[0] . ')
128: ~xsi');
129:
130: if (!empty($matches['htmlcomment'])) {
131: $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
132: $this->setContext(self::CONTEXT_HTML_COMMENT);
133:
134: } elseif (!empty($matches['tag'])) {
135: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
136: $token->name = $matches['tag'];
137: $token->closing = (bool) $matches['closing'];
138: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
139: $this->setContext(self::CONTEXT_HTML_TAG);
140:
141: } else {
142: return $this->processMacro($matches);
143: }
144: }
145:
146:
147: 148: 149:
150: private function contextCData()
151: {
152: $matches = $this->match('~
153: </(?P<tag>' . $this->lastHtmlTag . ')(?![a-z0-9:])| ## end HTML tag </tag
154: (?P<macro>' . $this->delimiters[0] . ')
155: ~xsi');
156:
157: if (!empty($matches['tag'])) {
158: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
159: $token->name = $this->lastHtmlTag;
160: $token->closing = TRUE;
161: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
162: $this->setContext(self::CONTEXT_HTML_TAG);
163: } else {
164: return $this->processMacro($matches);
165: }
166: }
167:
168:
169: 170: 171:
172: private function contextHtmlTag()
173: {
174: $matches = $this->match('~
175: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
176: (?P<macro>' . $this->delimiters[0] . ')|
177: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## beginning of HTML attribute
178: ~xsi');
179:
180: if (!empty($matches['end'])) {
181: $this->addToken(Token::HTML_TAG_END, $matches[0]);
182: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style'), TRUE) ? self::CONTEXT_CDATA : self::CONTEXT_HTML_TEXT);
183:
184: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
185: $token = $this->addToken(Token::HTML_ATTRIBUTE, $matches[0]);
186: $token->name = $matches['attr'];
187: $token->value = isset($matches['value']) ? $matches['value'] : '';
188:
189: if ($token->value === '"' || $token->value === "'") {
190: if (strncmp($token->name, self::N_PREFIX, strlen(self::N_PREFIX)) === 0) {
191: $token->value = '';
192: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
193: $token->value = $m[1];
194: $token->text .= $m[0];
195: }
196: } else {
197: $this->setContext(self::CONTEXT_HTML_ATTRIBUTE, $matches['value']);
198: }
199: }
200: } else {
201: return $this->processMacro($matches);
202: }
203: }
204:
205:
206: 207: 208:
209: private function contextHtmlAttribute()
210: {
211: $matches = $this->match('~
212: (?P<quote>' . $this->context[1] . ')| ## end of HTML attribute
213: (?P<macro>' . $this->delimiters[0] . ')
214: ~xsi');
215:
216: if (!empty($matches['quote'])) {
217: $this->addToken(Token::TEXT, $matches[0]);
218: $this->setContext(self::CONTEXT_HTML_TAG);
219: } else {
220: return $this->processMacro($matches);
221: }
222: }
223:
224:
225: 226: 227:
228: private function ()
229: {
230: $matches = $this->match('~
231: (?P<htmlcomment>-->)| ## end of HTML comment
232: (?P<macro>' . $this->delimiters[0] . ')
233: ~xsi');
234:
235: if (!empty($matches['htmlcomment'])) {
236: $this->addToken(Token::HTML_TAG_END, $matches[0]);
237: $this->setContext(self::CONTEXT_HTML_TEXT);
238: } else {
239: return $this->processMacro($matches);
240: }
241: }
242:
243:
244: 245: 246:
247: private function ()
248: {
249: $matches = $this->match('~
250: (?P<macro>' . $this->delimiters[0] . ')
251: ~xsi');
252: return $this->processMacro($matches);
253: }
254:
255:
256: 257: 258:
259: private function contextMacro()
260: {
261: $matches = $this->match('~
262: (?P<comment>\\*.*?\\*' . $this->delimiters[1] . '\n{0,2})|
263: (?P<macro>(?>
264: ' . self::RE_STRING . '|
265: \{(?>' . self::RE_STRING . '|[^\'"{}])*+\}|
266: [^\'"{}]
267: )+?)
268: ' . $this->delimiters[1] . '
269: (?P<rmargin>[ \t]*(?=\n))?
270: ~xsiA');
271:
272: if (!empty($matches['macro'])) {
273: $token = $this->addToken(Token::MACRO_TAG, $this->context[1][1] . $matches[0]);
274: list($token->name, $token->value, $token->modifiers, $token->empty) = $this->parseMacroTag($matches['macro']);
275: $this->context = $this->context[1][0];
276:
277: } elseif (!empty($matches['comment'])) {
278: $this->addToken(Token::COMMENT, $this->context[1][1] . $matches[0]);
279: $this->context = $this->context[1][0];
280:
281: } else {
282: throw new CompileException('Malformed macro');
283: }
284: }
285:
286:
287: private function processMacro($matches)
288: {
289: if (!empty($matches['macro'])) {
290: $this->setContext(self::CONTEXT_MACRO, array($this->context, $matches['macro']));
291: } else {
292: return FALSE;
293: }
294: }
295:
296:
297: 298: 299: 300: 301:
302: private function match($re)
303: {
304: if (!preg_match($re, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->offset)) {
305: if (preg_last_error()) {
306: throw new RegexpException(NULL, preg_last_error());
307: }
308: return array();
309: }
310:
311: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
312: if ($value !== '') {
313: $this->addToken(Token::TEXT, $value);
314: }
315: $this->offset = $matches[0][1] + strlen($matches[0][0]);
316: foreach ($matches as $k => $v) {
317: $matches[$k] = $v[0];
318: }
319: return $matches;
320: }
321:
322:
323: 324: 325:
326: public function setContentType($type)
327: {
328: if (strpos($type, 'html') !== FALSE) {
329: $this->xmlMode = FALSE;
330: $this->setContext(self::CONTEXT_HTML_TEXT);
331: } elseif (strpos($type, 'xml') !== FALSE) {
332: $this->xmlMode = TRUE;
333: $this->setContext(self::CONTEXT_HTML_TEXT);
334: } else {
335: $this->setContext(self::CONTEXT_RAW);
336: }
337: return $this;
338: }
339:
340:
341: 342: 343:
344: public function setContext($context, $quote = NULL)
345: {
346: $this->context = array($context, $quote);
347: return $this;
348: }
349:
350:
351: 352: 353: 354: 355:
356: public function setSyntax($type)
357: {
358: $type = $type ?: $this->defaultSyntax;
359: if (isset($this->syntaxes[$type])) {
360: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
361: } else {
362: throw new \InvalidArgumentException("Unknown syntax '$type'");
363: }
364: return $this;
365: }
366:
367:
368: 369: 370: 371: 372: 373:
374: public function setDelimiters($left, $right)
375: {
376: $this->delimiters = array($left, $right);
377: return $this;
378: }
379:
380:
381: 382: 383: 384: 385: 386:
387: public function parseMacroTag($tag)
388: {
389: if (!preg_match('~^
390: (
391: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
392: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
393: )(?P<args>(?:' . self::RE_STRING . '|[^\'"])*?)
394: (?P<modifiers>(?<!\|)\|[a-z](?:' . self::RE_STRING . '|[^\'"/]|/(?=.))*+)?
395: (?P<empty>/?\z)
396: ()\z~isx', $tag, $match)) {
397: if (preg_last_error()) {
398: throw new RegexpException(NULL, preg_last_error());
399: }
400: return FALSE;
401: }
402: if ($match['name'] === '') {
403: $match['name'] = $match['shortname'] ?: '=';
404: if ($match['noescape']) {
405: if (!$this->shortNoEscape) {
406: trigger_error("The noescape shortcut {!...} is deprecated, use {...|noescape} modifier on line {$this->getLine()}.", E_USER_DEPRECATED);
407: }
408: $match['modifiers'] .= '|noescape';
409: }
410: }
411: return array($match['name'], trim($match['args']), $match['modifiers'], (bool) $match['empty']);
412: }
413:
414:
415: private function addToken($type, $text)
416: {
417: $this->output[] = $token = new Token;
418: $token->type = $type;
419: $token->text = $text;
420: $token->line = $this->getLine();
421: return $token;
422: }
423:
424:
425: public function getLine()
426: {
427: return $this->offset
428: ? substr_count(substr($this->input, 0, $this->offset - 1), "\n") + 1
429: : 1;
430: }
431:
432:
433: 434: 435:
436: protected function filter(Token $token)
437: {
438: if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
439: $this->setSyntax($this->defaultSyntax);
440: $token->type = Token::COMMENT;
441:
442: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
443: $this->setSyntax($token->value);
444: $token->type = Token::COMMENT;
445:
446: } elseif ($token->type === Token::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
447: $this->setSyntax($token->value);
448: $this->syntaxEndTag = $this->lastHtmlTag;
449: $this->syntaxEndLevel = 1;
450: $token->type = Token::COMMENT;
451:
452: } elseif ($token->type === Token::HTML_TAG_BEGIN && $this->lastHtmlTag === $this->syntaxEndTag) {
453: $this->syntaxEndLevel++;
454:
455: } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === ('/' . $this->syntaxEndTag) && --$this->syntaxEndLevel === 0) {
456: $this->setSyntax($this->defaultSyntax);
457:
458: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
459: $this->setContentType($token->value);
460: }
461: }
462:
463: }
464: