1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Latte;
9:
10:
11: 12: 13: 14: 15:
16: class Parser extends Object
17: {
18:
19: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*+\'|"(?:\\\\.|[^"\\\\])*+"';
20:
21:
22: const N_PREFIX = 'n:';
23:
24:
25: public $defaultSyntax = 'latte';
26:
27:
28: public $shortNoEscape = FALSE;
29:
30:
31: public $syntaxes = array(
32: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
33: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
34: 'asp' => array('<%\s*', '\s*%>'),
35: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
36: 'off' => array('[^\x00-\xFF]', ''),
37: );
38:
39:
40: private $macroRe;
41:
42:
43: private $input;
44:
45:
46: private $output;
47:
48:
49: private $offset;
50:
51:
52: private $context;
53:
54:
55: private $lastHtmlTag;
56:
57:
58: private $syntaxEndTag;
59:
60:
61: private $syntaxEndLevel = 0;
62:
63:
64: private $xmlMode;
65:
66:
67: const CONTEXT_HTML_TEXT = 'htmlText',
68: CONTEXT_CDATA = 'cdata',
69: CONTEXT_HTML_TAG = 'htmlTag',
70: CONTEXT_HTML_ATTRIBUTE = 'htmlAttribute',
71: CONTEXT_RAW = 'raw',
72: = 'htmlComment';
73:
74:
75: 76: 77: 78: 79:
80: public function parse($input)
81: {
82: $this->offset = 0;
83:
84: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
85: $input = substr($input, 3);
86: }
87: if (!preg_match('##u', $input)) {
88: throw new \InvalidArgumentException('Template is not valid UTF-8 stream.');
89: }
90: $input = str_replace("\r\n", "\n", $input);
91: $this->input = $input;
92: $this->output = array();
93:
94: $this->setSyntax($this->defaultSyntax);
95: $this->setContext(self::CONTEXT_HTML_TEXT);
96: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
97:
98: while ($this->offset < strlen($input)) {
99: $matches = $this->{'context' . $this->context[0]}();
100:
101: if (!$matches) {
102: break;
103:
104: } elseif (!empty($matches['comment'])) {
105: $this->addToken(Token::COMMENT, $matches[0]);
106:
107: } elseif (!empty($matches['macro'])) {
108: $token = $this->addToken(Token::MACRO_TAG, $matches[0]);
109: list($token->name, $token->value, $token->modifiers, $token->empty) = $this->parseMacroTag($matches['macro']);
110: }
111:
112: $this->filter();
113: }
114:
115: if ($this->offset < strlen($input)) {
116: $this->addToken(Token::TEXT, substr($this->input, $this->offset));
117: }
118: return $this->output;
119: }
120:
121:
122: 123: 124:
125: private function contextHtmlText()
126: {
127: $matches = $this->match('~
128: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
129: <(?P<htmlcomment>!--(?!>))| ## begin of HTML comment <!--, but not <!-->
130: '.$this->macroRe.' ## macro tag
131: ~xsi');
132:
133: if (!empty($matches['htmlcomment'])) {
134: $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
135: $this->setContext(self::CONTEXT_HTML_COMMENT);
136:
137: } elseif (!empty($matches['tag'])) {
138: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
139: $token->name = $matches['tag'];
140: $token->closing = (bool) $matches['closing'];
141: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
142: $this->setContext(self::CONTEXT_HTML_TAG);
143: }
144: return $matches;
145: }
146:
147:
148: 149: 150:
151: private function contextCData()
152: {
153: $matches = $this->match('~
154: </(?P<tag>'.$this->lastHtmlTag.')(?![a-z0-9:])| ## end HTML tag </tag
155: '.$this->macroRe.' ## macro tag
156: ~xsi');
157:
158: if (!empty($matches['tag'])) {
159: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
160: $token->name = $this->lastHtmlTag;
161: $token->closing = TRUE;
162: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
163: $this->setContext(self::CONTEXT_HTML_TAG);
164: }
165: return $matches;
166: }
167:
168:
169: 170: 171:
172: private function contextHtmlTag()
173: {
174: $matches = $this->match('~
175: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
176: '.$this->macroRe.'| ## macro tag
177: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## beginning of HTML attribute
178: ~xsi');
179:
180: if (!empty($matches['end'])) {
181: $this->addToken(Token::HTML_TAG_END, $matches[0]);
182: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style'), TRUE) ? self::CONTEXT_CDATA : self::CONTEXT_HTML_TEXT);
183:
184: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
185: $token = $this->addToken(Token::HTML_ATTRIBUTE, $matches[0]);
186: $token->name = $matches['attr'];
187: $token->value = isset($matches['value']) ? $matches['value'] : '';
188:
189: if ($token->value === '"' || $token->value === "'") {
190: if (strncmp($token->name, self::N_PREFIX, strlen(self::N_PREFIX)) === 0) {
191: $token->value = '';
192: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
193: $token->value = $m[1];
194: $token->text .= $m[0];
195: }
196: } else {
197: $this->setContext(self::CONTEXT_HTML_ATTRIBUTE, $matches['value']);
198: }
199: }
200: }
201: return $matches;
202: }
203:
204:
205: 206: 207:
208: private function contextHtmlAttribute()
209: {
210: $matches = $this->match('~
211: (?P<quote>'.$this->context[1].')| ## end of HTML attribute
212: '.$this->macroRe.' ## macro tag
213: ~xsi');
214:
215: if (!empty($matches['quote'])) {
216: $this->addToken(Token::TEXT, $matches[0]);
217: $this->setContext(self::CONTEXT_HTML_TAG);
218: }
219: return $matches;
220: }
221:
222:
223: 224: 225:
226: private function ()
227: {
228: $matches = $this->match('~
229: (?P<htmlcomment>-->)| ## end of HTML comment
230: '.$this->macroRe.' ## macro tag
231: ~xsi');
232:
233: if (!empty($matches['htmlcomment'])) {
234: $this->addToken(Token::HTML_TAG_END, $matches[0]);
235: $this->setContext(self::CONTEXT_HTML_TEXT);
236: }
237: return $matches;
238: }
239:
240:
241: 242: 243:
244: private function ()
245: {
246: $matches = $this->match('~
247: '.$this->macroRe.' ## macro tag
248: ~xsi');
249: return $matches;
250: }
251:
252:
253: 254: 255: 256: 257:
258: private function match($re)
259: {
260: if (!preg_match($re, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->offset)) {
261: if (preg_last_error()) {
262: throw new RegexpException(NULL, preg_last_error());
263: }
264: return array();
265: }
266:
267: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
268: if ($value !== '') {
269: $this->addToken(Token::TEXT, $value);
270: }
271: $this->offset = $matches[0][1] + strlen($matches[0][0]);
272: foreach ($matches as $k => $v) {
273: $matches[$k] = $v[0];
274: }
275: return $matches;
276: }
277:
278:
279: 280: 281:
282: public function setContentType($type)
283: {
284: if (strpos($type, 'html') !== FALSE) {
285: $this->xmlMode = FALSE;
286: $this->setContext(self::CONTEXT_HTML_TEXT);
287: } elseif (strpos($type, 'xml') !== FALSE) {
288: $this->xmlMode = TRUE;
289: $this->setContext(self::CONTEXT_HTML_TEXT);
290: } else {
291: $this->setContext(self::CONTEXT_RAW);
292: }
293: return $this;
294: }
295:
296:
297: 298: 299:
300: public function setContext($context, $quote = NULL)
301: {
302: $this->context = array($context, $quote);
303: return $this;
304: }
305:
306:
307: 308: 309: 310: 311:
312: public function setSyntax($type)
313: {
314: $type = $type ?: $this->defaultSyntax;
315: if (isset($this->syntaxes[$type])) {
316: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
317: } else {
318: throw new \InvalidArgumentException("Unknown syntax '$type'");
319: }
320: return $this;
321: }
322:
323:
324: 325: 326: 327: 328: 329:
330: public function setDelimiters($left, $right)
331: {
332: $this->macroRe = '
333: (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
334: ' . $left . '
335: (?P<macro>(?:
336: ' . self::RE_STRING . '|
337: \{(?:' . self::RE_STRING . '|[^\'"{}])*+\}|
338: [^\'"{}]
339: )+?)
340: ' . $right . '
341: (?P<rmargin>[ \t]*(?=\n))?
342: ';
343: return $this;
344: }
345:
346:
347: 348: 349: 350: 351: 352:
353: public function parseMacroTag($tag)
354: {
355: if (!preg_match('~^
356: (
357: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
358: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
359: )(?P<args>.*?)
360: (?P<modifiers>\|[a-z](?:' . self::RE_STRING . '|[^\'"/]|/(?=.))*+)?
361: (?P<empty>/?\z)
362: ()\z~isx', $tag, $match)) {
363: if (preg_last_error()) {
364: throw new RegexpException(NULL, preg_last_error());
365: }
366: return FALSE;
367: }
368: if ($match['name'] === '') {
369: $match['name'] = $match['shortname'] ?: '=';
370: if ($match['noescape']) {
371: if (!$this->shortNoEscape) {
372: trigger_error("The noescape shortcut {!...} is deprecated, use {...|noescape} modifier on line {$this->getLine()}.", E_USER_DEPRECATED);
373: }
374: $match['modifiers'] .= '|noescape';
375: }
376: }
377: return array($match['name'], trim($match['args']), $match['modifiers'], (bool) $match['empty']);
378: }
379:
380:
381: private function addToken($type, $text)
382: {
383: $this->output[] = $token = new Token;
384: $token->type = $type;
385: $token->text = $text;
386: $token->line = $this->getLine();
387: return $token;
388: }
389:
390:
391: private function getLine()
392: {
393: return $this->offset
394: ? substr_count(substr($this->input, 0, $this->offset - 1), "\n") + 1
395: : 0;
396: }
397:
398:
399: 400: 401:
402: protected function filter()
403: {
404: $token = end($this->output);
405: if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
406: $this->setSyntax($this->defaultSyntax);
407: $token->type = Token::COMMENT;
408:
409: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
410: $this->setSyntax($token->value);
411: $token->type = Token::COMMENT;
412:
413: } elseif ($token->type === Token::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
414: $this->setSyntax($token->value);
415: $this->syntaxEndTag = $this->lastHtmlTag;
416: $this->syntaxEndLevel = 1;
417: $token->type = Token::COMMENT;
418: } elseif ($token->type === Token::HTML_TAG_BEGIN && $this->lastHtmlTag === $this->syntaxEndTag) {
419: $this->syntaxEndLevel++;
420: } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === ('/' . $this->syntaxEndTag) && --$this->syntaxEndLevel === 0) {
421: $this->setSyntax($this->defaultSyntax);
422: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
423: $this->setContentType($token->value);
424: }
425: }
426:
427: }
428: