1: <?php
  2: 
  3:   4:   5:   6: 
  7: 
  8: namespace Latte;
  9: 
 10: 
 11:  12:  13:  14:  15: 
 16: class Parser extends Object
 17: {
 18:     
 19:     const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*+\'|"(?:\\\\.|[^"\\\\])*+"';
 20: 
 21:     
 22:     const N_PREFIX = 'n:';
 23: 
 24:     
 25:     public $defaultSyntax = 'latte';
 26: 
 27:     
 28:     public $shortNoEscape = FALSE;
 29: 
 30:     
 31:     public $syntaxes = array(
 32:         'latte' => array('\\{(?![\\s\'"{}])', '\\}'), 
 33:         'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'), 
 34:         'asp' => array('<%\s*', '\s*%>'), 
 35:         'python' => array('\\{[{%]\s*', '\s*[%}]\\}'), 
 36:         'off' => array('[^\x00-\xFF]', ''),
 37:     );
 38: 
 39:     
 40:     private $macroRe;
 41: 
 42:     
 43:     private $input;
 44: 
 45:     
 46:     private $output;
 47: 
 48:     
 49:     private $offset;
 50: 
 51:     
 52:     private $context;
 53: 
 54:     
 55:     private $lastHtmlTag;
 56: 
 57:     
 58:     private $syntaxEndTag;
 59: 
 60:     
 61:     private $syntaxEndLevel = 0;
 62: 
 63:     
 64:     private $xmlMode;
 65: 
 66:     
 67:     const CONTEXT_HTML_TEXT = 'htmlText',
 68:         CONTEXT_CDATA = 'cdata',
 69:         CONTEXT_HTML_TAG = 'htmlTag',
 70:         CONTEXT_HTML_ATTRIBUTE = 'htmlAttribute',
 71:         CONTEXT_RAW = 'raw',
 72:          = 'htmlComment';
 73: 
 74: 
 75:      76:  77:  78:  79: 
 80:     public function parse($input)
 81:     {
 82:         $this->offset = 0;
 83: 
 84:         if (substr($input, 0, 3) === "\xEF\xBB\xBF") { 
 85:             $input = substr($input, 3);
 86:         }
 87:         if (!preg_match('##u', $input)) {
 88:             throw new \InvalidArgumentException('Template is not valid UTF-8 stream.');
 89:         }
 90:         $input = str_replace("\r\n", "\n", $input);
 91:         $this->input = $input;
 92:         $this->output = array();
 93: 
 94:         $this->setSyntax($this->defaultSyntax);
 95:         $this->setContext(self::CONTEXT_HTML_TEXT);
 96:         $this->lastHtmlTag = $this->syntaxEndTag = NULL;
 97: 
 98:         while ($this->offset < strlen($input)) {
 99:             $matches = $this->{'context' . $this->context[0]}();
100: 
101:             if (!$matches) { 
102:                 break;
103: 
104:             } elseif (!empty($matches['comment'])) { 
105:                 $this->addToken(Token::COMMENT, $matches[0]);
106: 
107:             } elseif (!empty($matches['macro'])) { 
108:                 $token = $this->addToken(Token::MACRO_TAG, $matches[0]);
109:                 list($token->name, $token->value, $token->modifiers, $token->empty) = $this->parseMacroTag($matches['macro']);
110:             }
111: 
112:             $this->filter();
113:         }
114: 
115:         if ($this->offset < strlen($input)) {
116:             $this->addToken(Token::TEXT, substr($this->input, $this->offset));
117:         }
118:         return $this->output;
119:     }
120: 
121: 
122:     123: 124: 
125:     private function contextHtmlText()
126:     {
127:         $matches = $this->match('~
128:             (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)|  ##  begin of HTML tag <tag </tag - ignores <!DOCTYPE
129:             <(?P<htmlcomment>!--(?!>))|     ##  begin of HTML comment <!--, but not <!-->
130:             '.$this->macroRe.'              ##  macro tag
131:         ~xsi');
132: 
133:         if (!empty($matches['htmlcomment'])) { 
134:             $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
135:             $this->setContext(self::CONTEXT_HTML_COMMENT);
136: 
137:         } elseif (!empty($matches['tag'])) { 
138:             $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
139:             $token->name = $matches['tag'];
140:             $token->closing = (bool) $matches['closing'];
141:             $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
142:             $this->setContext(self::CONTEXT_HTML_TAG);
143:         }
144:         return $matches;
145:     }
146: 
147: 
148:     149: 150: 
151:     private function contextCData()
152:     {
153:         $matches = $this->match('~
154:             </(?P<tag>'.$this->lastHtmlTag.')(?![a-z0-9:])| ##  end HTML tag </tag
155:             '.$this->macroRe.'              ##  macro tag
156:         ~xsi');
157: 
158:         if (!empty($matches['tag'])) { 
159:             $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
160:             $token->name = $this->lastHtmlTag;
161:             $token->closing = TRUE;
162:             $this->lastHtmlTag = '/' . $this->lastHtmlTag;
163:             $this->setContext(self::CONTEXT_HTML_TAG);
164:         }
165:         return $matches;
166:     }
167: 
168: 
169:     170: 171: 
172:     private function contextHtmlTag()
173:     {
174:         $matches = $this->match('~
175:             (?P<end>\ ?/?>)([ \t]*\n)?|  ##  end of HTML tag
176:             '.$this->macroRe.'|          ##  macro tag
177:             \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## beginning of HTML attribute
178:         ~xsi');
179: 
180:         if (!empty($matches['end'])) { 
181:             $this->addToken(Token::HTML_TAG_END, $matches[0]);
182:             $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style'), TRUE) ? self::CONTEXT_CDATA : self::CONTEXT_HTML_TEXT);
183: 
184:         } elseif (isset($matches['attr']) && $matches['attr'] !== '') { 
185:             $token = $this->addToken(Token::HTML_ATTRIBUTE, $matches[0]);
186:             $token->name = $matches['attr'];
187:             $token->value = isset($matches['value']) ? $matches['value'] : '';
188: 
189:             if ($token->value === '"' || $token->value === "'") { 
190:                 if (strncmp($token->name, self::N_PREFIX, strlen(self::N_PREFIX)) === 0) {
191:                     $token->value = '';
192:                     if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
193:                         $token->value = $m[1];
194:                         $token->text .= $m[0];
195:                     }
196:                 } else {
197:                     $this->setContext(self::CONTEXT_HTML_ATTRIBUTE, $matches['value']);
198:                 }
199:             }
200:         }
201:         return $matches;
202:     }
203: 
204: 
205:     206: 207: 
208:     private function contextHtmlAttribute()
209:     {
210:         $matches = $this->match('~
211:             (?P<quote>'.$this->context[1].')|  ##  end of HTML attribute
212:             '.$this->macroRe.'                 ##  macro tag
213:         ~xsi');
214: 
215:         if (!empty($matches['quote'])) { 
216:             $this->addToken(Token::TEXT, $matches[0]);
217:             $this->setContext(self::CONTEXT_HTML_TAG);
218:         }
219:         return $matches;
220:     }
221: 
222: 
223:     224: 225: 
226:     private function ()
227:     {
228:         $matches = $this->match('~
229:             (?P<htmlcomment>-->)|   ##  end of HTML comment
230:             '.$this->macroRe.'      ##  macro tag
231:         ~xsi');
232: 
233:         if (!empty($matches['htmlcomment'])) { 
234:             $this->addToken(Token::HTML_TAG_END, $matches[0]);
235:             $this->setContext(self::CONTEXT_HTML_TEXT);
236:         }
237:         return $matches;
238:     }
239: 
240: 
241:     242: 243: 
244:     private function ()
245:     {
246:         $matches = $this->match('~
247:             '.$this->macroRe.'     ##  macro tag
248:         ~xsi');
249:         return $matches;
250:     }
251: 
252: 
253:     254: 255: 256: 257: 
258:     private function match($re)
259:     {
260:         if (!preg_match($re, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->offset)) {
261:             if (preg_last_error()) {
262:                 throw new RegexpException(NULL, preg_last_error());
263:             }
264:             return array();
265:         }
266: 
267:         $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
268:         if ($value !== '') {
269:             $this->addToken(Token::TEXT, $value);
270:         }
271:         $this->offset = $matches[0][1] + strlen($matches[0][0]);
272:         foreach ($matches as $k => $v) {
273:             $matches[$k] = $v[0];
274:         }
275:         return $matches;
276:     }
277: 
278: 
279:     280: 281: 
282:     public function setContentType($type)
283:     {
284:         if (strpos($type, 'html') !== FALSE) {
285:             $this->xmlMode = FALSE;
286:             $this->setContext(self::CONTEXT_HTML_TEXT);
287:         } elseif (strpos($type, 'xml') !== FALSE) {
288:             $this->xmlMode = TRUE;
289:             $this->setContext(self::CONTEXT_HTML_TEXT);
290:         } else {
291:             $this->setContext(self::CONTEXT_RAW);
292:         }
293:         return $this;
294:     }
295: 
296: 
297:     298: 299: 
300:     public function setContext($context, $quote = NULL)
301:     {
302:         $this->context = array($context, $quote);
303:         return $this;
304:     }
305: 
306: 
307:     308: 309: 310: 311: 
312:     public function setSyntax($type)
313:     {
314:         $type = $type ?: $this->defaultSyntax;
315:         if (isset($this->syntaxes[$type])) {
316:             $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
317:         } else {
318:             throw new \InvalidArgumentException("Unknown syntax '$type'");
319:         }
320:         return $this;
321:     }
322: 
323: 
324:     325: 326: 327: 328: 329: 
330:     public function setDelimiters($left, $right)
331:     {
332:         $this->macroRe = '
333:             (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
334:             ' . $left . '
335:                 (?P<macro>(?:
336:                     ' . self::RE_STRING . '|
337:                     \{(?:' . self::RE_STRING . '|[^\'"{}])*+\}|
338:                     [^\'"{}]
339:                 )+?)
340:             ' . $right . '
341:             (?P<rmargin>[ \t]*(?=\n))?
342:         ';
343:         return $this;
344:     }
345: 
346: 
347:     348: 349: 350: 351: 352: 
353:     public function parseMacroTag($tag)
354:     {
355:         if (!preg_match('~^
356:             (
357:                 (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))|   ## ?, name, /name, but not function( or class:: or namespace\
358:                 (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?)      ## !expression, !=expression, ...
359:             )(?P<args>.*?)
360:             (?P<modifiers>\|[a-z](?:' . self::RE_STRING . '|[^\'"/]|/(?=.))*+)?
361:             (?P<empty>/?\z)
362:         ()\z~isx', $tag, $match)) {
363:             if (preg_last_error()) {
364:                 throw new RegexpException(NULL, preg_last_error());
365:             }
366:             return FALSE;
367:         }
368:         if ($match['name'] === '') {
369:             $match['name'] = $match['shortname'] ?: '=';
370:             if ($match['noescape']) {
371:                 if (!$this->shortNoEscape) {
372:                     trigger_error("The noescape shortcut {!...} is deprecated, use {...|noescape} modifier on line {$this->getLine()}.", E_USER_DEPRECATED);
373:                 }
374:                 $match['modifiers'] .= '|noescape';
375:             }
376:         }
377:         return array($match['name'], trim($match['args']), $match['modifiers'], (bool) $match['empty']);
378:     }
379: 
380: 
381:     private function addToken($type, $text)
382:     {
383:         $this->output[] = $token = new Token;
384:         $token->type = $type;
385:         $token->text = $text;
386:         $token->line = $this->getLine();
387:         return $token;
388:     }
389: 
390: 
391:     private function getLine()
392:     {
393:         return $this->offset
394:             ? substr_count(substr($this->input, 0, $this->offset - 1), "\n") + 1
395:             : 0;
396:     }
397: 
398: 
399:     400: 401: 
402:     protected function filter()
403:     {
404:         $token = end($this->output);
405:         if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
406:             $this->setSyntax($this->defaultSyntax);
407:             $token->type = Token::COMMENT;
408: 
409:         } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
410:             $this->setSyntax($token->value);
411:             $token->type = Token::COMMENT;
412: 
413:         } elseif ($token->type === Token::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
414:             $this->setSyntax($token->value);
415:             $this->syntaxEndTag = $this->lastHtmlTag;
416:             $this->syntaxEndLevel = 1;
417:             $token->type = Token::COMMENT;
418:         } elseif ($token->type === Token::HTML_TAG_BEGIN && $this->lastHtmlTag === $this->syntaxEndTag) {
419:             $this->syntaxEndLevel++;
420:         } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === ('/' . $this->syntaxEndTag) && --$this->syntaxEndLevel === 0) {
421:             $this->setSyntax($this->defaultSyntax);
422:         } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
423:             $this->setContentType($token->value);
424:         }
425:     }
426: 
427: }
428: