1: <?php
2:
3: 4: 5: 6: 7:
8:
9:
10:
11: 12: 13: 14: 15: 16:
17: class Parser extends Object
18: {
19:
20: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*\'|"(?:\\\\.|[^"\\\\])*"';
21:
22:
23: const N_PREFIX = 'n:';
24:
25:
26: public $defaultSyntax = 'latte';
27:
28:
29: public $syntaxes = array(
30: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
31: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
32: 'asp' => array('<%\s*', '\s*%>'),
33: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
34: 'off' => array('[^\x00-\xFF]', ''),
35: );
36:
37:
38: private $macroRe;
39:
40:
41: private $input;
42:
43:
44: private $output;
45:
46:
47: private $offset;
48:
49:
50: private $context;
51:
52:
53: private $lastHtmlTag;
54:
55:
56: private $syntaxEndTag;
57:
58:
59: private $xmlMode;
60:
61:
62: const CONTEXT_TEXT = 'text',
63: CONTEXT_CDATA = 'cdata',
64: CONTEXT_TAG = 'tag',
65: CONTEXT_ATTRIBUTE = 'attribute',
66: CONTEXT_NONE = 'none',
67: = 'comment';
68:
69:
70: 71: 72: 73: 74:
75: public function parse($input)
76: {
77: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
78: $input = substr($input, 3);
79: }
80: if (!Strings::checkEncoding($input)) {
81: throw new InvalidArgumentException('Template is not valid UTF-8 stream.');
82: }
83: $input = str_replace("\r\n", "\n", $input);
84: $this->input = $input;
85: $this->output = array();
86: $this->offset = 0;
87:
88: $this->setSyntax($this->defaultSyntax);
89: $this->setContext(self::CONTEXT_TEXT);
90: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
91:
92: while ($this->offset < strlen($input)) {
93: $matches = $this->{"context".$this->context[0]}();
94:
95: if (!$matches) {
96: break;
97:
98: } elseif (!empty($matches['comment'])) {
99: $this->addToken(LatteToken::COMMENT, $matches[0]);
100:
101: } elseif (!empty($matches['macro'])) {
102: $token = $this->addToken(LatteToken::MACRO_TAG, $matches[0]);
103: list($token->name, $token->value, $token->modifiers) = $this->parseMacroTag($matches['macro']);
104: }
105:
106: $this->filter();
107: }
108:
109: if ($this->offset < strlen($input)) {
110: $this->addToken(LatteToken::TEXT, substr($this->input, $this->offset));
111: }
112: return $this->output;
113: }
114:
115:
116: 117: 118:
119: private function contextText()
120: {
121: $matches = $this->match('~
122: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
123: <(?P<htmlcomment>!--(?!>))| ## begin of HTML comment <!--, but not <!-->
124: '.$this->macroRe.' ## macro tag
125: ~xsi');
126:
127: if (!empty($matches['htmlcomment'])) {
128: $this->addToken(LatteToken::HTML_TAG_BEGIN, $matches[0]);
129: $this->setContext(self::CONTEXT_COMMENT);
130:
131: } elseif (!empty($matches['tag'])) {
132: $token = $this->addToken(LatteToken::HTML_TAG_BEGIN, $matches[0]);
133: $token->name = $matches['tag'];
134: $token->closing = (bool) $matches['closing'];
135: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
136: $this->setContext(self::CONTEXT_TAG);
137: }
138: return $matches;
139: }
140:
141:
142: 143: 144:
145: private function contextCData()
146: {
147: $matches = $this->match('~
148: </(?P<tag>'.$this->lastHtmlTag.')(?![a-z0-9:])| ## end HTML tag </tag
149: '.$this->macroRe.' ## macro tag
150: ~xsi');
151:
152: if (!empty($matches['tag'])) {
153: $token = $this->addToken(LatteToken::HTML_TAG_BEGIN, $matches[0]);
154: $token->name = $this->lastHtmlTag;
155: $token->closing = TRUE;
156: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
157: $this->setContext(self::CONTEXT_TAG);
158: }
159: return $matches;
160: }
161:
162:
163: 164: 165:
166: private function contextTag()
167: {
168: $matches = $this->match('~
169: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
170: '.$this->macroRe.'| ## macro tag
171: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## begin of HTML attribute
172: ~xsi');
173:
174: if (!empty($matches['end'])) {
175: $this->addToken(LatteToken::HTML_TAG_END, $matches[0]);
176: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style'), TRUE) ? self::CONTEXT_CDATA : self::CONTEXT_TEXT);
177:
178: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
179: $token = $this->addToken(LatteToken::HTML_ATTRIBUTE, $matches[0]);
180: $token->name = $matches['attr'];
181: $token->value = isset($matches['value']) ? $matches['value'] : '';
182:
183: if ($token->value === '"' || $token->value === "'") {
184: if (Strings::startsWith($token->name, self::N_PREFIX)) {
185: $token->value = '';
186: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
187: $token->value = $m[1];
188: $token->text .= $m[0];
189: }
190: } else {
191: $this->setContext(self::CONTEXT_ATTRIBUTE, $matches['value']);
192: }
193: }
194: }
195: return $matches;
196: }
197:
198:
199: 200: 201:
202: private function contextAttribute()
203: {
204: $matches = $this->match('~
205: (?P<quote>'.$this->context[1].')| ## end of HTML attribute
206: '.$this->macroRe.' ## macro tag
207: ~xsi');
208:
209: if (!empty($matches['quote'])) {
210: $this->addToken(LatteToken::TEXT, $matches[0]);
211: $this->setContext(self::CONTEXT_TAG);
212: }
213: return $matches;
214: }
215:
216:
217: 218: 219:
220: private function ()
221: {
222: $matches = $this->match('~
223: (?P<htmlcomment>-->)| ## end of HTML comment
224: '.$this->macroRe.' ## macro tag
225: ~xsi');
226:
227: if (!empty($matches['htmlcomment'])) {
228: $this->addToken(LatteToken::HTML_TAG_END, $matches[0]);
229: $this->setContext(self::CONTEXT_TEXT);
230: }
231: return $matches;
232: }
233:
234:
235: 236: 237:
238: private function contextNone()
239: {
240: $matches = $this->match('~
241: '.$this->macroRe.' ## macro tag
242: ~xsi');
243: return $matches;
244: }
245:
246:
247: 248: 249: 250: 251:
252: private function match($re)
253: {
254: if ($matches = Strings::match($this->input, $re, PREG_OFFSET_CAPTURE, $this->offset)) {
255: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
256: if ($value !== '') {
257: $this->addToken(LatteToken::TEXT, $value);
258: }
259: $this->offset = $matches[0][1] + strlen($matches[0][0]);
260: foreach ($matches as $k => $v) $matches[$k] = $v[0];
261: }
262: return $matches;
263: }
264:
265:
266: 267: 268:
269: public function setContext($context, $quote = NULL)
270: {
271: $this->context = array($context, $quote);
272: return $this;
273: }
274:
275:
276: 277: 278: 279: 280:
281: public function setSyntax($type)
282: {
283: $type = ($tmp=$type) ? $tmp : $this->defaultSyntax;
284: if (isset($this->syntaxes[$type])) {
285: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
286: } else {
287: throw new InvalidArgumentException("Unknown syntax '$type'");
288: }
289: return $this;
290: }
291:
292:
293: 294: 295: 296: 297: 298:
299: public function setDelimiters($left, $right)
300: {
301: $this->macroRe = '
302: (?P<comment>' . $left . '\\*.*?\\*' . $right . '\n{0,2})|
303: ' . $left . '
304: (?P<macro>(?:' . self::RE_STRING . '|\{
305: (?P<inner>' . self::RE_STRING . '|\{(?P>inner)\}|[^\'"{}])*+
306: \}|[^\'"{}])+?)
307: ' . $right . '
308: (?P<rmargin>[ \t]*(?=\n))?
309: ';
310: return $this;
311: }
312:
313:
314: 315: 316: 317: 318:
319: public function parseMacroTag($tag)
320: {
321: $match = Strings::match($tag, '~^
322: (
323: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
324: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
325: )(?P<args>.*?)
326: (?P<modifiers>\|[a-z](?:'.Parser::RE_STRING.'|[^\'"])*)?
327: ()\z~isx');
328:
329: if (!$match) {
330: return FALSE;
331: }
332: $modifiers = preg_replace('#\|noescape\s?(?=\||\z)#i', '', $match['modifiers'], -1, $noescape);
333: if ($match['name'] === '') {
334: $match['name'] = ($tmp=$match['shortname']) ? $tmp : '=';
335: if (!$noescape && !$match['noescape'] && substr($match['shortname'], 0, 1) !== '/') {
336: $modifiers .= '|escape';
337: }
338: }
339: return array($match['name'], trim($match['args']), $modifiers);
340: }
341:
342:
343: private function addToken($type, $text)
344: {
345: $this->output[] = $token = new LatteToken;
346: $token->type = $type;
347: $token->text = $text;
348: $token->line = substr_count($this->input, "\n", 0, max(1, $this->offset - 1)) + 1;
349: return $token;
350: }
351:
352:
353: 354: 355:
356: protected function filter()
357: {
358: $token = end($this->output);
359: if ($token->type === LatteToken::MACRO_TAG && $token->name === '/syntax') {
360: $this->setSyntax($this->defaultSyntax);
361: $token->type = LatteToken::COMMENT;
362:
363: } elseif ($token->type === LatteToken::MACRO_TAG && $token->name === 'syntax') {
364: $this->setSyntax($token->value);
365: $token->type = LatteToken::COMMENT;
366:
367: } elseif ($token->type === LatteToken::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
368: $this->setSyntax($token->value);
369: $this->syntaxEndTag = '/' . $this->lastHtmlTag;
370: $token->type = LatteToken::COMMENT;
371:
372: } elseif ($token->type === LatteToken::HTML_TAG_END && $this->lastHtmlTag === $this->syntaxEndTag) {
373: $this->setSyntax($this->defaultSyntax);
374:
375: } elseif ($token->type === LatteToken::MACRO_TAG && $token->name === 'contentType') {
376: if (preg_match('#html|xml#', $token->value, $m)) {
377: $this->xmlMode = $m[0] === 'xml';
378: $this->setContext(self::CONTEXT_TEXT);
379: } else {
380: $this->setContext(self::CONTEXT_NONE);
381: }
382: }
383: }
384:
385: }
386: