1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15: 16: 17:
18: class Strings
19: {
20:
21: 22: 23:
24: final public function __construct()
25: {
26: throw new Nette\StaticClassException;
27: }
28:
29:
30: 31: 32: 33: 34:
35: public static function checkEncoding($s)
36: {
37: if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
38: trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
39: }
40: return $s === self::fixEncoding($s);
41: }
42:
43:
44: 45: 46: 47: 48:
49: public static function fixEncoding($s)
50: {
51: if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
52: trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
53: }
54:
55: if (PHP_VERSION_ID < 50400) {
56: return @iconv('UTF-16', 'UTF-8//IGNORE', iconv('UTF-8', 'UTF-16//IGNORE', $s));
57: } else {
58: return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
59: }
60: }
61:
62:
63: 64: 65: 66: 67:
68: public static function chr($code)
69: {
70: if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
71: trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
72: }
73: return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
74: }
75:
76:
77: 78: 79: 80: 81: 82:
83: public static function startsWith($haystack, $needle)
84: {
85: return strncmp($haystack, $needle, strlen($needle)) === 0;
86: }
87:
88:
89: 90: 91: 92: 93: 94:
95: public static function endsWith($haystack, $needle)
96: {
97: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
98: }
99:
100:
101: 102: 103: 104: 105: 106:
107: public static function contains($haystack, $needle)
108: {
109: return strpos($haystack, $needle) !== FALSE;
110: }
111:
112:
113: 114: 115: 116: 117: 118: 119:
120: public static function substring($s, $start, $length = NULL)
121: {
122: if (function_exists('mb_substr')) {
123: if ($length === NULL && PHP_VERSION_ID < 50408) {
124: $length = self::length($s);
125: }
126: return mb_substr($s, $start, $length, 'UTF-8');
127: } elseif ($length === NULL) {
128: $length = self::length($s);
129: } elseif ($start < 0 && $length < 0) {
130: $start += self::length($s);
131: }
132: return iconv_substr($s, $start, $length, 'UTF-8');
133: }
134:
135:
136: 137: 138: 139: 140:
141: public static function normalize($s)
142: {
143: $s = self::normalizeNewLines($s);
144:
145:
146: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
147:
148:
149: $s = preg_replace('#[\t ]+$#m', '', $s);
150:
151:
152: $s = trim($s, "\n");
153:
154: return $s;
155: }
156:
157:
158: 159: 160: 161: 162:
163: public static function normalizeNewLines($s)
164: {
165: return str_replace(array("\r\n", "\r"), "\n", $s);
166: }
167:
168:
169: 170: 171: 172: 173:
174: public static function toAscii($s)
175: {
176: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
177: $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
178: $s = str_replace(
179: array("\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A", "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xB0"),
180: array("\x03", "\x03", "\x03", "\x02", "\x02", "\x02", "\x04"), $s
181: );
182: if (ICONV_IMPL === 'glibc') {
183: $s = str_replace(
184: array("\xC2\xBB", "\xC2\xAB", "\xE2\x80\xA6", "\xE2\x84\xA2", "\xC2\xA9", "\xC2\xAE"),
185: array('>>', '<<', '...', 'TM', '(c)', '(R)'), $s
186: );
187: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
188: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
189: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
190: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
191: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe"
192: . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
193: 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.');
194: $s = preg_replace('#[^\x00-\x7F]++#', '', $s);
195: } else {
196: $s = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
197: }
198: $s = str_replace(array('`', "'", '"', '^', '~', '?'), '', $s);
199: return strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
200: }
201:
202:
203: 204: 205: 206: 207: 208: 209:
210: public static function webalize($s, $charlist = NULL, $lower = TRUE)
211: {
212: $s = self::toAscii($s);
213: if ($lower) {
214: $s = strtolower($s);
215: }
216: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
217: $s = trim($s, '-');
218: return $s;
219: }
220:
221:
222: 223: 224: 225: 226: 227: 228:
229: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
230: {
231: if (self::length($s) > $maxLen) {
232: $maxLen = $maxLen - self::length($append);
233: if ($maxLen < 1) {
234: return $append;
235:
236: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
237: return $matches[0] . $append;
238:
239: } else {
240: return self::substring($s, 0, $maxLen) . $append;
241: }
242: }
243: return $s;
244: }
245:
246:
247: 248: 249: 250: 251: 252: 253:
254: public static function indent($s, $level = 1, $chars = "\t")
255: {
256: if ($level > 0) {
257: $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
258: }
259: return $s;
260: }
261:
262:
263: 264: 265: 266: 267:
268: public static function lower($s)
269: {
270: return mb_strtolower($s, 'UTF-8');
271: }
272:
273:
274: 275: 276: 277: 278:
279: public static function upper($s)
280: {
281: return mb_strtoupper($s, 'UTF-8');
282: }
283:
284:
285: 286: 287: 288: 289:
290: public static function firstUpper($s)
291: {
292: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
293: }
294:
295:
296: 297: 298: 299: 300:
301: public static function capitalize($s)
302: {
303: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
304: }
305:
306:
307: 308: 309: 310: 311: 312: 313:
314: public static function compare($left, $right, $len = NULL)
315: {
316: if ($len < 0) {
317: $left = self::substring($left, $len, -$len);
318: $right = self::substring($right, $len, -$len);
319: } elseif ($len !== NULL) {
320: $left = self::substring($left, 0, $len);
321: $right = self::substring($right, 0, $len);
322: }
323: return self::lower($left) === self::lower($right);
324: }
325:
326:
327: 328: 329: 330: 331:
332: public static function findPrefix($strings)
333: {
334: if (!is_array($strings)) {
335: $strings = func_get_args();
336: }
337: $first = array_shift($strings);
338: for ($i = 0; $i < strlen($first); $i++) {
339: foreach ($strings as $s) {
340: if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
341: while ($i && $first[$i-1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
342: $i--;
343: }
344: return substr($first, 0, $i);
345: }
346: }
347: }
348: return $first;
349: }
350:
351:
352: 353: 354: 355: 356: 357:
358: public static function length($s)
359: {
360: return function_exists('mb_strlen') ? mb_strlen($s, 'UTF-8') : strlen(utf8_decode($s));
361: }
362:
363:
364: 365: 366: 367: 368: 369:
370: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
371: {
372: $charlist = preg_quote($charlist, '#');
373: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
374: }
375:
376:
377: 378: 379: 380: 381: 382: 383:
384: public static function padLeft($s, $length, $pad = ' ')
385: {
386: $length = max(0, $length - self::length($s));
387: $padLen = self::length($pad);
388: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
389: }
390:
391:
392: 393: 394: 395: 396: 397: 398:
399: public static function padRight($s, $length, $pad = ' ')
400: {
401: $length = max(0, $length - self::length($s));
402: $padLen = self::length($pad);
403: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
404: }
405:
406:
407: 408: 409: 410: 411:
412: public static function reverse($s)
413: {
414: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
415: }
416:
417:
418: 419: 420: 421:
422: public static function random($length = 10, $charlist = '0-9a-z')
423: {
424: return Random::generate($length, $charlist);
425: }
426:
427:
428: 429: 430: 431: 432: 433: 434:
435: public static function split($subject, $pattern, $flags = 0)
436: {
437: return self::pcre('preg_split', array($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE));
438: }
439:
440:
441: 442: 443: 444: 445: 446: 447: 448:
449: public static function match($subject, $pattern, $flags = 0, $offset = 0)
450: {
451: if ($offset > strlen($subject)) {
452: return NULL;
453: }
454: return self::pcre('preg_match', array($pattern, $subject, & $m, $flags, $offset))
455: ? $m
456: : NULL;
457: }
458:
459:
460: 461: 462: 463: 464: 465: 466: 467:
468: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
469: {
470: if ($offset > strlen($subject)) {
471: return array();
472: }
473: self::pcre('preg_match_all', array(
474: $pattern, $subject, & $m,
475: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
476: $offset,
477: ));
478: return $m;
479: }
480:
481:
482: 483: 484: 485: 486: 487: 488: 489:
490: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
491: {
492: if (is_object($replacement) || is_array($replacement)) {
493: if ($replacement instanceof Nette\Callback) {
494: $replacement = $replacement->getNative();
495: }
496: if (!is_callable($replacement, FALSE, $textual)) {
497: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
498: }
499:
500: return self::pcre('preg_replace_callback', array($pattern, $replacement, $subject, $limit));
501:
502: } elseif ($replacement === NULL && is_array($pattern)) {
503: $replacement = array_values($pattern);
504: $pattern = array_keys($pattern);
505: }
506:
507: return self::pcre('preg_replace', array($pattern, $replacement, $subject, $limit));
508: }
509:
510:
511:
512: public static function pcre($func, $args)
513: {
514: $res = Callback::invokeSafe($func, $args, function ($message) use ($args) {
515:
516: throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
517: });
518:
519: if (($code = preg_last_error())
520: && ($res === NULL || !in_array($func, array('preg_filter', 'preg_replace_callback', 'preg_replace')))
521: ) {
522: throw new RegexpException(NULL, $code, implode(' or ', (array) $args[0]));
523: }
524: return $res;
525: }
526:
527: }
528:
529:
530: 531: 532:
533: class RegexpException extends \Exception
534: {
535: static public $messages = array(
536: PREG_INTERNAL_ERROR => 'Internal error',
537: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
538: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
539: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
540: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
541: );
542:
543: public function __construct($message, $code = NULL, $pattern = NULL)
544: {
545: if (!$message) {
546: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
547: }
548: parent::__construct($message, $code);
549: }
550:
551: }
552: