1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15:
16: class Strings
17: {
18:
19: const TRIM_CHARACTERS = " \t\n\r\0\x0B\xC2\xA0";
20:
21:
22: 23: 24:
25: final public function __construct()
26: {
27: throw new Nette\StaticClassException;
28: }
29:
30:
31: 32: 33: 34: 35:
36: public static function checkEncoding($s)
37: {
38: return $s === self::fixEncoding($s);
39: }
40:
41:
42: 43: 44: 45: 46:
47: public static function fixEncoding($s)
48: {
49:
50: if (PHP_VERSION_ID < 50400) {
51: return @iconv('UTF-16', 'UTF-8//IGNORE', iconv('UTF-8', 'UTF-16//IGNORE', $s));
52: } else {
53: return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
54: }
55: }
56:
57:
58: 59: 60: 61: 62: 63:
64: public static function chr($code)
65: {
66: if ($code < 0 || ($code >= 0xD800 && $code <= 0xDFFF) || $code > 0x10FFFF) {
67: throw new Nette\InvalidArgumentException('Code point must be in range 0x0 to 0xD7FF or 0xE000 to 0x10FFFF.');
68: }
69: return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
70: }
71:
72:
73: 74: 75: 76: 77: 78:
79: public static function startsWith($haystack, $needle)
80: {
81: return strncmp($haystack, $needle, strlen($needle)) === 0;
82: }
83:
84:
85: 86: 87: 88: 89: 90:
91: public static function endsWith($haystack, $needle)
92: {
93: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
94: }
95:
96:
97: 98: 99: 100: 101: 102:
103: public static function contains($haystack, $needle)
104: {
105: return strpos($haystack, $needle) !== FALSE;
106: }
107:
108:
109: 110: 111: 112: 113: 114: 115:
116: public static function substring($s, $start, $length = NULL)
117: {
118: if (function_exists('mb_substr')) {
119: if ($length === NULL && PHP_VERSION_ID < 50408) {
120: $length = self::length($s);
121: }
122: return mb_substr($s, $start, $length, 'UTF-8');
123: } elseif ($length === NULL) {
124: $length = self::length($s);
125: } elseif ($start < 0 && $length < 0) {
126: $start += self::length($s);
127: }
128: return iconv_substr($s, $start, $length, 'UTF-8');
129: }
130:
131:
132: 133: 134: 135: 136:
137: public static function normalize($s)
138: {
139: $s = self::normalizeNewLines($s);
140:
141:
142: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F-\x9F]+#u', '', $s);
143:
144:
145: $s = preg_replace('#[\t ]+$#m', '', $s);
146:
147:
148: $s = trim($s, "\n");
149:
150: return $s;
151: }
152:
153:
154: 155: 156: 157: 158:
159: public static function normalizeNewLines($s)
160: {
161: return str_replace(array("\r\n", "\r"), "\n", $s);
162: }
163:
164:
165: 166: 167: 168: 169:
170: public static function toAscii($s)
171: {
172: static $transliterator = NULL;
173: if ($transliterator === NULL && class_exists('Transliterator', FALSE)) {
174: $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
175: }
176:
177: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
178: $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
179: $s = str_replace(
180: array("\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A", "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xB0"),
181: array("\x03", "\x03", "\x03", "\x02", "\x02", "\x02", "\x04"), $s
182: );
183: if ($transliterator !== NULL) {
184: $s = $transliterator->transliterate($s);
185: }
186: if (ICONV_IMPL === 'glibc') {
187: $s = str_replace(
188: array("\xC2\xBB", "\xC2\xAB", "\xE2\x80\xA6", "\xE2\x84\xA2", "\xC2\xA9", "\xC2\xAE"),
189: array('>>', '<<', '...', 'TM', '(c)', '(R)'), $s
190: );
191: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
192: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
193: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
194: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
195: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe"
196: . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
197: 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.');
198: $s = preg_replace('#[^\x00-\x7F]++#', '', $s);
199: } else {
200: $s = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
201: }
202: $s = str_replace(array('`', "'", '"', '^', '~', '?'), '', $s);
203: return strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
204: }
205:
206:
207: 208: 209: 210: 211: 212: 213:
214: public static function webalize($s, $charlist = NULL, $lower = TRUE)
215: {
216: $s = self::toAscii($s);
217: if ($lower) {
218: $s = strtolower($s);
219: }
220: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
221: $s = trim($s, '-');
222: return $s;
223: }
224:
225:
226: 227: 228: 229: 230: 231: 232:
233: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
234: {
235: if (self::length($s) > $maxLen) {
236: $maxLen = $maxLen - self::length($append);
237: if ($maxLen < 1) {
238: return $append;
239:
240: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
241: return $matches[0] . $append;
242:
243: } else {
244: return self::substring($s, 0, $maxLen) . $append;
245: }
246: }
247: return $s;
248: }
249:
250:
251: 252: 253: 254: 255: 256: 257:
258: public static function indent($s, $level = 1, $chars = "\t")
259: {
260: if ($level > 0) {
261: $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
262: }
263: return $s;
264: }
265:
266:
267: 268: 269: 270: 271:
272: public static function lower($s)
273: {
274: return mb_strtolower($s, 'UTF-8');
275: }
276:
277:
278: 279: 280: 281: 282:
283: public static function firstLower($s)
284: {
285: return self::lower(self::substring($s, 0, 1)) . self::substring($s, 1);
286: }
287:
288:
289: 290: 291: 292: 293:
294: public static function upper($s)
295: {
296: return mb_strtoupper($s, 'UTF-8');
297: }
298:
299:
300: 301: 302: 303: 304:
305: public static function firstUpper($s)
306: {
307: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
308: }
309:
310:
311: 312: 313: 314: 315:
316: public static function capitalize($s)
317: {
318: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
319: }
320:
321:
322: 323: 324: 325: 326: 327: 328:
329: public static function compare($left, $right, $len = NULL)
330: {
331: if ($len < 0) {
332: $left = self::substring($left, $len, -$len);
333: $right = self::substring($right, $len, -$len);
334: } elseif ($len !== NULL) {
335: $left = self::substring($left, 0, $len);
336: $right = self::substring($right, 0, $len);
337: }
338: return self::lower($left) === self::lower($right);
339: }
340:
341:
342: 343: 344: 345: 346:
347: public static function findPrefix($strings)
348: {
349: if (!is_array($strings)) {
350: $strings = func_get_args();
351: }
352: $first = array_shift($strings);
353: for ($i = 0; $i < strlen($first); $i++) {
354: foreach ($strings as $s) {
355: if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
356: while ($i && $first[$i - 1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
357: $i--;
358: }
359: return substr($first, 0, $i);
360: }
361: }
362: }
363: return $first;
364: }
365:
366:
367: 368: 369: 370: 371: 372:
373: public static function length($s)
374: {
375: return function_exists('mb_strlen') ? mb_strlen($s, 'UTF-8') : strlen(utf8_decode($s));
376: }
377:
378:
379: 380: 381: 382: 383: 384:
385: public static function trim($s, $charlist = self::TRIM_CHARACTERS)
386: {
387: $charlist = preg_quote($charlist, '#');
388: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
389: }
390:
391:
392: 393: 394: 395: 396: 397: 398:
399: public static function padLeft($s, $length, $pad = ' ')
400: {
401: $length = max(0, $length - self::length($s));
402: $padLen = self::length($pad);
403: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
404: }
405:
406:
407: 408: 409: 410: 411: 412: 413:
414: public static function padRight($s, $length, $pad = ' ')
415: {
416: $length = max(0, $length - self::length($s));
417: $padLen = self::length($pad);
418: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
419: }
420:
421:
422: 423: 424: 425: 426:
427: public static function reverse($s)
428: {
429: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
430: }
431:
432:
433: 434: 435: 436:
437: public static function random($length = 10, $charlist = '0-9a-z')
438: {
439: return Random::generate($length, $charlist);
440: }
441:
442:
443: 444: 445: 446: 447: 448: 449:
450: public static function before($haystack, $needle, $nth = 1)
451: {
452: $pos = self::pos($haystack, $needle, $nth);
453: return $pos === FALSE
454: ? FALSE
455: : substr($haystack, 0, $pos);
456: }
457:
458:
459: 460: 461: 462: 463: 464: 465:
466: public static function after($haystack, $needle, $nth = 1)
467: {
468: $pos = self::pos($haystack, $needle, $nth);
469: return $pos === FALSE
470: ? FALSE
471: : (string) substr($haystack, $pos + strlen($needle));
472: }
473:
474:
475: 476: 477: 478:
479: private static function pos($haystack, $needle, $nth = 1)
480: {
481: if (!$nth) {
482: return FALSE;
483: } elseif ($nth > 0) {
484: if (strlen($needle) === 0) {
485: return 0;
486: }
487: $pos = 0;
488: while (FALSE !== ($pos = strpos($haystack, $needle, $pos)) && --$nth) {
489: $pos++;
490: }
491: } else {
492: $len = strlen($haystack);
493: if (strlen($needle) === 0) {
494: return $len;
495: }
496: $pos = $len - 1;
497: while (FALSE !== ($pos = strrpos($haystack, $needle, $pos - $len)) && ++$nth) {
498: $pos--;
499: }
500: }
501: return $pos;
502: }
503:
504:
505: 506: 507: 508: 509: 510: 511:
512: public static function split($subject, $pattern, $flags = 0)
513: {
514: return self::pcre('preg_split', array($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE));
515: }
516:
517:
518: 519: 520: 521: 522: 523: 524: 525:
526: public static function match($subject, $pattern, $flags = 0, $offset = 0)
527: {
528: if ($offset > strlen($subject)) {
529: return NULL;
530: }
531: return self::pcre('preg_match', array($pattern, $subject, & $m, $flags, $offset))
532: ? $m
533: : NULL;
534: }
535:
536:
537: 538: 539: 540: 541: 542: 543: 544:
545: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
546: {
547: if ($offset > strlen($subject)) {
548: return array();
549: }
550: self::pcre('preg_match_all', array(
551: $pattern, $subject, & $m,
552: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
553: $offset,
554: ));
555: return $m;
556: }
557:
558:
559: 560: 561: 562: 563: 564: 565: 566:
567: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
568: {
569: if (is_object($replacement) || is_array($replacement)) {
570: if ($replacement instanceof Nette\Callback) {
571: $replacement = $replacement->getNative();
572: }
573: if (!is_callable($replacement, FALSE, $textual)) {
574: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
575: }
576:
577: return self::pcre('preg_replace_callback', array($pattern, $replacement, $subject, $limit));
578:
579: } elseif ($replacement === NULL && is_array($pattern)) {
580: $replacement = array_values($pattern);
581: $pattern = array_keys($pattern);
582: }
583:
584: return self::pcre('preg_replace', array($pattern, $replacement, $subject, $limit));
585: }
586:
587:
588:
589: public static function pcre($func, $args)
590: {
591: static $messages = array(
592: PREG_INTERNAL_ERROR => 'Internal error',
593: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
594: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
595: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
596: PREG_BAD_UTF8_OFFSET_ERROR => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
597: 6 => 'Failed due to limited JIT stack space',
598: );
599: $res = Callback::invokeSafe($func, $args, function ($message) use ($args) {
600:
601: throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
602: });
603:
604: if (($code = preg_last_error())
605: && ($res === NULL || !in_array($func, array('preg_filter', 'preg_replace_callback', 'preg_replace')))
606: ) {
607: throw new RegexpException((isset($messages[$code]) ? $messages[$code] : 'Unknown error')
608: . ' (pattern: ' . implode(' or ', (array) $args[0]) . ')', $code);
609: }
610: return $res;
611: }
612:
613: }
614: