1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette,
11: Nette\Diagnostics\Debugger;
12:
13:
14: 15: 16: 17: 18:
19: class Strings
20: {
21:
22: 23: 24:
25: final public function __construct()
26: {
27: throw new Nette\StaticClassException;
28: }
29:
30:
31: 32: 33: 34: 35: 36:
37: public static function checkEncoding($s, $encoding = 'UTF-8')
38: {
39: return $s === self::fixEncoding($s, $encoding);
40: }
41:
42:
43: 44: 45: 46: 47: 48:
49: public static function fixEncoding($s, $encoding = 'UTF-8')
50: {
51:
52: if (strcasecmp($encoding, 'UTF-8') === 0) {
53: $s = str_replace("\xEF\xBB\xBF", '', $s);
54: }
55: if (PHP_VERSION_ID >= 50400) {
56: ini_set('mbstring.substitute_character', 'none');
57: return mb_convert_encoding($s, $encoding, $encoding);
58: }
59: return @iconv('UTF-16', $encoding . '//IGNORE', iconv($encoding, 'UTF-16//IGNORE', $s));
60: }
61:
62:
63: 64: 65: 66: 67: 68:
69: public static function chr($code, $encoding = 'UTF-8')
70: {
71: return iconv('UTF-32BE', $encoding . '//IGNORE', pack('N', $code));
72: }
73:
74:
75: 76: 77: 78: 79: 80:
81: public static function startsWith($haystack, $needle)
82: {
83: return strncmp($haystack, $needle, strlen($needle)) === 0;
84: }
85:
86:
87: 88: 89: 90: 91: 92:
93: public static function endsWith($haystack, $needle)
94: {
95: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
96: }
97:
98:
99: 100: 101: 102: 103: 104:
105: public static function contains($haystack, $needle)
106: {
107: return strpos($haystack, $needle) !== FALSE;
108: }
109:
110:
111: 112: 113: 114: 115: 116: 117:
118: public static function substring($s, $start, $length = NULL)
119: {
120: if ($length === NULL) {
121: $length = self::length($s);
122: }
123: return function_exists('mb_substr') ? mb_substr($s, $start, $length, 'UTF-8') : iconv_substr($s, $start, $length, 'UTF-8');
124: }
125:
126:
127: 128: 129: 130: 131:
132: public static function normalize($s)
133: {
134:
135: $s = str_replace("\r\n", "\n", $s);
136: $s = strtr($s, "\r", "\n");
137:
138:
139: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
140:
141:
142: $s = preg_replace('#[\t ]+$#m', '', $s);
143:
144:
145: $s = trim($s, "\n");
146:
147: return $s;
148: }
149:
150:
151: 152: 153: 154: 155:
156: public static function toAscii($s)
157: {
158: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
159: $s = strtr($s, '`\'"^~', "\x01\x02\x03\x04\x05");
160: if (ICONV_IMPL === 'glibc') {
161: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT', $s);
162: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
163: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
164: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
165: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96",
166: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt-");
167: } else {
168: $s = @iconv('UTF-8', 'ASCII//TRANSLIT', $s);
169: }
170: $s = str_replace(array('`', "'", '"', '^', '~'), '', $s);
171: return strtr($s, "\x01\x02\x03\x04\x05", '`\'"^~');
172: }
173:
174:
175: 176: 177: 178: 179: 180: 181:
182: public static function webalize($s, $charlist = NULL, $lower = TRUE)
183: {
184: $s = self::toAscii($s);
185: if ($lower) {
186: $s = strtolower($s);
187: }
188: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
189: $s = trim($s, '-');
190: return $s;
191: }
192:
193:
194: 195: 196: 197: 198: 199: 200:
201: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
202: {
203: if (self::length($s) > $maxLen) {
204: $maxLen = $maxLen - self::length($append);
205: if ($maxLen < 1) {
206: return $append;
207:
208: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
209: return $matches[0] . $append;
210:
211: } else {
212: return self::substring($s, 0, $maxLen) . $append;
213: }
214: }
215: return $s;
216: }
217:
218:
219: 220: 221: 222: 223: 224: 225:
226: public static function indent($s, $level = 1, $chars = "\t")
227: {
228: return $level < 1 ? $s : self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
229: }
230:
231:
232: 233: 234: 235: 236:
237: public static function lower($s)
238: {
239: return mb_strtolower($s, 'UTF-8');
240: }
241:
242:
243: 244: 245: 246: 247:
248: public static function upper($s)
249: {
250: return mb_strtoupper($s, 'UTF-8');
251: }
252:
253:
254: 255: 256: 257: 258:
259: public static function firstUpper($s)
260: {
261: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
262: }
263:
264:
265: 266: 267: 268: 269:
270: public static function capitalize($s)
271: {
272: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
273: }
274:
275:
276: 277: 278: 279: 280: 281: 282:
283: public static function compare($left, $right, $len = NULL)
284: {
285: if ($len < 0) {
286: $left = self::substring($left, $len, -$len);
287: $right = self::substring($right, $len, -$len);
288: } elseif ($len !== NULL) {
289: $left = self::substring($left, 0, $len);
290: $right = self::substring($right, 0, $len);
291: }
292: return self::lower($left) === self::lower($right);
293: }
294:
295:
296: 297: 298: 299: 300:
301: public static function length($s)
302: {
303: return strlen(utf8_decode($s));
304: }
305:
306:
307: 308: 309: 310: 311: 312:
313: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
314: {
315: $charlist = preg_quote($charlist, '#');
316: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
317: }
318:
319:
320: 321: 322: 323: 324: 325: 326:
327: public static function padLeft($s, $length, $pad = ' ')
328: {
329: $length = max(0, $length - self::length($s));
330: $padLen = self::length($pad);
331: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
332: }
333:
334:
335: 336: 337: 338: 339: 340: 341:
342: public static function padRight($s, $length, $pad = ' ')
343: {
344: $length = max(0, $length - self::length($s));
345: $padLen = self::length($pad);
346: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
347: }
348:
349:
350: 351: 352: 353: 354:
355: public static function reverse($s)
356: {
357: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
358: }
359:
360:
361: 362: 363: 364: 365: 366:
367: public static function random($length = 10, $charlist = '0-9a-z')
368: {
369: $charlist = str_shuffle(preg_replace_callback('#.-.#', function($m) {
370: return implode('', range($m[0][0], $m[0][2]));
371: }, $charlist));
372: $chLen = strlen($charlist);
373:
374: if (function_exists('openssl_random_pseudo_bytes')
375: && (PHP_VERSION_ID >= 50400 || !defined('PHP_WINDOWS_VERSION_BUILD'))
376: ) {
377: $rand3 = openssl_random_pseudo_bytes($length);
378: }
379: if (empty($rand3) && function_exists('mcrypt_create_iv') && (PHP_VERSION_ID >= 50307 || !defined('PHP_WINDOWS_VERSION_BUILD'))) {
380: $rand3 = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM);
381: }
382: if (empty($rand3) && @is_readable('/dev/urandom')) {
383: $rand3 = file_get_contents('/dev/urandom', FALSE, NULL, -1, $length);
384: }
385: if (empty($rand3)) {
386: static $cache;
387: $rand3 = $cache ?: $cache = md5(serialize($_SERVER), TRUE);
388: }
389:
390: $s = '';
391: for ($i = 0; $i < $length; $i++) {
392: if ($i % 5 === 0) {
393: list($rand, $rand2) = explode(' ', microtime());
394: $rand += lcg_value();
395: }
396: $rand *= $chLen;
397: $s .= $charlist[($rand + $rand2 + ord($rand3[$i % strlen($rand3)])) % $chLen];
398: $rand -= (int) $rand;
399: }
400: return $s;
401: }
402:
403:
404: 405: 406: 407: 408: 409: 410:
411: public static function split($subject, $pattern, $flags = 0)
412: {
413: set_error_handler(function($severity, $message) use ($pattern) {
414: restore_error_handler();
415: throw new RegexpException("$message in pattern: $pattern");
416: });
417: $res = preg_split($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE);
418: restore_error_handler();
419: if (preg_last_error()) {
420: throw new RegexpException(NULL, preg_last_error(), $pattern);
421: }
422: return $res;
423: }
424:
425:
426: 427: 428: 429: 430: 431: 432: 433:
434: public static function match($subject, $pattern, $flags = 0, $offset = 0)
435: {
436: if ($offset > strlen($subject)) {
437: return NULL;
438: }
439: set_error_handler(function($severity, $message) use ($pattern) {
440: restore_error_handler();
441: throw new RegexpException("$message in pattern: $pattern");
442: });
443: $res = preg_match($pattern, $subject, $m, $flags, $offset);
444: restore_error_handler();
445: if (preg_last_error()) {
446: throw new RegexpException(NULL, preg_last_error(), $pattern);
447: }
448: if ($res) {
449: return $m;
450: }
451: }
452:
453:
454: 455: 456: 457: 458: 459: 460: 461:
462: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
463: {
464: if ($offset > strlen($subject)) {
465: return array();
466: }
467: set_error_handler(function($severity, $message) use ($pattern) {
468: restore_error_handler();
469: throw new RegexpException("$message in pattern: $pattern");
470: });
471: preg_match_all(
472: $pattern, $subject, $m,
473: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
474: $offset
475: );
476: restore_error_handler();
477: if (preg_last_error()) {
478: throw new RegexpException(NULL, preg_last_error(), $pattern);
479: }
480: return $m;
481: }
482:
483:
484: 485: 486: 487: 488: 489: 490: 491:
492: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
493: {
494: if (is_object($replacement) || is_array($replacement)) {
495: if ($replacement instanceof Nette\Callback) {
496: $replacement = $replacement->getNative();
497: }
498: if (!is_callable($replacement, FALSE, $textual)) {
499: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
500: }
501:
502: set_error_handler(function($severity, $message) use (& $tmp) {
503: restore_error_handler();
504: throw new RegexpException("$message in pattern: $tmp");
505: });
506: foreach ((array) $pattern as $tmp) {
507: preg_match($tmp, '');
508: }
509: restore_error_handler();
510:
511: $res = preg_replace_callback($pattern, $replacement, $subject, $limit);
512: if ($res === NULL && preg_last_error()) {
513: throw new RegexpException(NULL, preg_last_error(), $pattern);
514: }
515: return $res;
516:
517: } elseif ($replacement === NULL && is_array($pattern)) {
518: $replacement = array_values($pattern);
519: $pattern = array_keys($pattern);
520: }
521:
522: set_error_handler(function($severity, $message) use ($pattern) {
523: restore_error_handler();
524: throw new RegexpException("$message in pattern: " . implode(' or ', (array) $pattern));
525: });
526: $res = preg_replace($pattern, $replacement, $subject, $limit);
527: restore_error_handler();
528: if (preg_last_error()) {
529: throw new RegexpException(NULL, preg_last_error(), implode(' or ', (array) $pattern));
530: }
531: return $res;
532: }
533:
534: }
535:
536:
537: 538: 539:
540: class RegexpException extends \Exception
541: {
542: static public $messages = array(
543: PREG_INTERNAL_ERROR => 'Internal error',
544: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
545: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
546: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
547: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
548: );
549:
550: public function __construct($message, $code = NULL, $pattern = NULL)
551: {
552: if (!$message) {
553: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
554: }
555: parent::__construct($message, $code);
556: }
557:
558: }
559: