1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15: 16: 17:
18: class Strings
19: {
20:
21: 22: 23:
24: final public function __construct()
25: {
26: throw new Nette\StaticClassException;
27: }
28:
29:
30: 31: 32: 33: 34: 35:
36: public static function checkEncoding($s, $encoding = 'UTF-8')
37: {
38: return $s === self::fixEncoding($s, $encoding);
39: }
40:
41:
42: 43: 44: 45: 46: 47:
48: public static function fixEncoding($s, $encoding = 'UTF-8')
49: {
50:
51: if (PHP_VERSION_ID < 50400 || strcasecmp($encoding, 'UTF-8')) {
52: return @iconv('UTF-16', $encoding . '//IGNORE', iconv($encoding, 'UTF-16//IGNORE', $s));
53: } else {
54: return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
55: }
56: }
57:
58:
59: 60: 61: 62: 63: 64:
65: public static function chr($code, $encoding = 'UTF-8')
66: {
67: return iconv('UTF-32BE', $encoding . '//IGNORE', pack('N', $code));
68: }
69:
70:
71: 72: 73: 74: 75: 76:
77: public static function startsWith($haystack, $needle)
78: {
79: return strncmp($haystack, $needle, strlen($needle)) === 0;
80: }
81:
82:
83: 84: 85: 86: 87: 88:
89: public static function endsWith($haystack, $needle)
90: {
91: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
92: }
93:
94:
95: 96: 97: 98: 99: 100:
101: public static function contains($haystack, $needle)
102: {
103: return strpos($haystack, $needle) !== FALSE;
104: }
105:
106:
107: 108: 109: 110: 111: 112: 113:
114: public static function substring($s, $start, $length = NULL)
115: {
116: if (function_exists('mb_substr')) {
117: if ($length === NULL && PHP_VERSION_ID < 50408) {
118: $length = self::length($s);
119: }
120: return mb_substr($s, $start, $length, 'UTF-8');
121: } elseif ($length === NULL) {
122: $length = self::length($s);
123: } elseif ($start < 0 && $length < 0) {
124: $start += self::length($s);
125: }
126: return iconv_substr($s, $start, $length, 'UTF-8');
127: }
128:
129:
130: 131: 132: 133: 134:
135: public static function normalize($s)
136: {
137: $s = self::normalizeNewLines($s);
138:
139:
140: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
141:
142:
143: $s = preg_replace('#[\t ]+$#m', '', $s);
144:
145:
146: $s = trim($s, "\n");
147:
148: return $s;
149: }
150:
151:
152: 153: 154: 155: 156:
157: public static function normalizeNewLines($s)
158: {
159: return str_replace(array("\r\n", "\r"), "\n", $s);
160: }
161:
162:
163: 164: 165: 166: 167:
168: public static function toAscii($s)
169: {
170: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
171: $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
172: $s = str_replace(array("\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A",
173: "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xBB", "\xC2\xAB"),
174: array("\x03", "\x03", "\x03", "\x02", "\x02", "\x02", ">>", "<<"), $s);
175: if (ICONV_IMPL === 'glibc') {
176: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
177: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
178: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
179: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
180: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96",
181: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt-");
182: $s = preg_replace('#[^\x00-\x7F]++#', '', $s);
183: } else {
184: $s = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
185: }
186: $s = str_replace(array('`', "'", '"', '^', '~', '?'), '', $s);
187: return strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
188: }
189:
190:
191: 192: 193: 194: 195: 196: 197:
198: public static function webalize($s, $charlist = NULL, $lower = TRUE)
199: {
200: $s = self::toAscii($s);
201: if ($lower) {
202: $s = strtolower($s);
203: }
204: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
205: $s = trim($s, '-');
206: return $s;
207: }
208:
209:
210: 211: 212: 213: 214: 215: 216:
217: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
218: {
219: if (self::length($s) > $maxLen) {
220: $maxLen = $maxLen - self::length($append);
221: if ($maxLen < 1) {
222: return $append;
223:
224: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
225: return $matches[0] . $append;
226:
227: } else {
228: return self::substring($s, 0, $maxLen) . $append;
229: }
230: }
231: return $s;
232: }
233:
234:
235: 236: 237: 238: 239: 240: 241:
242: public static function indent($s, $level = 1, $chars = "\t")
243: {
244: if ($level > 0) {
245: $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
246: }
247: return $s;
248: }
249:
250:
251: 252: 253: 254: 255:
256: public static function lower($s)
257: {
258: return mb_strtolower($s, 'UTF-8');
259: }
260:
261:
262: 263: 264: 265: 266:
267: public static function upper($s)
268: {
269: return mb_strtoupper($s, 'UTF-8');
270: }
271:
272:
273: 274: 275: 276: 277:
278: public static function firstUpper($s)
279: {
280: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
281: }
282:
283:
284: 285: 286: 287: 288:
289: public static function capitalize($s)
290: {
291: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
292: }
293:
294:
295: 296: 297: 298: 299: 300: 301:
302: public static function compare($left, $right, $len = NULL)
303: {
304: if ($len < 0) {
305: $left = self::substring($left, $len, -$len);
306: $right = self::substring($right, $len, -$len);
307: } elseif ($len !== NULL) {
308: $left = self::substring($left, 0, $len);
309: $right = self::substring($right, 0, $len);
310: }
311: return self::lower($left) === self::lower($right);
312: }
313:
314:
315: 316: 317: 318: 319:
320: public static function findPrefix($strings)
321: {
322: if (!is_array($strings)) {
323: $strings = func_get_args();
324: }
325: $first = array_shift($strings);
326: for ($i = 0; $i < strlen($first); $i++) {
327: foreach ($strings as $s) {
328: if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
329: while ($i && $first[$i-1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
330: $i--;
331: }
332: return substr($first, 0, $i);
333: }
334: }
335: }
336: return $first;
337: }
338:
339:
340: 341: 342: 343: 344:
345: public static function length($s)
346: {
347: return function_exists('mb_strlen') ? mb_strlen($s, 'UTF-8') : strlen(utf8_decode($s));
348: }
349:
350:
351: 352: 353: 354: 355: 356:
357: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
358: {
359: $charlist = preg_quote($charlist, '#');
360: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
361: }
362:
363:
364: 365: 366: 367: 368: 369: 370:
371: public static function padLeft($s, $length, $pad = ' ')
372: {
373: $length = max(0, $length - self::length($s));
374: $padLen = self::length($pad);
375: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
376: }
377:
378:
379: 380: 381: 382: 383: 384: 385:
386: public static function padRight($s, $length, $pad = ' ')
387: {
388: $length = max(0, $length - self::length($s));
389: $padLen = self::length($pad);
390: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
391: }
392:
393:
394: 395: 396: 397: 398:
399: public static function reverse($s)
400: {
401: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
402: }
403:
404:
405: 406: 407: 408: 409: 410:
411: public static function random($length = 10, $charlist = '0-9a-z')
412: {
413: $charlist = str_shuffle(preg_replace_callback('#.-.#', function ($m) {
414: return implode('', range($m[0][0], $m[0][2]));
415: }, $charlist));
416: $chLen = strlen($charlist);
417:
418: if (function_exists('openssl_random_pseudo_bytes')
419: && (PHP_VERSION_ID >= 50400 || !defined('PHP_WINDOWS_VERSION_BUILD'))
420: ) {
421: $rand3 = openssl_random_pseudo_bytes($length);
422: }
423: if (empty($rand3) && function_exists('mcrypt_create_iv') && (PHP_VERSION_ID >= 50307 || !defined('PHP_WINDOWS_VERSION_BUILD'))) {
424: $rand3 = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM);
425: }
426: if (empty($rand3) && !defined('PHP_WINDOWS_VERSION_BUILD') && @is_readable('/dev/urandom')) {
427: $rand3 = file_get_contents('/dev/urandom', FALSE, NULL, -1, $length);
428: }
429: if (empty($rand3)) {
430: static $cache;
431: $rand3 = $cache ?: $cache = md5(serialize($_SERVER), TRUE);
432: }
433:
434: $s = '';
435: for ($i = 0; $i < $length; $i++) {
436: if ($i % 5 === 0) {
437: list($rand, $rand2) = explode(' ', microtime());
438: $rand += lcg_value();
439: }
440: $rand *= $chLen;
441: $s .= $charlist[($rand + $rand2 + ord($rand3[$i % strlen($rand3)])) % $chLen];
442: $rand -= (int) $rand;
443: }
444: return $s;
445: }
446:
447:
448: 449: 450: 451: 452: 453: 454:
455: public static function split($subject, $pattern, $flags = 0)
456: {
457: return self::pcre('preg_split', array($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE));
458: }
459:
460:
461: 462: 463: 464: 465: 466: 467: 468:
469: public static function match($subject, $pattern, $flags = 0, $offset = 0)
470: {
471: if ($offset > strlen($subject)) {
472: return NULL;
473: }
474: return self::pcre('preg_match', array($pattern, $subject, & $m, $flags, $offset))
475: ? $m
476: : NULL;
477: }
478:
479:
480: 481: 482: 483: 484: 485: 486: 487:
488: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
489: {
490: if ($offset > strlen($subject)) {
491: return array();
492: }
493: self::pcre('preg_match_all', array(
494: $pattern, $subject, & $m,
495: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
496: $offset
497: ));
498: return $m;
499: }
500:
501:
502: 503: 504: 505: 506: 507: 508: 509:
510: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
511: {
512: if (is_object($replacement) || is_array($replacement)) {
513: if ($replacement instanceof Nette\Callback) {
514: $replacement = $replacement->getNative();
515: }
516: if (!is_callable($replacement, FALSE, $textual)) {
517: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
518: }
519:
520: return self::pcre('preg_replace_callback', array($pattern, $replacement, $subject, $limit));
521:
522: } elseif ($replacement === NULL && is_array($pattern)) {
523: $replacement = array_values($pattern);
524: $pattern = array_keys($pattern);
525: }
526:
527: return self::pcre('preg_replace', array($pattern, $replacement, $subject, $limit));
528: }
529:
530:
531:
532: public static function pcre($func, $args)
533: {
534: $res = Callback::invokeSafe($func, $args, function ($message) use ($args) {
535:
536: throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
537: });
538:
539: if (($code = preg_last_error())
540: && ($res === NULL || !in_array($func, array('preg_filter', 'preg_replace_callback', 'preg_replace')))
541: ) {
542: throw new RegexpException(NULL, $code, implode(' or ', (array) $args[0]));
543: }
544: return $res;
545: }
546:
547: }
548:
549:
550: 551: 552:
553: class RegexpException extends \Exception
554: {
555: static public $messages = array(
556: PREG_INTERNAL_ERROR => 'Internal error',
557: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
558: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
559: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
560: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
561: );
562:
563: public function __construct($message, $code = NULL, $pattern = NULL)
564: {
565: if (!$message) {
566: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
567: }
568: parent::__construct($message, $code);
569: }
570:
571: }
572: