1: <?php
2:
3: 4: 5: 6: 7:
8:
9:
10:
11: 12: 13: 14: 15: 16:
17: class Strings
18: {
19:
20: 21: 22:
23: final public function __construct()
24: {
25: throw new StaticClassException;
26: }
27:
28:
29: 30: 31: 32: 33: 34:
35: public static function checkEncoding($s, $encoding = 'UTF-8')
36: {
37: return $s === self::fixEncoding($s, $encoding);
38: }
39:
40:
41: 42: 43: 44: 45: 46:
47: public static function fixEncoding($s, $encoding = 'UTF-8')
48: {
49:
50: if (strcasecmp($encoding, 'UTF-8') === 0) {
51: $s = str_replace("\xEF\xBB\xBF", '', $s);
52: }
53: if (PHP_VERSION_ID >= 50400) {
54: ini_set('mbstring.substitute_character', 'none');
55: return mb_convert_encoding($s, $encoding, $encoding);
56: }
57: return @iconv('UTF-16', $encoding . '//IGNORE', iconv($encoding, 'UTF-16//IGNORE', $s));
58: }
59:
60:
61: 62: 63: 64: 65: 66:
67: public static function chr($code, $encoding = 'UTF-8')
68: {
69: return iconv('UTF-32BE', $encoding . '//IGNORE', pack('N', $code));
70: }
71:
72:
73: 74: 75: 76: 77: 78:
79: public static function startsWith($haystack, $needle)
80: {
81: return strncmp($haystack, $needle, strlen($needle)) === 0;
82: }
83:
84:
85: 86: 87: 88: 89: 90:
91: public static function endsWith($haystack, $needle)
92: {
93: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
94: }
95:
96:
97: 98: 99: 100: 101: 102:
103: public static function contains($haystack, $needle)
104: {
105: return strpos($haystack, $needle) !== FALSE;
106: }
107:
108:
109: 110: 111: 112: 113: 114: 115:
116: public static function substring($s, $start, $length = NULL)
117: {
118: if ($length === NULL) {
119: $length = self::length($s);
120: }
121: return function_exists('mb_substr') ? mb_substr($s, $start, $length, 'UTF-8') : iconv_substr($s, $start, $length, 'UTF-8');
122: }
123:
124:
125: 126: 127: 128: 129:
130: public static function normalize($s)
131: {
132:
133: $s = str_replace("\r\n", "\n", $s);
134: $s = strtr($s, "\r", "\n");
135:
136:
137: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
138:
139:
140: $s = preg_replace('#[\t ]+$#m', '', $s);
141:
142:
143: $s = trim($s, "\n");
144:
145: return $s;
146: }
147:
148:
149: 150: 151: 152: 153:
154: public static function toAscii($s)
155: {
156: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
157: $s = strtr($s, '`\'"^~', "\x01\x02\x03\x04\x05");
158: if (ICONV_IMPL === 'glibc') {
159: $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT', $s);
160: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
161: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
162: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
163: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96",
164: "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt-");
165: } else {
166: $s = @iconv('UTF-8', 'ASCII//TRANSLIT', $s);
167: }
168: $s = str_replace(array('`', "'", '"', '^', '~'), '', $s);
169: return strtr($s, "\x01\x02\x03\x04\x05", '`\'"^~');
170: }
171:
172:
173: 174: 175: 176: 177: 178: 179:
180: public static function webalize($s, $charlist = NULL, $lower = TRUE)
181: {
182: $s = self::toAscii($s);
183: if ($lower) {
184: $s = strtolower($s);
185: }
186: $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
187: $s = trim($s, '-');
188: return $s;
189: }
190:
191:
192: 193: 194: 195: 196: 197: 198:
199: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
200: {
201: if (self::length($s) > $maxLen) {
202: $maxLen = $maxLen - self::length($append);
203: if ($maxLen < 1) {
204: return $append;
205:
206: } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
207: return $matches[0] . $append;
208:
209: } else {
210: return self::substring($s, 0, $maxLen) . $append;
211: }
212: }
213: return $s;
214: }
215:
216:
217: 218: 219: 220: 221: 222: 223:
224: public static function indent($s, $level = 1, $chars = "\t")
225: {
226: return $level < 1 ? $s : self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
227: }
228:
229:
230: 231: 232: 233: 234:
235: public static function lower($s)
236: {
237: return mb_strtolower($s, 'UTF-8');
238: }
239:
240:
241: 242: 243: 244: 245:
246: public static function upper($s)
247: {
248: return mb_strtoupper($s, 'UTF-8');
249: }
250:
251:
252: 253: 254: 255: 256:
257: public static function firstUpper($s)
258: {
259: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
260: }
261:
262:
263: 264: 265: 266: 267:
268: public static function capitalize($s)
269: {
270: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
271: }
272:
273:
274: 275: 276: 277: 278: 279: 280:
281: public static function compare($left, $right, $len = NULL)
282: {
283: if ($len < 0) {
284: $left = self::substring($left, $len, -$len);
285: $right = self::substring($right, $len, -$len);
286: } elseif ($len !== NULL) {
287: $left = self::substring($left, 0, $len);
288: $right = self::substring($right, 0, $len);
289: }
290: return self::lower($left) === self::lower($right);
291: }
292:
293:
294: 295: 296: 297: 298:
299: public static function length($s)
300: {
301: return strlen(utf8_decode($s));
302: }
303:
304:
305: 306: 307: 308: 309: 310:
311: public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
312: {
313: $charlist = preg_quote($charlist, '#');
314: return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
315: }
316:
317:
318: 319: 320: 321: 322: 323: 324:
325: public static function padLeft($s, $length, $pad = ' ')
326: {
327: $length = max(0, $length - self::length($s));
328: $padLen = self::length($pad);
329: return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
330: }
331:
332:
333: 334: 335: 336: 337: 338: 339:
340: public static function padRight($s, $length, $pad = ' ')
341: {
342: $length = max(0, $length - self::length($s));
343: $padLen = self::length($pad);
344: return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
345: }
346:
347:
348: 349: 350: 351: 352:
353: public static function reverse($s)
354: {
355: return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
356: }
357:
358:
359: 360: 361: 362: 363: 364:
365: public static function random($length = 10, $charlist = '0-9a-z')
366: {
367: $charlist = str_shuffle(preg_replace_callback('#.-.#', create_function('$m', '
368: return implode(\'\', range($m[0][0], $m[0][2]));
369: '), $charlist));
370: $chLen = strlen($charlist);
371:
372: if (function_exists('openssl_random_pseudo_bytes')
373: && (PHP_VERSION_ID >= 50400 || !defined('PHP_WINDOWS_VERSION_BUILD'))
374: ) {
375: $rand3 = openssl_random_pseudo_bytes($length);
376: }
377: if (empty($rand3) && function_exists('mcrypt_create_iv') && (PHP_VERSION_ID >= 50307 || !defined('PHP_WINDOWS_VERSION_BUILD'))) {
378: $rand3 = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM);
379: }
380: if (empty($rand3) && @is_readable('/dev/urandom')) {
381: $rand3 = file_get_contents('/dev/urandom', FALSE, NULL, -1, $length);
382: }
383: if (empty($rand3)) {
384: static $cache;
385: $rand3 = ($tmp=$cache) ? $tmp : $cache = md5(serialize($_SERVER), TRUE);
386: }
387:
388: $s = '';
389: for ($i = 0; $i < $length; $i++) {
390: if ($i % 5 === 0) {
391: list($rand, $rand2) = explode(' ', microtime());
392: $rand += lcg_value();
393: }
394: $rand *= $chLen;
395: $s .= $charlist[($rand + $rand2 + ord($rand3[$i % strlen($rand3)])) % $chLen];
396: $rand -= (int) $rand;
397: }
398: return $s;
399: }
400:
401:
402: 403: 404: 405: 406: 407: 408:
409: public static function split($subject, $pattern, $flags = 0)
410: {
411: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
412: restore_error_handler();
413: throw new RegexpException("$message in pattern: $pattern");
414: '));
415: $res = preg_split($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE);
416: restore_error_handler();
417: if (preg_last_error()) {
418: throw new RegexpException(NULL, preg_last_error(), $pattern);
419: }
420: return $res;
421: }
422:
423:
424: 425: 426: 427: 428: 429: 430: 431:
432: public static function match($subject, $pattern, $flags = 0, $offset = 0)
433: {
434: if ($offset > strlen($subject)) {
435: return NULL;
436: }
437: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
438: restore_error_handler();
439: throw new RegexpException("$message in pattern: $pattern");
440: '));
441: $res = preg_match($pattern, $subject, $m, $flags, $offset);
442: restore_error_handler();
443: if (preg_last_error()) {
444: throw new RegexpException(NULL, preg_last_error(), $pattern);
445: }
446: if ($res) {
447: return $m;
448: }
449: }
450:
451:
452: 453: 454: 455: 456: 457: 458: 459:
460: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
461: {
462: if ($offset > strlen($subject)) {
463: return array();
464: }
465: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
466: restore_error_handler();
467: throw new RegexpException("$message in pattern: $pattern");
468: '));
469: preg_match_all(
470: $pattern, $subject, $m,
471: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
472: $offset
473: );
474: restore_error_handler();
475: if (preg_last_error()) {
476: throw new RegexpException(NULL, preg_last_error(), $pattern);
477: }
478: return $m;
479: }
480:
481:
482: 483: 484: 485: 486: 487: 488: 489:
490: public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
491: {
492: if (is_object($replacement) || is_array($replacement)|| preg_match('#^\x00lambda_\d+\z#', $replacement)) {
493: if ($replacement instanceof Callback) {
494: $replacement = $replacement->getNative();
495: }
496: if (!is_callable($replacement, FALSE, $textual)) {
497: throw new InvalidStateException("Callback '$textual' is not callable.");
498: }
499:
500: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('tmp'=>& $tmp)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
501: restore_error_handler();
502: throw new RegexpException("$message in pattern: $tmp");
503: '));
504: foreach ((array) $pattern as $tmp) {
505: preg_match($tmp, '');
506: }
507: restore_error_handler();
508:
509: $res = preg_replace_callback($pattern, $replacement, $subject, $limit);
510: if ($res === NULL && preg_last_error()) {
511: throw new RegexpException(NULL, preg_last_error(), $pattern);
512: }
513: return $res;
514:
515: } elseif ($replacement === NULL && is_array($pattern)) {
516: $replacement = array_values($pattern);
517: $pattern = array_keys($pattern);
518: }
519:
520: set_error_handler(create_function('$severity, $message', 'extract($GLOBALS[0]['.array_push($GLOBALS[0], array('pattern'=>$pattern)).'-1], EXTR_REFS); // preg_last_error does not return compile errors
521: restore_error_handler();
522: throw new RegexpException("$message in pattern: " . implode(\' or \', (array) $pattern));
523: '));
524: $res = preg_replace($pattern, $replacement, $subject, $limit);
525: restore_error_handler();
526: if (preg_last_error()) {
527: throw new RegexpException(NULL, preg_last_error(), implode(' or ', (array) $pattern));
528: }
529: return $res;
530: }
531:
532: }
533:
534:
535: 536: 537: 538:
539: class RegexpException extends Exception
540: {
541: static public $messages = array(
542: PREG_INTERNAL_ERROR => 'Internal error',
543: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
544: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
545: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
546: 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
547: );
548:
549: public function __construct($message, $code = NULL, $pattern = NULL)
550: {
551: if (!$message) {
552: $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
553: }
554: parent::__construct($message, $code);
555: }
556:
557: }
558: