File Utils/Strings.php | Nette 2.2 API

  1: <?php
  2: 
  3: /**
  4:  * This file is part of the Nette Framework (https://nette.org)
  5:  * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
  6:  */
  7: 
  8: namespace Nette\Utils;
  9: 
 10: use Nette;
 11: 
 12: 
 13: /**
 14:  * String tools library.
 15:  *
 16:  * @author     David Grudl
 17:  */
 18: class Strings
 19: {
 20: 
 21:     /**
 22:      * Static class - cannot be instantiated.
 23:      */
 24:     final public function __construct()
 25:     {
 26:         throw new Nette\StaticClassException;
 27:     }
 28: 
 29: 
 30:     /**
 31:      * Checks if the string is valid for UTF-8 encoding.
 32:      * @param  string  byte stream to check
 33:      * @return bool
 34:      */
 35:     public static function checkEncoding($s)
 36:     {
 37:         if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
 38:             trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
 39:         }
 40:         return $s === self::fixEncoding($s);
 41:     }
 42: 
 43: 
 44:     /**
 45:      * Removes invalid code unit sequences from UTF-8 string.
 46:      * @param  string  byte stream to fix
 47:      * @return string
 48:      */
 49:     public static function fixEncoding($s)
 50:     {
 51:         if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
 52:             trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
 53:         }
 54:         // removes xD800-xDFFF, x110000 and higher
 55:         if (PHP_VERSION_ID < 50400) {
 56:             return @iconv('UTF-16', 'UTF-8//IGNORE', iconv('UTF-8', 'UTF-16//IGNORE', $s)); // intentionally @
 57:         } else {
 58:             return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
 59:         }
 60:     }
 61: 
 62: 
 63:     /**
 64:      * Returns a specific character in UTF-8.
 65:      * @param  int     codepoint
 66:      * @return string
 67:      */
 68:     public static function chr($code)
 69:     {
 70:         if (func_num_args() > 1 && strcasecmp(func_get_arg(1), 'UTF-8')) {
 71:             trigger_error(__METHOD__ . ' supports only UTF-8 encoding.', E_USER_DEPRECATED);
 72:         }
 73:         return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
 74:     }
 75: 
 76: 
 77:     /**
 78:      * Starts the $haystack string with the prefix $needle?
 79:      * @param  string
 80:      * @param  string
 81:      * @return bool
 82:      */
 83:     public static function startsWith($haystack, $needle)
 84:     {
 85:         return strncmp($haystack, $needle, strlen($needle)) === 0;
 86:     }
 87: 
 88: 
 89:     /**
 90:      * Ends the $haystack string with the suffix $needle?
 91:      * @param  string
 92:      * @param  string
 93:      * @return bool
 94:      */
 95:     public static function endsWith($haystack, $needle)
 96:     {
 97:         return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
 98:     }
 99: 
100: 
101:     /**
102:      * Does $haystack contain $needle?
103:      * @param  string
104:      * @param  string
105:      * @return bool
106:      */
107:     public static function contains($haystack, $needle)
108:     {
109:         return strpos($haystack, $needle) !== FALSE;
110:     }
111: 
112: 
113:     /**
114:      * Returns a part of UTF-8 string.
115:      * @param  string
116:      * @param  int in characters (code points)
117:      * @param  int in characters (code points)
118:      * @return string
119:      */
120:     public static function substring($s, $start, $length = NULL)
121:     {
122:         if (function_exists('mb_substr')) {
123:             if ($length === NULL && PHP_VERSION_ID < 50408) {
124:                 $length = self::length($s);
125:             }
126:             return mb_substr($s, $start, $length, 'UTF-8'); // MB is much faster
127:         } elseif ($length === NULL) {
128:             $length = self::length($s);
129:         } elseif ($start < 0 && $length < 0) {
130:             $start += self::length($s); // unifies iconv_substr behavior with mb_substr
131:         }
132:         return iconv_substr($s, $start, $length, 'UTF-8');
133:     }
134: 
135: 
136:     /**
137:      * Removes special controls characters and normalizes line endings and spaces.
138:      * @param  string  UTF-8 encoding or 8-bit
139:      * @return string
140:      */
141:     public static function normalize($s)
142:     {
143:         $s = self::normalizeNewLines($s);
144: 
145:         // remove control characters; leave \t + \n
146:         $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
147: 
148:         // right trim
149:         $s = preg_replace('#[\t ]+$#m', '', $s);
150: 
151:         // leading and trailing blank lines
152:         $s = trim($s, "\n");
153: 
154:         return $s;
155:     }
156: 
157: 
158:     /**
159:      * Standardize line endings to unix-like.
160:      * @param  string  UTF-8 encoding or 8-bit
161:      * @return string
162:      */
163:     public static function normalizeNewLines($s)
164:     {
165:         return str_replace(array("\r\n", "\r"), "\n", $s);
166:     }
167: 
168: 
169:     /**
170:      * Converts to ASCII.
171:      * @param  string  UTF-8 encoding
172:      * @return string  ASCII
173:      */
174:     public static function toAscii($s)
175:     {
176:         $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
177:         $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
178:         $s = str_replace(
179:             array("\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A", "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xB0"),
180:             array("\x03", "\x03", "\x03", "\x02", "\x02", "\x02", "\x04"), $s
181:         );
182:         if (ICONV_IMPL === 'glibc') {
183:             $s = str_replace(
184:                 array("\xC2\xBB", "\xC2\xAB", "\xE2\x80\xA6", "\xE2\x84\xA2", "\xC2\xA9", "\xC2\xAE"),
185:                 array('>>', '<<', '...', 'TM', '(c)', '(R)'), $s
186:             );
187:             $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s); // intentionally @
188:             $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
189:                 . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
190:                 . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
191:                 . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe"
192:                 . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
193:                 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.');
194:             $s = preg_replace('#[^\x00-\x7F]++#', '', $s);
195:         } else {
196:             $s = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s); // intentionally @
197:         }
198:         $s = str_replace(array('`', "'", '"', '^', '~', '?'), '', $s);
199:         return strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
200:     }
201: 
202: 
203:     /**
204:      * Converts to web safe characters [a-z0-9-] text.
205:      * @param  string  UTF-8 encoding
206:      * @param  string  allowed characters
207:      * @param  bool
208:      * @return string
209:      */
210:     public static function webalize($s, $charlist = NULL, $lower = TRUE)
211:     {
212:         $s = self::toAscii($s);
213:         if ($lower) {
214:             $s = strtolower($s);
215:         }
216:         $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
217:         $s = trim($s, '-');
218:         return $s;
219:     }
220: 
221: 
222:     /**
223:      * Truncates string to maximal length.
224:      * @param  string  UTF-8 encoding
225:      * @param  int
226:      * @param  string  UTF-8 encoding
227:      * @return string
228:      */
229:     public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
230:     {
231:         if (self::length($s) > $maxLen) {
232:             $maxLen = $maxLen - self::length($append);
233:             if ($maxLen < 1) {
234:                 return $append;
235: 
236:             } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
237:                 return $matches[0] . $append;
238: 
239:             } else {
240:                 return self::substring($s, 0, $maxLen) . $append;
241:             }
242:         }
243:         return $s;
244:     }
245: 
246: 
247:     /**
248:      * Indents the content from the left.
249:      * @param  string  UTF-8 encoding or 8-bit
250:      * @param  int
251:      * @param  string
252:      * @return string
253:      */
254:     public static function indent($s, $level = 1, $chars = "\t")
255:     {
256:         if ($level > 0) {
257:             $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
258:         }
259:         return $s;
260:     }
261: 
262: 
263:     /**
264:      * Convert to lower case.
265:      * @param  string  UTF-8 encoding
266:      * @return string
267:      */
268:     public static function lower($s)
269:     {
270:         return mb_strtolower($s, 'UTF-8');
271:     }
272: 
273: 
274:     /**
275:      * Convert to upper case.
276:      * @param  string  UTF-8 encoding
277:      * @return string
278:      */
279:     public static function upper($s)
280:     {
281:         return mb_strtoupper($s, 'UTF-8');
282:     }
283: 
284: 
285:     /**
286:      * Convert first character to upper case.
287:      * @param  string  UTF-8 encoding
288:      * @return string
289:      */
290:     public static function firstUpper($s)
291:     {
292:         return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
293:     }
294: 
295: 
296:     /**
297:      * Capitalize string.
298:      * @param  string  UTF-8 encoding
299:      * @return string
300:      */
301:     public static function capitalize($s)
302:     {
303:         return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
304:     }
305: 
306: 
307:     /**
308:      * Case-insensitive compares UTF-8 strings.
309:      * @param  string
310:      * @param  string
311:      * @param  int
312:      * @return bool
313:      */
314:     public static function compare($left, $right, $len = NULL)
315:     {
316:         if ($len < 0) {
317:             $left = self::substring($left, $len, -$len);
318:             $right = self::substring($right, $len, -$len);
319:         } elseif ($len !== NULL) {
320:             $left = self::substring($left, 0, $len);
321:             $right = self::substring($right, 0, $len);
322:         }
323:         return self::lower($left) === self::lower($right);
324:     }
325: 
326: 
327:     /**
328:      * Finds the length of common prefix of strings.
329:      * @param  string|array
330:      * @return string
331:      */
332:     public static function findPrefix($strings)
333:     {
334:         if (!is_array($strings)) {
335:             $strings = func_get_args();
336:         }
337:         $first = array_shift($strings);
338:         for ($i = 0; $i < strlen($first); $i++) {
339:             foreach ($strings as $s) {
340:                 if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
341:                     while ($i && $first[$i-1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
342:                         $i--;
343:                     }
344:                     return substr($first, 0, $i);
345:                 }
346:             }
347:         }
348:         return $first;
349:     }
350: 
351: 
352:     /**
353:      * Returns number of characters (not bytes) in UTF-8 string.
354:      * That is the number of Unicode code points which may differ from the number of graphemes.
355:      * @param  string
356:      * @return int
357:      */
358:     public static function length($s)
359:     {
360:         return function_exists('mb_strlen') ? mb_strlen($s, 'UTF-8') : strlen(utf8_decode($s));
361:     }
362: 
363: 
364:     /**
365:      * Strips whitespace.
366:      * @param  string  UTF-8 encoding
367:      * @param  string
368:      * @return string
369:      */
370:     public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
371:     {
372:         $charlist = preg_quote($charlist, '#');
373:         return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
374:     }
375: 
376: 
377:     /**
378:      * Pad a string to a certain length with another string.
379:      * @param  string  UTF-8 encoding
380:      * @param  int
381:      * @param  string
382:      * @return string
383:      */
384:     public static function padLeft($s, $length, $pad = ' ')
385:     {
386:         $length = max(0, $length - self::length($s));
387:         $padLen = self::length($pad);
388:         return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
389:     }
390: 
391: 
392:     /**
393:      * Pad a string to a certain length with another string.
394:      * @param  string  UTF-8 encoding
395:      * @param  int
396:      * @param  string
397:      * @return string
398:      */
399:     public static function padRight($s, $length, $pad = ' ')
400:     {
401:         $length = max(0, $length - self::length($s));
402:         $padLen = self::length($pad);
403:         return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
404:     }
405: 
406: 
407:     /**
408:      * Reverse string.
409:      * @param  string  UTF-8 encoding
410:      * @return string
411:      */
412:     public static function reverse($s)
413:     {
414:         return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
415:     }
416: 
417: 
418:     /**
419:      * Use Nette\Utils\Random::generate
420:      * @deprecated
421:      */
422:     public static function random($length = 10, $charlist = '0-9a-z')
423:     {
424:         return Random::generate($length, $charlist);
425:     }
426: 
427: 
428:     /**
429:      * Splits string by a regular expression.
430:      * @param  string
431:      * @param  string
432:      * @param  int
433:      * @return array
434:      */
435:     public static function split($subject, $pattern, $flags = 0)
436:     {
437:         return self::pcre('preg_split', array($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE));
438:     }
439: 
440: 
441:     /**
442:      * Performs a regular expression match.
443:      * @param  string
444:      * @param  string
445:      * @param  int  can be PREG_OFFSET_CAPTURE (returned in bytes)
446:      * @param  int  offset in bytes
447:      * @return mixed
448:      */
449:     public static function match($subject, $pattern, $flags = 0, $offset = 0)
450:     {
451:         if ($offset > strlen($subject)) {
452:             return NULL;
453:         }
454:         return self::pcre('preg_match', array($pattern, $subject, & $m, $flags, $offset))
455:             ? $m
456:             : NULL;
457:     }
458: 
459: 
460:     /**
461:      * Performs a global regular expression match.
462:      * @param  string
463:      * @param  string
464:      * @param  int  can be PREG_OFFSET_CAPTURE (returned in bytes); PREG_SET_ORDER is default
465:      * @param  int  offset in bytes
466:      * @return array
467:      */
468:     public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
469:     {
470:         if ($offset > strlen($subject)) {
471:             return array();
472:         }
473:         self::pcre('preg_match_all', array(
474:             $pattern, $subject, & $m,
475:             ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
476:             $offset,
477:         ));
478:         return $m;
479:     }
480: 
481: 
482:     /**
483:      * Perform a regular expression search and replace.
484:      * @param  string
485:      * @param  string|array
486:      * @param  string|callable
487:      * @param  int
488:      * @return string
489:      */
490:     public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
491:     {
492:         if (is_object($replacement) || is_array($replacement)) {
493:             if ($replacement instanceof Nette\Callback) {
494:                 $replacement = $replacement->getNative();
495:             }
496:             if (!is_callable($replacement, FALSE, $textual)) {
497:                 throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
498:             }
499: 
500:             return self::pcre('preg_replace_callback', array($pattern, $replacement, $subject, $limit));
501: 
502:         } elseif ($replacement === NULL && is_array($pattern)) {
503:             $replacement = array_values($pattern);
504:             $pattern = array_keys($pattern);
505:         }
506: 
507:         return self::pcre('preg_replace', array($pattern, $replacement, $subject, $limit));
508:     }
509: 
510: 
511:     /** @internal */
512:     public static function pcre($func, $args)
513:     {
514:         $res = Callback::invokeSafe($func, $args, function ($message) use ($args) {
515:             // compile-time error, not detectable by preg_last_error
516:             throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
517:         });
518: 
519:         if (($code = preg_last_error()) // run-time error, but preg_last_error & return code are liars
520:             && ($res === NULL || !in_array($func, array('preg_filter', 'preg_replace_callback', 'preg_replace')))
521:         ) {
522:             throw new RegexpException(NULL, $code, implode(' or ', (array) $args[0]));
523:         }
524:         return $res;
525:     }
526: 
527: }
528: 
529: 
530: /**
531:  * The exception that indicates error of the last Regexp execution.
532:  */
533: class RegexpException extends \Exception
534: {
535:     static public $messages = array(
536:         PREG_INTERNAL_ERROR => 'Internal error',
537:         PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
538:         PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
539:         PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
540:         5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point', // PREG_BAD_UTF8_OFFSET_ERROR
541:     );
542: 
543:     public function __construct($message, $code = NULL, $pattern = NULL)
544:     {
545:         if (!$message) {
546:             $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
547:         }
548:         parent::__construct($message, $code);
549:     }
550: 
551: }
552:
Namespaces

Classes

Interfaces

Exceptions