File Utils/Strings.php | Nette 2.4-20180918 API

  1: <?php
  2: 
  3: /**
  4:  * This file is part of the Nette Framework (https://nette.org)
  5:  * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
  6:  */
  7: 
  8: namespace Nette\Utils;
  9: 
 10: use Nette;
 11: 
 12: 
 13: /**
 14:  * String tools library.
 15:  */
 16: class Strings
 17: {
 18:     use Nette\StaticClass;
 19: 
 20:     const TRIM_CHARACTERS = " \t\n\r\0\x0B\xC2\xA0";
 21: 
 22: 
 23:     /**
 24:      * Checks if the string is valid for UTF-8 encoding.
 25:      * @param  string  byte stream to check
 26:      * @return bool
 27:      */
 28:     public static function checkEncoding($s)
 29:     {
 30:         return $s === self::fixEncoding($s);
 31:     }
 32: 
 33: 
 34:     /**
 35:      * Removes invalid code unit sequences from UTF-8 string.
 36:      * @param  string  byte stream to fix
 37:      * @return string
 38:      */
 39:     public static function fixEncoding($s)
 40:     {
 41:         // removes xD800-xDFFF, x110000 and higher
 42:         return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
 43:     }
 44: 
 45: 
 46:     /**
 47:      * Returns a specific character in UTF-8.
 48:      * @param  int     code point (0x0 to 0xD7FF or 0xE000 to 0x10FFFF)
 49:      * @return string
 50:      * @throws Nette\InvalidArgumentException if code point is not in valid range
 51:      */
 52:     public static function chr($code)
 53:     {
 54:         if ($code < 0 || ($code >= 0xD800 && $code <= 0xDFFF) || $code > 0x10FFFF) {
 55:             throw new Nette\InvalidArgumentException('Code point must be in range 0x0 to 0xD7FF or 0xE000 to 0x10FFFF.');
 56:         }
 57:         return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
 58:     }
 59: 
 60: 
 61:     /**
 62:      * Starts the $haystack string with the prefix $needle?
 63:      * @param  string
 64:      * @param  string
 65:      * @return bool
 66:      */
 67:     public static function startsWith($haystack, $needle)
 68:     {
 69:         return strncmp($haystack, $needle, strlen($needle)) === 0;
 70:     }
 71: 
 72: 
 73:     /**
 74:      * Ends the $haystack string with the suffix $needle?
 75:      * @param  string
 76:      * @param  string
 77:      * @return bool
 78:      */
 79:     public static function endsWith($haystack, $needle)
 80:     {
 81:         return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
 82:     }
 83: 
 84: 
 85:     /**
 86:      * Does $haystack contain $needle?
 87:      * @param  string
 88:      * @param  string
 89:      * @return bool
 90:      */
 91:     public static function contains($haystack, $needle)
 92:     {
 93:         return strpos($haystack, $needle) !== false;
 94:     }
 95: 
 96: 
 97:     /**
 98:      * Returns a part of UTF-8 string.
 99:      * @param  string
100:      * @param  int in characters (code points)
101:      * @param  int in characters (code points)
102:      * @return string
103:      */
104:     public static function substring($s, $start, $length = null)
105:     {
106:         if (function_exists('mb_substr')) {
107:             return mb_substr($s, $start, $length, 'UTF-8'); // MB is much faster
108:         } elseif ($length === null) {
109:             $length = self::length($s);
110:         } elseif ($start < 0 && $length < 0) {
111:             $start += self::length($s); // unifies iconv_substr behavior with mb_substr
112:         }
113:         return iconv_substr($s, $start, $length, 'UTF-8');
114:     }
115: 
116: 
117:     /**
118:      * Removes special controls characters and normalizes line endings and spaces.
119:      * @param  string  UTF-8 encoding
120:      * @return string
121:      */
122:     public static function normalize($s)
123:     {
124:         $s = self::normalizeNewLines($s);
125: 
126:         // remove control characters; leave \t + \n
127:         $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F-\x9F]+#u', '', $s);
128: 
129:         // right trim
130:         $s = preg_replace('#[\t ]+$#m', '', $s);
131: 
132:         // leading and trailing blank lines
133:         $s = trim($s, "\n");
134: 
135:         return $s;
136:     }
137: 
138: 
139:     /**
140:      * Standardize line endings to unix-like.
141:      * @param  string  UTF-8 encoding or 8-bit
142:      * @return string
143:      */
144:     public static function normalizeNewLines($s)
145:     {
146:         return str_replace(["\r\n", "\r"], "\n", $s);
147:     }
148: 
149: 
150:     /**
151:      * Converts to ASCII.
152:      * @param  string  UTF-8 encoding
153:      * @return string  ASCII
154:      */
155:     public static function toAscii($s)
156:     {
157:         static $transliterator = null;
158:         if ($transliterator === null && class_exists('Transliterator', false)) {
159:             $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
160:         }
161: 
162:         $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
163:         $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
164:         $s = str_replace(
165:             ["\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A", "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xB0"],
166:             ["\x03", "\x03", "\x03", "\x02", "\x02", "\x02", "\x04"], $s
167:         );
168:         if ($transliterator !== null) {
169:             $s = $transliterator->transliterate($s);
170:         }
171:         if (ICONV_IMPL === 'glibc') {
172:             $s = str_replace(
173:                 ["\xC2\xBB", "\xC2\xAB", "\xE2\x80\xA6", "\xE2\x84\xA2", "\xC2\xA9", "\xC2\xAE"],
174:                 ['>>', '<<', '...', 'TM', '(c)', '(R)'], $s
175:             );
176:             $s = iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
177:             $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
178:                 . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
179:                 . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
180:                 . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe"
181:                 . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
182:                 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.');
183:             $s = preg_replace('#[^\x00-\x7F]++#', '', $s);
184:         } else {
185:             $s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
186:         }
187:         $s = str_replace(['`', "'", '"', '^', '~', '?'], '', $s);
188:         return strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
189:     }
190: 
191: 
192:     /**
193:      * Converts to web safe characters [a-z0-9-] text.
194:      * @param  string  UTF-8 encoding
195:      * @param  string  allowed characters
196:      * @param  bool
197:      * @return string
198:      */
199:     public static function webalize($s, $charlist = null, $lower = true)
200:     {
201:         $s = self::toAscii($s);
202:         if ($lower) {
203:             $s = strtolower($s);
204:         }
205:         $s = preg_replace('#[^a-z0-9' . ($charlist !== null ? preg_quote($charlist, '#') : '') . ']+#i', '-', $s);
206:         $s = trim($s, '-');
207:         return $s;
208:     }
209: 
210: 
211:     /**
212:      * Truncates string to maximal length.
213:      * @param  string  UTF-8 encoding
214:      * @param  int
215:      * @param  string  UTF-8 encoding
216:      * @return string
217:      */
218:     public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
219:     {
220:         if (self::length($s) > $maxLen) {
221:             $maxLen = $maxLen - self::length($append);
222:             if ($maxLen < 1) {
223:                 return $append;
224: 
225:             } elseif ($matches = self::match($s, '#^.{1,' . $maxLen . '}(?=[\s\x00-/:-@\[-`{-~])#us')) {
226:                 return $matches[0] . $append;
227: 
228:             } else {
229:                 return self::substring($s, 0, $maxLen) . $append;
230:             }
231:         }
232:         return $s;
233:     }
234: 
235: 
236:     /**
237:      * Indents the content from the left.
238:      * @param  string  UTF-8 encoding or 8-bit
239:      * @param  int
240:      * @param  string
241:      * @return string
242:      */
243:     public static function indent($s, $level = 1, $chars = "\t")
244:     {
245:         if ($level > 0) {
246:             $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
247:         }
248:         return $s;
249:     }
250: 
251: 
252:     /**
253:      * Convert to lower case.
254:      * @param  string  UTF-8 encoding
255:      * @return string
256:      */
257:     public static function lower($s)
258:     {
259:         return mb_strtolower($s, 'UTF-8');
260:     }
261: 
262: 
263:     /**
264:      * Convert first character to lower case.
265:      * @param  string  UTF-8 encoding
266:      * @return string
267:      */
268:     public static function firstLower($s)
269:     {
270:         return self::lower(self::substring($s, 0, 1)) . self::substring($s, 1);
271:     }
272: 
273: 
274:     /**
275:      * Convert to upper case.
276:      * @param  string  UTF-8 encoding
277:      * @return string
278:      */
279:     public static function upper($s)
280:     {
281:         return mb_strtoupper($s, 'UTF-8');
282:     }
283: 
284: 
285:     /**
286:      * Convert first character to upper case.
287:      * @param  string  UTF-8 encoding
288:      * @return string
289:      */
290:     public static function firstUpper($s)
291:     {
292:         return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
293:     }
294: 
295: 
296:     /**
297:      * Capitalize string.
298:      * @param  string  UTF-8 encoding
299:      * @return string
300:      */
301:     public static function capitalize($s)
302:     {
303:         return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
304:     }
305: 
306: 
307:     /**
308:      * Case-insensitive compares UTF-8 strings.
309:      * @param  string
310:      * @param  string
311:      * @param  int
312:      * @return bool
313:      */
314:     public static function compare($left, $right, $len = null)
315:     {
316:         if ($len < 0) {
317:             $left = self::substring($left, $len, -$len);
318:             $right = self::substring($right, $len, -$len);
319:         } elseif ($len !== null) {
320:             $left = self::substring($left, 0, $len);
321:             $right = self::substring($right, 0, $len);
322:         }
323:         return self::lower($left) === self::lower($right);
324:     }
325: 
326: 
327:     /**
328:      * Finds the length of common prefix of strings.
329:      * @param  string|array
330:      * @return string
331:      */
332:     public static function findPrefix(...$strings)
333:     {
334:         if (is_array($strings[0])) {
335:             $strings = $strings[0];
336:         }
337:         $first = array_shift($strings);
338:         for ($i = 0; $i < strlen($first); $i++) {
339:             foreach ($strings as $s) {
340:                 if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
341:                     while ($i && $first[$i - 1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
342:                         $i--;
343:                     }
344:                     return substr($first, 0, $i);
345:                 }
346:             }
347:         }
348:         return $first;
349:     }
350: 
351: 
352:     /**
353:      * Returns number of characters (not bytes) in UTF-8 string.
354:      * That is the number of Unicode code points which may differ from the number of graphemes.
355:      * @param  string
356:      * @return int
357:      */
358:     public static function length($s)
359:     {
360:         return function_exists('mb_strlen') ? mb_strlen($s, 'UTF-8') : strlen(utf8_decode($s));
361:     }
362: 
363: 
364:     /**
365:      * Strips whitespace.
366:      * @param  string  UTF-8 encoding
367:      * @param  string
368:      * @return string
369:      */
370:     public static function trim($s, $charlist = self::TRIM_CHARACTERS)
371:     {
372:         $charlist = preg_quote($charlist, '#');
373:         return self::replace($s, '#^[' . $charlist . ']+|[' . $charlist . ']+\z#u', '');
374:     }
375: 
376: 
377:     /**
378:      * Pad a string to a certain length with another string.
379:      * @param  string  UTF-8 encoding
380:      * @param  int
381:      * @param  string
382:      * @return string
383:      */
384:     public static function padLeft($s, $length, $pad = ' ')
385:     {
386:         $length = max(0, $length - self::length($s));
387:         $padLen = self::length($pad);
388:         return str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen) . $s;
389:     }
390: 
391: 
392:     /**
393:      * Pad a string to a certain length with another string.
394:      * @param  string  UTF-8 encoding
395:      * @param  int
396:      * @param  string
397:      * @return string
398:      */
399:     public static function padRight($s, $length, $pad = ' ')
400:     {
401:         $length = max(0, $length - self::length($s));
402:         $padLen = self::length($pad);
403:         return $s . str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen);
404:     }
405: 
406: 
407:     /**
408:      * Reverse string.
409:      * @param  string  UTF-8 encoding
410:      * @return string
411:      */
412:     public static function reverse($s)
413:     {
414:         return iconv('UTF-32LE', 'UTF-8', strrev(iconv('UTF-8', 'UTF-32BE', $s)));
415:     }
416: 
417: 
418:     /**
419:      * Returns part of $haystack before $nth occurence of $needle.
420:      * @param  string
421:      * @param  string
422:      * @param  int  negative value means searching from the end
423:      * @return string|false  returns false if the needle was not found
424:      */
425:     public static function before($haystack, $needle, $nth = 1)
426:     {
427:         $pos = self::pos($haystack, $needle, $nth);
428:         return $pos === false
429:             ? false
430:             : substr($haystack, 0, $pos);
431:     }
432: 
433: 
434:     /**
435:      * Returns part of $haystack after $nth occurence of $needle.
436:      * @param  string
437:      * @param  string
438:      * @param  int  negative value means searching from the end
439:      * @return string|false  returns false if the needle was not found
440:      */
441:     public static function after($haystack, $needle, $nth = 1)
442:     {
443:         $pos = self::pos($haystack, $needle, $nth);
444:         return $pos === false
445:             ? false
446:             : (string) substr($haystack, $pos + strlen($needle));
447:     }
448: 
449: 
450:     /**
451:      * Returns position of $nth occurence of $needle in $haystack.
452:      * @param  string
453:      * @param  string
454:      * @param  int  negative value means searching from the end
455:      * @return int|false  offset in characters or false if the needle was not found
456:      */
457:     public static function indexOf($haystack, $needle, $nth = 1)
458:     {
459:         $pos = self::pos($haystack, $needle, $nth);
460:         return $pos === false
461:             ? false
462:             : self::length(substr($haystack, 0, $pos));
463:     }
464: 
465: 
466:     /**
467:      * Returns position of $nth occurence of $needle in $haystack.
468:      * @return int|false  offset in bytes or false if the needle was not found
469:      */
470:     private static function pos($haystack, $needle, $nth = 1)
471:     {
472:         if (!$nth) {
473:             return false;
474:         } elseif ($nth > 0) {
475:             if (strlen($needle) === 0) {
476:                 return 0;
477:             }
478:             $pos = 0;
479:             while (($pos = strpos($haystack, $needle, $pos)) !== false && --$nth) {
480:                 $pos++;
481:             }
482:         } else {
483:             $len = strlen($haystack);
484:             if (strlen($needle) === 0) {
485:                 return $len;
486:             }
487:             $pos = $len - 1;
488:             while (($pos = strrpos($haystack, $needle, $pos - $len)) !== false && ++$nth) {
489:                 $pos--;
490:             }
491:         }
492:         return $pos;
493:     }
494: 
495: 
496:     /**
497:      * Splits string by a regular expression.
498:      * @param  string
499:      * @param  string
500:      * @param  int
501:      * @return array
502:      */
503:     public static function split($subject, $pattern, $flags = 0)
504:     {
505:         return self::pcre('preg_split', [$pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE]);
506:     }
507: 
508: 
509:     /**
510:      * Performs a regular expression match.
511:      * @param  string
512:      * @param  string
513:      * @param  int  can be PREG_OFFSET_CAPTURE (returned in bytes)
514:      * @param  int  offset in bytes
515:      * @return mixed
516:      */
517:     public static function match($subject, $pattern, $flags = 0, $offset = 0)
518:     {
519:         if ($offset > strlen($subject)) {
520:             return null;
521:         }
522:         return self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])
523:             ? $m
524:             : null;
525:     }
526: 
527: 
528:     /**
529:      * Performs a global regular expression match.
530:      * @param  string
531:      * @param  string
532:      * @param  int  can be PREG_OFFSET_CAPTURE (returned in bytes); PREG_SET_ORDER is default
533:      * @param  int  offset in bytes
534:      * @return array
535:      */
536:     public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
537:     {
538:         if ($offset > strlen($subject)) {
539:             return [];
540:         }
541:         self::pcre('preg_match_all', [
542:             $pattern, $subject, &$m,
543:             ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
544:             $offset,
545:         ]);
546:         return $m;
547:     }
548: 
549: 
550:     /**
551:      * Perform a regular expression search and replace.
552:      * @param  string
553:      * @param  string|array
554:      * @param  string|callable
555:      * @param  int
556:      * @return string
557:      */
558:     public static function replace($subject, $pattern, $replacement = null, $limit = -1)
559:     {
560:         if (is_object($replacement) || is_array($replacement)) {
561:             if (!is_callable($replacement, false, $textual)) {
562:                 throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
563:             }
564:             return self::pcre('preg_replace_callback', [$pattern, $replacement, $subject, $limit]);
565: 
566:         } elseif ($replacement === null && is_array($pattern)) {
567:             $replacement = array_values($pattern);
568:             $pattern = array_keys($pattern);
569:         }
570: 
571:         return self::pcre('preg_replace', [$pattern, $replacement, $subject, $limit]);
572:     }
573: 
574: 
575:     /** @internal */
576:     public static function pcre($func, $args)
577:     {
578:         static $messages = [
579:             PREG_INTERNAL_ERROR => 'Internal error',
580:             PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
581:             PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
582:             PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
583:             PREG_BAD_UTF8_OFFSET_ERROR => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
584:             6 => 'Failed due to limited JIT stack space', // PREG_JIT_STACKLIMIT_ERROR
585:         ];
586:         $res = Callback::invokeSafe($func, $args, function ($message) use ($args) {
587:             // compile-time error, not detectable by preg_last_error
588:             throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
589:         });
590: 
591:         if (($code = preg_last_error()) // run-time error, but preg_last_error & return code are liars
592:             && ($res === null || !in_array($func, ['preg_filter', 'preg_replace_callback', 'preg_replace'], true))
593:         ) {
594:             throw new RegexpException((isset($messages[$code]) ? $messages[$code] : 'Unknown error')
595:                 . ' (pattern: ' . implode(' or ', (array) $args[0]) . ')', $code);
596:         }
597:         return $res;
598:     }
599: }
600:
Namespaces

Classes

Interfaces

Exceptions