2 declare(strict_types
=1);
4 use wcf\system\application\ApplicationHandler
;
5 use wcf\system\request\RouteHandler
;
9 * Contains string-related functions.
11 * @author Oliver Kliebisch, Marcel Werk
12 * @copyright 2001-2018 WoltLab GmbH
13 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
14 * @package WoltLabSuite\Core\Util
16 final class StringUtil
{
17 const HTML_PATTERN
= '~</?[a-z]+[1-6]?
18 (?:\s*[a-z\-]+\s*(=\s*(?:
19 "[^"\\\\]*(?:\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'|[^\s>]
21 const HTML_COMMENT_PATTERN
= '~<!--(.*?)-->~';
24 * utf8 bytes of the HORIZONTAL ELLIPSIS (U+2026)
27 const HELLIP
= "\u{2026}";
30 * utf8 bytes of the MINUS SIGN (U+2212)
33 const MINUS
= "\u{2212}";
36 * Alias to php sha1() function.
38 * @param string $value
41 public static function getHash($value) {
46 * Creates a random hash.
50 public static function getRandomID() {
51 return self
::getHash(microtime() . uniqid((string) mt_rand(), true));
59 public static function getUUID() {
60 return sprintf('%04x%04x-%04x-%04x-%04x-%04x%04x%04x', mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(16384, 20479), mt_rand(32768, 49151), mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(0, 65535));
64 * Converts dos to unix newlines.
66 * @param string $string
69 public static function unifyNewlines($string) {
70 return preg_replace("%(\r\n)|(\r)%", "\n", $string);
74 * Removes Unicode whitespace characters from the beginning
75 * and ending of the given string.
80 public static function trim($text) {
81 // These regular expressions use character properties
82 // to find characters defined as space in the unicode
84 // Do not merge the expressions, they are separated for
85 // performance reasons.
86 $text = preg_replace('/^[\p{Zs}\s]+/u', '', $text);
87 $text = preg_replace('/[\p{Zs}\s]+$/u', '', $text);
93 * Converts html special characters.
95 * @param string $string
98 public static function encodeHTML($string) {
99 return @htmlspecialchars
((string) $string, ENT_COMPAT
, 'UTF-8');
103 * Converts javascript special characters.
105 * @param string $string
108 public static function encodeJS($string) {
110 $string = self
::unifyNewlines($string);
113 $string = str_replace("\\", "\\\\", $string);
115 // escape singe quote
116 $string = str_replace("'", "\'", $string);
119 $string = str_replace("\n", '\n', $string);
122 $string = str_replace("/", '\/', $string);
128 * Encodes JSON strings. This is not the same as PHP's json_encode()!
130 * @param string $string
133 public static function encodeJSON($string) {
134 $string = self
::encodeJS($string);
136 $string = self
::encodeHTML($string);
138 // single quotes must be encoded as HTML entity
139 $string = str_replace("\'", "'", $string);
145 * Decodes html entities.
147 * @param string $string
150 public static function decodeHTML($string) {
151 $string = str_ireplace(' ', ' ', $string); // convert non-breaking spaces to ascii 32; not ascii 160
152 return @html_entity_decode
($string, ENT_COMPAT
, 'UTF-8');
158 * @param number $numeric
161 public static function formatNumeric($numeric) {
162 if (is_int($numeric)) {
163 return self
::formatInteger($numeric);
165 else if (is_float($numeric)) {
166 return self
::formatDouble($numeric);
169 if (floatval($numeric) - (float) intval($numeric)) {
170 return self
::formatDouble($numeric);
173 return self
::formatInteger(intval($numeric));
179 * Formats an integer.
181 * @param integer $integer
184 public static function formatInteger($integer) {
185 $integer = self
::addThousandsSeparator($integer);
188 $integer = self
::formatNegative($integer);
196 * @param double $double
197 * @param integer $maxDecimals
200 public static function formatDouble($double, $maxDecimals = 0) {
202 $double = (string) round($double, ($maxDecimals > 0 ?
$maxDecimals : 2));
204 // consider as integer, if no decimal places found
205 if (!$maxDecimals && preg_match('~^(-?\d+)(?:\.(?:0*|00[0-4]\d*))?$~', $double, $match)) {
206 return self
::formatInteger($match[1]);
210 if ($maxDecimals < 2 && substr($double, -1) == '0') $double = substr($double, 0, -1);
212 // replace decimal point
213 $double = str_replace('.', WCF
::getLanguage()->get('wcf.global.decimalPoint'), $double);
215 // add thousands separator
216 $double = self
::addThousandsSeparator($double);
219 $double = self
::formatNegative($double);
225 * Adds thousands separators to a given number.
227 * @param mixed $number
230 public static function addThousandsSeparator($number) {
231 if ($number >= 1000 ||
$number <= -1000) {
232 $number = preg_replace('~(?<=\d)(?=(\d{3})+(?!\d))~', WCF
::getLanguage()->get('wcf.global.thousandsSeparator'), $number);
239 * Replaces the MINUS-HYPHEN with the MINUS SIGN.
241 * @param mixed $number
244 public static function formatNegative($number) {
245 return str_replace('-', self
::MINUS
, $number);
249 * Alias to php ucfirst() function with multibyte support.
251 * @param string $string
254 public static function firstCharToUpperCase($string) {
255 return mb_strtoupper(mb_substr($string, 0, 1)).mb_substr($string, 1);
259 * Alias to php lcfirst() function with multibyte support.
261 * @param string $string
264 public static function firstCharToLowerCase($string) {
265 return mb_strtolower(mb_substr($string, 0, 1)).mb_substr($string, 1);
269 * Alias to php mb_convert_case() function.
271 * @param string $string
274 public static function wordsToUpperCase($string) {
275 return mb_convert_case($string, MB_CASE_TITLE
);
279 * Alias to php str_ireplace() function with UTF-8 support.
281 * This function is considered to be slow, if $search contains
282 * only ASCII characters, please use str_ireplace() instead.
284 * @param string $search
285 * @param string $replace
286 * @param string $subject
287 * @param integer $count
290 public static function replaceIgnoreCase($search, $replace, $subject, &$count = 0) {
291 $startPos = mb_strpos(mb_strtolower($subject), mb_strtolower($search));
292 if ($startPos === false) return $subject;
294 $endPos = $startPos +
mb_strlen($search);
296 return mb_substr($subject, 0, $startPos) . $replace . self
::replaceIgnoreCase($search, $replace, mb_substr($subject, $endPos), $count);
301 * Alias to php str_split() function with multibyte support.
303 * @param string $string
304 * @param integer $length
307 public static function split($string, $length = 1) {
309 for ($i = 0, $max = mb_strlen($string); $i < $max; $i +
= $length) {
310 $result[] = mb_substr($string, $i, $length);
316 * Checks whether $haystack starts with $needle, or not.
318 * @param string $haystack The string to be checked for starting with $needle
319 * @param string $needle The string to be found at the start of $haystack
320 * @param boolean $ci Case insensitive or not. Default = false.
322 * @return boolean True, if $haystack starts with $needle, false otherwise.
324 public static function startsWith($haystack, $needle, $ci = false) {
326 $haystack = mb_strtolower($haystack);
327 $needle = mb_strtolower($needle);
329 // using mb_substr and === is MUCH faster for long strings then using indexOf.
330 return mb_substr($haystack, 0, mb_strlen($needle)) === $needle;
334 * Returns true if $haystack ends with $needle or if the length of $needle is 0.
336 * @param string $haystack
337 * @param string $needle
338 * @param boolean $ci case insensitive
341 public static function endsWith($haystack, $needle, $ci = false) {
343 $haystack = mb_strtolower($haystack);
344 $needle = mb_strtolower($needle);
346 $length = mb_strlen($needle);
347 if ($length === 0) return true;
348 return (mb_substr($haystack, $length * -1) === $needle);
352 * Alias to php str_pad function with multibyte support.
354 * @param string $input
355 * @param integer $padLength
356 * @param string $padString
357 * @param integer $padType
360 public static function pad($input, $padLength, $padString = ' ', $padType = STR_PAD_RIGHT
) {
361 $additionalPadding = strlen($input) - mb_strlen($input);
362 return str_pad($input, $padLength +
$additionalPadding, $padString, $padType);
366 * Unescapes escaped characters in a string.
368 * @param string $string
369 * @param string $chars
372 public static function unescape($string, $chars = '"') {
373 for ($i = 0, $j = strlen($chars); $i < $j; $i++
) {
374 $string = str_replace('\\'.$chars[$i], $chars[$i], $string);
381 * Takes a numeric HTML entity value and returns the appropriate UTF-8 bytes.
383 * @param integer $dec html entity value
384 * @return string utf-8 bytes
386 public static function getCharacter($dec) {
390 else if ($dec < 2048) {
391 $utf = chr(192 +
(($dec - ($dec %
64)) / 64));
392 $utf .= chr(128 +
($dec %
64));
395 $utf = chr(224 +
(($dec - ($dec %
4096)) / 4096));
396 $utf .= chr(128 +
((($dec %
4096) - ($dec %
64)) / 64));
397 $utf .= chr(128 +
($dec %
64));
403 * Converts UTF-8 to Unicode
404 * @see http://www1.tip.nl/~t876506/utf8tbl.html
409 public static function getCharValue($c) {
411 if (ord($c{0}) >= 0 && ord($c{0}) <= 127)
413 if (ord($c{0}) >= 192 && ord($c{0}) <= 223)
414 $ud = (ord($c{0}) - 192) * 64 +
(ord($c{1}) - 128);
415 if (ord($c{0}) >= 224 && ord($c{0}) <= 239)
416 $ud = (ord($c{0}) - 224) * 4096 +
(ord($c{1}) - 128) * 64 +
(ord($c{2}) - 128);
417 if (ord($c{0}) >= 240 && ord($c{0}) <= 247)
418 $ud = (ord($c{0}) - 240) * 262144 +
(ord($c{1}) - 128) * 4096 +
(ord($c{2}) - 128) * 64 +
(ord($c{3}) - 128);
419 if (ord($c{0}) >= 248 && ord($c{0}) <= 251)
420 $ud = (ord($c{0}) - 248) * 16777216 +
(ord($c{1}) - 128) * 262144 +
(ord($c{2}) - 128) * 4096 +
(ord($c{3}) - 128) * 64 +
(ord($c{4}) - 128);
421 if (ord($c{0}) >= 252 && ord($c{0}) <= 253)
422 $ud = (ord($c{0}) - 252) * 1073741824 +
(ord($c{1}) - 128) * 16777216 +
(ord($c{2}) - 128) * 262144 +
(ord($c{3}) - 128) * 4096 +
(ord($c{4}) - 128) * 64 +
(ord($c{5}) - 128);
423 if (ord($c{0}) >= 254 && ord($c{0}) <= 255)
424 $ud = false; // error
429 * Returns html entities of all characters in the given string.
431 * @param string $string
434 public static function encodeAllChars($string) {
436 for ($i = 0, $j = mb_strlen($string); $i < $j; $i++
) {
437 $char = mb_substr($string, $i, 1);
438 $result .= '&#'.self
::getCharValue($char).';';
445 * Returns true if the given string contains only ASCII characters.
447 * @param string $string
450 public static function isASCII($string) {
451 return preg_match('/^[\x00-\x7F]*$/', $string);
455 * Returns true if the given string is utf-8 encoded.
456 * @see http://www.w3.org/International/questions/qa-forms-utf-8
458 * @param string $string
461 public static function isUTF8($string) {
462 return preg_match('/^(
463 [\x09\x0A\x0D\x20-\x7E]* # ASCII
464 | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
465 | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
466 | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
467 | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
468 | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
469 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
470 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
475 * Escapes the closing cdata tag.
477 * @param string $string
480 public static function escapeCDATA($string) {
481 return str_replace(']]>', ']]]]><![CDATA[>', $string);
485 * Converts a string to requested character encoding.
486 * @see mb_convert_encoding()
488 * @param string $inCharset
489 * @param string $outCharset
490 * @param string $string
491 * @return string converted string
493 public static function convertEncoding($inCharset, $outCharset, $string) {
494 if ($inCharset == 'ISO-8859-1' && $outCharset == 'UTF-8') return utf8_encode($string);
495 if ($inCharset == 'UTF-8' && $outCharset == 'ISO-8859-1') return utf8_decode($string);
497 return mb_convert_encoding($string, $outCharset, $inCharset);
501 * Strips HTML tags from a string.
503 * @param string $string
506 public static function stripHTML($string) {
507 return preg_replace(self
::HTML_PATTERN
, '', preg_replace(self
::HTML_COMMENT_PATTERN
, '', $string));
511 * Returns false if the given word is forbidden by given word filter.
513 * @param string $word
514 * @param string $filter
517 public static function executeWordFilter($word, $filter) {
518 $filter = self
::trim($filter);
519 $word = mb_strtolower($word);
522 $forbiddenNames = explode("\n", mb_strtolower(self
::unifyNewlines($filter)));
523 foreach ($forbiddenNames as $forbiddenName) {
524 // ignore empty lines in between actual values
525 $forbiddenName = self
::trim($forbiddenName);
526 if (empty($forbiddenName)) continue;
528 if (mb_strpos($forbiddenName, '*') !== false) {
529 $forbiddenName = str_replace('\*', '.*', preg_quote($forbiddenName, '/'));
530 if (preg_match('/^'.$forbiddenName.'$/s', $word)) {
535 if ($word == $forbiddenName) {
546 * Truncates the given string to a certain number of characters.
548 * @param string $string string which shall be truncated
549 * @param integer $length string length after truncating
550 * @param string $etc string to append when $string is truncated
551 * @param boolean $breakWords should words be broken in the middle
552 * @return string truncated string
554 public static function truncate($string, $length = 80, $etc = self
::HELLIP
, $breakWords = false) {
559 if (mb_strlen($string) > $length) {
560 $length -= mb_strlen($etc);
563 $string = preg_replace('/\\s+?(\\S+)?$/', '', mb_substr($string, 0, $length +
1));
566 return mb_substr($string, 0, $length).$etc;
574 * Truncates a string containing HTML code and keeps the HTML syntax intact.
576 * @param string $string string which shall be truncated
577 * @param integer $length string length after truncating
578 * @param string $etc ending string which will be appended after truncating
579 * @param boolean $breakWords if false words will not be split and the return string might be shorter than $length
580 * @return string truncated string
582 public static function truncateHTML($string, $length = 500, $etc = self
::HELLIP
, $breakWords = false) {
583 if (mb_strlen(self
::stripHTML($string)) <= $length) {
587 $truncatedString = '';
589 // initialize length counter with the ending length
590 $totalLength = mb_strlen($etc);
592 preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $string, $tags, PREG_SET_ORDER
);
594 foreach ($tags as $tag) {
595 // ignore void elements
596 if (!preg_match('/^(area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)$/s', $tag[2])) {
597 // look for opening tags
598 if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) {
599 array_unshift($openTags, $tag[2]);
602 * look for closing tags and check if this tag has a corresponding opening tag
603 * and omit the opening tag if it has been closed already
605 else if (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) {
606 $position = array_search($closeTag[1], $openTags);
607 if ($position !== false) {
608 array_splice($openTags, $position, 1);
613 $truncatedString .= $tag[1];
615 // get length of the content without entities. If the content is too long, keep entities intact
616 $decodedContent = self
::decodeHTML($tag[3]);
617 $contentLength = mb_strlen($decodedContent);
618 if ($contentLength +
$totalLength > $length) {
620 if (preg_match('/^(.{1,'.($length - $totalLength).'}) /s', $decodedContent, $match)) {
621 $truncatedString .= self
::encodeHTML($match[1]);
627 $left = $length - $totalLength;
629 if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE
)) {
630 foreach ($entities[0] as $entity) {
631 if ($entity[1] +
1 - $entitiesLength <= $left) {
633 $entitiesLength +
= mb_strlen($entity[0]);
640 $truncatedString .= mb_substr($tag[3], 0, $left +
$entitiesLength);
644 $truncatedString .= $tag[3];
645 $totalLength +
= $contentLength;
647 if ($totalLength >= $length) {
652 // close all open tags
653 foreach ($openTags as $tag) {
654 $truncatedString .= '</'.$tag.'>';
658 $truncatedString .= $etc;
660 return $truncatedString;
664 * Generates an anchor tag from given URL.
667 * @param string $title
668 * @param boolean $encodeTitle
669 * @return string anchor tag
671 public static function getAnchorTag($url, $title = '', $encodeTitle = true) {
672 $url = self
::trim($url);
675 if (ApplicationHandler
::getInstance()->isInternalURL($url)) {
677 $url = preg_replace('~^https?://~', RouteHandler
::getProtocol(), $url);
682 // use URL and remove protocol and www subdomain
683 $title = preg_replace('~^(?:https?|ftps?)://(?:www\.)?~i', '', $url);
685 if (mb_strlen($title) > 60) {
686 $title = mb_substr($title, 0, 30) . self
::HELLIP
. mb_substr($title, -25);
689 if (!$encodeTitle) $title = self
::encodeHTML($title);
692 return '<a href="'.self
::encodeHTML($url).'"'.($external ?
(' class="externalURL"'.((EXTERNAL_LINK_REL_NOFOLLOW || EXTERNAL_LINK_TARGET_BLANK
) ?
(' rel="'.(EXTERNAL_LINK_REL_NOFOLLOW ?
'nofollow' : '').((EXTERNAL_LINK_REL_NOFOLLOW
&& EXTERNAL_LINK_TARGET_BLANK
) ?
' ' : '').(EXTERNAL_LINK_TARGET_BLANK ?
'noopener noreferrer' : '').'"') : '').(EXTERNAL_LINK_TARGET_BLANK ?
' target="_blank"' : '')) : '').'>'.($encodeTitle ? self
::encodeHTML($title) : $title).'</a>';
696 * Splits given string into smaller chunks.
698 * @param string $string
699 * @param integer $length
700 * @param string $break
703 public static function splitIntoChunks($string, $length = 75, $break = "\r\n") {
704 return mb_ereg_replace('.{'.$length.'}', "\\0".$break, $string);
708 * Simple multi-byte safe wordwrap() function.
710 * @param string $string
711 * @param integer $width
712 * @param string $break
715 public static function wordwrap($string, $width = 50, $break = ' ') {
717 $substrings = explode($break, $string);
719 foreach ($substrings as $substring) {
720 $length = mb_strlen($substring);
721 if ($length > $width) {
722 $j = ceil($length / $width);
724 for ($i = 0; $i < $j; $i++
) {
725 if (!empty($result)) $result .= $break;
726 if ($width * ($i +
1) > $length) $result .= mb_substr($substring, $width * $i);
727 else $result .= mb_substr($substring, $width * $i, $width);
731 if (!empty($result)) $result .= $break;
732 $result .= $substring;
740 * Shortens numbers larger than 1000 by using unit prefixes.
742 * @param integer $number
745 public static function getShortUnit($number) {
748 if ($number >= 1000000) {
751 $number = floor($number);
754 $number = round($number, 1);
758 else if ($number >= 1000) {
761 $number = floor($number);
764 $number = round($number, 1);
769 return self
::formatNumeric($number) . $unitPrefix;
773 * Normalizes a string representing comma-separated values by making sure
774 * that the separator is just a comma, not a combination of whitespace and
777 * @param string $string
781 public static function normalizeCsv($string) {
782 return implode(',', ArrayUtil
::trim(explode(',', $string)));
786 * Forbid creation of StringUtil objects.
788 private function __construct() {