Merge branch '3.0'
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / util / StringUtil.class.php
CommitLineData
06ccd650
AE
1<?php
2namespace wcf\util;
593c4fad 3use wcf\system\application\ApplicationHandler;
dccac621 4use wcf\system\request\RouteHandler;
06ccd650
AE
5use wcf\system\WCF;
6
7/**
8 * Contains string-related functions.
9 *
e3369fd2 10 * @author Oliver Kliebisch, Marcel Werk
c839bd49 11 * @copyright 2001-2018 WoltLab GmbH
06ccd650 12 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
e71525e4 13 * @package WoltLabSuite\Core\Util
06ccd650 14 */
18284789 15final class StringUtil {
06ccd650 16 const HTML_PATTERN = '~</?[a-z]+[1-6]?
b0042a2c 17 (?:\s*[a-z\-]+\s*(=\s*(?:
06ccd650 18 "[^"\\\\]*(?:\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'|[^\s>]
19fd3f4e 19 ))?)*\s*/?>~ix';
eeb82973 20 const HTML_COMMENT_PATTERN = '~<!--(.*?)-->~';
06ccd650
AE
21
22 /**
7e969bc7 23 * utf8 bytes of the HORIZONTAL ELLIPSIS (U+2026)
df27a012 24 * @var string
06ccd650 25 */
df27a012 26 const HELLIP = "\xE2\x80\xA6";
06ccd650 27
7e969bc7
TD
28 /**
29 * utf8 bytes of the MINUS SIGN (U+2212)
30 * @var string
31 */
32 const MINUS = "\xE2\x88\x92";
33
06ccd650 34 /**
9f959ced 35 * Alias to php sha1() function.
a17de04e 36 *
9f959ced
MS
37 * @param string $value
38 * @return string
06ccd650
AE
39 */
40 public static function getHash($value) {
41 return sha1($value);
42 }
9f959ced 43
06ccd650
AE
44 /**
45 * Creates a random hash.
46 *
9f959ced 47 * @return string
06ccd650
AE
48 */
49 public static function getRandomID() {
50 return self::getHash(microtime() . uniqid(mt_rand(), true));
51 }
9f959ced 52
18dea1d3
TD
53 /**
54 * Creates an UUID.
55 *
56 * @return string
57 */
58 public static function getUUID() {
59 return sprintf('%04x%04x-%04x-%04x-%04x-%04x%04x%04x', mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(16384, 20479), mt_rand(32768, 49151), mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(0, 65535));
60 }
61
06ccd650
AE
62 /**
63 * Converts dos to unix newlines.
9f959ced
MS
64 *
65 * @param string $string
66 * @return string
06ccd650
AE
67 */
68 public static function unifyNewlines($string) {
69 return preg_replace("%(\r\n)|(\r)%", "\n", $string);
70 }
9f959ced 71
06ccd650 72 /**
307eb577
TD
73 * Removes Unicode whitespace characters from the beginning
74 * and ending of the given string.
06ccd650 75 *
78b9440c 76 * @param string $text
9f959ced 77 * @return string
06ccd650 78 */
da2933af 79 public static function trim($text) {
307eb577
TD
80 // These regular expressions use character properties
81 // to find characters defined as space in the unicode
82 // specification.
c295be1f 83 // Do not merge the expressions, they are separated for
307eb577 84 // performance reasons.
fd253fa3 85 $text = preg_replace('/^[\p{Zs}\s]+/u', '', $text);
c295be1f
AE
86 $text = preg_replace('/[\p{Zs}\s]+$/u', '', $text);
87
bb38c70f 88 return $text;
06ccd650 89 }
9f959ced 90
06ccd650
AE
91 /**
92 * Converts html special characters.
9f959ced
MS
93 *
94 * @param string $string
95 * @return string
06ccd650
AE
96 */
97 public static function encodeHTML($string) {
06ccd650
AE
98 return @htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
99 }
100
859e8566
MW
101 /**
102 * Converts javascript special characters.
9f959ced
MS
103 *
104 * @param string $string
105 * @return string
859e8566
MW
106 */
107 public static function encodeJS($string) {
b04706ce
MM
108 // unify newlines
109 $string = self::unifyNewlines($string);
110
859e8566 111 // escape backslash
838e315b 112 $string = str_replace("\\", "\\\\", $string);
859e8566
MW
113
114 // escape singe quote
838e315b 115 $string = str_replace("'", "\'", $string);
859e8566
MW
116
117 // escape new lines
838e315b 118 $string = str_replace("\n", '\n', $string);
859e8566
MW
119
120 // escape slashes
838e315b 121 $string = str_replace("/", '\/', $string);
859e8566
MW
122
123 return $string;
124 }
125
5d502a64
AE
126 /**
127 * Encodes JSON strings. This is not the same as PHP's json_encode()!
128 *
129 * @param string $string
130 * @return string
131 */
132 public static function encodeJSON($string) {
133 $string = self::encodeJS($string);
134
1a296a85
TD
135 $string = self::encodeHTML($string);
136
5d502a64 137 // single quotes must be encoded as HTML entity
838e315b 138 $string = str_replace("\'", "&#39;", $string);
5d502a64
AE
139
140 return $string;
141 }
142
06ccd650
AE
143 /**
144 * Decodes html entities.
9f959ced
MS
145 *
146 * @param string $string
147 * @return string
06ccd650
AE
148 */
149 public static function decodeHTML($string) {
06ccd650
AE
150 $string = str_ireplace('&nbsp;', ' ', $string); // convert non-breaking spaces to ascii 32; not ascii 160
151 return @html_entity_decode($string, ENT_COMPAT, 'UTF-8');
152 }
9f959ced 153
06ccd650
AE
154 /**
155 * Formats a numeric.
9f959ced 156 *
29ae57a5 157 * @param number $numeric
9f959ced 158 * @return string
06ccd650
AE
159 */
160 public static function formatNumeric($numeric) {
7e969bc7 161 if (is_int($numeric)) {
06ccd650 162 return self::formatInteger($numeric);
7e969bc7
TD
163 }
164 else if (is_float($numeric)) {
06ccd650 165 return self::formatDouble($numeric);
7e969bc7 166 }
06ccd650 167 else {
7e969bc7 168 if (floatval($numeric) - (float) intval($numeric)) {
06ccd650 169 return self::formatDouble($numeric);
7e969bc7
TD
170 }
171 else {
06ccd650 172 return self::formatInteger(intval($numeric));
7e969bc7 173 }
06ccd650
AE
174 }
175 }
176
177 /**
178 * Formats an integer.
179 *
180 * @param integer $integer
181 * @return string
182 */
183 public static function formatInteger($integer) {
184 $integer = self::addThousandsSeparator($integer);
185
7e969bc7
TD
186 // format minus
187 $integer = self::formatNegative($integer);
188
06ccd650
AE
189 return $integer;
190 }
191
192 /**
193 * Formats a double.
194 *
195 * @param double $double
ad72cfee 196 * @param integer $maxDecimals
06ccd650
AE
197 * @return string
198 */
9f8cb875 199 public static function formatDouble($double, $maxDecimals = 0) {
870a3f75 200 // round
c395ec16 201 $double = round($double, ($maxDecimals > 0 ? $maxDecimals : 2));
870a3f75 202
06ccd650 203 // consider as integer, if no decimal places found
9f8cb875 204 if (!$maxDecimals && preg_match('~^(-?\d+)(?:\.(?:0*|00[0-4]\d*))?$~', $double, $match)) {
06ccd650
AE
205 return self::formatInteger($match[1]);
206 }
870a3f75 207
06ccd650 208 // remove last 0
9f8cb875 209 if ($maxDecimals < 2 && substr($double, -1) == '0') $double = substr($double, 0, -1);
06ccd650
AE
210
211 // replace decimal point
212 $double = str_replace('.', WCF::getLanguage()->get('wcf.global.decimalPoint'), $double);
213
214 // add thousands separator
215 $double = self::addThousandsSeparator($double);
216
7e969bc7
TD
217 // format minus
218 $double = self::formatNegative($double);
219
06ccd650
AE
220 return $double;
221 }
222
223 /**
224 * Adds thousands separators to a given number.
225 *
226 * @param mixed $number
227 * @return string
228 */
229 public static function addThousandsSeparator($number) {
230 if ($number >= 1000 || $number <= -1000) {
231 $number = preg_replace('~(?<=\d)(?=(\d{3})+(?!\d))~', WCF::getLanguage()->get('wcf.global.thousandsSeparator'), $number);
232 }
233
234 return $number;
235 }
236
7e969bc7 237 /**
f4aa9110 238 * Replaces the MINUS-HYPHEN with the MINUS SIGN.
7e969bc7
TD
239 *
240 * @param mixed $number
241 * @return string
242 */
243 public static function formatNegative($number) {
838e315b 244 return str_replace('-', self::MINUS, $number);
7e969bc7
TD
245 }
246
06ccd650 247 /**
9f959ced 248 * Alias to php ucfirst() function with multibyte support.
ea4c25a4
MS
249 *
250 * @param string $string
251 * @return string
06ccd650
AE
252 */
253 public static function firstCharToUpperCase($string) {
838e315b 254 return mb_strtoupper(mb_substr($string, 0, 1)).mb_substr($string, 1);
06ccd650
AE
255 }
256
257 /**
9f959ced 258 * Alias to php lcfirst() function with multibyte support.
ea4c25a4
MS
259 *
260 * @param string $string
261 * @return string
06ccd650
AE
262 */
263 public static function firstCharToLowerCase($string) {
838e315b 264 return mb_strtolower(mb_substr($string, 0, 1)).mb_substr($string, 1);
06ccd650
AE
265 }
266
267 /**
9f959ced 268 * Alias to php mb_convert_case() function.
ea4c25a4
MS
269 *
270 * @param string $string
271 * @return string
06ccd650
AE
272 */
273 public static function wordsToUpperCase($string) {
274 return mb_convert_case($string, MB_CASE_TITLE);
275 }
276
06ccd650 277 /**
257405df
MW
278 * Alias to php str_ireplace() function with UTF-8 support.
279 *
280 * This function is considered to be slow, if $search contains
281 * only ASCII characters, please use str_ireplace() instead.
6f37a5f5
MS
282 *
283 * @param string $search
284 * @param string $replace
285 * @param string $subject
286 * @param integer $count
287 * @return string
06ccd650
AE
288 */
289 public static function replaceIgnoreCase($search, $replace, $subject, &$count = 0) {
838e315b 290 $startPos = mb_strpos(mb_strtolower($subject), mb_strtolower($search));
06ccd650
AE
291 if ($startPos === false) return $subject;
292 else {
838e315b 293 $endPos = $startPos + mb_strlen($search);
06ccd650 294 $count++;
838e315b 295 return mb_substr($subject, 0, $startPos) . $replace . self::replaceIgnoreCase($search, $replace, mb_substr($subject, $endPos), $count);
06ccd650
AE
296 }
297 }
298
d2ae8f18
TD
299 /**
300 * Alias to php str_split() function with multibyte support.
301 *
302 * @param string $string
303 * @param integer $length
7a23a706 304 * @return string[]
d2ae8f18
TD
305 */
306 public static function split($string, $length = 1) {
058cbd6a 307 $result = [];
8f762ae6 308 for ($i = 0, $max = mb_strlen($string); $i < $max; $i += $length) {
7ad19f28 309 $result[] = mb_substr($string, $i, $length);
d2ae8f18
TD
310 }
311 return $result;
312 }
313
fe26ce5a 314 /**
b2aa772d 315 * Checks whether $haystack starts with $needle, or not.
fe26ce5a 316 *
39bea7dd
MS
317 * @param string $haystack The string to be checked for starting with $needle
318 * @param string $needle The string to be found at the start of $haystack
fe26ce5a
ST
319 * @param boolean $ci Case insensitive or not. Default = false.
320 *
39bea7dd 321 * @return boolean True, if $haystack starts with $needle, false otherwise.
fe26ce5a
ST
322 */
323 public static function startsWith($haystack, $needle, $ci = false) {
1d0f3c4d 324 if ($ci) {
838e315b
SG
325 $haystack = mb_strtolower($haystack);
326 $needle = mb_strtolower($needle);
fe26ce5a 327 }
7ad19f28 328 // using mb_substr and === is MUCH faster for long strings then using indexOf.
838e315b 329 return mb_substr($haystack, 0, mb_strlen($needle)) === $needle;
fe26ce5a
ST
330 }
331
332 /**
a17de04e 333 * Returns true if $haystack ends with $needle or if the length of $needle is 0.
fe26ce5a 334 *
a17de04e 335 * @param string $haystack
e3369fd2 336 * @param string $needle
a17de04e
MS
337 * @param boolean $ci case insensitive
338 * @return boolean
39bea7dd 339 */
fe26ce5a 340 public static function endsWith($haystack, $needle, $ci = false) {
1d0f3c4d 341 if ($ci) {
838e315b
SG
342 $haystack = mb_strtolower($haystack);
343 $needle = mb_strtolower($needle);
1d0f3c4d 344 }
838e315b 345 $length = mb_strlen($needle);
1d0f3c4d 346 if ($length === 0) return true;
838e315b 347 return (mb_substr($haystack, $length * -1) === $needle);
fe26ce5a
ST
348 }
349
f5368a65
TD
350 /**
351 * Alias to php str_pad function with multibyte support.
6f37a5f5
MS
352 *
353 * @param string $input
354 * @param integer $padLength
355 * @param string $padString
356 * @param integer $padType
357 * @return string
f5368a65 358 */
72ea9e76 359 public static function pad($input, $padLength, $padString = ' ', $padType = STR_PAD_RIGHT) {
8f762ae6 360 $additionalPadding = strlen($input) - mb_strlen($input);
f5368a65
TD
361 return str_pad($input, $padLength + $additionalPadding, $padString, $padType);
362 }
363
06ccd650
AE
364 /**
365 * Unescapes escaped characters in a string.
366 *
367 * @param string $string
368 * @param string $chars
9f959ced 369 * @return string
06ccd650
AE
370 */
371 public static function unescape($string, $chars = '"') {
372 for ($i = 0, $j = strlen($chars); $i < $j; $i++) {
838e315b 373 $string = str_replace('\\'.$chars[$i], $chars[$i], $string);
06ccd650
AE
374 }
375
376 return $string;
377 }
378
379 /**
380 * Takes a numeric HTML entity value and returns the appropriate UTF-8 bytes.
381 *
382 * @param integer $dec html entity value
383 * @return string utf-8 bytes
384 */
385 public static function getCharacter($dec) {
386 if ($dec < 128) {
387 $utf = chr($dec);
388 }
389 else if ($dec < 2048) {
390 $utf = chr(192 + (($dec - ($dec % 64)) / 64));
391 $utf .= chr(128 + ($dec % 64));
392 }
393 else {
394 $utf = chr(224 + (($dec - ($dec % 4096)) / 4096));
395 $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64));
396 $utf .= chr(128 + ($dec % 64));
397 }
398 return $utf;
399 }
400
401 /**
402 * Converts UTF-8 to Unicode
403 * @see http://www1.tip.nl/~t876506/utf8tbl.html
9f959ced 404 *
06ccd650 405 * @param string $c
9f959ced 406 * @return integer
06ccd650
AE
407 */
408 public static function getCharValue($c) {
409 $ud = 0;
410 if (ord($c{0}) >= 0 && ord($c{0}) <= 127)
411 $ud = ord($c{0});
412 if (ord($c{0}) >= 192 && ord($c{0}) <= 223)
413 $ud = (ord($c{0}) - 192) * 64 + (ord($c{1}) - 128);
414 if (ord($c{0}) >= 224 && ord($c{0}) <= 239)
415 $ud = (ord($c{0}) - 224) * 4096 + (ord($c{1}) - 128) * 64 + (ord($c{2}) - 128);
416 if (ord($c{0}) >= 240 && ord($c{0}) <= 247)
417 $ud = (ord($c{0}) - 240) * 262144 + (ord($c{1}) - 128) * 4096 + (ord($c{2}) - 128) * 64 + (ord($c{3}) - 128);
418 if (ord($c{0}) >= 248 && ord($c{0}) <= 251)
419 $ud = (ord($c{0}) - 248) * 16777216 + (ord($c{1}) - 128) * 262144 + (ord($c{2}) - 128) * 4096 + (ord($c{3}) - 128) * 64 + (ord($c{4}) - 128);
420 if (ord($c{0}) >= 252 && ord($c{0}) <= 253)
421 $ud = (ord($c{0}) - 252) * 1073741824 + (ord($c{1}) - 128) * 16777216 + (ord($c{2}) - 128) * 262144 + (ord($c{3}) - 128) * 4096 + (ord($c{4}) - 128) * 64 + (ord($c{5}) - 128);
422 if (ord($c{0}) >= 254 && ord($c{0}) <= 255)
423 $ud = false; // error
424 return $ud;
425 }
426
427 /**
428 * Returns html entities of all characters in the given string.
429 *
430 * @param string $string
431 * @return string
432 */
433 public static function encodeAllChars($string) {
434 $result = '';
838e315b
SG
435 for ($i = 0, $j = mb_strlen($string); $i < $j; $i++) {
436 $char = mb_substr($string, $i, 1);
e421b813 437 $result .= '&#'.self::getCharValue($char).';';
06ccd650
AE
438 }
439
440 return $result;
441 }
442
443 /**
28410a97 444 * Returns true if the given string contains only ASCII characters.
06ccd650
AE
445 *
446 * @param string $string
447 * @return boolean
448 */
449 public static function isASCII($string) {
450 return preg_match('/^[\x00-\x7F]*$/', $string);
451 }
452
453 /**
28410a97 454 * Returns true if the given string is utf-8 encoded.
06ccd650
AE
455 * @see http://www.w3.org/International/questions/qa-forms-utf-8
456 *
457 * @param string $string
458 * @return boolean
459 */
460 public static function isUTF8($string) {
a92ec792 461 return preg_match('/^(
06ccd650
AE
462 [\x09\x0A\x0D\x20-\x7E]* # ASCII
463 | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
464 | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
465 | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
466 | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
467 | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
468 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
469 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
470 )*$/x', $string);
06ccd650
AE
471 }
472
06ccd650
AE
473 /**
474 * Escapes the closing cdata tag.
475 *
476 * @param string $string
477 * @return string
478 */
479 public static function escapeCDATA($string) {
480 return str_replace(']]>', ']]]]><![CDATA[>', $string);
481 }
482
483 /**
484 * Converts a string to requested character encoding.
485 * @see mb_convert_encoding()
486 *
39bea7dd
MS
487 * @param string $inCharset
488 * @param string $outCharset
489 * @param string $string
490 * @return string converted string
06ccd650
AE
491 */
492 public static function convertEncoding($inCharset, $outCharset, $string) {
493 if ($inCharset == 'ISO-8859-1' && $outCharset == 'UTF-8') return utf8_encode($string);
494 if ($inCharset == 'UTF-8' && $outCharset == 'ISO-8859-1') return utf8_decode($string);
495
496 return mb_convert_encoding($string, $outCharset, $inCharset);
497 }
498
499 /**
500 * Strips HTML tags from a string.
501 *
502 * @param string $string
503 * @return string
504 */
505 public static function stripHTML($string) {
eeb82973 506 return preg_replace(self::HTML_PATTERN, '', preg_replace(self::HTML_COMMENT_PATTERN, '', $string));
06ccd650
AE
507 }
508
509 /**
28410a97 510 * Returns false if the given word is forbidden by given word filter.
06ccd650 511 *
39bea7dd 512 * @param string $word
06ccd650
AE
513 * @param string $filter
514 * @return boolean
515 */
516 public static function executeWordFilter($word, $filter) {
25729b87 517 $filter = self::trim($filter);
838e315b 518 $word = mb_strtolower($word);
06ccd650
AE
519
520 if ($filter != '') {
838e315b 521 $forbiddenNames = explode("\n", mb_strtolower(self::unifyNewlines($filter)));
06ccd650 522 foreach ($forbiddenNames as $forbiddenName) {
b21976e6 523 // ignore empty lines in between actual values
5b1a955e 524 $forbiddenName = self::trim($forbiddenName);
b21976e6
AE
525 if (empty($forbiddenName)) continue;
526
838e315b
SG
527 if (mb_strpos($forbiddenName, '*') !== false) {
528 $forbiddenName = str_replace('\*', '.*', preg_quote($forbiddenName, '/'));
06ccd650
AE
529 if (preg_match('/^'.$forbiddenName.'$/s', $word)) {
530 return false;
531 }
532 }
533 else {
534 if ($word == $forbiddenName) {
535 return false;
536 }
537 }
538 }
539 }
540
541 return true;
542 }
543
4ff37a29
TD
544 /**
545 * Truncates the given string to a certain number of characters.
546 *
d92792c5
MW
547 * @param string $string string which shall be truncated
548 * @param integer $length string length after truncating
4ff37a29
TD
549 * @param string $etc string to append when $string is truncated
550 * @param boolean $breakWords should words be broken in the middle
d92792c5 551 * @return string truncated string
4ff37a29 552 */
e421b813 553 public static function truncate($string, $length = 80, $etc = self::HELLIP, $breakWords = false) {
4ff37a29
TD
554 if ($length == 0) {
555 return '';
556 }
557
838e315b
SG
558 if (mb_strlen($string) > $length) {
559 $length -= mb_strlen($etc);
4ff37a29
TD
560
561 if (!$breakWords) {
838e315b 562 $string = preg_replace('/\\s+?(\\S+)?$/', '', mb_substr($string, 0, $length + 1));
4ff37a29
TD
563 }
564
838e315b 565 return mb_substr($string, 0, $length).$etc;
4ff37a29
TD
566 }
567 else {
568 return $string;
569 }
570 }
571
df27a012
MW
572 /**
573 * Truncates a string containing HTML code and keeps the HTML syntax intact.
f4aa9110
MS
574 *
575 * @param string $string string which shall be truncated
576 * @param integer $length string length after truncating
577 * @param string $etc ending string which will be appended after truncating
d92792c5 578 * @param boolean $breakWords if false words will not be split and the return string might be shorter than $length
f4aa9110 579 * @return string truncated string
df27a012 580 */
d92792c5 581 public static function truncateHTML($string, $length = 500, $etc = self::HELLIP, $breakWords = false) {
838e315b 582 if (mb_strlen(self::stripHTML($string)) <= $length) {
df27a012
MW
583 return $string;
584 }
058cbd6a 585 $openTags = [];
df27a012 586 $truncatedString = '';
f4aa9110 587
b2aa772d 588 // initialize length counter with the ending length
838e315b 589 $totalLength = mb_strlen($etc);
f4aa9110 590
df27a012 591 preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $string, $tags, PREG_SET_ORDER);
f4aa9110 592
df27a012 593 foreach ($tags as $tag) {
52a23a1d
AE
594 // ignore void elements
595 if (!preg_match('/^(area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)$/s', $tag[2])) {
df27a012
MW
596 // look for opening tags
597 if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) {
598 array_unshift($openTags, $tag[2]);
599 }
600 /**
601 * look for closing tags and check if this tag has a corresponding opening tag
602 * and omit the opening tag if it has been closed already
603 */
604 else if (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) {
605 $position = array_search($closeTag[1], $openTags);
606 if ($position !== false) {
607 array_splice($openTags, $position, 1);
608 }
609 }
610 }
611 // append tag
612 $truncatedString .= $tag[1];
f4aa9110 613
df27a012 614 // get length of the content without entities. If the content is too long, keep entities intact
e60707f0 615 $decodedContent = self::decodeHTML($tag[3]);
838e315b 616 $contentLength = mb_strlen($decodedContent);
df27a012 617 if ($contentLength + $totalLength > $length) {
d92792c5 618 if (!$breakWords) {
e60707f0 619 if (preg_match('/^(.{1,'.($length - $totalLength).'}) /s', $decodedContent, $match)) {
0c2c74a0 620 $truncatedString .= self::encodeHTML($match[1]);
e60707f0
MW
621 }
622
623 break;
624 }
625
df27a012
MW
626 $left = $length - $totalLength;
627 $entitiesLength = 0;
628 if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) {
629 foreach ($entities[0] as $entity) {
630 if ($entity[1] + 1 - $entitiesLength <= $left) {
631 $left--;
838e315b 632 $entitiesLength += mb_strlen($entity[0]);
df27a012
MW
633 }
634 else {
635 break;
636 }
637 }
638 }
838e315b 639 $truncatedString .= mb_substr($tag[3], 0, $left + $entitiesLength);
df27a012
MW
640 break;
641 }
642 else {
643 $truncatedString .= $tag[3];
644 $totalLength += $contentLength;
645 }
646 if ($totalLength >= $length) {
647 break;
648 }
649 }
f4aa9110 650
df27a012
MW
651 // close all open tags
652 foreach ($openTags as $tag) {
653 $truncatedString .= '</'.$tag.'>';
654 }
f4aa9110 655
df27a012
MW
656 // add etc
657 $truncatedString .= $etc;
658
659 return $truncatedString;
660 }
661
593c4fad
MW
662 /**
663 * Generates an anchor tag from given URL.
06355ec3 664 *
593c4fad
MW
665 * @param string $url
666 * @param string $title
b47a2214 667 * @param boolean $encodeTitle
593c4fad
MW
668 * @return string anchor tag
669 */
b47a2214 670 public static function getAnchorTag($url, $title = '', $encodeTitle = true) {
7307f0a7
MM
671 $url = self::trim($url);
672
593c4fad
MW
673 $external = true;
674 if (ApplicationHandler::getInstance()->isInternalURL($url)) {
675 $external = false;
dccac621 676 $url = preg_replace('~^https?://~', RouteHandler::getProtocol(), $url);
593c4fad
MW
677 }
678
679 // cut visible url
680 if (empty($title)) {
e3369fd2 681 // use URL and remove protocol and www subdomain
593c4fad
MW
682 $title = preg_replace('~^(?:https?|ftps?)://(?:www\.)?~i', '', $url);
683
838e315b
SG
684 if (mb_strlen($title) > 60) {
685 $title = mb_substr($title, 0, 30) . self::HELLIP . mb_substr($title, -25);
593c4fad 686 }
b47a2214
MW
687
688 if (!$encodeTitle) $title = self::encodeHTML($title);
593c4fad
MW
689 }
690
879b065d 691 return '<a href="'.self::encodeHTML($url).'"'.($external ? (' class="externalURL"'.((EXTERNAL_LINK_REL_NOFOLLOW || EXTERNAL_LINK_TARGET_BLANK) ? (' rel="'.(EXTERNAL_LINK_REL_NOFOLLOW ? 'nofollow' : '').((EXTERNAL_LINK_REL_NOFOLLOW && EXTERNAL_LINK_TARGET_BLANK) ? ' ' : '').(EXTERNAL_LINK_TARGET_BLANK ? 'noopener noreferrer' : '').'"') : '').(EXTERNAL_LINK_TARGET_BLANK ? ' target="_blank"' : '')) : '').'>'.($encodeTitle ? self::encodeHTML($title) : $title).'</a>';
593c4fad
MW
692 }
693
06ccd650
AE
694 /**
695 * Splits given string into smaller chunks.
696 *
697 * @param string $string
698 * @param integer $length
699 * @param string $break
700 * @return string
701 */
702 public static function splitIntoChunks($string, $length = 75, $break = "\r\n") {
703 return mb_ereg_replace('.{'.$length.'}', "\\0".$break, $string);
704 }
705
5d3505bf
MW
706 /**
707 * Simple multi-byte safe wordwrap() function.
708 *
06355ec3 709 * @param string $string
5d3505bf
MW
710 * @param integer $width
711 * @param string $break
712 * @return string
713 */
714 public static function wordwrap($string, $width = 50, $break = ' ') {
715 $result = '';
716 $substrings = explode($break, $string);
717
718 foreach ($substrings as $substring) {
838e315b 719 $length = mb_strlen($substring);
5d3505bf
MW
720 if ($length > $width) {
721 $j = ceil($length / $width);
722
723 for ($i = 0; $i < $j; $i++) {
724 if (!empty($result)) $result .= $break;
838e315b
SG
725 if ($width * ($i + 1) > $length) $result .= mb_substr($substring, $width * $i);
726 else $result .= mb_substr($substring, $width * $i, $width);
5d3505bf
MW
727 }
728 }
729 else {
730 if (!empty($result)) $result .= $break;
731 $result .= $substring;
732 }
733 }
734
735 return $result;
736 }
737
a15ec9be
MW
738 /**
739 * Shortens numbers larger than 1000 by using unit prefixes.
740 *
741 * @param integer $number
742 * @return string
743 */
744 public static function getShortUnit($number) {
745 $unitPrefix = '';
746
747 if ($number >= 1000000) {
748 $number /= 1000000;
749 if ($number > 10) {
750 $number = floor($number);
751 }
752 else {
753 $number = round($number, 1);
754 }
755 $unitPrefix = 'M';
756 }
757 else if ($number >= 1000) {
758 $number /= 1000;
759 if ($number > 10) {
760 $number = floor($number);
761 }
762 else {
763 $number = round($number, 1);
764 }
765 $unitPrefix = 'k';
766 }
767
768 return self::formatNumeric($number) . $unitPrefix;
769 }
770
4f2d3f58
MS
771 /**
772 * Normalizes a string representing comma-separated values by making sure
773 * that the separator is just a comma, not a combination of whitespace and
774 * a comma.
775 *
776 * @param string $string
777 * @return string
778 * @since 3.1
779 */
780 public static function normalizeCsv($string) {
781 return implode(',', ArrayUtil::trim(explode(',', $string)));
782 }
783
1d5f9363
MS
784 /**
785 * Forbid creation of StringUtil objects.
786 */
787 private function __construct() {
788 // does nothing
789 }
dcb3a44c 790}