Commit | Line | Data |
---|---|---|
06ccd650 AE |
1 | <?php |
2 | namespace wcf\util; | |
593c4fad | 3 | use wcf\system\application\ApplicationHandler; |
dccac621 | 4 | use wcf\system\request\RouteHandler; |
06ccd650 AE |
5 | use wcf\system\WCF; |
6 | ||
7 | /** | |
8 | * Contains string-related functions. | |
9 | * | |
e3369fd2 | 10 | * @author Oliver Kliebisch, Marcel Werk |
c839bd49 | 11 | * @copyright 2001-2018 WoltLab GmbH |
06ccd650 | 12 | * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php> |
e71525e4 | 13 | * @package WoltLabSuite\Core\Util |
06ccd650 | 14 | */ |
18284789 | 15 | final class StringUtil { |
06ccd650 | 16 | const HTML_PATTERN = '~</?[a-z]+[1-6]? |
b0042a2c | 17 | (?:\s*[a-z\-]+\s*(=\s*(?: |
06ccd650 | 18 | "[^"\\\\]*(?:\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'|[^\s>] |
19fd3f4e | 19 | ))?)*\s*/?>~ix'; |
eeb82973 | 20 | const HTML_COMMENT_PATTERN = '~<!--(.*?)-->~'; |
06ccd650 AE |
21 | |
22 | /** | |
7e969bc7 | 23 | * utf8 bytes of the HORIZONTAL ELLIPSIS (U+2026) |
df27a012 | 24 | * @var string |
06ccd650 | 25 | */ |
df27a012 | 26 | const HELLIP = "\xE2\x80\xA6"; |
06ccd650 | 27 | |
7e969bc7 TD |
28 | /** |
29 | * utf8 bytes of the MINUS SIGN (U+2212) | |
30 | * @var string | |
31 | */ | |
32 | const MINUS = "\xE2\x88\x92"; | |
33 | ||
06ccd650 | 34 | /** |
9f959ced | 35 | * Alias to php sha1() function. |
a17de04e | 36 | * |
9f959ced MS |
37 | * @param string $value |
38 | * @return string | |
06ccd650 AE |
39 | */ |
40 | public static function getHash($value) { | |
41 | return sha1($value); | |
42 | } | |
9f959ced | 43 | |
06ccd650 AE |
44 | /** |
45 | * Creates a random hash. | |
46 | * | |
9f959ced | 47 | * @return string |
06ccd650 AE |
48 | */ |
49 | public static function getRandomID() { | |
50 | return self::getHash(microtime() . uniqid(mt_rand(), true)); | |
51 | } | |
9f959ced | 52 | |
18dea1d3 TD |
53 | /** |
54 | * Creates an UUID. | |
55 | * | |
56 | * @return string | |
57 | */ | |
58 | public static function getUUID() { | |
59 | return sprintf('%04x%04x-%04x-%04x-%04x-%04x%04x%04x', mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(16384, 20479), mt_rand(32768, 49151), mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(0, 65535)); | |
60 | } | |
61 | ||
06ccd650 AE |
62 | /** |
63 | * Converts dos to unix newlines. | |
9f959ced MS |
64 | * |
65 | * @param string $string | |
66 | * @return string | |
06ccd650 AE |
67 | */ |
68 | public static function unifyNewlines($string) { | |
69 | return preg_replace("%(\r\n)|(\r)%", "\n", $string); | |
70 | } | |
9f959ced | 71 | |
06ccd650 | 72 | /** |
307eb577 TD |
73 | * Removes Unicode whitespace characters from the beginning |
74 | * and ending of the given string. | |
06ccd650 | 75 | * |
78b9440c | 76 | * @param string $text |
9f959ced | 77 | * @return string |
06ccd650 | 78 | */ |
da2933af | 79 | public static function trim($text) { |
307eb577 TD |
80 | // These regular expressions use character properties |
81 | // to find characters defined as space in the unicode | |
82 | // specification. | |
c295be1f | 83 | // Do not merge the expressions, they are separated for |
307eb577 | 84 | // performance reasons. |
fd253fa3 | 85 | $text = preg_replace('/^[\p{Zs}\s]+/u', '', $text); |
c295be1f AE |
86 | $text = preg_replace('/[\p{Zs}\s]+$/u', '', $text); |
87 | ||
bb38c70f | 88 | return $text; |
06ccd650 | 89 | } |
9f959ced | 90 | |
06ccd650 AE |
91 | /** |
92 | * Converts html special characters. | |
9f959ced MS |
93 | * |
94 | * @param string $string | |
95 | * @return string | |
06ccd650 AE |
96 | */ |
97 | public static function encodeHTML($string) { | |
06ccd650 AE |
98 | return @htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); |
99 | } | |
100 | ||
859e8566 MW |
101 | /** |
102 | * Converts javascript special characters. | |
9f959ced MS |
103 | * |
104 | * @param string $string | |
105 | * @return string | |
859e8566 MW |
106 | */ |
107 | public static function encodeJS($string) { | |
b04706ce MM |
108 | // unify newlines |
109 | $string = self::unifyNewlines($string); | |
110 | ||
859e8566 | 111 | // escape backslash |
838e315b | 112 | $string = str_replace("\\", "\\\\", $string); |
859e8566 MW |
113 | |
114 | // escape singe quote | |
838e315b | 115 | $string = str_replace("'", "\'", $string); |
859e8566 MW |
116 | |
117 | // escape new lines | |
838e315b | 118 | $string = str_replace("\n", '\n', $string); |
859e8566 MW |
119 | |
120 | // escape slashes | |
838e315b | 121 | $string = str_replace("/", '\/', $string); |
859e8566 MW |
122 | |
123 | return $string; | |
124 | } | |
125 | ||
5d502a64 AE |
126 | /** |
127 | * Encodes JSON strings. This is not the same as PHP's json_encode()! | |
128 | * | |
129 | * @param string $string | |
130 | * @return string | |
131 | */ | |
132 | public static function encodeJSON($string) { | |
133 | $string = self::encodeJS($string); | |
134 | ||
1a296a85 TD |
135 | $string = self::encodeHTML($string); |
136 | ||
5d502a64 | 137 | // single quotes must be encoded as HTML entity |
838e315b | 138 | $string = str_replace("\'", "'", $string); |
5d502a64 AE |
139 | |
140 | return $string; | |
141 | } | |
142 | ||
06ccd650 AE |
143 | /** |
144 | * Decodes html entities. | |
9f959ced MS |
145 | * |
146 | * @param string $string | |
147 | * @return string | |
06ccd650 AE |
148 | */ |
149 | public static function decodeHTML($string) { | |
06ccd650 AE |
150 | $string = str_ireplace(' ', ' ', $string); // convert non-breaking spaces to ascii 32; not ascii 160 |
151 | return @html_entity_decode($string, ENT_COMPAT, 'UTF-8'); | |
152 | } | |
9f959ced | 153 | |
06ccd650 AE |
154 | /** |
155 | * Formats a numeric. | |
9f959ced | 156 | * |
29ae57a5 | 157 | * @param number $numeric |
9f959ced | 158 | * @return string |
06ccd650 AE |
159 | */ |
160 | public static function formatNumeric($numeric) { | |
7e969bc7 | 161 | if (is_int($numeric)) { |
06ccd650 | 162 | return self::formatInteger($numeric); |
7e969bc7 TD |
163 | } |
164 | else if (is_float($numeric)) { | |
06ccd650 | 165 | return self::formatDouble($numeric); |
7e969bc7 | 166 | } |
06ccd650 | 167 | else { |
7e969bc7 | 168 | if (floatval($numeric) - (float) intval($numeric)) { |
06ccd650 | 169 | return self::formatDouble($numeric); |
7e969bc7 TD |
170 | } |
171 | else { | |
06ccd650 | 172 | return self::formatInteger(intval($numeric)); |
7e969bc7 | 173 | } |
06ccd650 AE |
174 | } |
175 | } | |
176 | ||
177 | /** | |
178 | * Formats an integer. | |
179 | * | |
180 | * @param integer $integer | |
181 | * @return string | |
182 | */ | |
183 | public static function formatInteger($integer) { | |
184 | $integer = self::addThousandsSeparator($integer); | |
185 | ||
7e969bc7 TD |
186 | // format minus |
187 | $integer = self::formatNegative($integer); | |
188 | ||
06ccd650 AE |
189 | return $integer; |
190 | } | |
191 | ||
192 | /** | |
193 | * Formats a double. | |
194 | * | |
195 | * @param double $double | |
ad72cfee | 196 | * @param integer $maxDecimals |
06ccd650 AE |
197 | * @return string |
198 | */ | |
9f8cb875 | 199 | public static function formatDouble($double, $maxDecimals = 0) { |
870a3f75 | 200 | // round |
c395ec16 | 201 | $double = round($double, ($maxDecimals > 0 ? $maxDecimals : 2)); |
870a3f75 | 202 | |
06ccd650 | 203 | // consider as integer, if no decimal places found |
9f8cb875 | 204 | if (!$maxDecimals && preg_match('~^(-?\d+)(?:\.(?:0*|00[0-4]\d*))?$~', $double, $match)) { |
06ccd650 AE |
205 | return self::formatInteger($match[1]); |
206 | } | |
870a3f75 | 207 | |
06ccd650 | 208 | // remove last 0 |
9f8cb875 | 209 | if ($maxDecimals < 2 && substr($double, -1) == '0') $double = substr($double, 0, -1); |
06ccd650 AE |
210 | |
211 | // replace decimal point | |
212 | $double = str_replace('.', WCF::getLanguage()->get('wcf.global.decimalPoint'), $double); | |
213 | ||
214 | // add thousands separator | |
215 | $double = self::addThousandsSeparator($double); | |
216 | ||
7e969bc7 TD |
217 | // format minus |
218 | $double = self::formatNegative($double); | |
219 | ||
06ccd650 AE |
220 | return $double; |
221 | } | |
222 | ||
223 | /** | |
224 | * Adds thousands separators to a given number. | |
225 | * | |
226 | * @param mixed $number | |
227 | * @return string | |
228 | */ | |
229 | public static function addThousandsSeparator($number) { | |
230 | if ($number >= 1000 || $number <= -1000) { | |
231 | $number = preg_replace('~(?<=\d)(?=(\d{3})+(?!\d))~', WCF::getLanguage()->get('wcf.global.thousandsSeparator'), $number); | |
232 | } | |
233 | ||
234 | return $number; | |
235 | } | |
236 | ||
7e969bc7 | 237 | /** |
f4aa9110 | 238 | * Replaces the MINUS-HYPHEN with the MINUS SIGN. |
7e969bc7 TD |
239 | * |
240 | * @param mixed $number | |
241 | * @return string | |
242 | */ | |
243 | public static function formatNegative($number) { | |
838e315b | 244 | return str_replace('-', self::MINUS, $number); |
7e969bc7 TD |
245 | } |
246 | ||
06ccd650 | 247 | /** |
9f959ced | 248 | * Alias to php ucfirst() function with multibyte support. |
ea4c25a4 MS |
249 | * |
250 | * @param string $string | |
251 | * @return string | |
06ccd650 AE |
252 | */ |
253 | public static function firstCharToUpperCase($string) { | |
838e315b | 254 | return mb_strtoupper(mb_substr($string, 0, 1)).mb_substr($string, 1); |
06ccd650 AE |
255 | } |
256 | ||
257 | /** | |
9f959ced | 258 | * Alias to php lcfirst() function with multibyte support. |
ea4c25a4 MS |
259 | * |
260 | * @param string $string | |
261 | * @return string | |
06ccd650 AE |
262 | */ |
263 | public static function firstCharToLowerCase($string) { | |
838e315b | 264 | return mb_strtolower(mb_substr($string, 0, 1)).mb_substr($string, 1); |
06ccd650 AE |
265 | } |
266 | ||
267 | /** | |
9f959ced | 268 | * Alias to php mb_convert_case() function. |
ea4c25a4 MS |
269 | * |
270 | * @param string $string | |
271 | * @return string | |
06ccd650 AE |
272 | */ |
273 | public static function wordsToUpperCase($string) { | |
274 | return mb_convert_case($string, MB_CASE_TITLE); | |
275 | } | |
276 | ||
06ccd650 | 277 | /** |
257405df MW |
278 | * Alias to php str_ireplace() function with UTF-8 support. |
279 | * | |
280 | * This function is considered to be slow, if $search contains | |
281 | * only ASCII characters, please use str_ireplace() instead. | |
6f37a5f5 MS |
282 | * |
283 | * @param string $search | |
284 | * @param string $replace | |
285 | * @param string $subject | |
286 | * @param integer $count | |
287 | * @return string | |
06ccd650 AE |
288 | */ |
289 | public static function replaceIgnoreCase($search, $replace, $subject, &$count = 0) { | |
838e315b | 290 | $startPos = mb_strpos(mb_strtolower($subject), mb_strtolower($search)); |
06ccd650 AE |
291 | if ($startPos === false) return $subject; |
292 | else { | |
838e315b | 293 | $endPos = $startPos + mb_strlen($search); |
06ccd650 | 294 | $count++; |
838e315b | 295 | return mb_substr($subject, 0, $startPos) . $replace . self::replaceIgnoreCase($search, $replace, mb_substr($subject, $endPos), $count); |
06ccd650 AE |
296 | } |
297 | } | |
298 | ||
d2ae8f18 TD |
299 | /** |
300 | * Alias to php str_split() function with multibyte support. | |
301 | * | |
302 | * @param string $string | |
303 | * @param integer $length | |
7a23a706 | 304 | * @return string[] |
d2ae8f18 TD |
305 | */ |
306 | public static function split($string, $length = 1) { | |
058cbd6a | 307 | $result = []; |
8f762ae6 | 308 | for ($i = 0, $max = mb_strlen($string); $i < $max; $i += $length) { |
7ad19f28 | 309 | $result[] = mb_substr($string, $i, $length); |
d2ae8f18 TD |
310 | } |
311 | return $result; | |
312 | } | |
313 | ||
fe26ce5a | 314 | /** |
b2aa772d | 315 | * Checks whether $haystack starts with $needle, or not. |
fe26ce5a | 316 | * |
39bea7dd MS |
317 | * @param string $haystack The string to be checked for starting with $needle |
318 | * @param string $needle The string to be found at the start of $haystack | |
fe26ce5a ST |
319 | * @param boolean $ci Case insensitive or not. Default = false. |
320 | * | |
39bea7dd | 321 | * @return boolean True, if $haystack starts with $needle, false otherwise. |
fe26ce5a ST |
322 | */ |
323 | public static function startsWith($haystack, $needle, $ci = false) { | |
1d0f3c4d | 324 | if ($ci) { |
838e315b SG |
325 | $haystack = mb_strtolower($haystack); |
326 | $needle = mb_strtolower($needle); | |
fe26ce5a | 327 | } |
7ad19f28 | 328 | // using mb_substr and === is MUCH faster for long strings then using indexOf. |
838e315b | 329 | return mb_substr($haystack, 0, mb_strlen($needle)) === $needle; |
fe26ce5a ST |
330 | } |
331 | ||
332 | /** | |
a17de04e | 333 | * Returns true if $haystack ends with $needle or if the length of $needle is 0. |
fe26ce5a | 334 | * |
a17de04e | 335 | * @param string $haystack |
e3369fd2 | 336 | * @param string $needle |
a17de04e MS |
337 | * @param boolean $ci case insensitive |
338 | * @return boolean | |
39bea7dd | 339 | */ |
fe26ce5a | 340 | public static function endsWith($haystack, $needle, $ci = false) { |
1d0f3c4d | 341 | if ($ci) { |
838e315b SG |
342 | $haystack = mb_strtolower($haystack); |
343 | $needle = mb_strtolower($needle); | |
1d0f3c4d | 344 | } |
838e315b | 345 | $length = mb_strlen($needle); |
1d0f3c4d | 346 | if ($length === 0) return true; |
838e315b | 347 | return (mb_substr($haystack, $length * -1) === $needle); |
fe26ce5a ST |
348 | } |
349 | ||
f5368a65 TD |
350 | /** |
351 | * Alias to php str_pad function with multibyte support. | |
6f37a5f5 MS |
352 | * |
353 | * @param string $input | |
354 | * @param integer $padLength | |
355 | * @param string $padString | |
356 | * @param integer $padType | |
357 | * @return string | |
f5368a65 | 358 | */ |
72ea9e76 | 359 | public static function pad($input, $padLength, $padString = ' ', $padType = STR_PAD_RIGHT) { |
8f762ae6 | 360 | $additionalPadding = strlen($input) - mb_strlen($input); |
f5368a65 TD |
361 | return str_pad($input, $padLength + $additionalPadding, $padString, $padType); |
362 | } | |
363 | ||
06ccd650 AE |
364 | /** |
365 | * Unescapes escaped characters in a string. | |
366 | * | |
367 | * @param string $string | |
368 | * @param string $chars | |
9f959ced | 369 | * @return string |
06ccd650 AE |
370 | */ |
371 | public static function unescape($string, $chars = '"') { | |
372 | for ($i = 0, $j = strlen($chars); $i < $j; $i++) { | |
838e315b | 373 | $string = str_replace('\\'.$chars[$i], $chars[$i], $string); |
06ccd650 AE |
374 | } |
375 | ||
376 | return $string; | |
377 | } | |
378 | ||
379 | /** | |
380 | * Takes a numeric HTML entity value and returns the appropriate UTF-8 bytes. | |
381 | * | |
382 | * @param integer $dec html entity value | |
383 | * @return string utf-8 bytes | |
384 | */ | |
385 | public static function getCharacter($dec) { | |
386 | if ($dec < 128) { | |
387 | $utf = chr($dec); | |
388 | } | |
389 | else if ($dec < 2048) { | |
390 | $utf = chr(192 + (($dec - ($dec % 64)) / 64)); | |
391 | $utf .= chr(128 + ($dec % 64)); | |
392 | } | |
393 | else { | |
394 | $utf = chr(224 + (($dec - ($dec % 4096)) / 4096)); | |
395 | $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64)); | |
396 | $utf .= chr(128 + ($dec % 64)); | |
397 | } | |
398 | return $utf; | |
399 | } | |
400 | ||
401 | /** | |
402 | * Converts UTF-8 to Unicode | |
403 | * @see http://www1.tip.nl/~t876506/utf8tbl.html | |
9f959ced | 404 | * |
06ccd650 | 405 | * @param string $c |
9f959ced | 406 | * @return integer |
06ccd650 AE |
407 | */ |
408 | public static function getCharValue($c) { | |
409 | $ud = 0; | |
410 | if (ord($c{0}) >= 0 && ord($c{0}) <= 127) | |
411 | $ud = ord($c{0}); | |
412 | if (ord($c{0}) >= 192 && ord($c{0}) <= 223) | |
413 | $ud = (ord($c{0}) - 192) * 64 + (ord($c{1}) - 128); | |
414 | if (ord($c{0}) >= 224 && ord($c{0}) <= 239) | |
415 | $ud = (ord($c{0}) - 224) * 4096 + (ord($c{1}) - 128) * 64 + (ord($c{2}) - 128); | |
416 | if (ord($c{0}) >= 240 && ord($c{0}) <= 247) | |
417 | $ud = (ord($c{0}) - 240) * 262144 + (ord($c{1}) - 128) * 4096 + (ord($c{2}) - 128) * 64 + (ord($c{3}) - 128); | |
418 | if (ord($c{0}) >= 248 && ord($c{0}) <= 251) | |
419 | $ud = (ord($c{0}) - 248) * 16777216 + (ord($c{1}) - 128) * 262144 + (ord($c{2}) - 128) * 4096 + (ord($c{3}) - 128) * 64 + (ord($c{4}) - 128); | |
420 | if (ord($c{0}) >= 252 && ord($c{0}) <= 253) | |
421 | $ud = (ord($c{0}) - 252) * 1073741824 + (ord($c{1}) - 128) * 16777216 + (ord($c{2}) - 128) * 262144 + (ord($c{3}) - 128) * 4096 + (ord($c{4}) - 128) * 64 + (ord($c{5}) - 128); | |
422 | if (ord($c{0}) >= 254 && ord($c{0}) <= 255) | |
423 | $ud = false; // error | |
424 | return $ud; | |
425 | } | |
426 | ||
427 | /** | |
428 | * Returns html entities of all characters in the given string. | |
429 | * | |
430 | * @param string $string | |
431 | * @return string | |
432 | */ | |
433 | public static function encodeAllChars($string) { | |
434 | $result = ''; | |
838e315b SG |
435 | for ($i = 0, $j = mb_strlen($string); $i < $j; $i++) { |
436 | $char = mb_substr($string, $i, 1); | |
e421b813 | 437 | $result .= '&#'.self::getCharValue($char).';'; |
06ccd650 AE |
438 | } |
439 | ||
440 | return $result; | |
441 | } | |
442 | ||
443 | /** | |
28410a97 | 444 | * Returns true if the given string contains only ASCII characters. |
06ccd650 AE |
445 | * |
446 | * @param string $string | |
447 | * @return boolean | |
448 | */ | |
449 | public static function isASCII($string) { | |
450 | return preg_match('/^[\x00-\x7F]*$/', $string); | |
451 | } | |
452 | ||
453 | /** | |
28410a97 | 454 | * Returns true if the given string is utf-8 encoded. |
06ccd650 AE |
455 | * @see http://www.w3.org/International/questions/qa-forms-utf-8 |
456 | * | |
457 | * @param string $string | |
458 | * @return boolean | |
459 | */ | |
460 | public static function isUTF8($string) { | |
a92ec792 | 461 | return preg_match('/^( |
06ccd650 AE |
462 | [\x09\x0A\x0D\x20-\x7E]* # ASCII |
463 | | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | |
464 | | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | |
465 | | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte | |
466 | | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates | |
467 | | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | |
468 | | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 | |
469 | | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 | |
470 | )*$/x', $string); | |
06ccd650 AE |
471 | } |
472 | ||
06ccd650 AE |
473 | /** |
474 | * Escapes the closing cdata tag. | |
475 | * | |
476 | * @param string $string | |
477 | * @return string | |
478 | */ | |
479 | public static function escapeCDATA($string) { | |
480 | return str_replace(']]>', ']]]]><![CDATA[>', $string); | |
481 | } | |
482 | ||
483 | /** | |
484 | * Converts a string to requested character encoding. | |
485 | * @see mb_convert_encoding() | |
486 | * | |
39bea7dd MS |
487 | * @param string $inCharset |
488 | * @param string $outCharset | |
489 | * @param string $string | |
490 | * @return string converted string | |
06ccd650 AE |
491 | */ |
492 | public static function convertEncoding($inCharset, $outCharset, $string) { | |
493 | if ($inCharset == 'ISO-8859-1' && $outCharset == 'UTF-8') return utf8_encode($string); | |
494 | if ($inCharset == 'UTF-8' && $outCharset == 'ISO-8859-1') return utf8_decode($string); | |
495 | ||
496 | return mb_convert_encoding($string, $outCharset, $inCharset); | |
497 | } | |
498 | ||
499 | /** | |
500 | * Strips HTML tags from a string. | |
501 | * | |
502 | * @param string $string | |
503 | * @return string | |
504 | */ | |
505 | public static function stripHTML($string) { | |
eeb82973 | 506 | return preg_replace(self::HTML_PATTERN, '', preg_replace(self::HTML_COMMENT_PATTERN, '', $string)); |
06ccd650 AE |
507 | } |
508 | ||
509 | /** | |
28410a97 | 510 | * Returns false if the given word is forbidden by given word filter. |
06ccd650 | 511 | * |
39bea7dd | 512 | * @param string $word |
06ccd650 AE |
513 | * @param string $filter |
514 | * @return boolean | |
515 | */ | |
516 | public static function executeWordFilter($word, $filter) { | |
25729b87 | 517 | $filter = self::trim($filter); |
838e315b | 518 | $word = mb_strtolower($word); |
06ccd650 AE |
519 | |
520 | if ($filter != '') { | |
838e315b | 521 | $forbiddenNames = explode("\n", mb_strtolower(self::unifyNewlines($filter))); |
06ccd650 | 522 | foreach ($forbiddenNames as $forbiddenName) { |
b21976e6 | 523 | // ignore empty lines in between actual values |
5b1a955e | 524 | $forbiddenName = self::trim($forbiddenName); |
b21976e6 AE |
525 | if (empty($forbiddenName)) continue; |
526 | ||
838e315b SG |
527 | if (mb_strpos($forbiddenName, '*') !== false) { |
528 | $forbiddenName = str_replace('\*', '.*', preg_quote($forbiddenName, '/')); | |
06ccd650 AE |
529 | if (preg_match('/^'.$forbiddenName.'$/s', $word)) { |
530 | return false; | |
531 | } | |
532 | } | |
533 | else { | |
534 | if ($word == $forbiddenName) { | |
535 | return false; | |
536 | } | |
537 | } | |
538 | } | |
539 | } | |
540 | ||
541 | return true; | |
542 | } | |
543 | ||
4ff37a29 TD |
544 | /** |
545 | * Truncates the given string to a certain number of characters. | |
546 | * | |
d92792c5 MW |
547 | * @param string $string string which shall be truncated |
548 | * @param integer $length string length after truncating | |
4ff37a29 TD |
549 | * @param string $etc string to append when $string is truncated |
550 | * @param boolean $breakWords should words be broken in the middle | |
d92792c5 | 551 | * @return string truncated string |
4ff37a29 | 552 | */ |
e421b813 | 553 | public static function truncate($string, $length = 80, $etc = self::HELLIP, $breakWords = false) { |
4ff37a29 TD |
554 | if ($length == 0) { |
555 | return ''; | |
556 | } | |
557 | ||
838e315b SG |
558 | if (mb_strlen($string) > $length) { |
559 | $length -= mb_strlen($etc); | |
4ff37a29 TD |
560 | |
561 | if (!$breakWords) { | |
838e315b | 562 | $string = preg_replace('/\\s+?(\\S+)?$/', '', mb_substr($string, 0, $length + 1)); |
4ff37a29 TD |
563 | } |
564 | ||
838e315b | 565 | return mb_substr($string, 0, $length).$etc; |
4ff37a29 TD |
566 | } |
567 | else { | |
568 | return $string; | |
569 | } | |
570 | } | |
571 | ||
df27a012 MW |
572 | /** |
573 | * Truncates a string containing HTML code and keeps the HTML syntax intact. | |
f4aa9110 MS |
574 | * |
575 | * @param string $string string which shall be truncated | |
576 | * @param integer $length string length after truncating | |
577 | * @param string $etc ending string which will be appended after truncating | |
d92792c5 | 578 | * @param boolean $breakWords if false words will not be split and the return string might be shorter than $length |
f4aa9110 | 579 | * @return string truncated string |
df27a012 | 580 | */ |
d92792c5 | 581 | public static function truncateHTML($string, $length = 500, $etc = self::HELLIP, $breakWords = false) { |
838e315b | 582 | if (mb_strlen(self::stripHTML($string)) <= $length) { |
df27a012 MW |
583 | return $string; |
584 | } | |
058cbd6a | 585 | $openTags = []; |
df27a012 | 586 | $truncatedString = ''; |
f4aa9110 | 587 | |
b2aa772d | 588 | // initialize length counter with the ending length |
838e315b | 589 | $totalLength = mb_strlen($etc); |
f4aa9110 | 590 | |
df27a012 | 591 | preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $string, $tags, PREG_SET_ORDER); |
f4aa9110 | 592 | |
df27a012 | 593 | foreach ($tags as $tag) { |
52a23a1d AE |
594 | // ignore void elements |
595 | if (!preg_match('/^(area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)$/s', $tag[2])) { | |
df27a012 MW |
596 | // look for opening tags |
597 | if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) { | |
598 | array_unshift($openTags, $tag[2]); | |
599 | } | |
600 | /** | |
601 | * look for closing tags and check if this tag has a corresponding opening tag | |
602 | * and omit the opening tag if it has been closed already | |
603 | */ | |
604 | else if (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) { | |
605 | $position = array_search($closeTag[1], $openTags); | |
606 | if ($position !== false) { | |
607 | array_splice($openTags, $position, 1); | |
608 | } | |
609 | } | |
610 | } | |
611 | // append tag | |
612 | $truncatedString .= $tag[1]; | |
f4aa9110 | 613 | |
df27a012 | 614 | // get length of the content without entities. If the content is too long, keep entities intact |
e60707f0 | 615 | $decodedContent = self::decodeHTML($tag[3]); |
838e315b | 616 | $contentLength = mb_strlen($decodedContent); |
df27a012 | 617 | if ($contentLength + $totalLength > $length) { |
d92792c5 | 618 | if (!$breakWords) { |
e60707f0 | 619 | if (preg_match('/^(.{1,'.($length - $totalLength).'}) /s', $decodedContent, $match)) { |
0c2c74a0 | 620 | $truncatedString .= self::encodeHTML($match[1]); |
e60707f0 MW |
621 | } |
622 | ||
623 | break; | |
624 | } | |
625 | ||
df27a012 MW |
626 | $left = $length - $totalLength; |
627 | $entitiesLength = 0; | |
628 | if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) { | |
629 | foreach ($entities[0] as $entity) { | |
630 | if ($entity[1] + 1 - $entitiesLength <= $left) { | |
631 | $left--; | |
838e315b | 632 | $entitiesLength += mb_strlen($entity[0]); |
df27a012 MW |
633 | } |
634 | else { | |
635 | break; | |
636 | } | |
637 | } | |
638 | } | |
838e315b | 639 | $truncatedString .= mb_substr($tag[3], 0, $left + $entitiesLength); |
df27a012 MW |
640 | break; |
641 | } | |
642 | else { | |
643 | $truncatedString .= $tag[3]; | |
644 | $totalLength += $contentLength; | |
645 | } | |
646 | if ($totalLength >= $length) { | |
647 | break; | |
648 | } | |
649 | } | |
f4aa9110 | 650 | |
df27a012 MW |
651 | // close all open tags |
652 | foreach ($openTags as $tag) { | |
653 | $truncatedString .= '</'.$tag.'>'; | |
654 | } | |
f4aa9110 | 655 | |
df27a012 MW |
656 | // add etc |
657 | $truncatedString .= $etc; | |
658 | ||
659 | return $truncatedString; | |
660 | } | |
661 | ||
593c4fad MW |
662 | /** |
663 | * Generates an anchor tag from given URL. | |
06355ec3 | 664 | * |
593c4fad MW |
665 | * @param string $url |
666 | * @param string $title | |
b47a2214 | 667 | * @param boolean $encodeTitle |
593c4fad MW |
668 | * @return string anchor tag |
669 | */ | |
b47a2214 | 670 | public static function getAnchorTag($url, $title = '', $encodeTitle = true) { |
7307f0a7 MM |
671 | $url = self::trim($url); |
672 | ||
593c4fad MW |
673 | $external = true; |
674 | if (ApplicationHandler::getInstance()->isInternalURL($url)) { | |
675 | $external = false; | |
dccac621 | 676 | $url = preg_replace('~^https?://~', RouteHandler::getProtocol(), $url); |
593c4fad MW |
677 | } |
678 | ||
679 | // cut visible url | |
680 | if (empty($title)) { | |
e3369fd2 | 681 | // use URL and remove protocol and www subdomain |
593c4fad MW |
682 | $title = preg_replace('~^(?:https?|ftps?)://(?:www\.)?~i', '', $url); |
683 | ||
838e315b SG |
684 | if (mb_strlen($title) > 60) { |
685 | $title = mb_substr($title, 0, 30) . self::HELLIP . mb_substr($title, -25); | |
593c4fad | 686 | } |
b47a2214 MW |
687 | |
688 | if (!$encodeTitle) $title = self::encodeHTML($title); | |
593c4fad MW |
689 | } |
690 | ||
879b065d | 691 | return '<a href="'.self::encodeHTML($url).'"'.($external ? (' class="externalURL"'.((EXTERNAL_LINK_REL_NOFOLLOW || EXTERNAL_LINK_TARGET_BLANK) ? (' rel="'.(EXTERNAL_LINK_REL_NOFOLLOW ? 'nofollow' : '').((EXTERNAL_LINK_REL_NOFOLLOW && EXTERNAL_LINK_TARGET_BLANK) ? ' ' : '').(EXTERNAL_LINK_TARGET_BLANK ? 'noopener noreferrer' : '').'"') : '').(EXTERNAL_LINK_TARGET_BLANK ? ' target="_blank"' : '')) : '').'>'.($encodeTitle ? self::encodeHTML($title) : $title).'</a>'; |
593c4fad MW |
692 | } |
693 | ||
06ccd650 AE |
694 | /** |
695 | * Splits given string into smaller chunks. | |
696 | * | |
697 | * @param string $string | |
698 | * @param integer $length | |
699 | * @param string $break | |
700 | * @return string | |
701 | */ | |
702 | public static function splitIntoChunks($string, $length = 75, $break = "\r\n") { | |
703 | return mb_ereg_replace('.{'.$length.'}', "\\0".$break, $string); | |
704 | } | |
705 | ||
5d3505bf MW |
706 | /** |
707 | * Simple multi-byte safe wordwrap() function. | |
708 | * | |
06355ec3 | 709 | * @param string $string |
5d3505bf MW |
710 | * @param integer $width |
711 | * @param string $break | |
712 | * @return string | |
713 | */ | |
714 | public static function wordwrap($string, $width = 50, $break = ' ') { | |
715 | $result = ''; | |
716 | $substrings = explode($break, $string); | |
717 | ||
718 | foreach ($substrings as $substring) { | |
838e315b | 719 | $length = mb_strlen($substring); |
5d3505bf MW |
720 | if ($length > $width) { |
721 | $j = ceil($length / $width); | |
722 | ||
723 | for ($i = 0; $i < $j; $i++) { | |
724 | if (!empty($result)) $result .= $break; | |
838e315b SG |
725 | if ($width * ($i + 1) > $length) $result .= mb_substr($substring, $width * $i); |
726 | else $result .= mb_substr($substring, $width * $i, $width); | |
5d3505bf MW |
727 | } |
728 | } | |
729 | else { | |
730 | if (!empty($result)) $result .= $break; | |
731 | $result .= $substring; | |
732 | } | |
733 | } | |
734 | ||
735 | return $result; | |
736 | } | |
737 | ||
a15ec9be MW |
738 | /** |
739 | * Shortens numbers larger than 1000 by using unit prefixes. | |
740 | * | |
741 | * @param integer $number | |
742 | * @return string | |
743 | */ | |
744 | public static function getShortUnit($number) { | |
745 | $unitPrefix = ''; | |
746 | ||
747 | if ($number >= 1000000) { | |
748 | $number /= 1000000; | |
749 | if ($number > 10) { | |
750 | $number = floor($number); | |
751 | } | |
752 | else { | |
753 | $number = round($number, 1); | |
754 | } | |
755 | $unitPrefix = 'M'; | |
756 | } | |
757 | else if ($number >= 1000) { | |
758 | $number /= 1000; | |
759 | if ($number > 10) { | |
760 | $number = floor($number); | |
761 | } | |
762 | else { | |
763 | $number = round($number, 1); | |
764 | } | |
765 | $unitPrefix = 'k'; | |
766 | } | |
767 | ||
768 | return self::formatNumeric($number) . $unitPrefix; | |
769 | } | |
770 | ||
4f2d3f58 MS |
771 | /** |
772 | * Normalizes a string representing comma-separated values by making sure | |
773 | * that the separator is just a comma, not a combination of whitespace and | |
774 | * a comma. | |
775 | * | |
776 | * @param string $string | |
777 | * @return string | |
778 | * @since 3.1 | |
779 | */ | |
780 | public static function normalizeCsv($string) { | |
781 | return implode(',', ArrayUtil::trim(explode(',', $string))); | |
782 | } | |
783 | ||
1d5f9363 MS |
784 | /** |
785 | * Forbid creation of StringUtil objects. | |
786 | */ | |
787 | private function __construct() { | |
788 | // does nothing | |
789 | } | |
dcb3a44c | 790 | } |