From: Tim Düsterhus Date: Thu, 17 Nov 2016 16:46:14 +0000 (+0100) Subject: Improve StringUtil::trim() X-Git-Tag: 3.0.0_Beta_5~36 X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=307eb577b6a02ae48727aa8a602cfb3f8505437f;p=GitHub%2FWoltLab%2FWCF.git Improve StringUtil::trim() The previous regular expression for trim at the end (introduced in 73d8b58f6a2ed1c9f49684e867d2b42a8cd82920) show bad performance for huge strings containing little amounts of white space. The version before that one blew the C stack for huge strings containing huge amounts of contiguous white space. Matching for the Unicode property of being a whitespace is both faster (less backtracking for alternations) *and* more correct (because it matches many more types of white space). win-win --- diff --git a/wcfsetup/install/files/lib/util/StringUtil.class.php b/wcfsetup/install/files/lib/util/StringUtil.class.php index dbf98564c6..904b7bf8a4 100644 --- a/wcfsetup/install/files/lib/util/StringUtil.class.php +++ b/wcfsetup/install/files/lib/util/StringUtil.class.php @@ -70,16 +70,21 @@ final class StringUtil { } /** - * Swallowes whitespace from beginning and end of the string. + * Removes Unicode whitespace characters from the beginning + * and ending of the given string. * * @param string $text * @return string */ public static function trim($text) { - // Whitespace + (narrow) non breaking spaces. - // No one can triforce now. - $text = preg_replace('/^(\s|'.chr(226).chr(128).chr(175).'|'.chr(194).chr(160).')+/', '', $text); - $text = preg_replace('/([^\s'.chr(226).chr(194).']++)(?:\s|'.chr(226).chr(128).chr(175).'|'.chr(194).chr(160).')++$/', '\\1', $text); + // These regular expressions use character properties + // to find characters defined as space in the unicode + // specification. + // Do not merge the expressions, they are seperated for + // performance reasons. + + $text = preg_replace('/^\p{Zs}+/u', '', $text); + $text = preg_replace('/\p{Zs}+$/u', '', $text); return $text; }