From a92ec792d48b2a9c7290ef517c1054932f9e47dd Mon Sep 17 00:00:00 2001 From: Marcel Werk Date: Wed, 9 Jul 2014 12:36:32 +0200 Subject: [PATCH] Improved detection of utf-8 encoded strings --- wcfsetup/install/files/lib/system/WCF.class.php | 2 +- .../lib/system/bbcode/KeywordHighlighter.class.php | 2 +- wcfsetup/install/files/lib/util/StringUtil.class.php | 12 +----------- wcfsetup/install/files/lib/util/UserUtil.class.php | 4 ++-- 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/wcfsetup/install/files/lib/system/WCF.class.php b/wcfsetup/install/files/lib/system/WCF.class.php index e40feb8647..1afab98fce 100644 --- a/wcfsetup/install/files/lib/system/WCF.class.php +++ b/wcfsetup/install/files/lib/system/WCF.class.php @@ -746,7 +746,7 @@ class WCF { } $path = str_replace('/index.php', '', str_replace($scriptName, '', $_SERVER['REQUEST_URI'])); - if (!StringUtil::isASCII($path) && !StringUtil::isUTF8($path)) { + if (!StringUtil::isUTF8($path)) { $path = StringUtil::convertEncoding('ISO-8859-1', 'UTF-8', $path); } $path = FileUtil::removeLeadingSlash($path); diff --git a/wcfsetup/install/files/lib/system/bbcode/KeywordHighlighter.class.php b/wcfsetup/install/files/lib/system/bbcode/KeywordHighlighter.class.php index 7b62166727..691e65d6d0 100644 --- a/wcfsetup/install/files/lib/system/bbcode/KeywordHighlighter.class.php +++ b/wcfsetup/install/files/lib/system/bbcode/KeywordHighlighter.class.php @@ -95,7 +95,7 @@ class KeywordHighlighter extends SingletonFactory { */ protected function parseKeywords($keywordString) { // convert encoding if necessary - if (!StringUtil::isASCII($keywordString) && !StringUtil::isUTF8($keywordString)) { + if (!StringUtil::isUTF8($keywordString)) { $keywordString = StringUtil::convertEncoding('ISO-8859-1', 'UTF-8', $keywordString); } diff --git a/wcfsetup/install/files/lib/util/StringUtil.class.php b/wcfsetup/install/files/lib/util/StringUtil.class.php index a51a41fa88..f1f4a6061c 100644 --- a/wcfsetup/install/files/lib/util/StringUtil.class.php +++ b/wcfsetup/install/files/lib/util/StringUtil.class.php @@ -524,7 +524,7 @@ final class StringUtil { * @return boolean */ public static function isUTF8($string) { - /*return preg_match('/^( + return preg_match('/^( [\x09\x0A\x0D\x20-\x7E]* # ASCII | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs @@ -534,16 +534,6 @@ final class StringUtil { | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 )*$/x', $string); - */ - return preg_match('/( - [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte - | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs - | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte - | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates - | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 - | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 - | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 - )/x', $string); } /** diff --git a/wcfsetup/install/files/lib/util/UserUtil.class.php b/wcfsetup/install/files/lib/util/UserUtil.class.php index 6553ea887d..b4aaa225a8 100644 --- a/wcfsetup/install/files/lib/util/UserUtil.class.php +++ b/wcfsetup/install/files/lib/util/UserUtil.class.php @@ -107,7 +107,7 @@ final class UserUtil { public static function getUserAgent() { if (isset($_SERVER['HTTP_USER_AGENT'])) { $userAgent = $_SERVER['HTTP_USER_AGENT']; - if (!StringUtil::isASCII($userAgent) && !StringUtil::isUTF8($userAgent)) { + if (!StringUtil::isUTF8($userAgent)) { $userAgent = StringUtil::convertEncoding('ISO-8859-1', 'UTF-8', $userAgent); } @@ -263,7 +263,7 @@ final class UserUtil { } // fix encoding - if (!StringUtil::isASCII($REQUEST_URI) && !StringUtil::isUTF8($REQUEST_URI)) { + if (!StringUtil::isUTF8($REQUEST_URI)) { $REQUEST_URI = StringUtil::convertEncoding('ISO-8859-1', 'UTF-8', $REQUEST_URI); } -- 2.20.1