Improved detection of utf-8 encoded strings
authorMarcel Werk <burntime@woltlab.com>
Wed, 9 Jul 2014 10:36:32 +0000 (12:36 +0200)
committerMarcel Werk <burntime@woltlab.com>
Wed, 9 Jul 2014 10:36:32 +0000 (12:36 +0200)
wcfsetup/install/files/lib/system/WCF.class.php
wcfsetup/install/files/lib/system/bbcode/KeywordHighlighter.class.php
wcfsetup/install/files/lib/util/StringUtil.class.php
wcfsetup/install/files/lib/util/UserUtil.class.php

index e40feb86476bee572947a32a9d3895b67d7f3019..1afab98fce4497f253fd578e0deb507cdd000acb 100644 (file)
@@ -746,7 +746,7 @@ class WCF {
                }
                
                $path = str_replace('/index.php', '', str_replace($scriptName, '', $_SERVER['REQUEST_URI']));
-               if (!StringUtil::isASCII($path) && !StringUtil::isUTF8($path)) {
+               if (!StringUtil::isUTF8($path)) {
                        $path = StringUtil::convertEncoding('ISO-8859-1', 'UTF-8', $path);
                }
                $path = FileUtil::removeLeadingSlash($path);
index 7b621667278d498f78a866ad82514f555e7805ec..691e65d6d0f214f722fafd37feee7c1e032fa04d 100644 (file)
@@ -95,7 +95,7 @@ class KeywordHighlighter extends SingletonFactory {
         */
        protected function parseKeywords($keywordString) {
                // convert encoding if necessary
-               if (!StringUtil::isASCII($keywordString) && !StringUtil::isUTF8($keywordString)) {
+               if (!StringUtil::isUTF8($keywordString)) {
                        $keywordString = StringUtil::convertEncoding('ISO-8859-1', 'UTF-8', $keywordString);
                }
                
index a51a41fa88a8b927a39d8629053eece52ae8b300..f1f4a6061cdeecdc1ca3f200cffdf44ff3b0234b 100644 (file)
@@ -524,7 +524,7 @@ final class StringUtil {
         * @return      boolean
         */
        public static function isUTF8($string) {
-               /*return preg_match('/^(
+               return preg_match('/^(
                                [\x09\x0A\x0D\x20-\x7E]*                # ASCII
                        |       [\xC2-\xDF][\x80-\xBF]                  # non-overlong 2-byte
                        |       \xE0[\xA0-\xBF][\x80-\xBF]              # excluding overlongs
@@ -534,16 +534,6 @@ final class StringUtil {
                        |       [\xF1-\xF3][\x80-\xBF]{3}               # planes 4-15
                        |       \xF4[\x80-\x8F][\x80-\xBF]{2}           # plane 16
                        )*$/x', $string);
-               */
-               return preg_match('/(
-                               [\xC2-\xDF][\x80-\xBF]                  # non-overlong 2-byte
-                       |       \xE0[\xA0-\xBF][\x80-\xBF]              # excluding overlongs
-                       |       [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}       # straight 3-byte
-                       |       \xED[\x80-\x9F][\x80-\xBF]              # excluding surrogates
-                       |       \xF0[\x90-\xBF][\x80-\xBF]{2}           # planes 1-3
-                       |       [\xF1-\xF3][\x80-\xBF]{3}               # planes 4-15
-                       |       \xF4[\x80-\x8F][\x80-\xBF]{2}           # plane 16
-                       )/x', $string);
        }
        
        /**
index 6553ea887de5bfe237cf53e527e7d8971d3dc36d..b4aaa225a894e2572f858224c0c66ff8313bcabb 100644 (file)
@@ -107,7 +107,7 @@ final class UserUtil {
        public static function getUserAgent() {
                if (isset($_SERVER['HTTP_USER_AGENT'])) {
                        $userAgent = $_SERVER['HTTP_USER_AGENT'];
-                       if (!StringUtil::isASCII($userAgent) && !StringUtil::isUTF8($userAgent)) {
+                       if (!StringUtil::isUTF8($userAgent)) {
                                $userAgent = StringUtil::convertEncoding('ISO-8859-1', 'UTF-8', $userAgent);
                        }
                        
@@ -263,7 +263,7 @@ final class UserUtil {
                }
                
                // fix encoding
-               if (!StringUtil::isASCII($REQUEST_URI) && !StringUtil::isUTF8($REQUEST_URI)) {
+               if (!StringUtil::isUTF8($REQUEST_URI)) {
                        $REQUEST_URI = StringUtil::convertEncoding('ISO-8859-1', 'UTF-8', $REQUEST_URI);
                }