Do not add the `+` prefix to search terms matching an InnoDB stop word
authorTim Düsterhus <duesterhus@woltlab.com>
Fri, 8 Apr 2022 10:00:53 +0000 (12:00 +0200)
committerTim Düsterhus <duesterhus@woltlab.com>
Fri, 8 Apr 2022 10:00:53 +0000 (12:00 +0200)
This is issue is effectively identical to the one fixed in commit
247d9cc51af9cd78395e2e7600bacbc2ffdf918b.

wcfsetup/install/files/lib/system/search/mysql/MysqlSearchEngine.class.php

index 5d9ab9ad28640541edecb0cd012a345162429176..ad31ea80b0883aeea99eb380f9699cf4f9f80889 100644 (file)
@@ -187,9 +187,16 @@ class MysqlSearchEngine extends AbstractSearchEngine
 
             if (!$prefix) {
                 // Add a '+' prefix if no prefix is given, and
+                // the word is not a stopword, and
                 // - the word is longer than the min token size, or
                 // - the word is quoted.
-                if ($word[0] === '"' || \strlen($word) >= $this->getMinTokenSize()) {
+                if (
+                    !\in_array($word, $this->getStopWords())
+                    && (
+                        $word[0] === '"'
+                        || \strlen($word) >= $this->getMinTokenSize()
+                    )
+                 ) {
                     $prefix = '+';
                 }
             }
@@ -557,4 +564,49 @@ class MysqlSearchEngine extends AbstractSearchEngine
 
         return $this->minTokenSize;
     }
+
+    /**
+     * @return string[]
+     */
+    private function getStopWords(): array
+    {
+        return [
+            'a',
+            'about',
+            'an',
+            'are',
+            'as',
+            'at',
+            'be',
+            'by',
+            'com',
+            'de',
+            'en',
+            'for',
+            'from',
+            'how',
+            'i',
+            'in',
+            'is',
+            'it',
+            'la',
+            'of',
+            'on',
+            'or',
+            'that',
+            'the',
+            'this',
+            'to',
+            'was',
+            'what',
+            'when',
+            'where',
+            'who',
+            'will',
+            'with',
+            'und',
+            'the',
+            'www',
+        ];
+    }
 }