From: Tim Düsterhus Date: Wed, 6 Apr 2022 11:02:02 +0000 (+0200) Subject: Ignore prefixes that are not separated with a space in MysqlSearchEngine::splitIntoTe... X-Git-Tag: 5.4.16_dev_1~10 X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=da8232c4427a5abb8587b0266a36a51f625af55b;p=GitHub%2FWoltLab%2FWCF.git Ignore prefixes that are not separated with a space in MysqlSearchEngine::splitIntoTerms() --- diff --git a/wcfsetup/install/files/lib/system/search/mysql/MysqlSearchEngine.class.php b/wcfsetup/install/files/lib/system/search/mysql/MysqlSearchEngine.class.php index 44f4989626..5d9ab9ad28 100644 --- a/wcfsetup/install/files/lib/system/search/mysql/MysqlSearchEngine.class.php +++ b/wcfsetup/install/files/lib/system/search/mysql/MysqlSearchEngine.class.php @@ -230,6 +230,18 @@ class MysqlSearchEngine extends AbstractSearchEngine * Word: |Apfel| * Word: -|Banane| * + * Query: `Apfel-Banane` + * Word: |Apfel| + * Word: |Banane| + * + * Query: `Apfel-+-Banane` + * Word: |Apfel| + * Word: |Banane| + * + * Query: `B*-tree` + * Word: |B|* + * Word: |tree| + * * Query: ` Apfel ` * Word: |Apfel| * @@ -437,15 +449,38 @@ class MysqlSearchEngine extends AbstractSearchEngine $i++; continue; } else { - $state = 'finish'; - // No increment, we must yield the word and then continue parsing at - // the current position to prevent skipping characters. + $state = 'prefixWithoutSpace'; + // No increment, we must check whether the current character is a prefix + // that needs to be dropped. continue; } } elseif ($state === 'atSign') { if (\preg_match('/[0-9]/', $char)) { $i++; continue; + } else { + $state = 'prefixWithoutSpace'; + // No increment, we must check whether the current character is a prefix + // that needs to be dropped. + continue; + } + } elseif ($state === 'prefixWithoutSpace') { + if ( + \in_array($char, [ + '-', + '+', + '~', + '<', + '>', + ]) + ) { + // Ignore valid prefixes after a word is fully parsed: The word + // parsing was aborted, because the prefix character was encountered. + // Thus an input such as `compound-word` would see the hyphen as a + // exclusion prefix of `word` instead of a hyphen. By ignoring such + // prefixes (unless they are clearly delimited with a space) the parsed + // result will better match user expectations. + $i++; } else { $state = 'finish'; // No increment, we must yield the word and then continue parsing at