Commit | Line | Data |
---|---|---|
b52f751b AE |
1 | <?php |
2 | namespace wcf\system\search\mysql; | |
3 | use wcf\system\database\util\PreparedStatementConditionBuilder; | |
4 | use wcf\system\database\DatabaseException; | |
5 | use wcf\system\exception\SystemException; | |
6 | use wcf\system\search\AbstractSearchEngine; | |
0cd425be | 7 | use wcf\system\search\SearchEngine; |
b52f751b AE |
8 | use wcf\system\search\SearchIndexManager; |
9 | use wcf\system\WCF; | |
7d059540 | 10 | use wcf\util\StringUtil; |
b52f751b AE |
11 | |
12 | /** | |
13 | * Search engine using MySQL's FULLTEXT index. | |
14 | * | |
15 | * @author Alexander Ebert | |
7b7b9764 | 16 | * @copyright 2001-2019 WoltLab GmbH |
b52f751b | 17 | * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php> |
e71525e4 | 18 | * @package WoltLabSuite\Core\System\Search |
b52f751b AE |
19 | */ |
20 | class MysqlSearchEngine extends AbstractSearchEngine { | |
21 | /** | |
22 | * MySQL's minimum word length for fulltext indices | |
23 | * @var integer | |
24 | */ | |
0cd425be | 25 | protected $ftMinWordLen = null; |
b52f751b | 26 | |
11767746 | 27 | /** |
0fcfe5f6 | 28 | * @inheritDoc |
11767746 | 29 | */ |
058cbd6a | 30 | protected $specialCharacters = ['(', ')', '@', '+', '-', '"', '<', '>', '~', '*']; |
11767746 | 31 | |
b52f751b | 32 | /** |
0fcfe5f6 | 33 | * @inheritDoc |
b52f751b | 34 | */ |
058cbd6a | 35 | public function search($q, array $objectTypes, $subjectOnly = false, PreparedStatementConditionBuilder $searchIndexCondition = null, array $additionalConditions = [], $orderBy = 'time DESC', $limit = 1000) { |
b52f751b AE |
36 | // build search query |
37 | $sql = ''; | |
058cbd6a | 38 | $parameters = []; |
b52f751b AE |
39 | foreach ($objectTypes as $objectTypeName) { |
40 | $objectType = SearchEngine::getInstance()->getObjectType($objectTypeName); | |
0cd425be AE |
41 | |
42 | if (!empty($sql)) $sql .= "\nUNION ALL\n"; | |
b52f751b | 43 | $additionalConditionsConditionBuilder = (isset($additionalConditions[$objectTypeName]) ? $additionalConditions[$objectTypeName] : null); |
0cd425be AE |
44 | |
45 | $query = $objectType->getOuterSQLQuery($q, $searchIndexCondition, $additionalConditionsConditionBuilder); | |
46 | if (empty($query)) { | |
47 | $query = "SELECT ".$objectType->getIDFieldName()." AS objectID, | |
48 | ".$objectType->getSubjectFieldName()." AS subject, | |
49 | ".$objectType->getTimeFieldName()." AS time, | |
50 | ".$objectType->getUsernameFieldName()." AS username, | |
51 | '".$objectTypeName."' AS objectType | |
52 | ".($orderBy == 'relevance ASC' || $orderBy == 'relevance DESC' ? ',search_index.relevance' : '')." | |
53 | FROM ".$objectType->getTableName()." | |
54 | INNER JOIN ( | |
55 | {WCF_SEARCH_INNER_JOIN} | |
56 | ) search_index | |
57 | ON (".$objectType->getIDFieldName()." = search_index.objectID) | |
58 | ".$objectType->getJoins()." | |
59 | ".(isset($additionalConditions[$objectTypeName]) ? $additionalConditions[$objectTypeName] : ''); | |
b52f751b | 60 | } |
0cd425be AE |
61 | |
62 | if (mb_strpos($query, '{WCF_SEARCH_INNER_JOIN}')) { | |
63 | $innerJoin = $this->getInnerJoin($objectTypeName, $q, $subjectOnly, $searchIndexCondition, $orderBy, $limit); | |
64 | ||
65 | $query = str_replace('{WCF_SEARCH_INNER_JOIN}', $innerJoin['sql'], $query); | |
66 | if ($innerJoin['fulltextCondition'] !== null) $parameters = array_merge($parameters, $innerJoin['fulltextCondition']->getParameters()); | |
b52f751b AE |
67 | } |
68 | ||
b52f751b AE |
69 | if ($searchIndexCondition !== null) $parameters = array_merge($parameters, $searchIndexCondition->getParameters()); |
70 | if (isset($additionalConditions[$objectTypeName])) $parameters = array_merge($parameters, $additionalConditions[$objectTypeName]->getParameters()); | |
0cd425be AE |
71 | |
72 | $sql .= $query; | |
b52f751b AE |
73 | } |
74 | if (empty($sql)) { | |
75 | throw new SystemException('no object types given'); | |
76 | } | |
77 | ||
78 | if (!empty($orderBy)) { | |
79 | $sql .= " ORDER BY " . $orderBy; | |
80 | } | |
81 | ||
82 | // send search query | |
058cbd6a | 83 | $messages = []; |
b52f751b AE |
84 | $statement = WCF::getDB()->prepareStatement($sql, $limit); |
85 | $statement->execute($parameters); | |
86 | while ($row = $statement->fetchArray()) { | |
058cbd6a | 87 | $messages[] = [ |
b52f751b AE |
88 | 'objectID' => $row['objectID'], |
89 | 'objectType' => $row['objectType'] | |
058cbd6a | 90 | ]; |
b52f751b AE |
91 | } |
92 | ||
93 | return $messages; | |
94 | } | |
95 | ||
96 | /** | |
0fcfe5f6 | 97 | * @inheritDoc |
b52f751b | 98 | */ |
0cd425be AE |
99 | public function getInnerJoin($objectTypeName, $q, $subjectOnly = false, PreparedStatementConditionBuilder $searchIndexCondition = null, $orderBy = 'time DESC', $limit = 1000) { |
100 | $fulltextCondition = null; | |
101 | $relevanceCalc = ''; | |
102 | if (!empty($q)) { | |
103 | $q = $this->parseSearchQuery($q); | |
b52f751b | 104 | |
0cd425be | 105 | $fulltextCondition = new PreparedStatementConditionBuilder(false); |
058cbd6a | 106 | $fulltextCondition->add("MATCH (subject".(!$subjectOnly ? ', message, metaData' : '').") AGAINST (? IN BOOLEAN MODE)", [$q]); |
b52f751b | 107 | |
0cd425be AE |
108 | if ($orderBy == 'relevance ASC' || $orderBy == 'relevance DESC') { |
109 | $relevanceCalc = "MATCH (subject".(!$subjectOnly ? ', message, metaData' : '').") AGAINST ('".escapeString($q)."') + (5 / (1 + POW(LN(1 + (".TIME_NOW." - time) / 2592000), 2))) AS relevance"; | |
b52f751b | 110 | } |
b52f751b AE |
111 | } |
112 | ||
0cd425be | 113 | $sql = "SELECT objectID |
957dea34 | 114 | ".($relevanceCalc ? ','.$relevanceCalc : ", '0' AS relevance")." |
0cd425be AE |
115 | FROM ".SearchIndexManager::getTableName($objectTypeName)." |
116 | WHERE ".($fulltextCondition !== null ? $fulltextCondition : '')." | |
117 | ".(($searchIndexCondition !== null && $searchIndexCondition->__toString()) ? ($fulltextCondition !== null ? "AND " : '').$searchIndexCondition : '')." | |
118 | ".(!empty($orderBy) && $fulltextCondition === null ? 'ORDER BY '.$orderBy : '')." | |
4e798400 | 119 | LIMIT ".($limit == 1000 ? SearchEngine::INNER_SEARCH_LIMIT : $limit); |
b52f751b | 120 | |
058cbd6a | 121 | return [ |
0cd425be | 122 | 'fulltextCondition' => $fulltextCondition, |
0496fe5c | 123 | 'searchIndexCondition' => $searchIndexCondition, |
0cd425be | 124 | 'sql' => $sql |
058cbd6a | 125 | ]; |
b52f751b AE |
126 | } |
127 | ||
7d059540 TD |
128 | /** |
129 | * Manipulates the search term (< and > used as quotation marks): | |
130 | * | |
131 | * - <test foo> becomes <+test* +foo*> | |
132 | * - <test -foo bar> becomes <+test* -foo* +bar*> | |
133 | * - <test "foo bar"> becomes <+test* +"foo bar"> | |
134 | * | |
135 | * @see http://dev.mysql.com/doc/refman/5.5/en/fulltext-boolean.html | |
136 | * | |
137 | * @param string $query | |
138 | * @return string | |
139 | */ | |
140 | protected function parseSearchQuery($query) { | |
141 | $query = StringUtil::trim($query); | |
142 | ||
143 | // expand search terms with a * unless they're encapsulated with quotes | |
144 | $inQuotes = false; | |
145 | $previousChar = $tmp = ''; | |
146 | $controlCharacterOrSpace = false; | |
147 | $chars = ['+', '-', '*']; | |
148 | $ftMinWordLen = $this->getFulltextMinimumWordLength(); | |
149 | for ($i = 0, $length = mb_strlen($query); $i < $length; $i++) { | |
150 | $char = mb_substr($query, $i, 1); | |
151 | ||
152 | if ($inQuotes) { | |
153 | if ($char == '"') { | |
154 | $inQuotes = false; | |
155 | } | |
156 | } | |
157 | else { | |
158 | if ($char == '"') { | |
159 | $inQuotes = true; | |
160 | } | |
161 | else { | |
162 | if ($char == ' ' && !$controlCharacterOrSpace) { | |
163 | $controlCharacterOrSpace = true; | |
164 | $tmp .= '*'; | |
165 | } | |
166 | else if (in_array($char, $chars)) { | |
167 | $controlCharacterOrSpace = true; | |
168 | } | |
169 | else { | |
170 | $controlCharacterOrSpace = false; | |
171 | } | |
172 | } | |
173 | } | |
174 | ||
175 | /* | |
176 | * prepend a plus sign (logical AND) if ALL these conditions are given: | |
177 | * | |
178 | * 1) previous character: | |
179 | * - is empty (start of string) | |
180 | * - is a space (MySQL uses spaces to separate words) | |
181 | * | |
182 | * 2) not within quotation marks | |
183 | * | |
184 | * 3) current char: | |
185 | * - is NOT +, - or * | |
186 | */ | |
187 | if (($previousChar == '' || $previousChar == ' ') && !$inQuotes && !in_array($char, $chars)) { | |
188 | // check if the term is shorter than the minimum fulltext word length | |
189 | if ($i + $ftMinWordLen <= $length) { | |
190 | $term = '';// $char; | |
191 | for ($j = $i, $innerLength = $ftMinWordLen + $i; $j < $innerLength; $j++) { | |
192 | $currentChar = mb_substr($query, $j, 1); | |
193 | if ($currentChar == '"' || $currentChar == ' ' || in_array($currentChar, $chars)) { | |
194 | break; | |
195 | } | |
196 | ||
197 | $term .= $currentChar; | |
198 | } | |
199 | ||
200 | if (mb_strlen($term) == $ftMinWordLen) { | |
201 | $tmp .= '+'; | |
202 | } | |
203 | } | |
204 | } | |
205 | ||
206 | $tmp .= $char; | |
207 | $previousChar = $char; | |
208 | } | |
209 | ||
210 | // handle last char | |
211 | if (!$inQuotes && !$controlCharacterOrSpace) { | |
212 | $tmp .= '*'; | |
213 | } | |
214 | ||
215 | return $tmp; | |
216 | } | |
217 | ||
b52f751b | 218 | /** |
0fcfe5f6 | 219 | * @inheritDoc |
b52f751b | 220 | */ |
0cd425be AE |
221 | protected function getFulltextMinimumWordLength() { |
222 | if ($this->ftMinWordLen === null) { | |
ece62940 | 223 | $sql = "SHOW VARIABLES LIKE 'ft_min_word_len'"; |
b52f751b AE |
224 | |
225 | try { | |
ece62940 AE |
226 | $statement = WCF::getDB()->prepareStatement($sql); |
227 | $statement->execute(); | |
b52f751b AE |
228 | $row = $statement->fetchArray(); |
229 | } | |
230 | catch (DatabaseException $e) { | |
231 | // fallback if user is disallowed to issue 'SHOW VARIABLES' | |
058cbd6a | 232 | $row = ['Value' => 4]; |
b52f751b AE |
233 | } |
234 | ||
0cd425be | 235 | $this->ftMinWordLen = $row['Value']; |
b52f751b AE |
236 | } |
237 | ||
0cd425be | 238 | return $this->ftMinWordLen; |
b52f751b AE |
239 | } |
240 | } |