Remove AbstractSearchEngine::parseSearchQuery()
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / search / mysql / MysqlSearchEngine.class.php
1 <?php
2 namespace wcf\system\search\mysql;
3 use wcf\system\database\util\PreparedStatementConditionBuilder;
4 use wcf\system\database\DatabaseException;
5 use wcf\system\exception\SystemException;
6 use wcf\system\search\AbstractSearchEngine;
7 use wcf\system\search\SearchEngine;
8 use wcf\system\search\SearchIndexManager;
9 use wcf\system\WCF;
10 use wcf\util\StringUtil;
11
12 /**
13 * Search engine using MySQL's FULLTEXT index.
14 *
15 * @author Alexander Ebert
16 * @copyright 2001-2019 WoltLab GmbH
17 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
18 * @package WoltLabSuite\Core\System\Search
19 */
20 class MysqlSearchEngine extends AbstractSearchEngine {
21 /**
22 * MySQL's minimum word length for fulltext indices
23 * @var integer
24 */
25 protected $ftMinWordLen = null;
26
27 /**
28 * @inheritDoc
29 */
30 protected $specialCharacters = ['(', ')', '@', '+', '-', '"', '<', '>', '~', '*'];
31
32 /**
33 * @inheritDoc
34 */
35 public function search($q, array $objectTypes, $subjectOnly = false, PreparedStatementConditionBuilder $searchIndexCondition = null, array $additionalConditions = [], $orderBy = 'time DESC', $limit = 1000) {
36 // build search query
37 $sql = '';
38 $parameters = [];
39 foreach ($objectTypes as $objectTypeName) {
40 $objectType = SearchEngine::getInstance()->getObjectType($objectTypeName);
41
42 if (!empty($sql)) $sql .= "\nUNION ALL\n";
43 $additionalConditionsConditionBuilder = (isset($additionalConditions[$objectTypeName]) ? $additionalConditions[$objectTypeName] : null);
44
45 $query = $objectType->getOuterSQLQuery($q, $searchIndexCondition, $additionalConditionsConditionBuilder);
46 if (empty($query)) {
47 $query = "SELECT ".$objectType->getIDFieldName()." AS objectID,
48 ".$objectType->getSubjectFieldName()." AS subject,
49 ".$objectType->getTimeFieldName()." AS time,
50 ".$objectType->getUsernameFieldName()." AS username,
51 '".$objectTypeName."' AS objectType
52 ".($orderBy == 'relevance ASC' || $orderBy == 'relevance DESC' ? ',search_index.relevance' : '')."
53 FROM ".$objectType->getTableName()."
54 INNER JOIN (
55 {WCF_SEARCH_INNER_JOIN}
56 ) search_index
57 ON (".$objectType->getIDFieldName()." = search_index.objectID)
58 ".$objectType->getJoins()."
59 ".(isset($additionalConditions[$objectTypeName]) ? $additionalConditions[$objectTypeName] : '');
60 }
61
62 if (mb_strpos($query, '{WCF_SEARCH_INNER_JOIN}')) {
63 $innerJoin = $this->getInnerJoin($objectTypeName, $q, $subjectOnly, $searchIndexCondition, $orderBy, $limit);
64
65 $query = str_replace('{WCF_SEARCH_INNER_JOIN}', $innerJoin['sql'], $query);
66 if ($innerJoin['fulltextCondition'] !== null) $parameters = array_merge($parameters, $innerJoin['fulltextCondition']->getParameters());
67 }
68
69 if ($searchIndexCondition !== null) $parameters = array_merge($parameters, $searchIndexCondition->getParameters());
70 if (isset($additionalConditions[$objectTypeName])) $parameters = array_merge($parameters, $additionalConditions[$objectTypeName]->getParameters());
71
72 $sql .= $query;
73 }
74 if (empty($sql)) {
75 throw new SystemException('no object types given');
76 }
77
78 if (!empty($orderBy)) {
79 $sql .= " ORDER BY " . $orderBy;
80 }
81
82 // send search query
83 $messages = [];
84 $statement = WCF::getDB()->prepareStatement($sql, $limit);
85 $statement->execute($parameters);
86 while ($row = $statement->fetchArray()) {
87 $messages[] = [
88 'objectID' => $row['objectID'],
89 'objectType' => $row['objectType']
90 ];
91 }
92
93 return $messages;
94 }
95
96 /**
97 * @inheritDoc
98 */
99 public function getInnerJoin($objectTypeName, $q, $subjectOnly = false, PreparedStatementConditionBuilder $searchIndexCondition = null, $orderBy = 'time DESC', $limit = 1000) {
100 $fulltextCondition = null;
101 $relevanceCalc = '';
102 if (!empty($q)) {
103 $q = $this->parseSearchQuery($q);
104
105 $fulltextCondition = new PreparedStatementConditionBuilder(false);
106 $fulltextCondition->add("MATCH (subject".(!$subjectOnly ? ', message, metaData' : '').") AGAINST (? IN BOOLEAN MODE)", [$q]);
107
108 if ($orderBy == 'relevance ASC' || $orderBy == 'relevance DESC') {
109 $relevanceCalc = "MATCH (subject".(!$subjectOnly ? ', message, metaData' : '').") AGAINST ('".escapeString($q)."') + (5 / (1 + POW(LN(1 + (".TIME_NOW." - time) / 2592000), 2))) AS relevance";
110 }
111 }
112
113 $sql = "SELECT objectID
114 ".($relevanceCalc ? ','.$relevanceCalc : ", '0' AS relevance")."
115 FROM ".SearchIndexManager::getTableName($objectTypeName)."
116 WHERE ".($fulltextCondition !== null ? $fulltextCondition : '')."
117 ".(($searchIndexCondition !== null && $searchIndexCondition->__toString()) ? ($fulltextCondition !== null ? "AND " : '').$searchIndexCondition : '')."
118 ".(!empty($orderBy) && $fulltextCondition === null ? 'ORDER BY '.$orderBy : '')."
119 LIMIT ".($limit == 1000 ? SearchEngine::INNER_SEARCH_LIMIT : $limit);
120
121 return [
122 'fulltextCondition' => $fulltextCondition,
123 'searchIndexCondition' => $searchIndexCondition,
124 'sql' => $sql
125 ];
126 }
127
128 /**
129 * Manipulates the search term (< and > used as quotation marks):
130 *
131 * - <test foo> becomes <+test* +foo*>
132 * - <test -foo bar> becomes <+test* -foo* +bar*>
133 * - <test "foo bar"> becomes <+test* +"foo bar">
134 *
135 * @see http://dev.mysql.com/doc/refman/5.5/en/fulltext-boolean.html
136 *
137 * @param string $query
138 * @return string
139 */
140 protected function parseSearchQuery($query) {
141 $query = StringUtil::trim($query);
142
143 // expand search terms with a * unless they're encapsulated with quotes
144 $inQuotes = false;
145 $previousChar = $tmp = '';
146 $controlCharacterOrSpace = false;
147 $chars = ['+', '-', '*'];
148 $ftMinWordLen = $this->getFulltextMinimumWordLength();
149 for ($i = 0, $length = mb_strlen($query); $i < $length; $i++) {
150 $char = mb_substr($query, $i, 1);
151
152 if ($inQuotes) {
153 if ($char == '"') {
154 $inQuotes = false;
155 }
156 }
157 else {
158 if ($char == '"') {
159 $inQuotes = true;
160 }
161 else {
162 if ($char == ' ' && !$controlCharacterOrSpace) {
163 $controlCharacterOrSpace = true;
164 $tmp .= '*';
165 }
166 else if (in_array($char, $chars)) {
167 $controlCharacterOrSpace = true;
168 }
169 else {
170 $controlCharacterOrSpace = false;
171 }
172 }
173 }
174
175 /*
176 * prepend a plus sign (logical AND) if ALL these conditions are given:
177 *
178 * 1) previous character:
179 * - is empty (start of string)
180 * - is a space (MySQL uses spaces to separate words)
181 *
182 * 2) not within quotation marks
183 *
184 * 3) current char:
185 * - is NOT +, - or *
186 */
187 if (($previousChar == '' || $previousChar == ' ') && !$inQuotes && !in_array($char, $chars)) {
188 // check if the term is shorter than the minimum fulltext word length
189 if ($i + $ftMinWordLen <= $length) {
190 $term = '';// $char;
191 for ($j = $i, $innerLength = $ftMinWordLen + $i; $j < $innerLength; $j++) {
192 $currentChar = mb_substr($query, $j, 1);
193 if ($currentChar == '"' || $currentChar == ' ' || in_array($currentChar, $chars)) {
194 break;
195 }
196
197 $term .= $currentChar;
198 }
199
200 if (mb_strlen($term) == $ftMinWordLen) {
201 $tmp .= '+';
202 }
203 }
204 }
205
206 $tmp .= $char;
207 $previousChar = $char;
208 }
209
210 // handle last char
211 if (!$inQuotes && !$controlCharacterOrSpace) {
212 $tmp .= '*';
213 }
214
215 return $tmp;
216 }
217
218 /**
219 * @inheritDoc
220 */
221 protected function getFulltextMinimumWordLength() {
222 if ($this->ftMinWordLen === null) {
223 $sql = "SHOW VARIABLES LIKE 'ft_min_word_len'";
224
225 try {
226 $statement = WCF::getDB()->prepareStatement($sql);
227 $statement->execute();
228 $row = $statement->fetchArray();
229 }
230 catch (DatabaseException $e) {
231 // fallback if user is disallowed to issue 'SHOW VARIABLES'
232 $row = ['Value' => 4];
233 }
234
235 $this->ftMinWordLen = $row['Value'];
236 }
237
238 return $this->ftMinWordLen;
239 }
240 }