Remove AbstractSearchEngine::parseSearchQuery()
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / search / mysql / MysqlSearchEngine.class.php
CommitLineData
b52f751b
AE
1<?php
2namespace wcf\system\search\mysql;
3use wcf\system\database\util\PreparedStatementConditionBuilder;
4use wcf\system\database\DatabaseException;
5use wcf\system\exception\SystemException;
6use wcf\system\search\AbstractSearchEngine;
0cd425be 7use wcf\system\search\SearchEngine;
b52f751b
AE
8use wcf\system\search\SearchIndexManager;
9use wcf\system\WCF;
7d059540 10use wcf\util\StringUtil;
b52f751b
AE
11
12/**
13 * Search engine using MySQL's FULLTEXT index.
14 *
15 * @author Alexander Ebert
7b7b9764 16 * @copyright 2001-2019 WoltLab GmbH
b52f751b 17 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
e71525e4 18 * @package WoltLabSuite\Core\System\Search
b52f751b
AE
19 */
20class MysqlSearchEngine extends AbstractSearchEngine {
21 /**
22 * MySQL's minimum word length for fulltext indices
23 * @var integer
24 */
0cd425be 25 protected $ftMinWordLen = null;
b52f751b 26
11767746 27 /**
0fcfe5f6 28 * @inheritDoc
11767746 29 */
058cbd6a 30 protected $specialCharacters = ['(', ')', '@', '+', '-', '"', '<', '>', '~', '*'];
11767746 31
b52f751b 32 /**
0fcfe5f6 33 * @inheritDoc
b52f751b 34 */
058cbd6a 35 public function search($q, array $objectTypes, $subjectOnly = false, PreparedStatementConditionBuilder $searchIndexCondition = null, array $additionalConditions = [], $orderBy = 'time DESC', $limit = 1000) {
b52f751b
AE
36 // build search query
37 $sql = '';
058cbd6a 38 $parameters = [];
b52f751b
AE
39 foreach ($objectTypes as $objectTypeName) {
40 $objectType = SearchEngine::getInstance()->getObjectType($objectTypeName);
0cd425be
AE
41
42 if (!empty($sql)) $sql .= "\nUNION ALL\n";
b52f751b 43 $additionalConditionsConditionBuilder = (isset($additionalConditions[$objectTypeName]) ? $additionalConditions[$objectTypeName] : null);
0cd425be
AE
44
45 $query = $objectType->getOuterSQLQuery($q, $searchIndexCondition, $additionalConditionsConditionBuilder);
46 if (empty($query)) {
47 $query = "SELECT ".$objectType->getIDFieldName()." AS objectID,
48 ".$objectType->getSubjectFieldName()." AS subject,
49 ".$objectType->getTimeFieldName()." AS time,
50 ".$objectType->getUsernameFieldName()." AS username,
51 '".$objectTypeName."' AS objectType
52 ".($orderBy == 'relevance ASC' || $orderBy == 'relevance DESC' ? ',search_index.relevance' : '')."
53 FROM ".$objectType->getTableName()."
54 INNER JOIN (
55 {WCF_SEARCH_INNER_JOIN}
56 ) search_index
57 ON (".$objectType->getIDFieldName()." = search_index.objectID)
58 ".$objectType->getJoins()."
59 ".(isset($additionalConditions[$objectTypeName]) ? $additionalConditions[$objectTypeName] : '');
b52f751b 60 }
0cd425be
AE
61
62 if (mb_strpos($query, '{WCF_SEARCH_INNER_JOIN}')) {
63 $innerJoin = $this->getInnerJoin($objectTypeName, $q, $subjectOnly, $searchIndexCondition, $orderBy, $limit);
64
65 $query = str_replace('{WCF_SEARCH_INNER_JOIN}', $innerJoin['sql'], $query);
66 if ($innerJoin['fulltextCondition'] !== null) $parameters = array_merge($parameters, $innerJoin['fulltextCondition']->getParameters());
b52f751b
AE
67 }
68
b52f751b
AE
69 if ($searchIndexCondition !== null) $parameters = array_merge($parameters, $searchIndexCondition->getParameters());
70 if (isset($additionalConditions[$objectTypeName])) $parameters = array_merge($parameters, $additionalConditions[$objectTypeName]->getParameters());
0cd425be
AE
71
72 $sql .= $query;
b52f751b
AE
73 }
74 if (empty($sql)) {
75 throw new SystemException('no object types given');
76 }
77
78 if (!empty($orderBy)) {
79 $sql .= " ORDER BY " . $orderBy;
80 }
81
82 // send search query
058cbd6a 83 $messages = [];
b52f751b
AE
84 $statement = WCF::getDB()->prepareStatement($sql, $limit);
85 $statement->execute($parameters);
86 while ($row = $statement->fetchArray()) {
058cbd6a 87 $messages[] = [
b52f751b
AE
88 'objectID' => $row['objectID'],
89 'objectType' => $row['objectType']
058cbd6a 90 ];
b52f751b
AE
91 }
92
93 return $messages;
94 }
95
96 /**
0fcfe5f6 97 * @inheritDoc
b52f751b 98 */
0cd425be
AE
99 public function getInnerJoin($objectTypeName, $q, $subjectOnly = false, PreparedStatementConditionBuilder $searchIndexCondition = null, $orderBy = 'time DESC', $limit = 1000) {
100 $fulltextCondition = null;
101 $relevanceCalc = '';
102 if (!empty($q)) {
103 $q = $this->parseSearchQuery($q);
b52f751b 104
0cd425be 105 $fulltextCondition = new PreparedStatementConditionBuilder(false);
058cbd6a 106 $fulltextCondition->add("MATCH (subject".(!$subjectOnly ? ', message, metaData' : '').") AGAINST (? IN BOOLEAN MODE)", [$q]);
b52f751b 107
0cd425be
AE
108 if ($orderBy == 'relevance ASC' || $orderBy == 'relevance DESC') {
109 $relevanceCalc = "MATCH (subject".(!$subjectOnly ? ', message, metaData' : '').") AGAINST ('".escapeString($q)."') + (5 / (1 + POW(LN(1 + (".TIME_NOW." - time) / 2592000), 2))) AS relevance";
b52f751b 110 }
b52f751b
AE
111 }
112
0cd425be 113 $sql = "SELECT objectID
957dea34 114 ".($relevanceCalc ? ','.$relevanceCalc : ", '0' AS relevance")."
0cd425be
AE
115 FROM ".SearchIndexManager::getTableName($objectTypeName)."
116 WHERE ".($fulltextCondition !== null ? $fulltextCondition : '')."
117 ".(($searchIndexCondition !== null && $searchIndexCondition->__toString()) ? ($fulltextCondition !== null ? "AND " : '').$searchIndexCondition : '')."
118 ".(!empty($orderBy) && $fulltextCondition === null ? 'ORDER BY '.$orderBy : '')."
4e798400 119 LIMIT ".($limit == 1000 ? SearchEngine::INNER_SEARCH_LIMIT : $limit);
b52f751b 120
058cbd6a 121 return [
0cd425be 122 'fulltextCondition' => $fulltextCondition,
0496fe5c 123 'searchIndexCondition' => $searchIndexCondition,
0cd425be 124 'sql' => $sql
058cbd6a 125 ];
b52f751b
AE
126 }
127
7d059540
TD
128 /**
129 * Manipulates the search term (< and > used as quotation marks):
130 *
131 * - <test foo> becomes <+test* +foo*>
132 * - <test -foo bar> becomes <+test* -foo* +bar*>
133 * - <test "foo bar"> becomes <+test* +"foo bar">
134 *
135 * @see http://dev.mysql.com/doc/refman/5.5/en/fulltext-boolean.html
136 *
137 * @param string $query
138 * @return string
139 */
140 protected function parseSearchQuery($query) {
141 $query = StringUtil::trim($query);
142
143 // expand search terms with a * unless they're encapsulated with quotes
144 $inQuotes = false;
145 $previousChar = $tmp = '';
146 $controlCharacterOrSpace = false;
147 $chars = ['+', '-', '*'];
148 $ftMinWordLen = $this->getFulltextMinimumWordLength();
149 for ($i = 0, $length = mb_strlen($query); $i < $length; $i++) {
150 $char = mb_substr($query, $i, 1);
151
152 if ($inQuotes) {
153 if ($char == '"') {
154 $inQuotes = false;
155 }
156 }
157 else {
158 if ($char == '"') {
159 $inQuotes = true;
160 }
161 else {
162 if ($char == ' ' && !$controlCharacterOrSpace) {
163 $controlCharacterOrSpace = true;
164 $tmp .= '*';
165 }
166 else if (in_array($char, $chars)) {
167 $controlCharacterOrSpace = true;
168 }
169 else {
170 $controlCharacterOrSpace = false;
171 }
172 }
173 }
174
175 /*
176 * prepend a plus sign (logical AND) if ALL these conditions are given:
177 *
178 * 1) previous character:
179 * - is empty (start of string)
180 * - is a space (MySQL uses spaces to separate words)
181 *
182 * 2) not within quotation marks
183 *
184 * 3) current char:
185 * - is NOT +, - or *
186 */
187 if (($previousChar == '' || $previousChar == ' ') && !$inQuotes && !in_array($char, $chars)) {
188 // check if the term is shorter than the minimum fulltext word length
189 if ($i + $ftMinWordLen <= $length) {
190 $term = '';// $char;
191 for ($j = $i, $innerLength = $ftMinWordLen + $i; $j < $innerLength; $j++) {
192 $currentChar = mb_substr($query, $j, 1);
193 if ($currentChar == '"' || $currentChar == ' ' || in_array($currentChar, $chars)) {
194 break;
195 }
196
197 $term .= $currentChar;
198 }
199
200 if (mb_strlen($term) == $ftMinWordLen) {
201 $tmp .= '+';
202 }
203 }
204 }
205
206 $tmp .= $char;
207 $previousChar = $char;
208 }
209
210 // handle last char
211 if (!$inQuotes && !$controlCharacterOrSpace) {
212 $tmp .= '*';
213 }
214
215 return $tmp;
216 }
217
b52f751b 218 /**
0fcfe5f6 219 * @inheritDoc
b52f751b 220 */
0cd425be
AE
221 protected function getFulltextMinimumWordLength() {
222 if ($this->ftMinWordLen === null) {
ece62940 223 $sql = "SHOW VARIABLES LIKE 'ft_min_word_len'";
b52f751b
AE
224
225 try {
ece62940
AE
226 $statement = WCF::getDB()->prepareStatement($sql);
227 $statement->execute();
b52f751b
AE
228 $row = $statement->fetchArray();
229 }
230 catch (DatabaseException $e) {
231 // fallback if user is disallowed to issue 'SHOW VARIABLES'
058cbd6a 232 $row = ['Value' => 4];
b52f751b
AE
233 }
234
0cd425be 235 $this->ftMinWordLen = $row['Value'];
b52f751b
AE
236 }
237
0cd425be 238 return $this->ftMinWordLen;
b52f751b
AE
239 }
240}