4283eb8f99232d1dbc35755c930dca897f446360
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / search / AbstractSearchEngine.class.php
1 <?php
2 namespace wcf\system\search;
3 use wcf\system\database\util\PreparedStatementConditionBuilder;
4 use wcf\system\SingletonFactory;
5 use wcf\util\StringUtil;
6
7 /**
8 * Default implementation for search engines, this class should be extended by
9 * all search engines to preserve compatibility in case of interface changes.
10 *
11 * @author Alexander Ebert
12 * @copyright 2001-2019 WoltLab GmbH
13 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
14 * @package WoltLabSuite\Core\System\Search
15 */
16 abstract class AbstractSearchEngine extends SingletonFactory implements ISearchEngine {
17 /**
18 * class name for preferred condition builder
19 * @var string
20 */
21 protected $conditionBuilderClassName = PreparedStatementConditionBuilder::class;
22
23 /**
24 * list of engine-specific special characters
25 * @var string[]
26 */
27 protected $specialCharacters = [];
28
29 /**
30 * @inheritDoc
31 */
32 public function getConditionBuilderClassName() {
33 return $this->conditionBuilderClassName;
34 }
35
36 /**
37 * Manipulates the search term (< and > used as quotation marks):
38 *
39 * - <test foo> becomes <+test* +foo*>
40 * - <test -foo bar> becomes <+test* -foo* +bar*>
41 * - <test "foo bar"> becomes <+test* +"foo bar">
42 *
43 * @see http://dev.mysql.com/doc/refman/5.5/en/fulltext-boolean.html
44 *
45 * @param string $query
46 * @return string
47 */
48 protected function parseSearchQuery($query) {
49 $query = StringUtil::trim($query);
50
51 // expand search terms with a * unless they're encapsulated with quotes
52 $inQuotes = false;
53 $previousChar = $tmp = '';
54 $controlCharacterOrSpace = false;
55 $chars = ['+', '-', '*'];
56 $ftMinWordLen = $this->getFulltextMinimumWordLength();
57 for ($i = 0, $length = mb_strlen($query); $i < $length; $i++) {
58 $char = mb_substr($query, $i, 1);
59
60 if ($inQuotes) {
61 if ($char == '"') {
62 $inQuotes = false;
63 }
64 }
65 else {
66 if ($char == '"') {
67 $inQuotes = true;
68 }
69 else {
70 if ($char == ' ' && !$controlCharacterOrSpace) {
71 $controlCharacterOrSpace = true;
72 $tmp .= '*';
73 }
74 else if (in_array($char, $chars)) {
75 $controlCharacterOrSpace = true;
76 }
77 else {
78 $controlCharacterOrSpace = false;
79 }
80 }
81 }
82
83 /*
84 * prepend a plus sign (logical AND) if ALL these conditions are given:
85 *
86 * 1) previous character:
87 * - is empty (start of string)
88 * - is a space (MySQL uses spaces to separate words)
89 *
90 * 2) not within quotation marks
91 *
92 * 3) current char:
93 * - is NOT +, - or *
94 */
95 if (($previousChar == '' || $previousChar == ' ') && !$inQuotes && !in_array($char, $chars)) {
96 // check if the term is shorter than the minimum fulltext word length
97 if ($i + $ftMinWordLen <= $length) {
98 $term = '';// $char;
99 for ($j = $i, $innerLength = $ftMinWordLen + $i; $j < $innerLength; $j++) {
100 $currentChar = mb_substr($query, $j, 1);
101 if ($currentChar == '"' || $currentChar == ' ' || in_array($currentChar, $chars)) {
102 break;
103 }
104
105 $term .= $currentChar;
106 }
107
108 if (mb_strlen($term) == $ftMinWordLen) {
109 $tmp .= '+';
110 }
111 }
112 }
113
114 $tmp .= $char;
115 $previousChar = $char;
116 }
117
118 // handle last char
119 if (!$inQuotes && !$controlCharacterOrSpace) {
120 $tmp .= '*';
121 }
122
123 return $tmp;
124 }
125
126 /**
127 * Returns minimum word length for fulltext indices.
128 *
129 * @return integer
130 */
131 abstract protected function getFulltextMinimumWordLength();
132
133 /**
134 * @inheritDoc
135 */
136 public function removeSpecialCharacters($string) {
137 if (!empty($this->specialCharacters)) {
138 return str_replace($this->specialCharacters, '', $string);
139 }
140
141 return $string;
142 }
143 }