Commit | Line | Data |
---|---|---|
ac9b0f6e | 1 | <?php |
a9229942 | 2 | |
ac9b0f6e | 3 | namespace wcf\system; |
a9229942 | 4 | |
a17de04e | 5 | use wcf\system\exception\SystemException; |
ac9b0f6e TD |
6 | |
7 | /** | |
a17de04e | 8 | * Represents a regular expression. |
a9229942 TD |
9 | * |
10 | * @author Tim Duesterhus | |
11 | * @copyright 2001-2019 WoltLab GmbH | |
12 | * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php> | |
13 | * @package WoltLabSuite\Core\System | |
ac9b0f6e | 14 | */ |
a9229942 TD |
15 | final class Regex |
16 | { | |
17 | /** | |
18 | * delimiter used internally | |
19 | * @var string | |
20 | */ | |
21 | const REGEX_DELIMITER = '/'; | |
22 | ||
23 | /** | |
24 | * indicates that no modifier is applied | |
25 | * @var int | |
26 | */ | |
27 | const MODIFIER_NONE = 0; | |
28 | ||
29 | /** | |
30 | * indicates case insensitive matching | |
31 | * @var int | |
32 | */ | |
33 | const CASE_INSENSITIVE = 1; | |
34 | ||
35 | /** | |
36 | * indicates ungreedy matching | |
37 | * @var int | |
38 | */ | |
39 | const UNGREEDY = 2; | |
40 | ||
41 | /** | |
42 | * indicates that no extra time is spent on analysing | |
43 | * @var int | |
44 | */ | |
45 | const NO_ANALYSE = 8; | |
46 | ||
47 | /** | |
48 | * indicates that whitespaces are ignored in regex | |
49 | * @var int | |
50 | */ | |
51 | const IGNORE_WHITESPACE = 16; | |
52 | ||
53 | /** | |
54 | * indicates that a dot matches every char | |
55 | * @var int | |
56 | */ | |
57 | const DOT_ALL = 32; | |
58 | ||
59 | /** | |
60 | * indicates that ^/$ match start and end of a line instead of the whole string | |
61 | * @var int | |
62 | */ | |
63 | const MULTILINE = 64; | |
64 | ||
65 | /** | |
66 | * indicates that pattern string is treated as UTF-8. | |
67 | * @var int | |
68 | */ | |
69 | const UTF_8 = 128; | |
70 | ||
71 | /** | |
72 | * indicates that no flags are set | |
73 | * @var int | |
74 | */ | |
75 | const FLAGS_NONE = 0; | |
76 | ||
77 | /** | |
78 | * indicates that default flags are set | |
79 | * @var int | |
80 | */ | |
81 | const FLAGS_DEFAULT = 1; | |
82 | ||
83 | /** | |
84 | * captures the offset of an match (all excluding replace) | |
85 | * @var int | |
86 | */ | |
87 | const CAPTURE_OFFSET = 2; | |
88 | ||
89 | /** | |
90 | * indicates default pattern ordering (match all only) | |
91 | * @var int | |
92 | */ | |
93 | const ORDER_MATCH_BY_PATTERN = 4; | |
94 | ||
95 | /** | |
96 | * indicates alternative set ordering (match all only) | |
97 | * @var int | |
98 | */ | |
99 | const ORDER_MATCH_BY_SET = 8; | |
100 | ||
101 | /** | |
102 | * indicates that only non-empty pieces will be splitted (split only) | |
103 | * @var int | |
104 | */ | |
105 | const SPLIT_NON_EMPTY_ONLY = 16; | |
106 | ||
107 | /** | |
108 | * indicates that the split delimiter is returned as well (split only) | |
109 | * @var int | |
110 | */ | |
111 | const CAPTURE_SPLIT_DELIMITER = 32; | |
112 | ||
113 | /** | |
114 | * compiled regex | |
115 | * @var string | |
116 | */ | |
117 | private $regex = ''; | |
118 | ||
119 | /** | |
120 | * last matches | |
121 | * @var array | |
122 | */ | |
123 | private $matches = []; | |
124 | ||
125 | /** | |
126 | * Creates a regex. | |
127 | * | |
128 | * @param string $regex | |
129 | * @param int $modifier | |
130 | */ | |
131 | public function __construct($regex, $modifier = self::MODIFIER_NONE) | |
132 | { | |
133 | // escape delimiter | |
134 | $regex = \str_replace(self::REGEX_DELIMITER, '\\' . self::REGEX_DELIMITER, $regex); | |
135 | ||
136 | // add delimiter | |
137 | $this->regex = self::REGEX_DELIMITER . $regex . self::REGEX_DELIMITER; | |
138 | ||
139 | // add modifiers | |
140 | if ($modifier & self::CASE_INSENSITIVE) { | |
141 | $this->regex .= 'i'; | |
142 | } | |
143 | if ($modifier & self::UNGREEDY) { | |
144 | $this->regex .= 'U'; | |
145 | } | |
146 | if (!($modifier & self::NO_ANALYSE)) { | |
147 | $this->regex .= 'S'; | |
148 | } | |
149 | if ($modifier & self::IGNORE_WHITESPACE) { | |
150 | $this->regex .= 'x'; | |
151 | } | |
152 | if ($modifier & self::DOT_ALL) { | |
153 | $this->regex .= 's'; | |
154 | } | |
155 | if ($modifier & self::MULTILINE) { | |
156 | $this->regex .= 'm'; | |
157 | } | |
158 | if ($modifier & self::UTF_8) { | |
159 | $this->regex .= 'u'; | |
160 | } | |
161 | } | |
162 | ||
163 | /** | |
164 | * @inheritDoc | |
165 | */ | |
166 | public static function compile($regex, $modifier = self::MODIFIER_NONE) | |
167 | { | |
168 | return new self($regex, $modifier); | |
169 | } | |
170 | ||
171 | /** | |
172 | * @inheritDoc | |
173 | */ | |
174 | public function __invoke($string) | |
175 | { | |
176 | return $this->match($string); | |
177 | } | |
178 | ||
179 | /** | |
180 | * Checks whether the regex is syntactically correct. | |
181 | * | |
182 | * @return bool | |
183 | */ | |
184 | public function isValid() | |
185 | { | |
186 | try { | |
187 | $this->match(''); // we don't care about the result, we only care about the exception | |
188 | ||
189 | return true; | |
190 | } catch (SystemException $e) { | |
191 | // we have a syntax error now | |
192 | return false; | |
193 | } | |
194 | } | |
195 | ||
196 | // @codingStandardsIgnoreStart | |
197 | ||
198 | /** | |
199 | * Checks whether the regex matches the given string. | |
200 | * | |
201 | * @param string $string string to match | |
202 | * @param bool $all indicates if all matches are collected | |
203 | * @param int $flags match flags | |
204 | * @return int return value of preg_match(_all) | |
205 | */ | |
206 | public function match($string, $all = false, $flags = self::FLAGS_DEFAULT) | |
207 | { | |
208 | $matchFlags = 0; | |
209 | if ($flags & self::CAPTURE_OFFSET) { | |
210 | $matchFlags |= \PREG_OFFSET_CAPTURE; | |
211 | } | |
212 | ||
213 | if ($all) { | |
214 | if ($flags & self::FLAGS_DEFAULT) { | |
215 | $matchFlags |= \PREG_PATTERN_ORDER; | |
216 | } | |
217 | if (($flags & self::ORDER_MATCH_BY_PATTERN) && !($flags & self::ORDER_MATCH_BY_SET)) { | |
218 | $matchFlags |= \PREG_PATTERN_ORDER; | |
219 | } | |
220 | if (($flags & self::ORDER_MATCH_BY_SET) && !($flags & self::ORDER_MATCH_BY_PATTERN)) { | |
221 | $matchFlags |= \PREG_SET_ORDER; | |
222 | } | |
223 | ||
224 | return $this->checkResult(\preg_match_all($this->regex, $string, $this->matches, $matchFlags), 'match'); | |
225 | } | |
226 | ||
227 | return $this->checkResult(\preg_match($this->regex, $string, $this->matches, $matchFlags), 'match'); | |
228 | } | |
229 | ||
230 | /** | |
231 | * Replaces part of the string with the regex. | |
232 | * | |
233 | * @param string $string | |
234 | * @param mixed $replacement replacement-string or closure | |
235 | * @return string | |
236 | */ | |
237 | public function replace($string, $replacement) | |
238 | { | |
239 | if ($replacement instanceof Callback || $replacement instanceof \Closure) { | |
240 | return $this->checkResult(\preg_replace_callback($this->regex, $replacement, $string), 'replace'); | |
241 | } | |
242 | ||
243 | return $this->checkResult(\preg_replace($this->regex, $replacement, $string), 'replace'); | |
244 | } | |
245 | ||
246 | /** | |
247 | * Splits the string with the regex. | |
248 | * | |
249 | * @param string $string | |
250 | * @param int $flags | |
251 | * @return string[] | |
252 | */ | |
253 | public function split($string, $flags = self::FLAGS_DEFAULT) | |
254 | { | |
255 | $splitFlags = 0; | |
256 | if ($flags & self::CAPTURE_OFFSET) { | |
257 | $splitFlags |= \PREG_SPLIT_OFFSET_CAPTURE; | |
258 | } | |
259 | if ($flags & self::SPLIT_NON_EMPTY_ONLY) { | |
260 | $splitFlags |= \PREG_SPLIT_NO_EMPTY; | |
261 | } | |
262 | if ($flags & self::CAPTURE_SPLIT_DELIMITER) { | |
263 | $splitFlags |= \PREG_SPLIT_DELIM_CAPTURE; | |
264 | } | |
265 | ||
266 | return $this->checkResult(\preg_split($this->regex, $string, -1, $splitFlags), 'split'); | |
267 | } | |
268 | ||
269 | // @codingStandardsIgnoreEnd | |
270 | ||
271 | /** | |
272 | * Checks whether there was success. | |
273 | * | |
274 | * @param mixed $result | |
275 | * @param string $method | |
276 | * @return mixed | |
277 | * @throws SystemException | |
278 | */ | |
279 | private function checkResult($result, $method = '') | |
280 | { | |
281 | if ($result === false || $result === null) { | |
282 | switch (\preg_last_error()) { | |
283 | case \PREG_INTERNAL_ERROR: | |
284 | $error = 'Internal error'; | |
285 | break; | |
286 | case \PREG_BACKTRACK_LIMIT_ERROR: | |
287 | $error = 'Backtrack limit was exhausted'; | |
288 | break; | |
289 | case \PREG_RECURSION_LIMIT_ERROR: | |
290 | $error = 'Recursion limit was exhausted'; | |
291 | break; | |
292 | case \PREG_BAD_UTF8_ERROR: | |
293 | $error = 'Bad UTF8'; | |
294 | break; | |
295 | case \PREG_NO_ERROR: | |
296 | return $result; | |
297 | break; | |
298 | default: | |
299 | $error = 'Unknown error'; | |
300 | break; | |
301 | } | |
302 | ||
303 | throw new SystemException( | |
304 | 'Could not execute ' . ($method ? $method . ' on ' : '') . $this->regex . ': ' . $error | |
305 | ); | |
306 | } | |
307 | ||
308 | return $result; | |
309 | } | |
310 | ||
311 | /** | |
312 | * Returns the matches of the last string. | |
313 | * | |
314 | * @return array | |
315 | */ | |
316 | public function getMatches() | |
317 | { | |
318 | return $this->matches; | |
319 | } | |
320 | ||
321 | /** | |
322 | * Returns the compiled regex. | |
323 | * | |
324 | * @return string | |
325 | */ | |
326 | public function getRegex() | |
327 | { | |
328 | return $this->regex; | |
329 | } | |
ac9b0f6e | 330 | } |