Merge branch '5.3'
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / Regex.class.php
CommitLineData
ac9b0f6e 1<?php
a9229942 2
ac9b0f6e 3namespace wcf\system;
a9229942 4
a17de04e 5use wcf\system\exception\SystemException;
ac9b0f6e
TD
6
7/**
a17de04e 8 * Represents a regular expression.
a9229942
TD
9 *
10 * @author Tim Duesterhus
11 * @copyright 2001-2019 WoltLab GmbH
12 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
13 * @package WoltLabSuite\Core\System
ac9b0f6e 14 */
a9229942
TD
15final class Regex
16{
17 /**
18 * delimiter used internally
19 * @var string
20 */
21 const REGEX_DELIMITER = '/';
22
23 /**
24 * indicates that no modifier is applied
25 * @var int
26 */
27 const MODIFIER_NONE = 0;
28
29 /**
30 * indicates case insensitive matching
31 * @var int
32 */
33 const CASE_INSENSITIVE = 1;
34
35 /**
36 * indicates ungreedy matching
37 * @var int
38 */
39 const UNGREEDY = 2;
40
41 /**
42 * indicates that no extra time is spent on analysing
43 * @var int
44 */
45 const NO_ANALYSE = 8;
46
47 /**
48 * indicates that whitespaces are ignored in regex
49 * @var int
50 */
51 const IGNORE_WHITESPACE = 16;
52
53 /**
54 * indicates that a dot matches every char
55 * @var int
56 */
57 const DOT_ALL = 32;
58
59 /**
60 * indicates that ^/$ match start and end of a line instead of the whole string
61 * @var int
62 */
63 const MULTILINE = 64;
64
65 /**
66 * indicates that pattern string is treated as UTF-8.
67 * @var int
68 */
69 const UTF_8 = 128;
70
71 /**
72 * indicates that no flags are set
73 * @var int
74 */
75 const FLAGS_NONE = 0;
76
77 /**
78 * indicates that default flags are set
79 * @var int
80 */
81 const FLAGS_DEFAULT = 1;
82
83 /**
84 * captures the offset of an match (all excluding replace)
85 * @var int
86 */
87 const CAPTURE_OFFSET = 2;
88
89 /**
90 * indicates default pattern ordering (match all only)
91 * @var int
92 */
93 const ORDER_MATCH_BY_PATTERN = 4;
94
95 /**
96 * indicates alternative set ordering (match all only)
97 * @var int
98 */
99 const ORDER_MATCH_BY_SET = 8;
100
101 /**
102 * indicates that only non-empty pieces will be splitted (split only)
103 * @var int
104 */
105 const SPLIT_NON_EMPTY_ONLY = 16;
106
107 /**
108 * indicates that the split delimiter is returned as well (split only)
109 * @var int
110 */
111 const CAPTURE_SPLIT_DELIMITER = 32;
112
113 /**
114 * compiled regex
115 * @var string
116 */
117 private $regex = '';
118
119 /**
120 * last matches
121 * @var array
122 */
123 private $matches = [];
124
125 /**
126 * Creates a regex.
127 *
128 * @param string $regex
129 * @param int $modifier
130 */
131 public function __construct($regex, $modifier = self::MODIFIER_NONE)
132 {
133 // escape delimiter
134 $regex = \str_replace(self::REGEX_DELIMITER, '\\' . self::REGEX_DELIMITER, $regex);
135
136 // add delimiter
137 $this->regex = self::REGEX_DELIMITER . $regex . self::REGEX_DELIMITER;
138
139 // add modifiers
140 if ($modifier & self::CASE_INSENSITIVE) {
141 $this->regex .= 'i';
142 }
143 if ($modifier & self::UNGREEDY) {
144 $this->regex .= 'U';
145 }
146 if (!($modifier & self::NO_ANALYSE)) {
147 $this->regex .= 'S';
148 }
149 if ($modifier & self::IGNORE_WHITESPACE) {
150 $this->regex .= 'x';
151 }
152 if ($modifier & self::DOT_ALL) {
153 $this->regex .= 's';
154 }
155 if ($modifier & self::MULTILINE) {
156 $this->regex .= 'm';
157 }
158 if ($modifier & self::UTF_8) {
159 $this->regex .= 'u';
160 }
161 }
162
163 /**
164 * @inheritDoc
165 */
166 public static function compile($regex, $modifier = self::MODIFIER_NONE)
167 {
168 return new self($regex, $modifier);
169 }
170
171 /**
172 * @inheritDoc
173 */
174 public function __invoke($string)
175 {
176 return $this->match($string);
177 }
178
179 /**
180 * Checks whether the regex is syntactically correct.
181 *
182 * @return bool
183 */
184 public function isValid()
185 {
186 try {
187 $this->match(''); // we don't care about the result, we only care about the exception
188
189 return true;
190 } catch (SystemException $e) {
191 // we have a syntax error now
192 return false;
193 }
194 }
195
196 // @codingStandardsIgnoreStart
197
198 /**
199 * Checks whether the regex matches the given string.
200 *
201 * @param string $string string to match
202 * @param bool $all indicates if all matches are collected
203 * @param int $flags match flags
204 * @return int return value of preg_match(_all)
205 */
206 public function match($string, $all = false, $flags = self::FLAGS_DEFAULT)
207 {
208 $matchFlags = 0;
209 if ($flags & self::CAPTURE_OFFSET) {
210 $matchFlags |= \PREG_OFFSET_CAPTURE;
211 }
212
213 if ($all) {
214 if ($flags & self::FLAGS_DEFAULT) {
215 $matchFlags |= \PREG_PATTERN_ORDER;
216 }
217 if (($flags & self::ORDER_MATCH_BY_PATTERN) && !($flags & self::ORDER_MATCH_BY_SET)) {
218 $matchFlags |= \PREG_PATTERN_ORDER;
219 }
220 if (($flags & self::ORDER_MATCH_BY_SET) && !($flags & self::ORDER_MATCH_BY_PATTERN)) {
221 $matchFlags |= \PREG_SET_ORDER;
222 }
223
224 return $this->checkResult(\preg_match_all($this->regex, $string, $this->matches, $matchFlags), 'match');
225 }
226
227 return $this->checkResult(\preg_match($this->regex, $string, $this->matches, $matchFlags), 'match');
228 }
229
230 /**
231 * Replaces part of the string with the regex.
232 *
233 * @param string $string
234 * @param mixed $replacement replacement-string or closure
235 * @return string
236 */
237 public function replace($string, $replacement)
238 {
239 if ($replacement instanceof Callback || $replacement instanceof \Closure) {
240 return $this->checkResult(\preg_replace_callback($this->regex, $replacement, $string), 'replace');
241 }
242
243 return $this->checkResult(\preg_replace($this->regex, $replacement, $string), 'replace');
244 }
245
246 /**
247 * Splits the string with the regex.
248 *
249 * @param string $string
250 * @param int $flags
251 * @return string[]
252 */
253 public function split($string, $flags = self::FLAGS_DEFAULT)
254 {
255 $splitFlags = 0;
256 if ($flags & self::CAPTURE_OFFSET) {
257 $splitFlags |= \PREG_SPLIT_OFFSET_CAPTURE;
258 }
259 if ($flags & self::SPLIT_NON_EMPTY_ONLY) {
260 $splitFlags |= \PREG_SPLIT_NO_EMPTY;
261 }
262 if ($flags & self::CAPTURE_SPLIT_DELIMITER) {
263 $splitFlags |= \PREG_SPLIT_DELIM_CAPTURE;
264 }
265
266 return $this->checkResult(\preg_split($this->regex, $string, -1, $splitFlags), 'split');
267 }
268
269 // @codingStandardsIgnoreEnd
270
271 /**
272 * Checks whether there was success.
273 *
274 * @param mixed $result
275 * @param string $method
276 * @return mixed
277 * @throws SystemException
278 */
279 private function checkResult($result, $method = '')
280 {
281 if ($result === false || $result === null) {
282 switch (\preg_last_error()) {
283 case \PREG_INTERNAL_ERROR:
284 $error = 'Internal error';
285 break;
286 case \PREG_BACKTRACK_LIMIT_ERROR:
287 $error = 'Backtrack limit was exhausted';
288 break;
289 case \PREG_RECURSION_LIMIT_ERROR:
290 $error = 'Recursion limit was exhausted';
291 break;
292 case \PREG_BAD_UTF8_ERROR:
293 $error = 'Bad UTF8';
294 break;
295 case \PREG_NO_ERROR:
296 return $result;
297 break;
298 default:
299 $error = 'Unknown error';
300 break;
301 }
302
303 throw new SystemException(
304 'Could not execute ' . ($method ? $method . ' on ' : '') . $this->regex . ': ' . $error
305 );
306 }
307
308 return $result;
309 }
310
311 /**
312 * Returns the matches of the last string.
313 *
314 * @return array
315 */
316 public function getMatches()
317 {
318 return $this->matches;
319 }
320
321 /**
322 * Returns the compiled regex.
323 *
324 * @return string
325 */
326 public function getRegex()
327 {
328 return $this->regex;
329 }
ac9b0f6e 330}