Merge branch '3.0'
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / Regex.class.php
1 <?php
2 namespace wcf\system;
3 use wcf\system\exception\SystemException;
4
5 /**
6 * Represents a regular expression.
7 *
8 * @author Tim Duesterhus
9 * @copyright 2001-2018 WoltLab GmbH
10 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
11 * @package WoltLabSuite\Core\System
12 */
13 final class Regex {
14 /**
15 * delimiter used internally
16 * @var string
17 */
18 const REGEX_DELIMITER = '/';
19
20 /**
21 * indicates that no modifier is applied
22 * @var integer
23 */
24 const MODIFIER_NONE = 0;
25
26 /**
27 * indicates case insensitive matching
28 * @var integer
29 */
30 const CASE_INSENSITIVE = 1;
31
32 /**
33 * indicates ungreedy matching
34 * @var integer
35 */
36 const UNGREEDY = 2;
37
38 /**
39 * indicates that no extra time is spent on analysing
40 * @var integer
41 */
42 const NO_ANALYSE = 8;
43
44 /**
45 * indicates that whitespaces are ignored in regex
46 * @var integer
47 */
48 const IGNORE_WHITESPACE = 16;
49
50 /**
51 * indicates that a dot matches every char
52 * @var integer
53 */
54 const DOT_ALL = 32;
55
56 /**
57 * indicates that ^/$ match start and end of a line instead of the whole string
58 * @var integer
59 */
60 const MULTILINE = 64;
61
62 /**
63 * indicates that pattern string is treated as UTF-8.
64 * @var integer
65 */
66 const UTF_8 = 128;
67
68 /**
69 * indicates that no flags are set
70 * @var integer
71 */
72 const FLAGS_NONE = 0;
73
74 /**
75 * indicates that default flags are set
76 * @var integer
77 */
78 const FLAGS_DEFAULT = 1;
79
80 /**
81 * captures the offset of an match (all excluding replace)
82 * @var integer
83 */
84 const CAPTURE_OFFSET = 2;
85
86 /**
87 * indicates default pattern ordering (match all only)
88 * @var integer
89 */
90 const ORDER_MATCH_BY_PATTERN = 4;
91
92 /**
93 * indicates alternative set ordering (match all only)
94 * @var integer
95 */
96 const ORDER_MATCH_BY_SET = 8;
97
98 /**
99 * indicates that only non-empty pieces will be splitted (split only)
100 * @var integer
101 */
102 const SPLIT_NON_EMPTY_ONLY = 16;
103
104 /**
105 * indicates that the split delimiter is returned as well (split only)
106 * @var integer
107 */
108 const CAPTURE_SPLIT_DELIMITER = 32;
109
110 /**
111 * compiled regex
112 * @var string
113 */
114 private $regex = '';
115
116 /**
117 * last matches
118 * @var array
119 */
120 private $matches = [];
121
122 /**
123 * Creates a regex.
124 *
125 * @param string $regex
126 * @param integer $modifier
127 */
128 public function __construct($regex, $modifier = self::MODIFIER_NONE) {
129 // escape delimiter
130 $regex = str_replace(self::REGEX_DELIMITER, '\\'.self::REGEX_DELIMITER, $regex);
131
132 // add delimiter
133 $this->regex = self::REGEX_DELIMITER.$regex.self::REGEX_DELIMITER;
134
135 // add modifiers
136 if ($modifier & self::CASE_INSENSITIVE) $this->regex .= 'i';
137 if ($modifier & self::UNGREEDY) $this->regex .= 'U';
138 if (!($modifier & self::NO_ANALYSE)) $this->regex .= 'S';
139 if ($modifier & self::IGNORE_WHITESPACE) $this->regex .= 'x';
140 if ($modifier & self::DOT_ALL) $this->regex .= 's';
141 if ($modifier & self::MULTILINE) $this->regex .= 'm';
142 if ($modifier & self::UTF_8) $this->regex .= 'u';
143 }
144
145 /**
146 * @inheritDoc
147 */
148 public static function compile($regex, $modifier = self::MODIFIER_NONE) {
149 return new self($regex, $modifier);
150 }
151
152 /**
153 * @inheritDoc
154 */
155 public function __invoke($string) {
156 return $this->match($string);
157 }
158
159 /**
160 * Checks whether the regex is syntactically correct.
161 *
162 * @return boolean
163 */
164 public function isValid() {
165 try {
166 $this->match(''); // we don't care about the result, we only care about the exception
167 return true;
168 }
169 catch (SystemException $e) {
170 // we have a syntax error now
171 return false;
172 }
173 }
174
175 // @codingStandardsIgnoreStart
176 /**
177 * Checks whether the regex matches the given string.
178 *
179 * @param string $string string to match
180 * @param boolean $all indicates if all matches are collected
181 * @param integer $flags match flags
182 * @return integer return value of preg_match(_all)
183 */
184 public function match($string, $all = false, $flags = self::FLAGS_DEFAULT) {
185 $matchFlags = 0;
186 if ($flags & self::CAPTURE_OFFSET) $matchFlags |= PREG_OFFSET_CAPTURE;
187
188 if ($all) {
189 if ($flags & self::FLAGS_DEFAULT) $matchFlags |= PREG_PATTERN_ORDER;
190 if (($flags & self::ORDER_MATCH_BY_PATTERN) && !($flags & self::ORDER_MATCH_BY_SET)) $matchFlags |= PREG_PATTERN_ORDER;
191 if (($flags & self::ORDER_MATCH_BY_SET) && !($flags & self::ORDER_MATCH_BY_PATTERN)) $matchFlags |= PREG_SET_ORDER;
192
193 return $this->checkResult(preg_match_all($this->regex, $string, $this->matches, $matchFlags), 'match');
194 }
195
196 return $this->checkResult(preg_match($this->regex, $string, $this->matches, $matchFlags), 'match');
197 }
198
199 /**
200 * Replaces part of the string with the regex.
201 *
202 * @param string $string
203 * @param mixed $replacement replacement-string or closure
204 * @return string
205 */
206 public function replace($string, $replacement) {
207 if ($replacement instanceof Callback || $replacement instanceof \Closure) {
208 return $this->checkResult(preg_replace_callback($this->regex, $replacement, $string), 'replace');
209 }
210
211 return $this->checkResult(preg_replace($this->regex, $replacement, $string), 'replace');
212 }
213
214 /**
215 * Splits the string with the regex.
216 *
217 * @param string $string
218 * @param integer $flags
219 * @return string[]
220 */
221 public function split($string, $flags = self::FLAGS_DEFAULT) {
222 $splitFlags = 0;
223 if ($flags & self::CAPTURE_OFFSET) $splitFlags |= PREG_SPLIT_OFFSET_CAPTURE;
224 if ($flags & self::SPLIT_NON_EMPTY_ONLY) $splitFlags |= PREG_SPLIT_NO_EMPTY;
225 if ($flags & self::CAPTURE_SPLIT_DELIMITER) $splitFlags |= PREG_SPLIT_DELIM_CAPTURE;
226
227 return $this->checkResult(preg_split($this->regex, $string, null, $splitFlags), 'split');
228 }
229 // @codingStandardsIgnoreEnd
230
231 /**
232 * Checks whether there was success.
233 *
234 * @param mixed $result
235 * @param string $method
236 * @return mixed
237 * @throws SystemException
238 */
239 private function checkResult($result, $method = '') {
240 if ($result === false || $result === null) {
241 switch (preg_last_error()) {
242 case PREG_INTERNAL_ERROR:
243 $error = 'Internal error';
244 break;
245 case PREG_BACKTRACK_LIMIT_ERROR:
246 $error = 'Backtrack limit was exhausted';
247 break;
248 case PREG_RECURSION_LIMIT_ERROR:
249 $error = 'Recursion limit was exhausted';
250 break;
251 case PREG_BAD_UTF8_ERROR:
252 $error = 'Bad UTF8';
253 break;
254 case PREG_NO_ERROR:
255 return $result;
256 break;
257 default:
258 $error = 'Unknown error';
259 break;
260 }
261
262 throw new SystemException('Could not execute '.($method ? $method.' on ' : '').$this->regex.': '.$error);
263 }
264 return $result;
265 }
266
267 /**
268 * Returns the matches of the last string.
269 *
270 * @return array
271 */
272 public function getMatches() {
273 return $this->matches;
274 }
275
276 /**
277 * Returns the compiled regex.
278 *
279 * @return string
280 */
281 public function getRegex() {
282 return $this->regex;
283 }
284 }