Commit | Line | Data |
---|---|---|
ac9b0f6e TD |
1 | <?php |
2 | namespace wcf\system; | |
a17de04e | 3 | use wcf\system\exception\SystemException; |
ac9b0f6e TD |
4 | |
5 | /** | |
a17de04e | 6 | * Represents a regular expression. |
ac9b0f6e | 7 | * |
7405c637 | 8 | * @author Tim Duesterhus |
c839bd49 | 9 | * @copyright 2001-2018 WoltLab GmbH |
ac9b0f6e | 10 | * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php> |
e71525e4 | 11 | * @package WoltLabSuite\Core\System |
ac9b0f6e TD |
12 | */ |
13 | final class Regex { | |
14 | /** | |
9f959ced | 15 | * delimiter used internally |
ac9b0f6e TD |
16 | * @var string |
17 | */ | |
18 | const REGEX_DELIMITER = '/'; | |
19 | ||
20 | /** | |
69719155 | 21 | * indicates that no modifier is applied |
ac9b0f6e TD |
22 | * @var integer |
23 | */ | |
24 | const MODIFIER_NONE = 0; | |
25 | ||
26 | /** | |
9f959ced | 27 | * indicates case insensitive matching |
ac9b0f6e TD |
28 | * @var integer |
29 | */ | |
30 | const CASE_INSENSITIVE = 1; | |
31 | ||
32 | /** | |
9f959ced | 33 | * indicates ungreedy matching |
ac9b0f6e TD |
34 | * @var integer |
35 | */ | |
36 | const UNGREEDY = 2; | |
37 | ||
ac9b0f6e | 38 | /** |
9f959ced | 39 | * indicates that no extra time is spent on analysing |
ac9b0f6e TD |
40 | * @var integer |
41 | */ | |
42 | const NO_ANALYSE = 8; | |
43 | ||
de3c3f7f | 44 | /** |
b2aa772d | 45 | * indicates that whitespaces are ignored in regex |
de3c3f7f TD |
46 | * @var integer |
47 | */ | |
48 | const IGNORE_WHITESPACE = 16; | |
49 | ||
50 | /** | |
9f959ced | 51 | * indicates that a dot matches every char |
de3c3f7f TD |
52 | * @var integer |
53 | */ | |
54 | const DOT_ALL = 32; | |
55 | ||
dff07f9c TD |
56 | /** |
57 | * indicates that ^/$ match start and end of a line instead of the whole string | |
58 | * @var integer | |
59 | */ | |
60 | const MULTILINE = 64; | |
61 | ||
5f68d236 N |
62 | /** |
63 | * indicates that pattern string is treated as UTF-8. | |
06355ec3 | 64 | * @var integer |
5f68d236 N |
65 | */ |
66 | const UTF_8 = 128; | |
67 | ||
aa8a3f46 DR |
68 | /** |
69 | * indicates that no flags are set | |
70 | * @var integer | |
71 | */ | |
72 | const FLAGS_NONE = 0; | |
73 | ||
74 | /** | |
75 | * indicates that default flags are set | |
76 | * @var integer | |
77 | */ | |
78 | const FLAGS_DEFAULT = 1; | |
79 | ||
80 | /** | |
81 | * captures the offset of an match (all excluding replace) | |
82 | * @var integer | |
83 | */ | |
84 | const CAPTURE_OFFSET = 2; | |
85 | ||
86 | /** | |
87 | * indicates default pattern ordering (match all only) | |
88 | * @var integer | |
89 | */ | |
90 | const ORDER_MATCH_BY_PATTERN = 4; | |
91 | ||
92 | /** | |
93 | * indicates alternative set ordering (match all only) | |
94 | * @var integer | |
95 | */ | |
96 | const ORDER_MATCH_BY_SET = 8; | |
97 | ||
98 | /** | |
99 | * indicates that only non-empty pieces will be splitted (split only) | |
100 | * @var integer | |
101 | */ | |
102 | const SPLIT_NON_EMPTY_ONLY = 16; | |
103 | ||
104 | /** | |
105 | * indicates that the split delimiter is returned as well (split only) | |
106 | * @var integer | |
107 | */ | |
108 | const CAPTURE_SPLIT_DELIMITER = 32; | |
109 | ||
ac9b0f6e | 110 | /** |
9f959ced | 111 | * compiled regex |
ac9b0f6e TD |
112 | * @var string |
113 | */ | |
114 | private $regex = ''; | |
115 | ||
116 | /** | |
9f959ced | 117 | * last matches |
ac9b0f6e TD |
118 | * @var array |
119 | */ | |
058cbd6a | 120 | private $matches = []; |
ac9b0f6e TD |
121 | |
122 | /** | |
123 | * Creates a regex. | |
9f959ced MS |
124 | * |
125 | * @param string $regex | |
126 | * @param integer $modifier | |
ac9b0f6e TD |
127 | */ |
128 | public function __construct($regex, $modifier = self::MODIFIER_NONE) { | |
129 | // escape delimiter | |
130 | $regex = str_replace(self::REGEX_DELIMITER, '\\'.self::REGEX_DELIMITER, $regex); | |
131 | ||
132 | // add delimiter | |
133 | $this->regex = self::REGEX_DELIMITER.$regex.self::REGEX_DELIMITER; | |
134 | ||
135 | // add modifiers | |
136 | if ($modifier & self::CASE_INSENSITIVE) $this->regex .= 'i'; | |
137 | if ($modifier & self::UNGREEDY) $this->regex .= 'U'; | |
69719155 | 138 | if (!($modifier & self::NO_ANALYSE)) $this->regex .= 'S'; |
de3c3f7f TD |
139 | if ($modifier & self::IGNORE_WHITESPACE) $this->regex .= 'x'; |
140 | if ($modifier & self::DOT_ALL) $this->regex .= 's'; | |
dff07f9c | 141 | if ($modifier & self::MULTILINE) $this->regex .= 'm'; |
5f68d236 | 142 | if ($modifier & self::UTF_8) $this->regex .= 'u'; |
ac9b0f6e TD |
143 | } |
144 | ||
145 | /** | |
0fcfe5f6 | 146 | * @inheritDoc |
ac9b0f6e TD |
147 | */ |
148 | public static function compile($regex, $modifier = self::MODIFIER_NONE) { | |
149 | return new self($regex, $modifier); | |
150 | } | |
151 | ||
152 | /** | |
0fcfe5f6 | 153 | * @inheritDoc |
ac9b0f6e TD |
154 | */ |
155 | public function __invoke($string) { | |
156 | return $this->match($string); | |
157 | } | |
158 | ||
ea9db448 TD |
159 | /** |
160 | * Checks whether the regex is syntactically correct. | |
9f959ced | 161 | * |
ea9db448 TD |
162 | * @return boolean |
163 | */ | |
164 | public function isValid() { | |
165 | try { | |
166 | $this->match(''); // we don't care about the result, we only care about the exception | |
167 | return true; | |
168 | } | |
169 | catch (SystemException $e) { | |
170 | // we have a syntax error now | |
171 | return false; | |
172 | } | |
173 | } | |
174 | ||
c4d5d33b | 175 | // @codingStandardsIgnoreStart |
ac9b0f6e TD |
176 | /** |
177 | * Checks whether the regex matches the given string. | |
178 | * | |
9f959ced MS |
179 | * @param string $string string to match |
180 | * @param boolean $all indicates if all matches are collected | |
aa8a3f46 | 181 | * @param integer $flags match flags |
9f959ced | 182 | * @return integer return value of preg_match(_all) |
ac9b0f6e | 183 | */ |
aa8a3f46 DR |
184 | public function match($string, $all = false, $flags = self::FLAGS_DEFAULT) { |
185 | $matchFlags = 0; | |
186 | if ($flags & self::CAPTURE_OFFSET) $matchFlags |= PREG_OFFSET_CAPTURE; | |
187 | ||
ac9b0f6e | 188 | if ($all) { |
aa8a3f46 | 189 | if ($flags & self::FLAGS_DEFAULT) $matchFlags |= PREG_PATTERN_ORDER; |
69719155 TD |
190 | if (($flags & self::ORDER_MATCH_BY_PATTERN) && !($flags & self::ORDER_MATCH_BY_SET)) $matchFlags |= PREG_PATTERN_ORDER; |
191 | if (($flags & self::ORDER_MATCH_BY_SET) && !($flags & self::ORDER_MATCH_BY_PATTERN)) $matchFlags |= PREG_SET_ORDER; | |
aa8a3f46 DR |
192 | |
193 | return $this->checkResult(preg_match_all($this->regex, $string, $this->matches, $matchFlags), 'match'); | |
ac9b0f6e TD |
194 | } |
195 | ||
aa8a3f46 | 196 | return $this->checkResult(preg_match($this->regex, $string, $this->matches, $matchFlags), 'match'); |
ac9b0f6e TD |
197 | } |
198 | ||
199 | /** | |
200 | * Replaces part of the string with the regex. | |
9f959ced | 201 | * |
e3369fd2 | 202 | * @param string $string |
9f0483d8 | 203 | * @param mixed $replacement replacement-string or closure |
ac9b0f6e TD |
204 | * @return string |
205 | */ | |
206 | public function replace($string, $replacement) { | |
a0c6927a | 207 | if ($replacement instanceof Callback || $replacement instanceof \Closure) { |
a25f25ee | 208 | return $this->checkResult(preg_replace_callback($this->regex, $replacement, $string), 'replace'); |
ac9b0f6e TD |
209 | } |
210 | ||
a25f25ee | 211 | return $this->checkResult(preg_replace($this->regex, $replacement, $string), 'replace'); |
ac9b0f6e TD |
212 | } |
213 | ||
214 | /** | |
215 | * Splits the string with the regex. | |
9f959ced MS |
216 | * |
217 | * @param string $string | |
aa8a3f46 | 218 | * @param integer $flags |
7a23a706 | 219 | * @return string[] |
ac9b0f6e | 220 | */ |
aa8a3f46 DR |
221 | public function split($string, $flags = self::FLAGS_DEFAULT) { |
222 | $splitFlags = 0; | |
223 | if ($flags & self::CAPTURE_OFFSET) $splitFlags |= PREG_SPLIT_OFFSET_CAPTURE; | |
224 | if ($flags & self::SPLIT_NON_EMPTY_ONLY) $splitFlags |= PREG_SPLIT_NO_EMPTY; | |
225 | if ($flags & self::CAPTURE_SPLIT_DELIMITER) $splitFlags |= PREG_SPLIT_DELIM_CAPTURE; | |
226 | ||
227 | return $this->checkResult(preg_split($this->regex, $string, null, $splitFlags), 'split'); | |
a25f25ee | 228 | } |
c4d5d33b | 229 | // @codingStandardsIgnoreEnd |
a25f25ee TD |
230 | |
231 | /** | |
232 | * Checks whether there was success. | |
9f959ced MS |
233 | * |
234 | * @param mixed $result | |
235 | * @param string $method | |
71952a87 | 236 | * @return mixed |
2b770bdd | 237 | * @throws SystemException |
a25f25ee TD |
238 | */ |
239 | private function checkResult($result, $method = '') { | |
c1407fb1 | 240 | if ($result === false || $result === null) { |
a25f25ee TD |
241 | switch (preg_last_error()) { |
242 | case PREG_INTERNAL_ERROR: | |
243 | $error = 'Internal error'; | |
244 | break; | |
245 | case PREG_BACKTRACK_LIMIT_ERROR: | |
246 | $error = 'Backtrack limit was exhausted'; | |
247 | break; | |
248 | case PREG_RECURSION_LIMIT_ERROR: | |
249 | $error = 'Recursion limit was exhausted'; | |
250 | break; | |
251 | case PREG_BAD_UTF8_ERROR: | |
252 | $error = 'Bad UTF8'; | |
253 | break; | |
c1407fb1 TD |
254 | case PREG_NO_ERROR: |
255 | return $result; | |
625b5351 | 256 | break; |
a25f25ee TD |
257 | default: |
258 | $error = 'Unknown error'; | |
625b5351 | 259 | break; |
a25f25ee | 260 | } |
c1407fb1 | 261 | |
a25f25ee | 262 | throw new SystemException('Could not execute '.($method ? $method.' on ' : '').$this->regex.': '.$error); |
ac9b0f6e TD |
263 | } |
264 | return $result; | |
265 | } | |
266 | ||
267 | /** | |
268 | * Returns the matches of the last string. | |
9f959ced | 269 | * |
ac9b0f6e TD |
270 | * @return array |
271 | */ | |
272 | public function getMatches() { | |
273 | return $this->matches; | |
274 | } | |
275 | ||
276 | /** | |
277 | * Returns the compiled regex. | |
9f959ced | 278 | * |
ac9b0f6e TD |
279 | * @return string |
280 | */ | |
281 | public function getRegex() { | |
282 | return $this->regex; | |
283 | } | |
284 | } |