Merge pull request #5953 from WoltLab/get-form-field
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / bbcode / SimpleMessageParser.class.php
1 <?php
2
3 namespace wcf\system\bbcode;
4
5 use wcf\data\smiley\Smiley;
6 use wcf\data\smiley\SmileyCache;
7 use wcf\system\event\EventHandler;
8 use wcf\system\SingletonFactory;
9 use wcf\util\StringUtil;
10
11 /**
12 * Parses urls and smilies in simple messages.
13 *
14 * @author Marcel Werk
15 * @copyright 2001-2019 WoltLab GmbH
16 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
17 */
18 class SimpleMessageParser extends SingletonFactory
19 {
20 /**
21 * forbidden characters
22 * @var string
23 */
24 protected static $illegalChars = '[^\x0-\x2C\x2E\x2F\x3A-\x40\x5B-\x60\x7B-\x7F]+';
25
26 /**
27 * list of smilies
28 * @var Smiley[]
29 */
30 protected $smilies = [];
31
32 /**
33 * cached URLs
34 * @var string[]
35 */
36 protected $cachedURLs = [];
37
38 /**
39 * cached e-mails
40 * @var string[]
41 */
42 protected $cachedEmails = [];
43
44 /**
45 * currently parsed message
46 * @var string
47 */
48 public $message = '';
49
50 /**
51 * @inheritDoc
52 */
53 protected function init()
54 {
55 parent::init();
56
57 if (MODULE_SMILEY == 1) {
58 // get smilies
59 $smilies = SmileyCache::getInstance()->getSmilies();
60 $categories = SmileyCache::getInstance()->getCategories();
61 foreach ($smilies as $categoryID => $categorySmilies) {
62 if ($categories[$categoryID ?: null]->isDisabled) {
63 continue;
64 }
65
66 /** @var Smiley $smiley */
67 foreach ($categorySmilies as $smiley) {
68 foreach ($smiley->smileyCodes as $smileyCode) {
69 $this->smilies[$smileyCode] = $smiley->getHtml();
70 }
71 }
72 }
73 \krsort($this->smilies);
74 }
75 }
76
77 /**
78 * Parses the given message and returns the parsed message.
79 *
80 * @param string $message
81 * @param bool $parseURLs
82 * @param bool $parseSmilies
83 * @return string
84 */
85 public function parse($message, $parseURLs = true, $parseSmilies = true)
86 {
87 $this->message = $message;
88 $this->cachedURLs = $this->cachedEmails = [];
89
90 // call event
91 EventHandler::getInstance()->fireAction($this, 'beforeParsing');
92
93 // parse urls
94 if ($parseURLs) {
95 $this->message = $this->parseURLs($this->message);
96 }
97
98 // encode html
99 $this->message = StringUtil::encodeHTML($this->message);
100
101 // converts newlines to <br>'s
102 $this->message = \nl2br($this->message, false);
103
104 // parse urls
105 if ($parseURLs) {
106 $this->message = $this->insertCachedURLs($this->message);
107 }
108
109 // parse smilies
110 if ($parseSmilies) {
111 $this->message = $this->parseSmilies($this->message);
112 }
113
114 // replace bad html tags (script etc.)
115 $badSearch = ['/(javascript):/i', '/(about):/i', '/(vbscript):/i'];
116 $badReplace = ['$1<b></b>:', '$1<b></b>:', '$1<b></b>:'];
117 $this->message = \preg_replace($badSearch, $badReplace, $this->message);
118
119 // call event
120 EventHandler::getInstance()->fireAction($this, 'afterParsing');
121
122 return $this->message;
123 }
124
125 /**
126 * Parses urls.
127 *
128 * @param string $text
129 * @return string text
130 */
131 public function parseURLs($text)
132 {
133 // define pattern
134 $urlPattern = '~(?<!\B|"|\'|=|/|\]|,|\?)
135 (?: # hostname
136 (?:ftp|https?)://' . static::$illegalChars . '(?:\.' . static::$illegalChars . ')*
137 |
138 www\.(?:' . static::$illegalChars . '\.)+
139 (?:[a-z]{2,63}(?=\b)) # tld
140 )
141
142 (?::\d+)? # port
143
144 (?:
145 /
146 [^!.,?;"\'<>()\[\]{}\s]*
147 (?:
148 [!.,?;(){}]+ [^!.,?;"\'<>()\[\]{}\s]+
149 )*
150 )?
151 ~ix';
152 $emailPattern = '~(?<!\B|"|\'|=|/|\]|,|:)
153 (?:)
154 \w+(?:[\.\-]\w+)*
155 @
156 (?:' . static::$illegalChars . '\.)+ # hostname
157 (?:[a-z]{2,4}(?=\b))
158 (?!"|\'|\[|\-)
159 ~ix';
160
161 // parse urls
162 $text = \preg_replace_callback($urlPattern, [$this, 'cacheURLsCallback'], $text);
163
164 // parse emails
165 if (\str_contains($text, '@')) {
166 $text = \preg_replace_callback($emailPattern, [$this, 'cacheEmailsCallback'], $text);
167 }
168
169 return $text;
170 }
171
172 /**
173 * Returns the hash for an matched URL in the message.
174 *
175 * @param array $matches
176 * @return string
177 */
178 protected function cacheURLsCallback($matches)
179 {
180 $hash = '@@' . StringUtil::getRandomID() . '@@';
181 $this->cachedURLs[$hash] = $matches[0];
182
183 return $hash;
184 }
185
186 /**
187 * Returns the hash for an matched e-mail in the message.
188 *
189 * @param array $matches
190 * @return string
191 */
192 protected function cacheEmailsCallback($matches)
193 {
194 $hash = '@@' . StringUtil::getRandomID() . '@@';
195 $this->cachedEmails[$hash] = $matches[0];
196
197 return $hash;
198 }
199
200 /**
201 * Reinserts cached URLs and e-mails.
202 *
203 * @param string $text
204 * @return string
205 */
206 protected function insertCachedURLs($text)
207 {
208 foreach ($this->cachedURLs as $hash => $url) {
209 // add protocol if necessary
210 if (!\preg_match("/[a-z]:\\/\\//si", $url)) {
211 $url = 'http://' . $url;
212 }
213
214 $text = \str_replace($hash, StringUtil::getAnchorTag($url, '', true, true), $text);
215 }
216
217 foreach ($this->cachedEmails as $hash => $email) {
218 $email = StringUtil::encodeHTML($email);
219
220 $text = \str_replace($hash, '<a href="mailto:' . $email . '">' . $email . '</a>', $text);
221 }
222
223 return $text;
224 }
225
226 /**
227 * Parses smiley codes.
228 *
229 * @param string $text
230 * @return string text
231 */
232 public function parseSmilies($text)
233 {
234 $smileyCount = 0;
235 foreach ($this->smilies as $code => $html) {
236 $text = \preg_replace_callback('~(?<=^|\s)' . \preg_quote(
237 StringUtil::encodeHTML($code),
238 '~'
239 ) . '(?=$|\s|<br />|<br>)~', static function () use ($code, $html, &$smileyCount) {
240 if ($smileyCount === 50) {
241 return $code;
242 }
243
244 $smileyCount++;
245
246 return $html;
247 }, $text);
248 }
249
250 return $text;
251 }
252 }