Revert "Removed obsolete trailing slashes from void elements"
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / bbcode / SimpleMessageParser.class.php
1 <?php
2 namespace wcf\system\bbcode;
3 use wcf\data\smiley\Smiley;
4 use wcf\data\smiley\SmileyCache;
5 use wcf\system\event\EventHandler;
6 use wcf\system\SingletonFactory;
7 use wcf\util\StringUtil;
8
9 /**
10 * Parses urls and smilies in simple messages.
11 *
12 * @author Marcel Werk
13 * @copyright 2001-2016 WoltLab GmbH
14 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
15 * @package com.woltlab.wcf
16 * @subpackage system.bbcode
17 * @category Community Framework
18 */
19 class SimpleMessageParser extends SingletonFactory {
20 /**
21 * forbidden characters
22 * @var string
23 */
24 protected static $illegalChars = '[^\x0-\x2C\x2E\x2F\x3A-\x40\x5B-\x60\x7B-\x7F]+';
25
26 /**
27 * list of smilies
28 * @var Smiley[]
29 */
30 protected $smilies = [];
31
32 /**
33 * cached URLs
34 * @var string[]
35 */
36 protected $cachedURLs = [];
37
38 /**
39 * cached e-mails
40 * @var string[]
41 */
42 protected $cachedEmails = [];
43
44 /**
45 * currently parsed message
46 * @var string
47 */
48 public $message = '';
49
50 /**
51 * @inheritDoc
52 */
53 protected function init() {
54 parent::init();
55
56 if (MODULE_SMILEY == 1) {
57 // get smilies
58 $smilies = SmileyCache::getInstance()->getSmilies();
59 $categories = SmileyCache::getInstance()->getCategories();
60 foreach ($smilies as $categoryID => $categorySmilies) {
61 if ($categories[$categoryID ?: null]->isDisabled) continue;
62
63 /** @var Smiley $smiley */
64 foreach ($categorySmilies as $smiley) {
65 foreach ($smiley->smileyCodes as $smileyCode) {
66 $this->smilies[$smileyCode] = '<img src="'.$smiley->getURL().'" alt="'.StringUtil::encodeHTML($smiley->smileyCode).'" />';
67 }
68 }
69 }
70 krsort($this->smilies);
71 }
72 }
73
74 /**
75 * Parses the given message and returns the parsed message.
76 *
77 * @param string $message
78 * @param boolean $parseURLs
79 * @param boolean $parseSmilies
80 * @return string
81 */
82 public function parse($message, $parseURLs = true, $parseSmilies = true) {
83 $this->message = $message;
84 $this->cachedURLs = $this->cachedEmails = [];
85
86 // call event
87 EventHandler::getInstance()->fireAction($this, 'beforeParsing');
88
89 // parse urls
90 if ($parseURLs) {
91 $this->message = $this->parseURLs($this->message);
92 }
93
94 // encode html
95 $this->message = StringUtil::encodeHTML($this->message);
96
97 // converts newlines to <br />'s
98 $this->message = nl2br($this->message);
99
100 // parse urls
101 if ($parseURLs) {
102 $this->message = $this->insertCachedURLs($this->message);
103 }
104
105 // parse smilies
106 if ($parseSmilies) {
107 $this->message = $this->parseSmilies($this->message);
108 }
109
110 // replace bad html tags (script etc.)
111 $badSearch = ['/(javascript):/i', '/(about):/i', '/(vbscript):/i'];
112 $badReplace = ['$1<b></b>:', '$1<b></b>:', '$1<b></b>:'];
113 $this->message = preg_replace($badSearch, $badReplace, $this->message);
114
115 // call event
116 EventHandler::getInstance()->fireAction($this, 'afterParsing');
117
118 return $this->message;
119 }
120
121 /**
122 * Parses urls.
123 *
124 * @param string $text
125 * @return string text
126 */
127 public function parseURLs($text) {
128 // define pattern
129 $urlPattern = '~(?<!\B|"|\'|=|/|\]|,|\?)
130 (?: # hostname
131 (?:ftp|https?)://'.static::$illegalChars.'(?:\.'.static::$illegalChars.')*
132 |
133 www\.(?:'.static::$illegalChars.'\.)+
134 (?:[a-z]{2,63}(?=\b)) # tld
135 )
136
137 (?::\d+)? # port
138
139 (?:
140 /
141 [^!.,?;"\'<>()\[\]{}\s]*
142 (?:
143 [!.,?;(){}]+ [^!.,?;"\'<>()\[\]{}\s]+
144 )*
145 )?
146 ~ix';
147 $emailPattern = '~(?<!\B|"|\'|=|/|\]|,|:)
148 (?:)
149 \w+(?:[\.\-]\w+)*
150 @
151 (?:'.static::$illegalChars.'\.)+ # hostname
152 (?:[a-z]{2,4}(?=\b))
153 (?!"|\'|\[|\-)
154 ~ix';
155
156 // parse urls
157 $text = preg_replace_callback($urlPattern, [$this, 'cacheURLsCallback'], $text);
158
159 // parse emails
160 if (mb_strpos($text, '@') !== false) {
161 $text = preg_replace_callback($emailPattern, [$this, 'cacheEmailsCallback'], $text);
162 }
163
164 return $text;
165 }
166
167 /**
168 * Returns the hash for an matched URL in the message.
169 *
170 * @param array $matches
171 * @return string
172 */
173 protected function cacheURLsCallback($matches) {
174 $hash = '@@'.StringUtil::getHash(uniqid(microtime()).$matches[0]).'@@';
175 $this->cachedURLs[$hash] = $matches[0];
176
177 return $hash;
178 }
179
180 /**
181 * Returns the hash for an matched e-mail in the message.
182 *
183 * @param array $matches
184 * @return string
185 */
186 protected function cacheEmailsCallback($matches) {
187 $hash = '@@'.StringUtil::getHash(uniqid(microtime()).$matches[0]).'@@';
188 $this->cachedEmails[$hash] = $matches[0];
189
190 return $hash;
191 }
192
193 /**
194 * Reinserts cached URLs and e-mails.
195 *
196 * @param string $text
197 * @return string
198 */
199 protected function insertCachedURLs($text) {
200 foreach ($this->cachedURLs as $hash => $url) {
201 // add protocol if necessary
202 if (!preg_match("/[a-z]:\/\//si", $url)) {
203 $url = 'http://'.$url;
204 }
205
206 $text = str_replace($hash, StringUtil::getAnchorTag($url), $text);
207 }
208
209 foreach ($this->cachedEmails as $hash => $email) {
210 $email = StringUtil::encodeHTML($email);
211
212 $text = str_replace($hash, '<a href="mailto:'.$email.'">'.$email.'</a>', $text);
213 }
214
215 return $text;
216 }
217
218 /**
219 * Parses smiley codes.
220 *
221 * @param string $text
222 * @return string text
223 */
224 public function parseSmilies($text) {
225 foreach ($this->smilies as $code => $html) {
226 //$text = preg_replace('~(?<!&\w{2}|&\w{3}|&\w{4}|&\w{5}|&\w{6}|&#\d{2}|&#\d{3}|&#\d{4}|&#\d{5})'.preg_quote(StringUtil::encodeHTML($code), '~').'(?![^<]*>)~', $html, $text);
227 $text = preg_replace('~(?<=^|\s)'.preg_quote(StringUtil::encodeHTML($code), '~').'(?=$|\s|<br />)~', $html, $text);
228 }
229
230 return $text;
231 }
232 }