Commit | Line | Data |
---|---|---|
dcc2332d MW |
1 | <?php |
2 | namespace wcf\system\bbcode; | |
7a23a706 | 3 | use wcf\data\smiley\Smiley; |
dcc2332d MW |
4 | use wcf\data\smiley\SmileyCache; |
5 | use wcf\system\event\EventHandler; | |
6 | use wcf\system\SingletonFactory; | |
7 | use wcf\util\StringUtil; | |
8 | ||
9 | /** | |
10 | * Parses urls and smilies in simple messages. | |
11 | * | |
12 | * @author Marcel Werk | |
7d739af0 | 13 | * @copyright 2001-2016 WoltLab GmbH |
dcc2332d | 14 | * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php> |
e71525e4 | 15 | * @package WoltLabSuite\Core\System\Bbcode |
dcc2332d MW |
16 | */ |
17 | class SimpleMessageParser extends SingletonFactory { | |
18 | /** | |
19 | * forbidden characters | |
20 | * @var string | |
21 | */ | |
22 | protected static $illegalChars = '[^\x0-\x2C\x2E\x2F\x3A-\x40\x5B-\x60\x7B-\x7F]+'; | |
23 | ||
24 | /** | |
25 | * list of smilies | |
7a23a706 | 26 | * @var Smiley[] |
dcc2332d | 27 | */ |
058cbd6a | 28 | protected $smilies = []; |
dcc2332d | 29 | |
e07ff8fa MW |
30 | /** |
31 | * cached URLs | |
7a23a706 | 32 | * @var string[] |
e07ff8fa | 33 | */ |
058cbd6a | 34 | protected $cachedURLs = []; |
e07ff8fa MW |
35 | |
36 | /** | |
37 | * cached e-mails | |
7a23a706 | 38 | * @var string[] |
e07ff8fa | 39 | */ |
058cbd6a | 40 | protected $cachedEmails = []; |
e07ff8fa | 41 | |
dcc2332d MW |
42 | /** |
43 | * currently parsed message | |
44 | * @var string | |
45 | */ | |
46 | public $message = ''; | |
47 | ||
48 | /** | |
0fcfe5f6 | 49 | * @inheritDoc |
dcc2332d MW |
50 | */ |
51 | protected function init() { | |
52 | parent::init(); | |
53 | ||
54 | if (MODULE_SMILEY == 1) { | |
55 | // get smilies | |
56 | $smilies = SmileyCache::getInstance()->getSmilies(); | |
57 | $categories = SmileyCache::getInstance()->getCategories(); | |
58 | foreach ($smilies as $categoryID => $categorySmilies) { | |
59 | if ($categories[$categoryID ?: null]->isDisabled) continue; | |
60 | ||
e4499881 | 61 | /** @var Smiley $smiley */ |
dcc2332d MW |
62 | foreach ($categorySmilies as $smiley) { |
63 | foreach ($smiley->smileyCodes as $smileyCode) { | |
d4da0d92 | 64 | $this->smilies[$smileyCode] = $smiley->getHtml(); |
dcc2332d MW |
65 | } |
66 | } | |
67 | } | |
68 | krsort($this->smilies); | |
69 | } | |
70 | } | |
71 | ||
72 | /** | |
73 | * Parses the given message and returns the parsed message. | |
74 | * | |
75 | * @param string $message | |
76 | * @param boolean $parseURLs | |
77 | * @param boolean $parseSmilies | |
78 | * @return string | |
79 | */ | |
80 | public function parse($message, $parseURLs = true, $parseSmilies = true) { | |
81 | $this->message = $message; | |
058cbd6a | 82 | $this->cachedURLs = $this->cachedEmails = []; |
dcc2332d MW |
83 | |
84 | // call event | |
85 | EventHandler::getInstance()->fireAction($this, 'beforeParsing'); | |
86 | ||
e07ff8fa MW |
87 | // parse urls |
88 | if ($parseURLs) { | |
89 | $this->message = $this->parseURLs($this->message); | |
90 | } | |
91 | ||
dcc2332d MW |
92 | // encode html |
93 | $this->message = StringUtil::encodeHTML($this->message); | |
94 | ||
e5f9b56c MW |
95 | // converts newlines to <br>'s |
96 | $this->message = nl2br($this->message, false); | |
dcc2332d MW |
97 | |
98 | // parse urls | |
99 | if ($parseURLs) { | |
e07ff8fa | 100 | $this->message = $this->insertCachedURLs($this->message); |
dcc2332d MW |
101 | } |
102 | ||
103 | // parse smilies | |
104 | if ($parseSmilies) { | |
105 | $this->message = $this->parseSmilies($this->message); | |
106 | } | |
107 | ||
108 | // replace bad html tags (script etc.) | |
058cbd6a MS |
109 | $badSearch = ['/(javascript):/i', '/(about):/i', '/(vbscript):/i']; |
110 | $badReplace = ['$1<b></b>:', '$1<b></b>:', '$1<b></b>:']; | |
dcc2332d MW |
111 | $this->message = preg_replace($badSearch, $badReplace, $this->message); |
112 | ||
113 | // call event | |
114 | EventHandler::getInstance()->fireAction($this, 'afterParsing'); | |
115 | ||
116 | return $this->message; | |
117 | } | |
118 | ||
119 | /** | |
120 | * Parses urls. | |
121 | * | |
122 | * @param string $text | |
123 | * @return string text | |
124 | */ | |
125 | public function parseURLs($text) { | |
126 | // define pattern | |
127 | $urlPattern = '~(?<!\B|"|\'|=|/|\]|,|\?) | |
128 | (?: # hostname | |
129 | (?:ftp|https?)://'.static::$illegalChars.'(?:\.'.static::$illegalChars.')* | |
130 | | | |
131 | www\.(?:'.static::$illegalChars.'\.)+ | |
f59a2d34 | 132 | (?:[a-z]{2,63}(?=\b)) # tld |
dcc2332d MW |
133 | ) |
134 | ||
135 | (?::\d+)? # port | |
136 | ||
137 | (?: | |
138 | / | |
139 | [^!.,?;"\'<>()\[\]{}\s]* | |
140 | (?: | |
141 | [!.,?;(){}]+ [^!.,?;"\'<>()\[\]{}\s]+ | |
142 | )* | |
143 | )? | |
144 | ~ix'; | |
145 | $emailPattern = '~(?<!\B|"|\'|=|/|\]|,|:) | |
146 | (?:) | |
147 | \w+(?:[\.\-]\w+)* | |
148 | @ | |
149 | (?:'.static::$illegalChars.'\.)+ # hostname | |
150 | (?:[a-z]{2,4}(?=\b)) | |
151 | (?!"|\'|\[|\-) | |
152 | ~ix'; | |
153 | ||
154 | // parse urls | |
058cbd6a | 155 | $text = preg_replace_callback($urlPattern, [$this, 'cacheURLsCallback'], $text); |
dcc2332d MW |
156 | |
157 | // parse emails | |
838e315b | 158 | if (mb_strpos($text, '@') !== false) { |
058cbd6a | 159 | $text = preg_replace_callback($emailPattern, [$this, 'cacheEmailsCallback'], $text); |
dcc2332d MW |
160 | } |
161 | ||
162 | return $text; | |
163 | } | |
164 | ||
165 | /** | |
e07ff8fa | 166 | * Returns the hash for an matched URL in the message. |
1a6e8c52 | 167 | * |
e07ff8fa MW |
168 | * @param array $matches |
169 | * @return string | |
dcc2332d | 170 | */ |
e07ff8fa MW |
171 | protected function cacheURLsCallback($matches) { |
172 | $hash = '@@'.StringUtil::getHash(uniqid(microtime()).$matches[0]).'@@'; | |
173 | $this->cachedURLs[$hash] = $matches[0]; | |
1a6e8c52 | 174 | |
e07ff8fa MW |
175 | return $hash; |
176 | } | |
177 | ||
178 | /** | |
179 | * Returns the hash for an matched e-mail in the message. | |
1a6e8c52 | 180 | * |
e07ff8fa MW |
181 | * @param array $matches |
182 | * @return string | |
183 | */ | |
184 | protected function cacheEmailsCallback($matches) { | |
185 | $hash = '@@'.StringUtil::getHash(uniqid(microtime()).$matches[0]).'@@'; | |
186 | $this->cachedEmails[$hash] = $matches[0]; | |
1a6e8c52 | 187 | |
e07ff8fa MW |
188 | return $hash; |
189 | } | |
190 | ||
191 | /** | |
192 | * Reinserts cached URLs and e-mails. | |
1a6e8c52 | 193 | * |
e07ff8fa MW |
194 | * @param string $text |
195 | * @return string | |
196 | */ | |
197 | protected function insertCachedURLs($text) { | |
198 | foreach ($this->cachedURLs as $hash => $url) { | |
199 | // add protocol if necessary | |
200 | if (!preg_match("/[a-z]:\/\//si", $url)) { | |
201 | $url = 'http://'.$url; | |
202 | } | |
203 | ||
204 | $text = str_replace($hash, StringUtil::getAnchorTag($url), $text); | |
e0c12979 MW |
205 | } |
206 | ||
e07ff8fa MW |
207 | foreach ($this->cachedEmails as $hash => $email) { |
208 | $email = StringUtil::encodeHTML($email); | |
209 | ||
210 | $text = str_replace($hash, '<a href="mailto:'.$email.'">'.$email.'</a>', $text); | |
211 | } | |
1a6e8c52 | 212 | |
e07ff8fa | 213 | return $text; |
dcc2332d MW |
214 | } |
215 | ||
216 | /** | |
217 | * Parses smiley codes. | |
218 | * | |
219 | * @param string $text | |
220 | * @return string text | |
221 | */ | |
222 | public function parseSmilies($text) { | |
223 | foreach ($this->smilies as $code => $html) { | |
224 | //$text = preg_replace('~(?<!&\w{2}|&\w{3}|&\w{4}|&\w{5}|&\w{6}|&#\d{2}|&#\d{3}|&#\d{4}|&#\d{5})'.preg_quote(StringUtil::encodeHTML($code), '~').'(?![^<]*>)~', $html, $text); | |
e5f9b56c | 225 | $text = preg_replace('~(?<=^|\s)'.preg_quote(StringUtil::encodeHTML($code), '~').'(?=$|\s|<br />|<br>)~', $html, $text); |
dcc2332d MW |
226 | } |
227 | ||
228 | return $text; | |
229 | } | |
230 | } |