Commit | Line | Data |
---|---|---|
4eef6bfd | 1 | <?php |
a9229942 | 2 | |
4eef6bfd | 3 | namespace wcf\system\html\input; |
a9229942 | 4 | |
60a35505 | 5 | use wcf\system\bbcode\HtmlBBCodeParser; |
a9229942 | 6 | use wcf\system\html\AbstractHtmlProcessor; |
4eef6bfd AE |
7 | use wcf\system\html\input\filter\IHtmlInputFilter; |
8 | use wcf\system\html\input\filter\MessageHtmlInputFilter; | |
b4346e66 | 9 | use wcf\system\html\input\node\HtmlInputNodeProcessor; |
13825b39 | 10 | use wcf\util\DOMUtil; |
60a35505 | 11 | use wcf\util\StringUtil; |
4eef6bfd | 12 | |
2f53b086 | 13 | /** |
4ccf5975 | 14 | * Reads a HTML string, applies filters and parses all nodes including bbcodes. |
a9229942 | 15 | * |
4ccf5975 | 16 | * @author Alexander Ebert |
a9229942 | 17 | * @copyright 2001-2019 WoltLab GmbH |
4ccf5975 AE |
18 | * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php> |
19 | * @package WoltLabSuite\Core\System\Html\Input | |
20 | * @since 3.0 | |
2f53b086 | 21 | */ |
a9229942 TD |
22 | class HtmlInputProcessor extends AbstractHtmlProcessor |
23 | { | |
24 | /** | |
25 | * list of embedded content grouped by type | |
26 | * @var array | |
27 | */ | |
28 | protected $embeddedContent = []; | |
29 | ||
30 | /** | |
31 | * @var IHtmlInputFilter | |
32 | */ | |
33 | protected $htmlInputFilter; | |
34 | ||
35 | /** | |
36 | * @var HtmlInputNodeProcessor | |
37 | */ | |
38 | protected $htmlInputNodeProcessor; | |
39 | ||
40 | /** | |
41 | * skip the HTML filter during message reprocessing | |
42 | * @var bool | |
43 | */ | |
44 | protected $skipFilter = false; | |
45 | ||
46 | /** | |
47 | * Processes the input html string. | |
48 | * | |
49 | * @param string $html html string | |
50 | * @param string $objectType object type identifier | |
51 | * @param int $objectID object id | |
52 | * @param bool $convertFromBBCode interpret input as bbcode | |
53 | */ | |
54 | public function process($html, $objectType, $objectID = 0, $convertFromBBCode = false) | |
55 | { | |
56 | $this->reset(); | |
57 | ||
58 | $this->setContext($objectType, $objectID); | |
59 | ||
60 | // enforce consistent newlines | |
61 | $html = StringUtil::trim(StringUtil::unifyNewlines($html)); | |
62 | ||
63 | // check if this is true HTML or just a bbcode string | |
64 | if ($convertFromBBCode) { | |
65 | $html = $this->convertToHtml($html); | |
66 | } | |
67 | ||
68 | // transform bbcodes into metacode markers | |
69 | $html = HtmlBBCodeParser::getInstance()->parse($html); | |
70 | ||
71 | // filter HTML | |
72 | if (!$this->skipFilter) { | |
73 | $html = $this->getHtmlInputFilter()->apply($html); | |
74 | } | |
75 | ||
76 | // pre-parse HTML | |
77 | $this->getHtmlInputNodeProcessor()->load($this, $html); | |
78 | $this->getHtmlInputNodeProcessor()->process(); | |
79 | $this->embeddedContent = $this->getHtmlInputNodeProcessor()->getEmbeddedContent(); | |
80 | } | |
81 | ||
82 | /** | |
83 | * Processes a HTML string to provide the general DOM API. This method | |
84 | * does not perform any filtering or validation. You SHOULD NOT use this | |
85 | * to deal with HTML that has not been filtered previously. | |
86 | * | |
87 | * @param string $html html string | |
88 | */ | |
89 | public function processIntermediate($html) | |
90 | { | |
91 | $this->getHtmlInputNodeProcessor()->load($this, $html); | |
92 | } | |
93 | ||
94 | /** | |
95 | * Reprocesses a message by transforming the message into an editor-like | |
96 | * state using plain bbcodes instead of metacode elements. | |
97 | * | |
98 | * @param string $html html string | |
99 | * @param string $objectType object type identifier | |
100 | * @param int $objectID object id | |
101 | * @since 3.1 | |
102 | */ | |
103 | public function reprocess($html, $objectType, $objectID) | |
104 | { | |
105 | $this->processIntermediate($html); | |
106 | ||
107 | // revert embedded bbcodes for re-evaluation | |
108 | $metacodes = DOMUtil::getElements($this->getHtmlInputNodeProcessor()->getDocument(), 'woltlab-metacode'); | |
109 | foreach ($metacodes as $metacode) { | |
110 | $name = $metacode->getAttribute('data-name'); | |
111 | $attributes = $this->getHtmlInputNodeProcessor() | |
112 | ->parseAttributes($metacode->getAttribute('data-attributes')); | |
113 | ||
114 | $bbcodeAttributes = ''; | |
115 | foreach ($attributes as $attribute) { | |
116 | if (!empty($bbcodeAttributes)) { | |
117 | $bbcodeAttributes .= ','; | |
118 | } | |
119 | ||
120 | if ($attribute === true) { | |
121 | $bbcodeAttributes .= 'true'; | |
122 | } elseif ($attribute === false) { | |
123 | $bbcodeAttributes .= 'false'; | |
124 | } elseif (\is_string($attribute) || \is_numeric($attribute)) { | |
125 | $bbcodeAttributes .= "'" . \addcslashes($attribute, "'") . "'"; | |
126 | } else { | |
127 | // discard anything that is not string-like | |
128 | $bbcodeAttributes .= "''"; | |
129 | } | |
130 | } | |
131 | ||
132 | $text = $metacode->ownerDocument->createTextNode( | |
133 | '[' . $name . (!empty($bbcodeAttributes) ? '=' . $bbcodeAttributes : '') . ']' | |
134 | ); | |
135 | $metacode->insertBefore($text, $metacode->firstChild); | |
136 | ||
137 | $text = $metacode->ownerDocument->createTextNode('[/' . $name . ']'); | |
138 | $metacode->appendChild($text); | |
139 | ||
140 | DOMUtil::removeNode($metacode, true); | |
141 | } | |
142 | ||
143 | try { | |
144 | $this->skipFilter = true; | |
145 | $this->process($this->getHtml(), $objectType, $objectID, false); | |
146 | } finally { | |
147 | $this->skipFilter = false; | |
148 | } | |
149 | } | |
150 | ||
151 | /** | |
152 | * Processes only embedded content. This method should only be called when rebuilding | |
153 | * data where only embedded content is relevant, but no actual parsing is required. | |
154 | * | |
155 | * @param string $html html string | |
156 | * @param string $objectType object type identifier | |
157 | * @param int $objectID object id | |
158 | * @throws \UnexpectedValueException | |
159 | */ | |
160 | public function processEmbeddedContent($html, $objectType, $objectID) | |
161 | { | |
162 | if (!$objectID) { | |
163 | throw new \UnexpectedValueException("Object id parameter must be non-zero."); | |
164 | } | |
165 | ||
166 | $this->setContext($objectType, $objectID); | |
167 | ||
168 | $this->getHtmlInputNodeProcessor()->load($this, $html); | |
169 | $this->getHtmlInputNodeProcessor()->processEmbeddedContent(); | |
170 | $this->embeddedContent = $this->getHtmlInputNodeProcessor()->getEmbeddedContent(); | |
171 | } | |
172 | ||
173 | /** | |
174 | * Checks the input html for disallowed bbcodes and returns any matches. | |
175 | * | |
176 | * @return string[] list of matched disallowed bbcodes | |
177 | */ | |
178 | public function validate() | |
179 | { | |
180 | return $this->getHtmlInputNodeProcessor()->validate(); | |
181 | } | |
182 | ||
183 | /** | |
184 | * Enforces the maximum depth of nested quotes. | |
185 | * | |
186 | * @param int $depth | |
187 | */ | |
188 | public function enforceQuoteDepth($depth) | |
189 | { | |
190 | $this->getHtmlInputNodeProcessor()->enforceQuoteDepth($depth); | |
191 | } | |
192 | ||
193 | /** | |
194 | * Returns the parsed HTML ready to store. | |
195 | * | |
196 | * @return string parsed html | |
197 | */ | |
198 | public function getHtml() | |
199 | { | |
200 | return $this->getHtmlInputNodeProcessor()->getHtml(); | |
201 | } | |
202 | ||
203 | /** | |
204 | * Returns the raw text content of current document. | |
205 | * | |
206 | * @return string raw text content | |
207 | */ | |
208 | public function getTextContent() | |
209 | { | |
210 | return $this->getHtmlInputNodeProcessor()->getTextContent(); | |
211 | } | |
212 | ||
213 | /** | |
214 | * Returns true if the message appears to be empty. | |
215 | * | |
216 | * @return bool true if message appears to be empty | |
217 | */ | |
218 | public function appearsToBeEmpty() | |
219 | { | |
220 | return $this->getHtmlInputNodeProcessor()->appearsToBeEmpty(); | |
221 | } | |
222 | ||
223 | /** | |
224 | * Returns the all embedded content data. | |
225 | * | |
226 | * @return array | |
227 | */ | |
228 | public function getEmbeddedContent() | |
229 | { | |
230 | return $this->embeddedContent; | |
231 | } | |
232 | ||
233 | /** | |
234 | * @return HtmlInputNodeProcessor | |
235 | */ | |
236 | public function getHtmlInputNodeProcessor() | |
237 | { | |
238 | if ($this->htmlInputNodeProcessor === null) { | |
239 | $this->htmlInputNodeProcessor = new HtmlInputNodeProcessor(); | |
240 | } | |
241 | ||
242 | return $this->htmlInputNodeProcessor; | |
243 | } | |
244 | ||
245 | /** | |
246 | * Sets the new object id. | |
247 | * | |
248 | * @param int $objectID object id | |
249 | */ | |
250 | public function setObjectID($objectID) | |
251 | { | |
252 | $this->context['objectID'] = $objectID; | |
253 | } | |
254 | ||
255 | /** | |
256 | * Resets internal states and discards references to objects. | |
257 | */ | |
258 | protected function reset() | |
259 | { | |
260 | $this->embeddedContent = []; | |
261 | $this->htmlInputNodeProcessor = null; | |
262 | } | |
263 | ||
264 | /** | |
265 | * @return IHtmlInputFilter | |
266 | */ | |
267 | protected function getHtmlInputFilter() | |
268 | { | |
269 | if ($this->htmlInputFilter === null) { | |
270 | $this->htmlInputFilter = new MessageHtmlInputFilter(); | |
271 | } | |
272 | ||
273 | return $this->htmlInputFilter; | |
274 | } | |
275 | ||
276 | /** | |
277 | * Converts bbcodes using newlines into valid HTML. | |
278 | * | |
279 | * @param string $html html string | |
280 | * @return string parsed html string | |
281 | */ | |
282 | protected function convertToHtml($html) | |
283 | { | |
284 | $html = StringUtil::encodeHTML($html); | |
285 | $html = \preg_replace('/\[attach=(\d+)\]/', "[attach=\\1,'none','2']", $html); | |
286 | $parts = \preg_split('~(\n+)~', $html, -1, \PREG_SPLIT_DELIM_CAPTURE); | |
287 | ||
288 | $openParagraph = false; | |
289 | $html = ''; | |
290 | for ($i = 0, $length = \count($parts); $i < $length; $i++) { | |
291 | $part = $parts[$i]; | |
292 | if (\strpos($part, "\n") !== false) { | |
293 | $newlines = \substr_count($part, "\n"); | |
294 | if ($newlines === 1) { | |
295 | $html .= '<br>'; | |
296 | } else { | |
297 | if ($openParagraph) { | |
298 | $html .= '</p>'; | |
299 | $openParagraph = false; | |
300 | } | |
301 | ||
302 | // ignore one newline because a new paragraph with bbcodes is created | |
303 | // using two subsequent newlines | |
304 | $newlines--; | |
305 | if ($newlines === 0) { | |
306 | continue; | |
307 | } | |
308 | ||
309 | $html .= \str_repeat('<p><br></p>', $newlines); | |
310 | } | |
311 | } else { | |
312 | if (!$openParagraph) { | |
313 | $html .= '<p>'; | |
314 | } | |
315 | ||
316 | $html .= $part; | |
317 | $openParagraph = true; | |
318 | } | |
319 | } | |
320 | ||
321 | return $html . '</p>'; | |
322 | } | |
4eef6bfd | 323 | } |