Added support for disallowed bbcodes
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / html / input / node / HtmlInputNodeWoltlabMetacodeMarker.class.php
1 <?php
2 namespace wcf\system\html\input\node;
3 use wcf\system\bbcode\HtmlBBCodeParser;
4 use wcf\system\html\node\AbstractHtmlNodeProcessor;
5 use wcf\util\DOMUtil;
6
7 /**
8 * Transforms bbcode markers into the custom HTML element `<woltlab-metacode>`. This process
9 * outputs well-formed markup with proper element nesting.
10 *
11 * @author Alexander Ebert
12 * @copyright 2001-2016 WoltLab GmbH
13 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
14 * @package WoltLabSuite\Core\System\Html\Input\Node
15 * @since 3.0
16 */
17 class HtmlInputNodeWoltlabMetacodeMarker extends AbstractHtmlInputNode {
18 /**
19 * list of bbcodes that represent block elements
20 * @var string[]
21 */
22 public $blockElements = [];
23
24 /**
25 * list of bbcodes that represent source code elements
26 * @var string[]
27 */
28 public $sourceElements = [];
29
30 /**
31 * @inheritDoc
32 */
33 protected $tagName = 'woltlab-metacode-marker';
34
35 /**
36 * HtmlInputNodeWoltlabMetacodeMarker constructor.
37 */
38 public function __construct() {
39 $this->blockElements = HtmlBBCodeParser::getInstance()->getBlockBBCodes();
40 $this->sourceElements = HtmlBBCodeParser::getInstance()->getSourceBBCodes();
41 }
42
43 /**
44 * @inheritDoc
45 */
46 public function isAllowed(AbstractHtmlNodeProcessor $htmlNodeProcessor) {
47 // metacode-marker isn't present at time of validation
48 return [];
49 }
50
51 /**
52 * @inheritDoc
53 */
54 public function process(array $elements, AbstractHtmlNodeProcessor $htmlNodeProcessor) {
55 // collect pairs
56 $pairs = $this->buildPairs($elements);
57
58 // validate pairs and remove items that lack an opening/closing element
59 $pairs = $this->validatePairs($pairs);
60
61 // group pairs by tag name
62 $groups = $this->groupPairsByName($pairs);
63
64 // convert source bbcode groups first to ensure no bbcodes inside
65 // source blocks will be evaluated
66 $groups = $this->convertSourceGroups($groups);
67
68 $groups = $this->revertMarkerInsideCodeBlocks($groups, $htmlNodeProcessor);
69
70 // convert pairs into HTML or metacode
71 $this->convertGroups($groups);
72 }
73
74 /**
75 * Transforms bbcode markers inside source code elements into their plain bbcode representation.
76 *
77 * @param array $groups grouped list of bbcode marker pairs
78 * @param AbstractHtmlNodeProcessor $htmlNodeProcessor node processor instance
79 * @return array filtered groups without source bbcodes
80 */
81 protected function revertMarkerInsideCodeBlocks(array $groups, AbstractHtmlNodeProcessor $htmlNodeProcessor) {
82 $isInsideCode = function(\DOMElement $element) {
83 $parent = $element;
84 while ($parent = $parent->parentNode) {
85 $nodeName = $parent->nodeName;
86
87 if ($nodeName === 'code' || $nodeName === 'kbd' || $nodeName === 'pre') {
88 return true;
89 }
90 else if ($nodeName === 'woltlab-metacode') {
91 $name = $parent->getAttribute('data-name');
92 if ($name === 'code' || $name === 'tt') {
93 return true;
94 }
95 }
96 }
97
98 return false;
99 };
100
101 foreach ($groups as $name => $pairs) {
102 $needsReindex = false;
103 for ($i = 0, $length = count($pairs); $i < $length; $i++) {
104 $pair = $pairs[$i];
105 if ($isInsideCode($pair['open']) || $isInsideCode($pair['close'])) {
106 $pair['attributes'] = $htmlNodeProcessor->parseAttributes($pair['attributes']);
107 $this->convertToBBCode($name, $pair);
108
109 $needsReindex = true;
110 unset($groups[$name][$i]);
111
112 if (empty($groups[$name])) {
113 $needsReindex = false;
114 unset($groups[$name]);
115 }
116 }
117 }
118
119 if ($needsReindex) {
120 $groups[$name] = array_values($groups[$name]);
121 }
122 }
123
124 return $groups;
125 }
126
127 /**
128 * Builds the list of paired bbcode markers.
129 *
130 * @param \DOMElement[] $elements list of marker elements
131 * @return array list of paired bbcode markers
132 */
133 protected function buildPairs(array $elements) {
134 $pairs = [];
135 /** @var \DOMElement $element */
136 foreach ($elements as $element) {
137 $attributes = $element->getAttribute('data-attributes');
138 $name = $element->getAttribute('data-name');
139 $uuid = $element->getAttribute('data-uuid');
140 $source = @base64_decode($element->getAttribute('data-source'));
141
142 if (!isset($pairs[$uuid])) {
143 $pairs[$uuid] = [
144 'attributes' => [],
145 'close' => null,
146 'name' => '',
147 'open' => null
148 ];
149 }
150
151 if ($name) {
152 $pairs[$uuid]['attributes'] = $attributes;
153 $pairs[$uuid]['name'] = $name;
154 $pairs[$uuid]['open'] = $element;
155 $pairs[$uuid]['openSource'] = $source;
156 }
157 else {
158 $pairs[$uuid]['close'] = $element;
159 $pairs[$uuid]['closeSource'] = $source;
160 }
161 }
162
163 return $pairs;
164 }
165
166 /**
167 * Validates bbcode marker pairs to include both an opening and closing element.
168 *
169 * @param array $pairs list of paired bbcode markers
170 * @return array filtered list of paired bbcode markers
171 */
172 protected function validatePairs(array $pairs) {
173 foreach ($pairs as $uuid => $data) {
174 if ($data['close'] === null) {
175 DOMUtil::removeNode($data['open']);
176 }
177 else if ($data['open'] === null) {
178 DOMUtil::removeNode($data['close']);
179 }
180 else {
181 continue;
182 }
183
184 unset($pairs[$uuid]);
185 }
186
187 return $pairs;
188 }
189
190 /**
191 * Groups bbcode marker pairs by their common bbcode identifier.
192 *
193 * @param array $pairs list of paired bbcode markers
194 * @return array grouped list of bbcode marker pairs
195 */
196 protected function groupPairsByName(array $pairs) {
197 $groups = [];
198 foreach ($pairs as $uuid => $data) {
199 $name = $data['name'];
200
201 if (!isset($groups[$name])) {
202 $groups[$name] = [];
203 }
204
205 $groups[$name][] = [
206 'attributes' => $data['attributes'],
207 'close' => $data['close'],
208 'closeSource' => $data['closeSource'],
209 'open' => $data['open'],
210 'openSource' => $data['openSource']
211 ];
212 }
213
214 return $groups;
215 }
216
217 /**
218 * Converts source bbcode groups.
219 *
220 * @param array $groups grouped list of bbcode marker pairs
221 * @return array filtered groups without source bbcodes
222 */
223 protected function convertSourceGroups(array $groups) {
224 foreach ($this->sourceElements as $name) {
225 if (in_array($name, $this->blockElements)) {
226 if (isset($groups[$name])) {
227 for ($i = 0, $length = count($groups[$name]); $i < $length; $i++) {
228 $data = $groups[$name][$i];
229 $this->convertBlockElement($name, $data['open'], $data['close'], $data['attributes']);
230 }
231
232 unset($groups[$name]);
233 }
234 }
235 else {
236 if (isset($groups[$name])) {
237 for ($i = 0, $length = count($groups[$name]); $i < $length; $i++) {
238 $data = $groups[$name][$i];
239 $this->convertInlineElement($name, $data['open'], $data['close'], $data['attributes']);
240 }
241
242 unset($groups[$name]);
243 }
244 }
245 }
246
247 return $groups;
248 }
249
250 /**
251 * Converts bbcode marker pairs into block- or inline-elements.
252 *
253 * @param array $groups grouped list of bbcode marker pairs
254 */
255 protected function convertGroups(array $groups) {
256 foreach ($this->blockElements as $name) {
257 if (isset($groups[$name])) {
258 for ($i = 0, $length = count($groups[$name]); $i < $length; $i++) {
259 $data = $groups[$name][$i];
260 $this->convertBlockElement($name, $data['open'], $data['close'], $data['attributes']);
261 }
262
263 unset($groups[$name]);
264 }
265 }
266
267 // treat remaining elements as inline elements
268 foreach ($groups as $name => $pairs) {
269 for ($i = 0, $length = count($pairs); $i < $length; $i++) {
270 $data = $pairs[$i];
271 $this->convertInlineElement($name, $data['open'], $data['close'], $data['attributes']);
272 }
273 }
274 }
275
276 /**
277 * Converts a block-level bbcode marker pair.
278 *
279 * @param string $name bbcode identifier
280 * @param \DOMElement $start start node
281 * @param \DOMElement $end end node
282 * @param string $attributes encoded attribute string
283 */
284 protected function convertBlockElement($name, $start, $end, $attributes) {
285 // we need to ensure proper nesting, block elements are not allowed to
286 // be placed inside paragraphs, but being a direct child of another block
287 // element is completely fine
288 $parent = $start;
289 do {
290 $parent = $parent->parentNode;
291 }
292 while ($parent->nodeName === 'p' || !$this->isBlockElement($parent));
293
294 $element = DOMUtil::splitParentsUntil($start, $parent);
295 DOMUtil::insertBefore($start, $element);
296
297 $commonAncestor = DOMUtil::getCommonAncestor($start, $end);
298 $lastElement = DOMUtil::splitParentsUntil($end, $commonAncestor, false);
299
300 $container = $start->ownerDocument->createElement('woltlab-metacode');
301 $container->setAttribute('data-name', $name);
302 $container->setAttribute('data-attributes', $attributes);
303
304 DOMUtil::insertAfter($container, $start);
305 DOMUtil::removeNode($start);
306
307 DOMUtil::moveNodesInto($container, $lastElement, $commonAncestor);
308
309 DOMUtil::removeNode($end);
310 }
311
312 /**
313 * Converts an inline bbcode marker pair.
314 *
315 * @param string $name bbcode identifier
316 * @param \DOMElement $start start node
317 * @param \DOMElement $end end node
318 * @param string $attributes encoded attribute string
319 */
320 protected function convertInlineElement($name, $start, $end, $attributes) {
321 if ($start->parentNode === $end->parentNode) {
322 $this->wrapContent($name, $attributes, $start, $end);
323
324 DOMUtil::removeNode($start);
325 DOMUtil::removeNode($end);
326 }
327 else {
328 $commonAncestor = DOMUtil::getCommonAncestor($start, $end);
329 $endAncestor = DOMUtil::getParentBefore($end, $commonAncestor);
330
331 $element = $this->wrapContent($name, $attributes, $start, null);
332 DOMUtil::removeNode($start);
333
334 $element = DOMUtil::getParentBefore($element, $commonAncestor);
335 while ($element = $element->nextSibling) {
336 if ($element->nodeType === XML_TEXT_NODE) {
337 // ignore text nodes between tags
338 continue;
339 }
340
341 if ($element !== $endAncestor) {
342 if ($this->isBlockElement($element)) {
343 $this->wrapContent($name, $attributes, $element->firstChild, null);
344 }
345 else {
346 $this->wrapContent($name, $attributes, $element, null);
347 }
348 }
349 else {
350 $this->wrapContent($name, $attributes, null, $end);
351
352 DOMUtil::removeNode($end);
353 break;
354 }
355 }
356 }
357 }
358
359 /**
360 * Wraps a sequence of nodes using a newly created element. If `$startNode` is `null` the end
361 * node and all previous siblings will be added to the element. The reverse takes place if
362 * `$endNode` is `null`.
363 *
364 * @param string $name element tag name
365 * @param string $attributes encoded attribute string
366 * @param \DOMElement|null $startNode first node to wrap
367 * @param \DOMElement|null $endNode last node to wrap
368 * @return \DOMElement newly created element
369 */
370 protected function wrapContent($name, $attributes, $startNode, $endNode) {
371 if ($startNode === null && $endNode === null) {
372 throw new \InvalidArgumentException("Must provide an existing element for start node or end node, both cannot be null.");
373 }
374
375 $element = ($startNode) ? $startNode->ownerDocument->createElement('woltlab-metacode') : $endNode->ownerDocument->createElement('woltlab-metacode');
376 $element->setAttribute('data-name', $name);
377 $element->setAttribute('data-attributes', $attributes);
378
379 if ($startNode) {
380 DOMUtil::insertBefore($element, $startNode);
381
382 while ($sibling = $element->nextSibling) {
383 $element->appendChild($sibling);
384
385 if ($sibling === $endNode) {
386 break;
387 }
388 }
389 }
390 else {
391 DOMUtil::insertAfter($element, $endNode);
392
393 while ($sibling = $element->previousSibling) {
394 DOMUtil::prepend($sibling, $element);
395
396 if ($sibling === $startNode) {
397 break;
398 }
399 }
400 }
401
402 return $element;
403 }
404
405 /**
406 * Returns true if provided node is a block element.
407 *
408 * @param \DOMNode $node node
409 * @return boolean true for certain block elements
410 */
411 protected function isBlockElement(\DOMNode $node) {
412 switch ($node->nodeName) {
413 case 'blockquote':
414 case 'body':
415 case 'code':
416 case 'div':
417 case 'p':
418 return true;
419 break;
420
421 case 'woltlab-metacode':
422 /** @var \DOMElement $node */
423 if (in_array($node->getAttribute('data-name'), $this->blockElements)) {
424 return true;
425 }
426 break;
427 }
428
429 return false;
430 }
431
432 /**
433 * Converts a bbcode marker pair into their plain bbcode representation. This method is used
434 * to convert markers inside source code elements.
435 *
436 * @param string $name bbcode name
437 * @param array $pair bbcode marker pair
438 */
439 protected function convertToBBCode($name, array $pair) {
440 /** @var \DOMElement $start */
441 $start = $pair['open'];
442 /** @var \DOMElement $end */
443 $end = $pair['close'];
444
445 $attributes = (isset($pair['attributes'])) ? $pair['attributes'] : '';
446 $textNode = $start->ownerDocument->createTextNode(($pair['openSource']) ?: HtmlBBCodeParser::getInstance()->buildBBCodeTag($name, $attributes, true));
447 DOMUtil::insertBefore($textNode, $start);
448 DOMUtil::removeNode($start);
449
450 $textNode = $end->ownerDocument->createTextNode(($pair['closeSource']) ?: '[/' . $name . ']');
451 DOMUtil::insertBefore($textNode, $end);
452 DOMUtil::removeNode($end);
453 }
454 }