Commit | Line | Data |
---|---|---|
60a35505 | 1 | <?php |
a9229942 | 2 | |
60a35505 | 3 | namespace wcf\system\html\input\node; |
a9229942 | 4 | |
4f7b97e3 | 5 | use wcf\system\bbcode\BBCodeHandler; |
1e8edfde | 6 | use wcf\system\bbcode\HtmlBBCodeParser; |
01ba60c7 | 7 | use wcf\system\event\EventHandler; |
4ccf5975 | 8 | use wcf\system\html\node\AbstractHtmlNodeProcessor; |
60a35505 | 9 | use wcf\util\DOMUtil; |
60a35505 AE |
10 | |
11 | /** | |
ece28c21 AE |
12 | * Transforms bbcode markers into the custom HTML element `<woltlab-metacode>`. This process |
13 | * outputs well-formed markup with proper element nesting. | |
a9229942 TD |
14 | * |
15 | * @author Alexander Ebert | |
16 | * @copyright 2001-2019 WoltLab GmbH | |
17 | * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php> | |
18 | * @package WoltLabSuite\Core\System\Html\Input\Node | |
19 | * @since 3.0 | |
60a35505 | 20 | */ |
a9229942 TD |
21 | class HtmlInputNodeWoltlabMetacodeMarker extends AbstractHtmlInputNode |
22 | { | |
23 | /** | |
24 | * list of tag names that should be considered as block level elements | |
25 | * @var string[] | |
26 | */ | |
27 | public static $customBlockElementTagNames = []; | |
28 | ||
29 | /** | |
30 | * list of bbcodes that represent block elements | |
31 | * @var string[] | |
32 | */ | |
33 | public $blockElements = []; | |
34 | ||
35 | /** | |
36 | * list of bbcodes that represent source code elements | |
37 | * @var string[] | |
38 | */ | |
39 | public $sourceElements = []; | |
40 | ||
41 | /** | |
42 | * @inheritDoc | |
43 | */ | |
44 | protected $tagName = 'woltlab-metacode-marker'; | |
45 | ||
46 | /** | |
47 | * HtmlInputNodeWoltlabMetacodeMarker constructor. | |
48 | */ | |
49 | public function __construct() | |
50 | { | |
51 | $this->blockElements = HtmlBBCodeParser::getInstance()->getBlockBBCodes(); | |
52 | $this->sourceElements = HtmlBBCodeParser::getInstance()->getSourceBBCodes(); | |
53 | } | |
54 | ||
55 | /** | |
56 | * @inheritDoc | |
57 | */ | |
58 | public function isAllowed(AbstractHtmlNodeProcessor $htmlNodeProcessor) | |
59 | { | |
60 | // metacode-marker isn't present at time of validation | |
61 | return []; | |
62 | } | |
63 | ||
64 | /** | |
65 | * @inheritDoc | |
66 | */ | |
67 | public function process(array $elements, AbstractHtmlNodeProcessor $htmlNodeProcessor) | |
68 | { | |
69 | // collect pairs | |
70 | $pairs = $this->buildPairs($elements); | |
71 | ||
72 | // validate pairs and remove items that lack an opening/closing element | |
73 | $pairs = $this->validatePairs($pairs); | |
74 | ||
75 | // group pairs by tag name | |
76 | $groups = $this->groupPairsByName($pairs); | |
77 | ||
78 | $groups = $this->filterGroups($groups, $htmlNodeProcessor); | |
79 | if (empty($groups)) { | |
80 | return; | |
81 | } | |
82 | ||
83 | // convert source bbcode groups first to ensure no bbcodes inside | |
84 | // source blocks will be evaluated | |
85 | $groups = $this->convertSourceGroups($groups); | |
86 | ||
87 | $groups = $this->revertMarkerInsideCodeBlocks($groups, $htmlNodeProcessor); | |
88 | ||
89 | // convert pairs into HTML or metacode | |
90 | $this->convertGroups($groups); | |
91 | } | |
92 | ||
93 | /** | |
94 | * Filters groups by reverting metacode markers for invalid bbcodes. | |
95 | * | |
96 | * @param array $groups grouped list of bbcode marker pairs | |
97 | * @param AbstractHtmlNodeProcessor $htmlNodeProcessor node processor instance | |
98 | * @return array filtered groups | |
99 | */ | |
100 | protected function filterGroups(array $groups, AbstractHtmlNodeProcessor $htmlNodeProcessor) | |
101 | { | |
102 | /** @noinspection PhpUndefinedMethodInspection */ | |
103 | $data = [ | |
104 | 'context' => $htmlNodeProcessor->getHtmlProcessor()->getContext(), | |
105 | 'bbcodes' => \array_keys($groups), | |
106 | ]; | |
107 | ||
108 | EventHandler::getInstance()->fireAction($this, 'filterGroups', $data); | |
109 | ||
110 | foreach ($groups as $name => $pairs) { | |
111 | if (!\in_array($name, $data['bbcodes']) || !BBCodeHandler::getInstance()->isAvailableBBCode($name)) { | |
112 | foreach ($pairs as $pair) { | |
113 | $pair['attributes'] = $htmlNodeProcessor->parseAttributes($pair['attributes']); | |
114 | $this->convertToBBCode($name, $pair); | |
115 | } | |
116 | ||
117 | unset($groups[$name]); | |
118 | } | |
119 | } | |
120 | ||
121 | return $groups; | |
122 | } | |
123 | ||
124 | /** | |
125 | * Transforms bbcode markers inside source code elements into their plain bbcode representation. | |
126 | * | |
127 | * @param array $groups grouped list of bbcode marker pairs | |
128 | * @param AbstractHtmlNodeProcessor $htmlNodeProcessor node processor instance | |
129 | * @return array filtered groups without source bbcodes | |
130 | */ | |
131 | protected function revertMarkerInsideCodeBlocks(array $groups, AbstractHtmlNodeProcessor $htmlNodeProcessor) | |
132 | { | |
133 | foreach ($groups as $name => $pairs) { | |
134 | $needsReindex = false; | |
135 | for ($i = 0, $length = \count($pairs); $i < $length; $i++) { | |
136 | $pair = $pairs[$i]; | |
137 | if ($this->isInsideCode($pair['open']) || $this->isInsideCode($pair['close'])) { | |
138 | $pair['attributes'] = $htmlNodeProcessor->parseAttributes($pair['attributes']); | |
139 | $this->convertToBBCode($name, $pair); | |
140 | ||
141 | $needsReindex = true; | |
142 | unset($groups[$name][$i]); | |
143 | ||
144 | if (empty($groups[$name])) { | |
145 | $needsReindex = false; | |
146 | unset($groups[$name]); | |
147 | } | |
148 | } | |
149 | } | |
150 | ||
151 | if ($needsReindex) { | |
152 | $groups[$name] = \array_values($groups[$name]); | |
153 | } | |
154 | } | |
155 | ||
156 | return $groups; | |
157 | } | |
158 | ||
159 | /** | |
160 | * Returns `true` if the given element is inside a code element. | |
161 | * | |
162 | * @param \DOMElement $element | |
163 | * @return bool | |
164 | */ | |
165 | protected function isInsideCode(\DOMElement $element) | |
166 | { | |
167 | $parent = $element; | |
168 | while ($parent = $parent->parentNode) { | |
169 | $nodeName = $parent->nodeName; | |
170 | ||
171 | if ($nodeName === 'code' || $nodeName === 'kbd' || $nodeName === 'pre') { | |
172 | return true; | |
173 | } elseif ($nodeName === 'woltlab-metacode') { | |
174 | $name = $parent->getAttribute('data-name'); | |
175 | if ($name === 'code' || $name === 'tt') { | |
176 | return true; | |
177 | } | |
178 | } | |
179 | } | |
180 | ||
181 | return false; | |
182 | } | |
183 | ||
184 | /** | |
185 | * Builds the list of paired bbcode markers. | |
186 | * | |
187 | * @param \DOMElement[] $elements list of marker elements | |
188 | * @return array list of paired bbcode markers | |
189 | */ | |
190 | protected function buildPairs(array $elements) | |
191 | { | |
192 | $pairs = []; | |
193 | /** @var \DOMElement $element */ | |
194 | foreach ($elements as $element) { | |
195 | $attributes = $element->getAttribute('data-attributes'); | |
196 | $name = $element->getAttribute('data-name'); | |
197 | $uuid = $element->getAttribute('data-uuid'); | |
198 | $source = @\base64_decode($element->getAttribute('data-source')); | |
199 | ||
200 | if (!isset($pairs[$uuid])) { | |
201 | $pairs[$uuid] = [ | |
202 | 'attributes' => [], | |
203 | 'close' => null, | |
204 | 'name' => '', | |
205 | 'open' => null, | |
206 | ]; | |
207 | } | |
208 | ||
209 | if ($name) { | |
210 | $pairs[$uuid]['attributes'] = $attributes; | |
211 | $pairs[$uuid]['name'] = $name; | |
212 | $pairs[$uuid]['open'] = $element; | |
213 | $pairs[$uuid]['openSource'] = $source; | |
214 | $pairs[$uuid]['useText'] = ($element->hasAttribute('data-use-text')) ? $element->getAttribute('data-use-text') : false; | |
215 | } else { | |
216 | $pairs[$uuid]['close'] = $element; | |
217 | $pairs[$uuid]['closeSource'] = $source; | |
218 | } | |
219 | } | |
220 | ||
221 | return $pairs; | |
222 | } | |
223 | ||
224 | /** | |
225 | * Validates bbcode marker pairs to include both an opening and closing element. | |
226 | * | |
227 | * @param array $pairs list of paired bbcode markers | |
228 | * @return array filtered list of paired bbcode markers | |
229 | */ | |
230 | protected function validatePairs(array $pairs) | |
231 | { | |
232 | foreach ($pairs as $uuid => $data) { | |
233 | if ($data['close'] === null) { | |
234 | DOMUtil::removeNode($data['open']); | |
235 | } elseif ($data['open'] === null) { | |
236 | DOMUtil::removeNode($data['close']); | |
237 | } else { | |
238 | continue; | |
239 | } | |
240 | ||
241 | unset($pairs[$uuid]); | |
242 | } | |
243 | ||
244 | return $pairs; | |
245 | } | |
246 | ||
247 | /** | |
248 | * Groups bbcode marker pairs by their common bbcode identifier. | |
249 | * | |
250 | * @param array $pairs list of paired bbcode markers | |
251 | * @return array grouped list of bbcode marker pairs | |
252 | */ | |
253 | protected function groupPairsByName(array $pairs) | |
254 | { | |
255 | $groups = []; | |
13b11e4c | 256 | foreach ($pairs as $data) { |
a9229942 TD |
257 | $name = $data['name']; |
258 | ||
259 | if (!isset($groups[$name])) { | |
260 | $groups[$name] = []; | |
261 | } | |
262 | ||
263 | $groups[$name][] = [ | |
264 | 'attributes' => $data['attributes'], | |
265 | 'close' => $data['close'], | |
266 | 'closeSource' => $data['closeSource'], | |
267 | 'open' => $data['open'], | |
268 | 'openSource' => $data['openSource'], | |
269 | 'useText' => $data['useText'], | |
270 | ]; | |
271 | } | |
272 | ||
273 | return $groups; | |
274 | } | |
275 | ||
276 | /** | |
277 | * Converts source bbcode groups. | |
278 | * | |
279 | * @param array $groups grouped list of bbcode marker pairs | |
280 | * @return array filtered groups without source bbcodes | |
281 | */ | |
282 | protected function convertSourceGroups(array $groups) | |
283 | { | |
284 | foreach ($this->sourceElements as $name) { | |
285 | if (!isset($groups[$name])) { | |
286 | continue; | |
287 | } | |
288 | ||
289 | for ($i = 0, $length = \count($groups[$name]); $i < $length; $i++) { | |
290 | $data = $groups[$name][$i]; | |
291 | if ($this->isInsideCode($data['open']) || $this->isInsideCode($data['close'])) { | |
292 | continue; | |
293 | } | |
294 | ||
295 | if (\in_array($name, $this->blockElements)) { | |
296 | $this->convertBlockElement($name, $data['open'], $data['close'], $data['attributes']); | |
297 | } else { | |
298 | $this->convertInlineElement($name, $data['open'], $data['close'], $data['attributes']); | |
299 | } | |
300 | ||
301 | unset($groups[$name][$i]); | |
302 | } | |
303 | ||
304 | if (empty($groups[$name])) { | |
305 | unset($groups[$name]); | |
306 | } else { | |
307 | $groups[$name] = \array_values($groups[$name]); | |
308 | } | |
309 | } | |
310 | ||
311 | return $groups; | |
312 | } | |
313 | ||
314 | /** | |
315 | * Converts bbcode marker pairs into block- or inline-elements. | |
316 | * | |
317 | * @param array $groups grouped list of bbcode marker pairs | |
318 | */ | |
319 | protected function convertGroups(array $groups) | |
320 | { | |
321 | foreach ($this->blockElements as $name) { | |
322 | if (isset($groups[$name])) { | |
323 | for ($i = 0, $length = \count($groups[$name]); $i < $length; $i++) { | |
324 | $data = $groups[$name][$i]; | |
325 | $this->convertBlockElement($name, $data['open'], $data['close'], $data['attributes']); | |
326 | } | |
327 | ||
328 | unset($groups[$name]); | |
329 | } | |
330 | } | |
331 | ||
332 | // treat remaining elements as inline elements | |
333 | foreach ($groups as $name => $pairs) { | |
334 | for ($i = 0, $length = \count($pairs); $i < $length; $i++) { | |
335 | $data = $pairs[$i]; | |
336 | $this->convertInlineElement($name, $data['open'], $data['close'], $data['attributes']); | |
337 | } | |
338 | } | |
339 | } | |
340 | ||
341 | /** | |
342 | * Converts a block-level bbcode marker pair. | |
343 | * | |
344 | * @param string $name bbcode identifier | |
345 | * @param \DOMElement $start start node | |
346 | * @param \DOMElement $end end node | |
347 | * @param string $attributes encoded attribute string | |
348 | */ | |
349 | protected function convertBlockElement($name, $start, $end, $attributes) | |
350 | { | |
351 | // we need to ensure proper nesting, block elements are not allowed to | |
352 | // be placed inside paragraphs, but being a direct child of another block | |
353 | // element is completely fine | |
354 | $parent = $start; | |
355 | $foundLi = false; | |
356 | do { | |
357 | $parent = $parent->parentNode; | |
358 | if (!$foundLi && $parent->nodeName === 'li') { | |
359 | // allow <li> if both the start and end have the same <li> as parent | |
360 | $parentEnd = $end; | |
361 | do { | |
362 | $parentEnd = $parentEnd->parentNode; | |
363 | if ($parentEnd === null) { | |
364 | break; | |
365 | } | |
366 | ||
367 | if ($parentEnd->nodeName === 'li') { | |
368 | if ($parent === $parentEnd) { | |
369 | // same ancestor, exit both loops | |
370 | break 2; | |
371 | } | |
372 | ||
373 | // mismatch | |
374 | break; | |
375 | } | |
376 | } while ($parentEnd); | |
377 | ||
378 | $foundLi = true; | |
379 | } | |
380 | } while ($parent->nodeName === 'p' || !$this->isBlockElement($parent)); | |
381 | ||
382 | // block elements can sometimes contain a line break after the end tag | |
383 | // which needs to be removed to avoid it being split into a separate p | |
384 | if ($node = $end->nextSibling) { | |
8f6eebb9 | 385 | if ($node->nodeType === \XML_TEXT_NODE && ($node->textContent === "\n" || $node->textContent === "\r\n")) { |
a9229942 TD |
386 | DOMUtil::removeNode($node); |
387 | } | |
388 | } | |
389 | ||
390 | $element = DOMUtil::splitParentsUntil($start, $parent); | |
391 | if ($start !== $element) { | |
392 | DOMUtil::insertBefore($start, $element); | |
393 | } | |
394 | ||
395 | $commonAncestor = DOMUtil::getCommonAncestor($start, $end); | |
396 | $lastElement = DOMUtil::splitParentsUntil($end, $commonAncestor, false); | |
397 | ||
398 | $container = $start->ownerDocument->createElement('woltlab-metacode'); | |
399 | $container->setAttribute('data-name', $name); | |
400 | $container->setAttribute('data-attributes', $attributes); | |
401 | ||
402 | DOMUtil::insertAfter($container, $start); | |
403 | DOMUtil::removeNode($start); | |
404 | ||
405 | DOMUtil::moveNodesInto($container, $lastElement, $commonAncestor); | |
406 | ||
407 | DOMUtil::removeNode($end); | |
408 | } | |
409 | ||
410 | /** | |
411 | * Converts an inline bbcode marker pair. | |
412 | * | |
413 | * @param string $name bbcode identifier | |
414 | * @param \DOMElement $start start node | |
415 | * @param \DOMElement $end end node | |
416 | * @param string $attributes encoded attribute string | |
417 | */ | |
418 | protected function convertInlineElement($name, $start, $end, $attributes) | |
419 | { | |
420 | if ($start->parentNode === $end->parentNode) { | |
421 | $this->wrapContent($name, $attributes, $start, $end); | |
422 | ||
423 | DOMUtil::removeNode($start); | |
424 | DOMUtil::removeNode($end); | |
425 | } else { | |
426 | $commonAncestor = DOMUtil::getCommonAncestor($start, $end); | |
427 | $endAncestor = DOMUtil::getParentBefore($end, $commonAncestor); | |
428 | ||
429 | $element = $this->wrapContent($name, $attributes, $start, null); | |
430 | DOMUtil::removeNode($start); | |
431 | ||
432 | $element = DOMUtil::getParentBefore($element, $commonAncestor); | |
433 | if ($element === null) { | |
434 | $element = $commonAncestor; | |
435 | } | |
436 | ||
437 | while ($element = $element->nextSibling) { | |
438 | if ($element->nodeType === \XML_TEXT_NODE) { | |
439 | // ignore text nodes between tags | |
440 | continue; | |
441 | } | |
442 | ||
443 | if ($element !== $endAncestor) { | |
444 | if ($this->isBlockElement($element)) { | |
445 | if ($element->childNodes->length === 0) { | |
446 | $element->appendChild($element->ownerDocument->createTextNode('')); | |
447 | } | |
448 | ||
449 | $this->wrapContent($name, $attributes, $element->childNodes->item(0), null); | |
450 | } else { | |
451 | $this->wrapContent($name, $attributes, $element, null); | |
452 | } | |
453 | } else { | |
454 | $this->wrapContent($name, $attributes, null, $end); | |
455 | ||
456 | DOMUtil::removeNode($end); | |
457 | break; | |
458 | } | |
459 | } | |
460 | } | |
461 | } | |
462 | ||
463 | /** | |
464 | * Wraps a sequence of nodes using a newly created element. If `$startNode` is `null` the end | |
465 | * node and all previous siblings will be added to the element. The reverse takes place if | |
466 | * `$endNode` is `null`. | |
467 | * | |
468 | * @param string $name element tag name | |
469 | * @param string $attributes encoded attribute string | |
470 | * @param \DOMElement|null $startNode first node to wrap | |
471 | * @param \DOMElement|null $endNode last node to wrap | |
472 | * @return \DOMElement newly created element | |
473 | */ | |
474 | protected function wrapContent($name, $attributes, $startNode, $endNode) | |
475 | { | |
476 | if ($startNode === null && $endNode === null) { | |
477 | throw new \InvalidArgumentException( | |
478 | "Must provide an existing element for start node or end node, both cannot be null." | |
479 | ); | |
480 | } | |
481 | ||
482 | if ($startNode) { | |
483 | $element = $startNode->ownerDocument->createElement('woltlab-metacode'); | |
484 | } else { | |
485 | $element = $endNode->ownerDocument->createElement('woltlab-metacode'); | |
486 | } | |
487 | ||
488 | $element->setAttribute('data-name', $name); | |
489 | $element->setAttribute('data-attributes', $attributes); | |
490 | ||
491 | if ($startNode) { | |
492 | DOMUtil::insertBefore($element, $startNode); | |
493 | ||
494 | while ($sibling = $element->nextSibling) { | |
495 | $element->appendChild($sibling); | |
496 | ||
497 | if ($sibling === $endNode) { | |
498 | break; | |
499 | } | |
500 | } | |
501 | } else { | |
502 | DOMUtil::insertAfter($element, $endNode); | |
503 | ||
504 | while ($sibling = $element->previousSibling) { | |
505 | DOMUtil::prepend($sibling, $element); | |
506 | ||
507 | if ($sibling === $startNode) { | |
508 | break; | |
509 | } | |
510 | } | |
511 | } | |
512 | ||
513 | return $element; | |
514 | } | |
515 | ||
516 | /** | |
517 | * Returns true if provided node is a block element. | |
518 | * | |
519 | * @param \DOMNode $node node | |
520 | * @return bool true for certain block elements | |
521 | */ | |
522 | protected function isBlockElement(\DOMNode $node) | |
523 | { | |
524 | switch ($node->nodeName) { | |
525 | case 'blockquote': | |
526 | case 'body': | |
527 | case 'code': | |
528 | case 'div': | |
529 | case 'p': | |
530 | case 'td': | |
531 | case 'woltlab-quote': | |
532 | case 'woltlab-spoiler': | |
533 | return true; | |
534 | ||
535 | case 'woltlab-metacode': | |
536 | /** @var \DOMElement $node */ | |
537 | return \in_array($node->getAttribute('data-name'), $this->blockElements); | |
538 | ||
539 | default: | |
540 | return \in_array($node->nodeName, self::$customBlockElementTagNames); | |
541 | } | |
542 | } | |
543 | ||
544 | /** | |
545 | * Converts a bbcode marker pair into their plain bbcode representation. This method is used | |
546 | * to convert markers inside source code elements. | |
547 | * | |
548 | * @param string $name bbcode name | |
549 | * @param array $pair bbcode marker pair | |
550 | */ | |
551 | protected function convertToBBCode($name, array $pair) | |
552 | { | |
553 | /** @var \DOMElement $start */ | |
554 | $start = $pair['open']; | |
555 | /** @var \DOMElement $end */ | |
556 | $end = $pair['close']; | |
557 | ||
558 | $attributes = $pair['attributes'] ?? []; | |
559 | $content = ''; | |
560 | if (isset($pair['useText']) && $pair['useText'] !== false && isset($attributes[$pair['useText']])) { | |
561 | $content = \array_splice($attributes, $pair['useText'])[0]; | |
562 | } | |
563 | ||
564 | $textNode = $start->ownerDocument->createTextNode(($pair['openSource'] ?: HtmlBBCodeParser::getInstance()->buildBBCodeTag( | |
565 | $name, | |
566 | $attributes, | |
567 | true | |
568 | )) . $content); | |
569 | DOMUtil::insertBefore($textNode, $start); | |
570 | DOMUtil::removeNode($start); | |
571 | ||
572 | $textNode = $end->ownerDocument->createTextNode($pair['closeSource'] ?: '[/' . $name . ']'); | |
573 | DOMUtil::insertBefore($textNode, $end); | |
574 | DOMUtil::removeNode($end); | |
575 | } | |
60a35505 | 576 | } |