From d429281cb740d2f1b07cc3d0068cace63c2fc440 Mon Sep 17 00:00:00 2001 From: Alexander Ebert Date: Tue, 17 May 2016 12:48:01 +0200 Subject: [PATCH] Enforced proper paragraphs and block nesting --- .../node/HtmlInputNodeProcessor.class.php | 51 +++++++++++++++++++ .../HtmlInputNodeWoltlabMetacode.class.php | 6 +-- ...mlInputNodeWoltlabMetacodeMarker.class.php | 22 ++++++++ .../html/node/HtmlNodeProcessor.class.php | 3 +- .../install/files/lib/util/DOMUtil.class.php | 51 +++++++++++++++++-- 5 files changed, 123 insertions(+), 10 deletions(-) diff --git a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php index a00d92a41b..c4c6f9f464 100644 --- a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php +++ b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php @@ -1,17 +1,68 @@ invokeHtmlNode(new HtmlInputNodeWoltlabMetacodeMarker()); // handle static converters $this->invokeHtmlNode(new HtmlInputNodeWoltlabMetacode()); + + // remove empty elements and join identical siblings if appropriate + $this->cleanup(); + } + + protected function cleanup() { + // remove emtpy elements + foreach ($this->emptyTags as $emptyTag) { + $elements = []; + foreach ($this->getDocument()->getElementsByTagName($emptyTag) as $element) { + $elements[] = $element; + } + + /** @var \DOMElement $element */ + foreach ($elements as $element) { + if (DOMUtil::isEmpty($element)) { + DOMUtil::removeNode($element); + } + } + } + + // find identical siblings + foreach ($this->mergeTags as $mergeTag) { + $elements = []; + foreach ($this->getDocument()->getElementsByTagName($mergeTag) as $element) { + $elements[] = $element; + } + + /** @var \DOMElement $element */ + foreach ($elements as $element) { + $sibling = $element->nextSibling; + if ($sibling === null) { + continue; + } + + if ($sibling->nodeName === $mergeTag) { + while ($sibling->hasChildNodes()) { + $element->appendChild($sibling->childNodes[0]); + } + + DOMUtil::removeNode($sibling); + } + } + } } } diff --git a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacode.class.php b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacode.class.php index 7d699d0438..eaf8401fc9 100644 --- a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacode.class.php +++ b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacode.class.php @@ -48,9 +48,7 @@ class HtmlInputNodeWoltlabMetacode extends AbstractHtmlNode { continue; } - $attributes = $element->getAttribute('data-attributes'); - if (!empty($attributes)) $attributes = @json_decode(base64_decode($attributes), true); - if (!is_array($attributes)) $attributes = []; + $attributes = $htmlNodeProcessor->parseAttributes($element->getAttribute('data-attributes')); // check for converters $converter = (isset($converters[$name])) ? $converters[$name] : null; @@ -72,7 +70,7 @@ class HtmlInputNodeWoltlabMetacode extends AbstractHtmlNode { if ($converter->validateAttributes($attributes)) { $newElement = $converter->convert(DOMUtil::childNodesToFragment($element), $attributes); if (!($newElement instanceof \DOMElement)) { - throw new SystemException("Expected a valid DOMElement as return value."); + throw new \UnexpectedValueException("Expected a valid DOMElement as return value."); } DOMUtil::replaceElement($element, $newElement); diff --git a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacodeMarker.class.php b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacodeMarker.class.php index 4948e6fcbf..9e2762194c 100644 --- a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacodeMarker.class.php +++ b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacodeMarker.class.php @@ -4,6 +4,7 @@ use wcf\system\bbcode\HtmlBBCodeParser; use wcf\system\html\node\AbstractHtmlNode; use wcf\system\html\node\HtmlNodeProcessor; use wcf\util\DOMUtil; +use wcf\util\StringUtil; /** * Transforms bbcode markers into the custom HTML element ``. This process @@ -244,6 +245,18 @@ class HtmlInputNodeWoltlabMetacodeMarker extends AbstractHtmlNode { * @param string $attributes encoded attribute string */ protected function convertBlockElement($name, $start, $end, $attributes) { + // we need to ensure proper nesting, block elements are not allowed to + // be placed inside paragraphs, but being a direct child of another block + // element is completely fine + $parent = $start; + do { + $parent = $parent->parentNode; + } + while ($parent->nodeName === 'p' || !$this->isBlockElement($parent)); + + $element = DOMUtil::splitParentsUntil($start, $parent); + DOMUtil::insertBefore($start, $element); + $commonAncestor = DOMUtil::getCommonAncestor($start, $end); $lastElement = DOMUtil::splitParentsUntil($end, $commonAncestor, false); @@ -361,10 +374,19 @@ class HtmlInputNodeWoltlabMetacodeMarker extends AbstractHtmlNode { protected function isBlockElement(\DOMNode $node) { switch ($node->nodeName) { case 'blockquote': + case 'body': case 'code': case 'div': case 'p': return true; + break; + + case 'woltlab-metacode': + /** @var \DOMElement $node */ + if (in_array($node->getAttribute('data-name'), $this->blockElements)) { + return true; + } + break; } return false; diff --git a/wcfsetup/install/files/lib/system/html/node/HtmlNodeProcessor.class.php b/wcfsetup/install/files/lib/system/html/node/HtmlNodeProcessor.class.php index 3a0b1b137f..72a34d8634 100644 --- a/wcfsetup/install/files/lib/system/html/node/HtmlNodeProcessor.class.php +++ b/wcfsetup/install/files/lib/system/html/node/HtmlNodeProcessor.class.php @@ -96,6 +96,7 @@ class HtmlNodeProcessor { $parsedAttributes = JSON::decode($parsedAttributes); } catch (SystemException $e) { + /* parse errors can occur if user provided malicious content - ignore them */ $parsedAttributes = []; } } @@ -106,7 +107,7 @@ class HtmlNodeProcessor { protected function invokeHtmlNode(IHtmlNode $htmlNode) { $tagName = $htmlNode->getTagName(); if (empty($tagName)) { - throw new SystemException("Missing tag name for " . get_class($htmlNode)); + throw new \UnexpectedValueException("Missing tag name for " . get_class($htmlNode)); } $elements = []; diff --git a/wcfsetup/install/files/lib/util/DOMUtil.class.php b/wcfsetup/install/files/lib/util/DOMUtil.class.php index f30db6de15..8d90ed795a 100644 --- a/wcfsetup/install/files/lib/util/DOMUtil.class.php +++ b/wcfsetup/install/files/lib/util/DOMUtil.class.php @@ -176,7 +176,7 @@ final class DOMUtil { } } - throw new SystemException("Unable to determine relative node position."); + throw new \RuntimeException("Unable to determine relative node position."); } /** @@ -204,6 +204,35 @@ final class DOMUtil { self::getParentNode($refNode)->insertBefore($node, $refNode); } + /** + * Returns true if this node is empty. + * + * @param \DOMNode $node node + * @return boolean true if node is empty + */ + public static function isEmpty(\DOMNode $node) { + if ($node->nodeType === XML_TEXT_NODE) { + return (StringUtil::trim($node->nodeValue) === ''); + } + else if ($node->nodeType === XML_ELEMENT_NODE) { + /** @var \DOMElement $node */ + if (self::isVoidElement($node)) { + return false; + } + else if ($node->hasChildNodes()) { + for ($i = 0, $length = $node->childNodes->length; $i < $length; $i++) { + if (!self::isEmpty($node->childNodes[$i])) { + return false; + } + } + } + + return true; + } + + return true; + } + /** * Returns true if given node is the first node of its given ancestor. * @@ -213,10 +242,7 @@ final class DOMUtil { */ public static function isFirstNode(\DOMNode $node, \DOMElement $ancestor) { if ($node->previousSibling === null) { - if ($node->previousSibling === null) { - throw new \InvalidArgumentException("Provided node is a not a descendant of ancestor element."); - } - else if ($node->parentNode === $ancestor || $node->parentNode->nodeName === 'body') { + if ($node->parentNode === $ancestor || $node->parentNode->nodeName === 'body') { return true; } else { @@ -256,6 +282,21 @@ final class DOMUtil { return false; } + /** + * Returns true if provided element is a void element. Void elements are elements + * that neither contain content nor have a closing tag, such as `
`. + * + * @param \DOMElement $element element + * @return boolean true if provided element is a void element + */ + public static function isVoidElement(\DOMElement $element) { + if (preg_match('~^(area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)$~', $element->nodeName)) { + return true; + } + + return false; + } + /** * Moves all nodes into `$container` until it reaches `$lastElement`. The direction * in which nodes will be considered for moving is determined by the logical position -- 2.20.1