Enforced proper paragraphs and block nesting
authorAlexander Ebert <ebert@woltlab.com>
Tue, 17 May 2016 10:48:01 +0000 (12:48 +0200)
committerAlexander Ebert <ebert@woltlab.com>
Tue, 17 May 2016 10:48:07 +0000 (12:48 +0200)
wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php
wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacode.class.php
wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeWoltlabMetacodeMarker.class.php
wcfsetup/install/files/lib/system/html/node/HtmlNodeProcessor.class.php
wcfsetup/install/files/lib/util/DOMUtil.class.php

index a00d92a41b511d5b6582ac7a6b5dda0a8b233e73..c4c6f9f464266ea9046d51e891b7561b7844c7fa 100644 (file)
@@ -1,17 +1,68 @@
 <?php
 namespace wcf\system\html\input\node;
 use wcf\system\html\node\HtmlNodeProcessor;
+use wcf\util\DOMUtil;
 
 /**
  * TOOD documentation
  * @since      2.2
  */
 class HtmlInputNodeProcessor extends HtmlNodeProcessor {
+       // TODO: this should include other tags
+       protected $emptyTags = ['em', 'strong', 'u'];
+       
+       // TODO: this should include other tags
+       protected $mergeTags = ['em', 'strong', 'u'];
+       
        public function process() {
                // process metacode markers first
                $this->invokeHtmlNode(new HtmlInputNodeWoltlabMetacodeMarker());
                
                // handle static converters
                $this->invokeHtmlNode(new HtmlInputNodeWoltlabMetacode());
+               
+               // remove empty elements and join identical siblings if appropriate
+               $this->cleanup();
+       }
+       
+       protected function cleanup() {
+               // remove emtpy elements
+               foreach ($this->emptyTags as $emptyTag) {
+                       $elements = [];
+                       foreach ($this->getDocument()->getElementsByTagName($emptyTag) as $element) {
+                               $elements[] = $element;
+                       }
+                       
+                       /** @var \DOMElement $element */
+                       foreach ($elements as $element) {
+                               if (DOMUtil::isEmpty($element)) {
+                                       DOMUtil::removeNode($element);
+                               }
+                       }
+               }
+               
+               // find identical siblings
+               foreach ($this->mergeTags as $mergeTag) {
+                       $elements = [];
+                       foreach ($this->getDocument()->getElementsByTagName($mergeTag) as $element) {
+                               $elements[] = $element;
+                       }
+                       
+                       /** @var \DOMElement $element */
+                       foreach ($elements as $element) {
+                               $sibling = $element->nextSibling;
+                               if ($sibling === null) {
+                                       continue;
+                               }
+                               
+                               if ($sibling->nodeName === $mergeTag) {
+                                       while ($sibling->hasChildNodes()) {
+                                               $element->appendChild($sibling->childNodes[0]);
+                                       }
+                                       
+                                       DOMUtil::removeNode($sibling);
+                               }
+                       }
+               }
        }
 }
index 7d699d04381b660bc7b8f89f233422858083e360..eaf8401fc9dc9864963c041df0cc0d0c9e609a4f 100644 (file)
@@ -48,9 +48,7 @@ class HtmlInputNodeWoltlabMetacode extends AbstractHtmlNode {
                                continue;
                        }
                        
-                       $attributes = $element->getAttribute('data-attributes');
-                       if (!empty($attributes)) $attributes = @json_decode(base64_decode($attributes), true);
-                       if (!is_array($attributes)) $attributes = [];
+                       $attributes = $htmlNodeProcessor->parseAttributes($element->getAttribute('data-attributes'));
                        
                        // check for converters
                        $converter = (isset($converters[$name])) ? $converters[$name] : null;
@@ -72,7 +70,7 @@ class HtmlInputNodeWoltlabMetacode extends AbstractHtmlNode {
                        if ($converter->validateAttributes($attributes)) {
                                $newElement = $converter->convert(DOMUtil::childNodesToFragment($element), $attributes);
                                if (!($newElement instanceof \DOMElement)) {
-                                       throw new SystemException("Expected a valid DOMElement as return value.");
+                                       throw new \UnexpectedValueException("Expected a valid DOMElement as return value.");
                                }
                                
                                DOMUtil::replaceElement($element, $newElement);
index 4948e6fcbf53f5912cc06dc7b3f14dec785335b5..9e2762194c380004539e8127fc195bf0ae7bd63d 100644 (file)
@@ -4,6 +4,7 @@ use wcf\system\bbcode\HtmlBBCodeParser;
 use wcf\system\html\node\AbstractHtmlNode;
 use wcf\system\html\node\HtmlNodeProcessor;
 use wcf\util\DOMUtil;
+use wcf\util\StringUtil;
 
 /**
  * Transforms bbcode markers into the custom HTML element `<woltlab-metacode>`. This process
@@ -244,6 +245,18 @@ class HtmlInputNodeWoltlabMetacodeMarker extends AbstractHtmlNode {
         * @param       string          $attributes     encoded attribute string
         */
        protected function convertBlockElement($name, $start, $end, $attributes) {
+               // we need to ensure proper nesting, block elements are not allowed to
+               // be placed inside paragraphs, but being a direct child of another block
+               // element is completely fine
+               $parent = $start;
+               do {
+                       $parent = $parent->parentNode;
+               }
+               while ($parent->nodeName === 'p' || !$this->isBlockElement($parent));
+               
+               $element = DOMUtil::splitParentsUntil($start, $parent);
+               DOMUtil::insertBefore($start, $element);
+               
                $commonAncestor = DOMUtil::getCommonAncestor($start, $end);
                $lastElement = DOMUtil::splitParentsUntil($end, $commonAncestor, false);
                
@@ -361,10 +374,19 @@ class HtmlInputNodeWoltlabMetacodeMarker extends AbstractHtmlNode {
        protected function isBlockElement(\DOMNode $node) {
                switch ($node->nodeName) {
                        case 'blockquote':
+                       case 'body':
                        case 'code':
                        case 'div':
                        case 'p':
                                return true;
+                               break;
+                       
+                       case 'woltlab-metacode':
+                               /** @var \DOMElement $node */
+                               if (in_array($node->getAttribute('data-name'), $this->blockElements)) {
+                                       return true;
+                               }
+                               break;
                }
                
                return false;
index 3a0b1b137f7f7d2cb8b844d99954e084ea3f6329..72a34d8634ebc97dbefd8c4c19a12e53b8c82f47 100644 (file)
@@ -96,6 +96,7 @@ class HtmlNodeProcessor {
                                $parsedAttributes = JSON::decode($parsedAttributes);
                        }
                        catch (SystemException $e) {
+                               /* parse errors can occur if user provided malicious content - ignore them */
                                $parsedAttributes = [];
                        }
                }
@@ -106,7 +107,7 @@ class HtmlNodeProcessor {
        protected function invokeHtmlNode(IHtmlNode $htmlNode) {
                $tagName = $htmlNode->getTagName();
                if (empty($tagName)) {
-                       throw new SystemException("Missing tag name for " . get_class($htmlNode));
+                       throw new \UnexpectedValueException("Missing tag name for " . get_class($htmlNode));
                }
                
                $elements = [];
index f30db6de15b29a0282f25eb46dab88f309ffb7d3..8d90ed795a0ca917ac98db194a1f97a1e52b9fb8 100644 (file)
@@ -176,7 +176,7 @@ final class DOMUtil {
                        }
                }
                
-               throw new SystemException("Unable to determine relative node position.");
+               throw new \RuntimeException("Unable to determine relative node position.");
        }
        
        /**
@@ -204,6 +204,35 @@ final class DOMUtil {
                self::getParentNode($refNode)->insertBefore($node, $refNode);
        }
        
+       /**
+        * Returns true if this node is empty.
+        * 
+        * @param       \DOMNode        $node           node
+        * @return      boolean         true if node is empty
+        */
+       public static function isEmpty(\DOMNode $node) {
+               if ($node->nodeType === XML_TEXT_NODE) {
+                       return (StringUtil::trim($node->nodeValue) === '');
+               }
+               else if ($node->nodeType === XML_ELEMENT_NODE) {
+                       /** @var \DOMElement $node */
+                       if (self::isVoidElement($node)) {
+                               return false;
+                       }
+                       else if ($node->hasChildNodes()) {
+                               for ($i = 0, $length = $node->childNodes->length; $i < $length; $i++) {
+                                       if (!self::isEmpty($node->childNodes[$i])) {
+                                               return false;
+                                       }
+                               }
+                       }
+                       
+                       return true;
+               }
+               
+               return true;
+       }
+       
        /**
         * Returns true if given node is the first node of its given ancestor.
         * 
@@ -213,10 +242,7 @@ final class DOMUtil {
         */
        public static function isFirstNode(\DOMNode $node, \DOMElement $ancestor) {
                if ($node->previousSibling === null) {
-                       if ($node->previousSibling === null) {
-                               throw new \InvalidArgumentException("Provided node is a not a descendant of ancestor element.");
-                       }
-                       else if ($node->parentNode === $ancestor || $node->parentNode->nodeName === 'body') {
+                       if ($node->parentNode === $ancestor || $node->parentNode->nodeName === 'body') {
                                return true;
                        }
                        else {
@@ -256,6 +282,21 @@ final class DOMUtil {
                return false;
        }
        
+       /**
+        * Returns true if provided element is a void element. Void elements are elements
+        * that neither contain content nor have a closing tag, such as `<br>`.
+        * 
+        * @param       \DOMElement     $element        element
+        * @return      boolean         true if provided element is a void element
+        */
+       public static function isVoidElement(\DOMElement $element) {
+               if (preg_match('~^(area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)$~', $element->nodeName)) {
+                       return true;
+               }
+               
+               return false;
+       }
+       
        /**
         * Moves all nodes into `$container` until it reaches `$lastElement`. The direction
         * in which nodes will be considered for moving is determined by the logical position