Cleanup DOM before saving
authorAlexander Ebert <ebert@woltlab.com>
Thu, 1 Sep 2016 15:56:45 +0000 (17:56 +0200)
committerAlexander Ebert <ebert@woltlab.com>
Thu, 1 Sep 2016 15:56:55 +0000 (17:56 +0200)
wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php

index 2875f3a2590cd93bc65681b73b9c1ba6c7f33490..e129c536507eaa5ecd150cfcf555f52d548fb5d4 100644 (file)
@@ -4,6 +4,7 @@ use wcf\system\bbcode\BBCodeHandler;
 use wcf\system\event\EventHandler;
 use wcf\system\html\node\AbstractHtmlNodeProcessor;
 use wcf\system\html\node\IHtmlNode;
+use wcf\util\DOMUtil;
 use wcf\util\StringUtil;
 
 /**
@@ -57,6 +58,8 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor {
                $this->processEmbeddedContent();
                
                EventHandler::getInstance()->fireAction($this, 'afterProcess');
+               
+               $this->cleanup();
        }
        
        /**
@@ -242,6 +245,48 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor {
                EventHandler::getInstance()->fireAction($this, 'parseEmbeddedContent');
        }
        
+       /**
+        * Removes garbage left in the DOM.
+        */
+       protected function cleanup() {
+               // remove empty <p> tags
+               $elements = [];
+               foreach ($this->getDocument()->getElementsByTagName('p') as $element) {
+                       $elements[] = $element;
+               }
+               
+               /** @var \DOMElement $element */
+               foreach ($elements as $element) {
+                       if ($element->hasChildNodes()) {
+                               if ($element->childNodes->length === 1) {
+                                       $textContent = StringUtil::trim($element->childNodes[0]->textContent);
+                                       if (empty($textContent)) {
+                                               DOMUtil::removeNode($element);
+                                       }
+                               }
+                       }
+                       else {
+                               DOMUtil::removeNode($element);
+                       }
+               }
+               
+               // remove <br> at the end of block elements
+               // without a succeeding non-empty paragraph
+               $elements = [];
+               foreach ($this->getDocument()->getElementsByTagName('br') as $element) {
+                       $elements[] = $element;
+               }
+               
+               $blocks = ['h1', 'h2', 'h3', 'p'];
+               foreach ($elements as $element) {
+                       if (in_array($element->parentNode->nodeName, $blocks)) {
+                               if ($element->previousSibling && !$element->nextSibling) {
+                                       DOMUtil::removeNode($element);
+                               }
+                       }
+               }
+       }
+       
        /**
         * Creates a new `<woltlab-metacode>` element contained in the same document
         * as the provided `$node`.