Fix for poorly nested HTML
authorAlexander Ebert <ebert@woltlab.com>
Tue, 23 Aug 2016 19:59:39 +0000 (21:59 +0200)
committerAlexander Ebert <ebert@woltlab.com>
Wed, 24 Aug 2016 09:10:34 +0000 (11:10 +0200)
wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php

index 9fb441d79ce90e2ad23c2b9c906e91273bab3e7b..2875f3a2590cd93bc65681b73b9c1ba6c7f33490 100644 (file)
@@ -33,6 +33,9 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor {
        public function process() {
                EventHandler::getInstance()->fireAction($this, 'beforeProcess');
                
+               // fix invalid html such as metacode markers outside of block elements
+               $this->fixDom();
+               
                // process metacode markers first
                $this->invokeHtmlNode(new HtmlInputNodeWoltlabMetacodeMarker());
                
@@ -56,6 +59,39 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor {
                EventHandler::getInstance()->fireAction($this, 'afterProcess');
        }
        
+       /**
+        * Fixes malformed HTML with metacode markers and text being placed
+        * outside of paragraphs.
+        */
+       protected function fixDom() {
+               $appendToPreviousParagraph = function ($node) {
+                       /** @var \DOMElement $paragraph */
+                       $paragraph = $node->previousSibling;
+                       
+                       if (!$paragraph || $paragraph->nodeName !== 'p') {
+                               $paragraph = $node->ownerDocument->createElement('p');
+                               $node->parentNode->insertBefore($paragraph, $node);
+                       }
+                       
+                       $paragraph->appendChild($node);
+                       
+                       return $paragraph;
+               };
+               
+               /** @var \DOMNode $node */
+               $node = $this->getDocument()->getElementsByTagName('body')->item(0)->firstChild;
+               while ($node) {
+                       if ($node->nodeType === XML_ELEMENT_NODE && $node->nodeName === 'woltlab-metacode-marker') {
+                               $node = $appendToPreviousParagraph($node);
+                       }
+                       else if ($node->nodeType === XML_TEXT_NODE) {
+                               $node = $appendToPreviousParagraph($node);
+                       }
+                       
+                       $node = $node->nextSibling;
+               }
+       }
+       
        /**
         * Trims leading and trailing whitespace. It will only remove text nodes containing
         * just whitespaces and <p><br></p> (including any whitespace-only text nodes).