Triming html messages
authorAlexander Ebert <ebert@woltlab.com>
Sun, 24 Jul 2016 11:31:00 +0000 (13:31 +0200)
committerAlexander Ebert <ebert@woltlab.com>
Sun, 24 Jul 2016 11:31:00 +0000 (13:31 +0200)
wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php

index f031c9159835a6a513849ae49708fd9ea9768d1b..9fb441d79ce90e2ad23c2b9c906e91273bab3e7b 100644 (file)
@@ -43,6 +43,9 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor {
                // dynamic node handlers
                $this->invokeNodeHandlers('wcf\system\html\input\node\HtmlInputNode', ['img', 'woltlab-metacode']);
                
+               // remove whitespace at the start/end of the message
+               $this->trim();
+               
                // detect mentions, urls, emails and smileys
                $textParser = new HtmlInputNodeTextParser($this);
                $textParser->parse();
@@ -53,6 +56,55 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor {
                EventHandler::getInstance()->fireAction($this, 'afterProcess');
        }
        
+       /**
+        * Trims leading and trailing whitespace. It will only remove text nodes containing
+        * just whitespaces and <p><br></p> (including any whitespace-only text nodes).
+        * 
+        * It is still possible to work around this by inserting useless text formats such
+        * as bold to circumvent this check. The point of this method is to remove unintentional
+        * and/or potentially unwanted whitespace, not guarding against people being jerks.
+        */
+       protected function trim() {
+               $body = $this->getDocument()->getElementsByTagName('body')->item(0);
+               
+               foreach (['firstChild', 'lastChild'] as $property) {
+                       while ($node = $body->$property) {
+                               if ($node->nodeType === XML_TEXT_NODE) {
+                                       if (StringUtil::trim($node->textContent) === '') {
+                                               $body->removeChild($node);
+                                       }
+                                       else {
+                                               break;
+                                       }
+                               }
+                               else {
+                                       /** @var \DOMElement $node */
+                                       if ($node->nodeName === 'p') {
+                                               for ($i = 0, $length = $node->childNodes->length; $i < $length; $i++) {
+                                                       /** @var \DOMNode $child */
+                                                       $child = $node->childNodes[$i];
+                                                       if ($child->nodeType === XML_TEXT_NODE) {
+                                                               if (StringUtil::trim($child->textContent) !== '') {
+                                                                       // terminate for() and while()
+                                                                       break 2;
+                                                               }
+                                                       }
+                                                       else if ($child->nodeName !== 'br') {
+                                                               // terminate for() and while()
+                                                               break 2;
+                                                       }
+                                               }
+                                               
+                                               $body->removeChild($node);
+                                       }
+                                       else {
+                                               break;
+                                       }
+                               }
+                       }
+               }
+       }
+       
        /**
         * Checks the input html for disallowed bbcodes and returns any matches.
         *