From 0f23b77da155ac0746b502db4b100ca35289598c Mon Sep 17 00:00:00 2001 From: Alexander Ebert Date: Sun, 24 Jul 2016 13:31:00 +0200 Subject: [PATCH] Triming html messages --- .../node/HtmlInputNodeProcessor.class.php | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php index f031c91598..9fb441d79c 100644 --- a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php +++ b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php @@ -43,6 +43,9 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { // dynamic node handlers $this->invokeNodeHandlers('wcf\system\html\input\node\HtmlInputNode', ['img', 'woltlab-metacode']); + // remove whitespace at the start/end of the message + $this->trim(); + // detect mentions, urls, emails and smileys $textParser = new HtmlInputNodeTextParser($this); $textParser->parse(); @@ -53,6 +56,55 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { EventHandler::getInstance()->fireAction($this, 'afterProcess'); } + /** + * Trims leading and trailing whitespace. It will only remove text nodes containing + * just whitespaces and


(including any whitespace-only text nodes). + * + * It is still possible to work around this by inserting useless text formats such + * as bold to circumvent this check. The point of this method is to remove unintentional + * and/or potentially unwanted whitespace, not guarding against people being jerks. + */ + protected function trim() { + $body = $this->getDocument()->getElementsByTagName('body')->item(0); + + foreach (['firstChild', 'lastChild'] as $property) { + while ($node = $body->$property) { + if ($node->nodeType === XML_TEXT_NODE) { + if (StringUtil::trim($node->textContent) === '') { + $body->removeChild($node); + } + else { + break; + } + } + else { + /** @var \DOMElement $node */ + if ($node->nodeName === 'p') { + for ($i = 0, $length = $node->childNodes->length; $i < $length; $i++) { + /** @var \DOMNode $child */ + $child = $node->childNodes[$i]; + if ($child->nodeType === XML_TEXT_NODE) { + if (StringUtil::trim($child->textContent) !== '') { + // terminate for() and while() + break 2; + } + } + else if ($child->nodeName !== 'br') { + // terminate for() and while() + break 2; + } + } + + $body->removeChild($node); + } + else { + break; + } + } + } + } + } + /** * Checks the input html for disallowed bbcodes and returns any matches. * -- 2.20.1