<?php
namespace wcf\system\html\input\node;
use wcf\system\html\node\HtmlNodeProcessor;
+use wcf\util\DOMUtil;
/**
* TOOD documentation
* @since 2.2
*/
class HtmlInputNodeProcessor extends HtmlNodeProcessor {
+ // TODO: this should include other tags
+ protected $emptyTags = ['em', 'strong', 'u'];
+
+ // TODO: this should include other tags
+ protected $mergeTags = ['em', 'strong', 'u'];
+
public function process() {
// process metacode markers first
$this->invokeHtmlNode(new HtmlInputNodeWoltlabMetacodeMarker());
// handle static converters
$this->invokeHtmlNode(new HtmlInputNodeWoltlabMetacode());
+
+ // remove empty elements and join identical siblings if appropriate
+ $this->cleanup();
+ }
+
+ protected function cleanup() {
+ // remove emtpy elements
+ foreach ($this->emptyTags as $emptyTag) {
+ $elements = [];
+ foreach ($this->getDocument()->getElementsByTagName($emptyTag) as $element) {
+ $elements[] = $element;
+ }
+
+ /** @var \DOMElement $element */
+ foreach ($elements as $element) {
+ if (DOMUtil::isEmpty($element)) {
+ DOMUtil::removeNode($element);
+ }
+ }
+ }
+
+ // find identical siblings
+ foreach ($this->mergeTags as $mergeTag) {
+ $elements = [];
+ foreach ($this->getDocument()->getElementsByTagName($mergeTag) as $element) {
+ $elements[] = $element;
+ }
+
+ /** @var \DOMElement $element */
+ foreach ($elements as $element) {
+ $sibling = $element->nextSibling;
+ if ($sibling === null) {
+ continue;
+ }
+
+ if ($sibling->nodeName === $mergeTag) {
+ while ($sibling->hasChildNodes()) {
+ $element->appendChild($sibling->childNodes[0]);
+ }
+
+ DOMUtil::removeNode($sibling);
+ }
+ }
+ }
}
}
continue;
}
- $attributes = $element->getAttribute('data-attributes');
- if (!empty($attributes)) $attributes = @json_decode(base64_decode($attributes), true);
- if (!is_array($attributes)) $attributes = [];
+ $attributes = $htmlNodeProcessor->parseAttributes($element->getAttribute('data-attributes'));
// check for converters
$converter = (isset($converters[$name])) ? $converters[$name] : null;
if ($converter->validateAttributes($attributes)) {
$newElement = $converter->convert(DOMUtil::childNodesToFragment($element), $attributes);
if (!($newElement instanceof \DOMElement)) {
- throw new SystemException("Expected a valid DOMElement as return value.");
+ throw new \UnexpectedValueException("Expected a valid DOMElement as return value.");
}
DOMUtil::replaceElement($element, $newElement);
use wcf\system\html\node\AbstractHtmlNode;
use wcf\system\html\node\HtmlNodeProcessor;
use wcf\util\DOMUtil;
+use wcf\util\StringUtil;
/**
* Transforms bbcode markers into the custom HTML element `<woltlab-metacode>`. This process
* @param string $attributes encoded attribute string
*/
protected function convertBlockElement($name, $start, $end, $attributes) {
+ // we need to ensure proper nesting, block elements are not allowed to
+ // be placed inside paragraphs, but being a direct child of another block
+ // element is completely fine
+ $parent = $start;
+ do {
+ $parent = $parent->parentNode;
+ }
+ while ($parent->nodeName === 'p' || !$this->isBlockElement($parent));
+
+ $element = DOMUtil::splitParentsUntil($start, $parent);
+ DOMUtil::insertBefore($start, $element);
+
$commonAncestor = DOMUtil::getCommonAncestor($start, $end);
$lastElement = DOMUtil::splitParentsUntil($end, $commonAncestor, false);
protected function isBlockElement(\DOMNode $node) {
switch ($node->nodeName) {
case 'blockquote':
+ case 'body':
case 'code':
case 'div':
case 'p':
return true;
+ break;
+
+ case 'woltlab-metacode':
+ /** @var \DOMElement $node */
+ if (in_array($node->getAttribute('data-name'), $this->blockElements)) {
+ return true;
+ }
+ break;
}
return false;
$parsedAttributes = JSON::decode($parsedAttributes);
}
catch (SystemException $e) {
+ /* parse errors can occur if user provided malicious content - ignore them */
$parsedAttributes = [];
}
}
protected function invokeHtmlNode(IHtmlNode $htmlNode) {
$tagName = $htmlNode->getTagName();
if (empty($tagName)) {
- throw new SystemException("Missing tag name for " . get_class($htmlNode));
+ throw new \UnexpectedValueException("Missing tag name for " . get_class($htmlNode));
}
$elements = [];
}
}
- throw new SystemException("Unable to determine relative node position.");
+ throw new \RuntimeException("Unable to determine relative node position.");
}
/**
self::getParentNode($refNode)->insertBefore($node, $refNode);
}
+ /**
+ * Returns true if this node is empty.
+ *
+ * @param \DOMNode $node node
+ * @return boolean true if node is empty
+ */
+ public static function isEmpty(\DOMNode $node) {
+ if ($node->nodeType === XML_TEXT_NODE) {
+ return (StringUtil::trim($node->nodeValue) === '');
+ }
+ else if ($node->nodeType === XML_ELEMENT_NODE) {
+ /** @var \DOMElement $node */
+ if (self::isVoidElement($node)) {
+ return false;
+ }
+ else if ($node->hasChildNodes()) {
+ for ($i = 0, $length = $node->childNodes->length; $i < $length; $i++) {
+ if (!self::isEmpty($node->childNodes[$i])) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ return true;
+ }
+
/**
* Returns true if given node is the first node of its given ancestor.
*
*/
public static function isFirstNode(\DOMNode $node, \DOMElement $ancestor) {
if ($node->previousSibling === null) {
- if ($node->previousSibling === null) {
- throw new \InvalidArgumentException("Provided node is a not a descendant of ancestor element.");
- }
- else if ($node->parentNode === $ancestor || $node->parentNode->nodeName === 'body') {
+ if ($node->parentNode === $ancestor || $node->parentNode->nodeName === 'body') {
return true;
}
else {
return false;
}
+ /**
+ * Returns true if provided element is a void element. Void elements are elements
+ * that neither contain content nor have a closing tag, such as `<br>`.
+ *
+ * @param \DOMElement $element element
+ * @return boolean true if provided element is a void element
+ */
+ public static function isVoidElement(\DOMElement $element) {
+ if (preg_match('~^(area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta|param|source|track|wbr)$~', $element->nodeName)) {
+ return true;
+ }
+
+ return false;
+ }
+
/**
* Moves all nodes into `$container` until it reaches `$lastElement`. The direction
* in which nodes will be considered for moving is determined by the logical position