From 0e37e5103ff889ea4986fe85b76972a8a9c716c9 Mon Sep 17 00:00:00 2001 From: Alexander Ebert Date: Thu, 28 Mar 2019 17:08:49 +0100 Subject: [PATCH] Detect plain and standalone links --- ...ctHtmlInputNodeProcessorListener.class.php | 85 +++++++-------- .../node/HtmlInputNodeProcessor.class.php | 95 +++++++++++++++- .../html/node/HtmlNodePlainLink.class.php | 103 ++++++++++++++++++ 3 files changed, 231 insertions(+), 52 deletions(-) create mode 100644 wcfsetup/install/files/lib/system/html/node/HtmlNodePlainLink.class.php diff --git a/wcfsetup/install/files/lib/system/event/listener/AbstractHtmlInputNodeProcessorListener.class.php b/wcfsetup/install/files/lib/system/event/listener/AbstractHtmlInputNodeProcessorListener.class.php index e62847f730..fd63fe0c2e 100644 --- a/wcfsetup/install/files/lib/system/event/listener/AbstractHtmlInputNodeProcessorListener.class.php +++ b/wcfsetup/install/files/lib/system/event/listener/AbstractHtmlInputNodeProcessorListener.class.php @@ -1,12 +1,12 @@ getDocument()->getElementsByTagName('a') as $element) { - /** @var \DOMElement $element */ - if ($element->getAttribute('href') === $element->textContent) { - if ($regex->match($element->getAttribute('href'), true)) { - $objectIDs[] = $regex->getMatches()[2][0]; - } + + foreach ($processor->plainLinks as $link) { + $objectID = $link->detectObjectID($regex); + if ($objectID) { + $objectIDs[] = $objectID; } } @@ -60,32 +59,10 @@ abstract class AbstractHtmlInputNodeProcessorListener implements IParameterizedE * @param Regex $regex * @param ITitledObject[] $objects * @param string $bbcodeName + * @deprecated 5.2 Use `replaceLinks()` instead. */ protected function replaceLinksWithBBCode(HtmlInputNodeProcessor $processor, Regex $regex, array $objects, $bbcodeName) { - $elements = []; - foreach ($processor->getDocument()->getElementsByTagName('a') as $element) { - /** @var \DOMElement $element */ - if ($element->getAttribute('href') === $element->textContent) { - if ($regex->match($element->getAttribute('href'), true)) { - $objectID = $regex->getMatches()[2][0]; - - if (isset($objects[$objectID])) { - $elements[] = [ - 'element' => $element, - 'objectID' => $objectID - ]; - } - } - } - } - - foreach ($elements as $elementData) { - $metacodeElement = $processor->getDocument()->createElement('woltlab-metacode'); - $metacodeElement->setAttribute('data-name', $bbcodeName); - $metacodeElement->setAttribute('data-attributes', base64_encode(JSON::encode([$elementData['objectID']]))); - - DOMUtil::replaceElement($elementData['element'], $metacodeElement, false); - } + $this->replaceLinks($processor, $objects, $bbcodeName); } /** @@ -96,23 +73,39 @@ abstract class AbstractHtmlInputNodeProcessorListener implements IParameterizedE * @param Regex $regex * @param ITitledObject[] $objects * @throws ImplementationException + * @deprecated 5.2 Use `replaceLinks()` instead. */ protected function setObjectTitles(HtmlInputNodeProcessor $processor, Regex $regex, array $objects) { - foreach ($processor->getDocument()->getElementsByTagName('a') as $element) { - /** @var \DOMElement $element */ - if ($element->getAttribute('href') === $element->textContent) { - if ($regex->match($element->getAttribute('href'), true)) { - $objectID = $regex->getMatches()[2][0]; - - if (isset($objects[$objectID])) { - $object = $objects[$objectID]; - if (!($object instanceof ITitledObject) && !($object instanceof DatabaseObjectDecorator) && !($object->getDecoratedObject() instanceof ITitledObject)) { - throw new ImplementationException(get_class($object), ITitledObject::class); - } - - $element->nodeValue = ''; - $element->appendChild($element->ownerDocument->createTextNode($object->getTitle())); + $this->replaceLinks($processor, $objects); + } + + /** + * @param HtmlInputNodeProcessor $processor + * @param ITitledObject[] $objects + * @param string $bbcodeName + */ + protected function replaceLinks(HtmlInputNodeProcessor $processor, array $objects, $bbcodeName = '') { + $bbcode = null; + if ($bbcodeName) { + $bbcode = BBCodeCache::getInstance()->getBBCodeByTag($bbcodeName); + } + + foreach ($processor->plainLinks as $link) { + if (!$link->isPristine()) { + continue; + } + + if (isset($objects[$link->getObjectID()])) { + if ($bbcode === null || !$link->isStandalone()) { + $object = $objects[$link->getObjectID()]; + if ($object instanceof DatabaseObjectDecorator) { + $object = $object->getDecoratedObject(); } + + $link->setTitle($object); + } + else if ($bbcode !== null) { + $link->replaceWithBBCode($bbcode); } } } diff --git a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php index adc2ce91e5..fa89b6a9f3 100644 --- a/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php +++ b/wcfsetup/install/files/lib/system/html/input/node/HtmlInputNodeProcessor.class.php @@ -3,6 +3,7 @@ namespace wcf\system\html\input\node; use wcf\system\bbcode\BBCodeHandler; use wcf\system\event\EventHandler; use wcf\system\html\node\AbstractHtmlNodeProcessor; +use wcf\system\html\node\HtmlNodePlainLink; use wcf\system\html\node\IHtmlNode; use wcf\util\DOMUtil; use wcf\util\StringUtil; @@ -30,12 +31,12 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { 'messageFloatObjectLeft', 'messageFloatObjectRight', // built-in - 'smiley', 'woltlabAttachment', 'woltlabSuiteMedia' + 'smiley', 'woltlabAttachment', 'woltlabSuiteMedia', ], 'li' => ['text-center', 'text-justify', 'text-right'], 'p' => ['text-center', 'text-justify', 'text-right'], 'pre' => ['woltlabHtml'], - 'td' => ['text-center', 'text-justify', 'text-right'] + 'td' => ['text-center', 'text-justify', 'text-right'], ]; /** @@ -48,7 +49,7 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { * @var string[] */ public static $allowedStyleElements = [ - 'span' + 'span', ]; /** @@ -71,9 +72,14 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { 'ul', 'ol', 'li', // other - 'a', 'kbd', 'woltlab-quote', 'woltlab-spoiler', 'pre', 'sub', 'sup' + 'a', 'kbd', 'woltlab-quote', 'woltlab-spoiler', 'pre', 'sub', 'sup', ]; + /** + * @var HtmlNodePlainLink[] + */ + public $plainLinks = []; + /** * list of embedded content grouped by type * @var array @@ -89,6 +95,8 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { * @inheritDoc */ public function process() { + $this->plainLinks = []; + EventHandler::getInstance()->fireAction($this, 'beforeProcess'); // fix invalid html such as metacode markers outside of block elements @@ -157,6 +165,8 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { // extract embedded content $this->processEmbeddedContent(); + $this->convertPlainLinks(); + EventHandler::getInstance()->fireAction($this, 'afterProcess'); } @@ -415,7 +425,7 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { 'font' => 'woltlab-size', 'size' => 'woltlab-size', 'spoiler' => 'woltlab-spoiler', - 'url' => 'a' + 'url' => 'a', ]; foreach ($customTags as $bbcode => $tagName) { @@ -504,7 +514,7 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { } /** - * Parses embedded content containedin metacode elements. + * Parses embedded content contained in metacode elements. */ protected function parseEmbeddedContent() { // handle `woltlab-metacode` @@ -541,4 +551,77 @@ class HtmlInputNodeProcessor extends AbstractHtmlNodeProcessor { return $element; } + + /** + * Detects links that contain nothing but their link target. Additionally, standalone links, i. e. + * those that are the only content in their line, are offered separately. + * + * @since 5.2 + */ + protected function convertPlainLinks() { + /** @var HtmlNodePlainLink[] $links */ + $links = []; + + /** @var \DOMElement $link */ + foreach ($this->getDocument()->getElementsByTagName('a') as $link) { + $href = $link->getAttribute('href'); + if ($href !== $link->textContent) { + continue; + } + + $plainLink = new HtmlNodePlainLink($link, $href); + + // Check if the line appears to only contain the link text. + $parent = $link; + while ($parent->parentNode->nodeName !== 'body') { + $parent = $parent->parentNode; + } + + if ($parent->nodeName === 'p' && $parent->textContent === $link->textContent) { + // The line may contain nothing but the link, exceptions include basic formatting + // and up to a single `
` element. + $mayContainOtherContent = false; + $linebreaks = 0; + /** @var \DOMElement $element */ + foreach ($parent->getElementsByTagName('*') as $element) { + switch ($element->nodeName) { + case 'br': + $linebreaks++; + break; + + case 'span': + if ($element->getAttribute('class')) { + $mayContainOtherContent = true; + break 2; + } + + // `` is used to hold text formatting. + break; + + case 'a': + case 'b': + case 'em': + case 'i': + case 'strong': + case 'u': + // These elements are perfectly fine. + break; + + default: + $mayContainOtherContent = true; + break 2; + } + } + + if (!$mayContainOtherContent || $linebreaks > 1) { + $this->plainLinks[] = $plainLink->setIsStandalone($parent); + continue; + } + } + + $this->plainLinks[] = $plainLink->setIsInline(); + } + + EventHandler::getInstance()->fireAction($this, 'convertPlainLinks'); + } } diff --git a/wcfsetup/install/files/lib/system/html/node/HtmlNodePlainLink.class.php b/wcfsetup/install/files/lib/system/html/node/HtmlNodePlainLink.class.php new file mode 100644 index 0000000000..78717d25da --- /dev/null +++ b/wcfsetup/install/files/lib/system/html/node/HtmlNodePlainLink.class.php @@ -0,0 +1,103 @@ +link = $link; + $this->href = $href; + } + + public function setIsInline() { + $this->standalone = false; + $this->topLevelParent = null; + + return $this; + } + + public function setIsStandalone(\DOMElement $topLevelParent) { + $this->standalone = true; + $this->topLevelParent = $topLevelParent; + + return $this; + } + + public function isPristine() { + return $this->pristine; + } + + public function isStandalone() { + return $this->standalone; + } + + public function detectObjectID(Regex $regex) { + if ($regex->match($this->href, true)) { + $this->objectID = $regex->getMatches()[2][0]; + } + + return $this->objectID; + } + + public function getObjectID() { + return $this->objectID; + } + + public function setTitle(ITitledObject $object) { + $this->markAsTainted(); + + $this->link->nodeValue = ''; + $this->link->appendChild($this->link->ownerDocument->createTextNode($object->getTitle())); + } + + public function replaceWithBBCode(BBCode $bbcode) { + $this->markAsTainted(); + + if ($this->objectID === 0) { + throw new \UnexpectedValueException('The objectID must not be null.'); + } + + $metacodeElement = $this->link->ownerDocument->createElement('woltlab-metacode'); + $metacodeElement->setAttribute('data-name', $bbcode->bbcodeTag); + $metacodeElement->setAttribute('data-attributes', base64_encode(JSON::encode([$this->objectID]))); + + if ($bbcode->isBlockElement) { + if (!$this->isStandalone()) { + throw new \LogicException('Cannot inject a block bbcode in an inline context.'); + } + + // Replace the top level parent with the link itself, which will be replaced with the bbcode afterwards. + $this->topLevelParent->insertBefore($this->link, $this->topLevelParent); + DOMUtil::removeNode($this->topLevelParent); + } + + DOMUtil::replaceElement($this->link, $metacodeElement, false); + } + + protected function markAsTainted() { + if (!$this->pristine) { + throw new \RuntimeException('This link has already been modified.'); + } + + $this->pristine = false; + } +} -- 2.20.1