+++ /dev/null
-<?php
-
-namespace wcf\system\html\output\node;
-
-use DOMElement;
-use wcf\system\html\node\AbstractHtmlNodeProcessor;
-use wcf\util\DOMUtil;
-use wcf\util\StringUtil;
-
-/**
- * Unwraps <br> and strips trailing <br>.
- *
- * @author Alexander Ebert
- * @copyright 2001-2023 WoltLab GmbH
- * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
- * @since 6.0
- */
-final class HtmlOutputNodeBr extends AbstractHtmlOutputNode
-{
- /**
- * @inheritDoc
- */
- protected $tagName = 'br';
-
- /**
- * @inheritDoc
- */
- public function process(array $elements, AbstractHtmlNodeProcessor $htmlNodeProcessor)
- {
- /** @var \DOMElement $element */
- foreach ($elements as $element) {
- $this->unwrap($element);
- $this->removeTrailingBr($element);
- }
- }
-
- private function unwrap(DOMElement $br): void
- {
- if ($br->previousSibling || $br->nextSibling) {
- return;
- }
-
- $parent = $br;
- while (($parent = $parent->parentNode) !== null) {
- switch ($parent->nodeName) {
- case "b":
- case "del":
- case "em":
- case "i":
- case "strong":
- case "sub":
- case "sup":
- case "span":
- case "u":
- if ($br->previousSibling || $br->nextSibling) {
- return;
- }
-
- $parent->parentNode->insertBefore($br, $parent);
- $parent->parentNode->removeChild($parent);
- $parent = $br;
-
- break;
-
- default:
- return;
- }
- }
- }
-
- private function removeTrailingBr(DOMElement $br): void
- {
- if ($br->getAttribute("data-cke-filler") === "true") {
- return;
- }
-
- $paragraph = DOMUtil::closest($br, "p");
- if ($paragraph === null) {
- return;
- }
-
- if (!DOMUtil::isLastNode($br, $paragraph)) {
- return;
- }
-
- if ($paragraph->childNodes->length === 1 && $paragraph->childNodes->item(0) === $br) {
- $paragraph->parentNode->removeChild($paragraph);
- } else {
- $br->remove();
- }
- }
-}
--- /dev/null
+<?php
+
+namespace wcf\system\html\output\node;
+
+use wcf\util\DOMUtil;
+
+/**
+ * Normalizes HTML generated by earlier version of WoltLab Suite.
+ *
+ * @author Alexander Ebert
+ * @copyright 2001-2023 WoltLab GmbH
+ * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
+ * @since 6.0
+ */
+final class HtmlOutputNodeNormalizer
+{
+ public function __construct(private readonly \DOMXPath $xpath)
+ {
+ }
+
+ public function normalize(): void
+ {
+ $this->normalizeBr();
+
+ $candidates = $this->getPossibleSpacerParagraphs();
+ $this->reduceSpacerParagraphs($candidates);
+ }
+
+ /**
+ * @return list<\DOMElement>
+ */
+ private function getPossibleSpacerParagraphs(): array
+ {
+ $paragraphs = [];
+
+ foreach ($this->xpath->query('//p') as $p) {
+ \assert($p instanceof \DOMElement);
+
+ if ($p->childNodes->length === 1) {
+ $child = $p->childNodes->item(0);
+ if ($child->nodeName === 'br') {
+ \assert($child instanceof \DOMElement);
+
+ if ($child->getAttribute('data-cke-filler') !== 'true') {
+ $paragraphs[] = $p;
+ }
+ }
+ }
+ }
+
+ return $paragraphs;
+ }
+
+ /**
+ * @param list<\DOMElement> $paragraphs
+ * @return void
+ */
+ private function reduceSpacerParagraphs(array $paragraphs): void
+ {
+ if ($paragraphs === []) {
+ return;
+ }
+
+ for ($i = 0, $length = \count($paragraphs); $i < $length; $i++) {
+ $candidate = $paragraphs[$i];
+ $offset = 0;
+
+ // Searches for adjacent paragraphs.
+ while ($i + $offset + 1 < $length) {
+ $nextCandidate = $paragraphs[$i + $offset + 1];
+ if ($candidate->nextElementSibling !== $nextCandidate) {
+ break;
+ }
+
+ $offset++;
+ }
+
+ if ($offset === 0) {
+ // An offset of 0 means that this is a single paragraph and we
+ // can safely remove it.
+ $candidate->remove();
+ } else {
+ // We need to reduce the number of paragraphs by half, unless it
+ // is an uneven number in which case we need to remove one
+ // additional paragraph.
+ if ($offset % 2 === 1) {
+ // 2 -> 1, 4 -> 2
+ $numberOfParagraphsToRemove = \ceil($offset / 2);
+ } else {
+ // 3 -> 1, 5 -> 2
+ $numberOfParagraphsToRemove = \ceil($offset / 2) + 1;
+ }
+
+ $removeParagraphs = \array_slice($paragraphs, $i, $numberOfParagraphsToRemove);
+ foreach ($removeParagraphs as $paragraph) {
+ $paragraph->remove();
+ }
+
+ $i += $offset;
+ }
+ }
+ }
+
+ private function normalizeBr(): void
+ {
+ foreach ($this->xpath->query('//br') as $br) {
+ \assert($br instanceof \DOMElement);
+
+ $this->unwrapBr($br);
+ $this->removeTrailingBr($br);
+ }
+ }
+
+ private function unwrapBr(\DOMElement $br): void
+ {
+ if ($br->previousSibling || $br->nextSibling) {
+ return;
+ }
+
+ $parent = $br->parentNode;
+ switch ($parent->nodeName) {
+ case "b":
+ case "del":
+ case "em":
+ case "i":
+ case "strong":
+ case "sub":
+ case "sup":
+ case "span":
+ case "u":
+ $parent->parentNode->insertBefore($br, $parent);
+ $parent->parentNode->removeChild($parent);
+
+ $this->unwrapBr($br);
+ break;
+ }
+ }
+
+ private function removeTrailingBr(\DOMElement $br): void
+ {
+ $paragraph = DOMUtil::closest($br, "p");
+ if ($paragraph === null) {
+ return;
+ }
+
+ if (!DOMUtil::isLastNode($br, $paragraph)) {
+ return;
+ }
+
+ if ($paragraph->childNodes->length > 1) {
+ $br->remove();
+ }
+ }
+}
+++ /dev/null
-<?php
-
-namespace wcf\system\html\output\node;
-
-use wcf\system\html\node\AbstractHtmlNodeProcessor;
-use wcf\util\StringUtil;
-
-/**
- * Removes empty paragraphs that were used to emulate paragraphs in earlier versions.
- *
- * @author Alexander Ebert
- * @copyright 2001-2023 WoltLab GmbH
- * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
- * @since 6.0
- */
-final class HtmlOutputNodeP extends AbstractHtmlOutputNode
-{
- /**
- * @inheritDoc
- */
- protected $tagName = 'p';
-
- /**
- * @inheritDoc
- */
- public function process(array $elements, AbstractHtmlNodeProcessor $htmlNodeProcessor)
- {
- /** @var \DOMElement $element */
- foreach ($elements as $element) {
- if ($element->childElementCount === 1 && $element->firstElementChild) {
- $child = $element->firstElementChild;
- if ($child->tagName === 'br') {
- if ($child->getAttribute('data-cke-filler') === 'true') {
- // This is an internal marker used to identify paragraphs
- // that are intentionally left blank.
- $child->removeAttribute('data-cke-filler');
-
- continue;
- }
-
- // This is most likely a legacy paragraph that was inserted
- // in earlier versions and is not longer required. We need
- // to verify that there is no other text inside the node
- // before removing it.
- if (StringUtil::trim($element->textContent) === '') {
- $element->remove();
- }
- }
- }
- }
- }
-}