From a29ca761d7baff3ed84657a0cb2ac89d64b01549 Mon Sep 17 00:00:00 2001 From: Alexander Ebert Date: Fri, 15 Mar 2024 17:22:24 +0100 Subject: [PATCH] =?utf8?q?Encode=20UTF-8=20characters=20in=20an=20URI?= =?utf8?q?=E2=80=99s=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit See https://www.woltlab.com/community/thread/304901-keine-linkvorschau-bei-links-mit-umlauten/ --- .../html/node/HtmlNodeUnfurlLink.class.php | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/wcfsetup/install/files/lib/system/html/node/HtmlNodeUnfurlLink.class.php b/wcfsetup/install/files/lib/system/html/node/HtmlNodeUnfurlLink.class.php index 74add3acd5..558dc5eb90 100644 --- a/wcfsetup/install/files/lib/system/html/node/HtmlNodeUnfurlLink.class.php +++ b/wcfsetup/install/files/lib/system/html/node/HtmlNodeUnfurlLink.class.php @@ -2,6 +2,8 @@ namespace wcf\system\html\node; +use Laminas\Diactoros\Exception\InvalidArgumentException; +use Laminas\Diactoros\Uri; use wcf\data\unfurl\url\UnfurlUrlAction; use wcf\util\DOMUtil; use wcf\util\Url; @@ -27,19 +29,38 @@ class HtmlNodeUnfurlLink extends HtmlNodePlainLink return; } - if (!Url::is($link->href)) { + try { + $uri = new Uri($link->href); + } catch (InvalidArgumentException) { return; } - $parsedUrl = Url::parse($link->href); + $path = $uri->getPath(); + if ($path !== '') { + // This is a simplified transformation that will only replace + // characters that are known to be always invalid in URIs and must + // be encoded at all times according to RFC 1738. + $path = \preg_replace_callback( + '~[^0-9a-zA-Z$-_.+!*\'(),;/?:@=&]~', + static fn (array $matches) => \rawurlencode($matches[0]), + $path + ); + $uri = $uri->withPath($path); + + // The above replacement excludes certain characters from the + // replacement that are conditionally unsafe. + if (!Url::is($uri->__toString())) { + return; + } + } // Ignore non-standard ports. - if ($parsedUrl['port']) { + if ($uri->getPort() !== null) { return; } // Ignore non-HTTP schemes. - if (!\in_array($parsedUrl['scheme'], ['http', 'https'])) { + if (!\in_array($uri->getScheme(), ['http', 'https'])) { return; } @@ -47,7 +68,7 @@ class HtmlNodeUnfurlLink extends HtmlNodePlainLink $object = new UnfurlUrlAction([], 'findOrCreate', [ 'data' => [ - 'url' => $link->href, + 'url' => $uri->__toString(), ], ]); $returnValues = $object->executeAction(); -- 2.20.1