Replace UnfurlUrlUtil with UnfurlResponse
authorjoshuaruesweg <ruesweg@woltlab.com>
Thu, 4 Mar 2021 13:26:57 +0000 (14:26 +0100)
committerjoshuaruesweg <ruesweg@woltlab.com>
Tue, 16 Mar 2021 15:19:12 +0000 (16:19 +0100)
wcfsetup/install/files/lib/system/background/job/UnfurlUrlBackgroundJob.class.php
wcfsetup/install/files/lib/system/message/unfurl/DownloadFailed.class.php [new file with mode: 0644]
wcfsetup/install/files/lib/system/message/unfurl/ParsingFailed.class.php [new file with mode: 0644]
wcfsetup/install/files/lib/system/message/unfurl/UnfurlResponse.class.php [new file with mode: 0644]
wcfsetup/install/files/lib/system/message/unfurl/UrlInaccessible.class.php [new file with mode: 0644]
wcfsetup/install/files/lib/util/UnfurlUrlUtil.class.php [deleted file]

index 954f6d4f05e75c66904bcde39ce0253a3c715263..c747e160d1bbf4698e36fe87e173f5a740db8ecd 100644 (file)
@@ -2,11 +2,16 @@
 
 namespace wcf\system\background\job;
 
+use BadMethodCallException;
+use GuzzleHttp\Psr7\Response;
 use wcf\data\unfurl\url\UnfurlUrl;
 use wcf\data\unfurl\url\UnfurlUrlAction;
+use wcf\system\message\unfurl\DownloadFailed;
+use wcf\system\message\unfurl\ParsingFailed;
+use wcf\system\message\unfurl\UnfurlResponse;
+use wcf\system\message\unfurl\UrlInaccessible;
 use wcf\util\FileUtil;
 use wcf\util\StringUtil;
-use wcf\util\UnfurlUrlUtil;
 
 /**
  * Represents a background job to get information for an url.
@@ -17,12 +22,12 @@ use wcf\util\UnfurlUrlUtil;
  * @package     WoltLabSuite\Core\System\Background\Job
  * @since       5.4
  */
-class UnfurlUrlBackgroundJob extends AbstractBackgroundJob
+final class UnfurlUrlBackgroundJob extends AbstractBackgroundJob
 {
     /**
-     * @var UnfurlUrl
+     * @var int
      */
-    private $url;
+    private $urlID;
 
     /**
      * UnfurlURLJob constructor.
@@ -31,7 +36,7 @@ class UnfurlUrlBackgroundJob extends AbstractBackgroundJob
      */
     public function __construct(UnfurlUrl $url)
     {
-        $this->url = $url;
+        $this->urlID = $url->urlID;
     }
 
     /**
@@ -57,104 +62,177 @@ class UnfurlUrlBackgroundJob extends AbstractBackgroundJob
      */
     public function perform()
     {
+        $unfurlUrl = new UnfurlUrl($this->urlID);
+
         try {
-            $url = new UnfurlUrlUtil($this->url->url);
-
-            if (empty(StringUtil::trim($url->getTitle()))) {
-                $urlAction = new UnfurlUrlAction([$this->url], 'update', [
-                    'data' => [
-                        'title' => '',
-                        'description' => '',
-                        'status' => UnfurlUrl::STATUS_REJECTED,
-                    ],
-                ]);
-                $urlAction->executeAction();
+            $unfurlResponse = UnfurlResponse::fetch($unfurlUrl->url);
+
+            if (empty(StringUtil::trim($unfurlResponse->getTitle()))) {
+                $this->save(UnfurlUrl::STATUS_REJECTED);
             } else {
-                $title = StringUtil::truncate($url->getTitle(), 255);
-                $description = $url->getDescription();
-                $data = [
-                    'title' => $title,
-                    'description' => $description !== null ? StringUtil::truncate($description, 500) : '',
-                    'status' => UnfurlUrl::STATUS_SUCCESSFUL,
-                ];
-
-                if ($url->getImageUrl()) {
-                    $image = UnfurlUrlUtil::downloadImageFromUrl($url->getImageUrl());
-
-                    if ($image !== null) {
-                        $imageData = @\getimagesizefromstring($image);
-
-                        // filter images which are too large or too small
-                        $isSquared = $imageData[0] === $imageData[1];
-                        if (
-                            (!$isSquared && ($imageData[0] < 300 && $imageData[1] < 150))
-                            || \min($imageData[0], $imageData[1]) < 50
-                        ) {
-                            $data['imageType'] = UnfurlUrl::IMAGE_NO_IMAGE;
-                        } else {
-                            if ($imageData[0] === $imageData[1]) {
-                                $data['imageUrl'] = $url->getImageUrl();
-                                $data['imageType'] = UnfurlUrl::IMAGE_SQUARED;
-                            } else {
-                                $data['imageUrl'] = $url->getImageUrl();
-                                $data['imageType'] = UnfurlUrl::IMAGE_COVER;
-                            }
+                $title = StringUtil::truncate($unfurlResponse->getTitle(), 255);
+                if ($unfurlResponse->getDescription() !== null) {
+                    $description = StringUtil::truncate($unfurlResponse->getDescription(), 500);
+                } else {
+                    $description = "";
+                }
 
-                            // Download image, if there is no image proxy or external source images allowed.
+                if ($unfurlResponse->getImageUrl()) {
+                    try {
+                        $image = $this->downloadImage($unfurlResponse->getImage());
+                        $imageData = \getimagesizefromstring($image);
+                        if ($imageData !== false) {
+                            $imageType = $this->validateImage($imageData);
                             if (!(MODULE_IMAGE_PROXY || IMAGE_ALLOW_EXTERNAL_SOURCE)) {
-                                if (isset($data['imageType'])) {
-                                    switch ($imageData[2]) {
-                                        case \IMAGETYPE_PNG:
-                                            $extension = 'png';
-                                            break;
-                                        case \IMAGETYPE_GIF:
-                                            $extension = 'gif';
-                                            break;
-                                        case \IMAGETYPE_JPEG:
-                                            $extension = 'jpg';
-                                            break;
-                                        default:
-                                            throw new \RuntimeException();
-                                    }
-
-                                    $data['imageHash'] = \sha1($image) . '.' . $extension;
-
-                                    $path = WCF_DIR . 'images/unfurlUrl/' . \substr($data['imageHash'], 0, 2);
-                                    FileUtil::makePath($path);
-
-                                    $fileLocation = $path . '/' . $data['imageHash'];
-
-                                    \file_put_contents($fileLocation, $image);
-
-                                    @\touch($fileLocation);
-                                }
+                                $imageHash = $this->saveImage($imageData, $image);
+                            } else {
+                                $imageHash = "";
                             }
+                        } else {
+                            $imageType = UnfurlUrl::IMAGE_NO_IMAGE;
+                        }
+
+                        if ($imageType === UnfurlUrl::IMAGE_NO_IMAGE) {
+                            $imageUrl = $imageHash = "";
+                        } else {
+                            $imageUrl = $unfurlResponse->getImageUrl();
                         }
+                    } catch (UrlInaccessible | DownloadFailed $e) {
+                        $imageType = UnfurlUrl::IMAGE_NO_IMAGE;
+                        $imageUrl = $imageHash = "";
                     }
+                } else {
+                    $imageType = UnfurlUrl::IMAGE_NO_IMAGE;
+                    $imageUrl = $imageHash = "";
                 }
 
-                $urlAction = new UnfurlUrlAction([$this->url], 'update', [
-                    'data' => $data,
-                ]);
-                $urlAction->executeAction();
+                $this->save(
+                    UnfurlUrl::STATUS_SUCCESSFUL,
+                    $title,
+                    $description,
+                    $imageType,
+                    $imageUrl,
+                    $imageHash
+                );
+            }
+        } catch (UrlInaccessible | ParsingFailed $e) {
+            if (\ENABLE_DEBUG_MODE) {
+                \wcf\functions\exception\logThrowable($e);
+            }
+
+            $this->save(UnfurlUrl::STATUS_REJECTED);
+        }
+    }
+
+    private function downloadImage(Response $imageResponse): string
+    {
+        $image = "";
+        while (!$imageResponse->getBody()->eof()) {
+            $image .= $imageResponse->getBody()->read(8192);
+
+            if ($imageResponse->getBody()->tell() >= UnfurlResponse::MAX_IMAGE_SIZE) {
+                break;
             }
-        } catch (\InvalidArgumentException $e) {
-            \wcf\functions\exception\logThrowable($e);
         }
+        $imageResponse->getBody()->close();
+
+        return $image;
     }
 
-    /**
-     * @inheritDoc
-     */
-    public function onFinalFailure()
+    private function validateImage(array $imageData): string
     {
-        $urlAction = new UnfurlUrlAction([$this->url], 'update', [
+        $isSquared = $imageData[0] === $imageData[1];
+        if (
+            (!$isSquared && ($imageData[0] < 300 && $imageData[1] < 150))
+            || \min($imageData[0], $imageData[1]) < 50
+        ) {
+            return UnfurlUrl::IMAGE_NO_IMAGE;
+        } else {
+            if ($isSquared) {
+                return UnfurlUrl::IMAGE_SQUARED;
+            } else {
+                return UnfurlUrl::IMAGE_COVER;
+            }
+        }
+    }
+
+    private function saveImage(array $imageData, string $image): string
+    {
+        switch ($imageData[2]) {
+            case \IMAGETYPE_PNG:
+                $extension = 'png';
+                break;
+            case \IMAGETYPE_GIF:
+                $extension = 'gif';
+                break;
+            case \IMAGETYPE_JPEG:
+                $extension = 'jpg';
+                break;
+
+            default:
+                throw new DownloadFailed();
+        }
+
+        $imageHash = sha1($image);
+
+        $path = WCF_DIR . 'images/unfurlUrl/' . \substr($imageHash, 0, 2);
+        FileUtil::makePath($path);
+
+        $fileLocation = $path . '/' . $imageHash . '.' . $extension;
+
+        \file_put_contents($fileLocation, $image);
+
+        @\touch($fileLocation);
+
+        return $imageHash . '.' . $extension;
+    }
+
+    private function save(
+        string $status,
+        string $title = "",
+        string $description = "",
+        string $imageType = UnfurlUrl::IMAGE_NO_IMAGE,
+        string $imageUrl = "",
+        string $imageHash = ""
+    ): void {
+        switch ($status) {
+            case UnfurlUrl::STATUS_PENDING:
+            case UnfurlUrl::STATUS_REJECTED:
+            case UnfurlUrl::STATUS_SUCCESSFUL:
+                break;
+
+            default:
+                throw new BadMethodCallException("Invalid status '{$status}' given.");
+        }
+
+        switch ($imageType) {
+            case UnfurlUrl::IMAGE_COVER:
+            case UnfurlUrl::IMAGE_NO_IMAGE:
+            case UnfurlUrl::IMAGE_SQUARED:
+                break;
+
+            default:
+                throw new BadMethodCallException("Invalid imageType '{$imageType}' given.");
+        }
+
+        $urlAction = new UnfurlUrlAction([$this->urlID], 'update', [
             'data' => [
-                'title' => '',
-                'description' => '',
-                'status' => 'REJECTED',
+                'status' => $status,
+                'title' => $title,
+                'description' => $description,
+                'imageType' => $imageType,
+                'imageUrl' => $imageUrl,
+                'imageHash' => $imageHash,
             ],
         ]);
         $urlAction->executeAction();
     }
+
+    /**
+     * @inheritDoc
+     */
+    public function onFinalFailure()
+    {
+        $this->save(UnfurlUrl::STATUS_REJECTED);
+    }
 }
diff --git a/wcfsetup/install/files/lib/system/message/unfurl/DownloadFailed.class.php b/wcfsetup/install/files/lib/system/message/unfurl/DownloadFailed.class.php
new file mode 100644 (file)
index 0000000..c9e776b
--- /dev/null
@@ -0,0 +1,18 @@
+<?php
+
+namespace wcf\system\message\unfurl;
+
+use Exception;
+
+/**
+ * Denotes a (temporary) download failed. It can be retried later.
+ *
+ * @author      Joshua Ruesweg
+ * @copyright   2001-2021 WoltLab GmbH
+ * @license     GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
+ * @package     WoltLabSuite\Core\System\Message\Unfurl
+ * @since       5.4
+ */
+class DownloadFailed extends Exception
+{
+}
diff --git a/wcfsetup/install/files/lib/system/message/unfurl/ParsingFailed.class.php b/wcfsetup/install/files/lib/system/message/unfurl/ParsingFailed.class.php
new file mode 100644 (file)
index 0000000..595288c
--- /dev/null
@@ -0,0 +1,18 @@
+<?php
+
+namespace wcf\system\message\unfurl;
+
+use Exception;
+
+/**
+ * Denotes a permanent parsing body failed. It should not be retried later.
+ *
+ * @author      Joshua Ruesweg
+ * @copyright   2001-2021 WoltLab GmbH
+ * @license     GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
+ * @package     WoltLabSuite\Core\System\Message\Unfurl
+ * @since       5.4
+ */
+class ParsingFailed extends Exception
+{
+}
diff --git a/wcfsetup/install/files/lib/system/message/unfurl/UnfurlResponse.class.php b/wcfsetup/install/files/lib/system/message/unfurl/UnfurlResponse.class.php
new file mode 100644 (file)
index 0000000..bd7983f
--- /dev/null
@@ -0,0 +1,294 @@
+<?php
+
+namespace wcf\system\message\unfurl;
+
+use BadMethodCallException;
+use Exception;
+use GuzzleHttp\ClientInterface;
+use GuzzleHttp\Exception\BadResponseException;
+use GuzzleHttp\Exception\TransferException;
+use GuzzleHttp\Psr7\Request;
+use GuzzleHttp\Psr7\Response;
+use GuzzleHttp\RequestOptions;
+use Psr\Http\Client\ClientExceptionInterface;
+use wcf\system\io\HttpFactory;
+use wcf\util\StringUtil;
+use wcf\util\Url;
+
+/**
+ * Helper class to unfurl specific urls.
+ *
+ * @author      Joshua Ruesweg
+ * @copyright   2001-2021 WoltLab GmbH
+ * @license     GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
+ * @package     WoltLabSuite\Core\System\Message\Unfurl
+ * @since       5.4
+ */
+final class UnfurlResponse
+{
+    /**
+     * 10 Mebibyte
+     */
+    private const MAX_SIZE = (10 * (1 << 20));
+
+    /**
+     * 3 Mebibyte
+     */
+    public const MAX_IMAGE_SIZE = (3 * (1 << 20));
+
+    /**
+     * @var ClientInterface
+     */
+    private static $httpClient;
+
+    /**
+     * @var string
+     */
+    private $url;
+
+    /**
+     * @var Response
+     */
+    private $response;
+
+    /**
+     * @var \DOMDocument
+     */
+    private $domDocument;
+
+    /**
+     * Fetches a given Url and returns an UnfurlResponse instance.
+     *
+     * @throws ParsingFailed If the body cannot be parsed (e.g. the url is an image).
+     * @throws DownloadFailed If the url can not be downloaded. This can be a temporary error.
+     * @throws UrlInaccessible If the url is inaccessible (e.g. sends status code 403).
+     */
+    public static function fetch(string $url): self
+    {
+        if (!Url::is($url)) {
+            throw new \InvalidArgumentException('Given URL "' . $url . '" is not a valid URL.');
+        }
+
+        try {
+            $request = new Request('GET', $url, [
+                'range' => \sprintf('bytes=%d-%d', 0, self::MAX_SIZE - 1),
+            ]);
+            $response = self::getHttpClient()->send($request);
+
+            return new self($url, $response);
+        } catch (BadResponseException $e) {
+            $response = $e->getResponse();
+
+            switch ($response->getStatusCode()) {
+                case 400: // Bad Request
+                case 401: // Unauthorized
+                case 402: // Payment Required
+                case 403: // Forbidden
+                case 404: // Not Found
+                    $message = "Request failed with status code {$response->getStatusCode()}.";
+
+                    throw new UrlInaccessible($message, $response->getStatusCode(), $e);
+                    break;
+
+                default:
+                    throw new DownloadFailed("Could not download content.", $response->getStatusCode(), $e);
+            }
+        } catch (ClientExceptionInterface $e) {
+            throw new DownloadFailed("Could not download content.", 0, $e);
+        }
+    }
+
+    /**
+     * @throws ParsingFailed If the body cannot be parsed (e.g. the url is an image).
+     * @throws DownloadFailed If the url can not be downloaded. This can be a temporary error.
+     */
+    private function __construct(string $url, Response $response)
+    {
+        $this->url = $url;
+        $this->response = $response;
+
+        $this->readBody();
+        $this->readDomDocument();
+    }
+
+    /**
+     * Reads the body of the given url and converts the body to utf-8.
+     */
+    private function readBody(): void
+    {
+        $this->body = "";
+        while (!$this->response->getBody()->eof()) {
+            $this->body .= $this->response->getBody()->read(8192);
+
+            if ($this->response->getBody()->tell() >= self::MAX_SIZE) {
+                break;
+            }
+        }
+        $this->response->getBody()->close();
+
+        if (\mb_detect_encoding($this->body) !== 'UTF-8') {
+            try {
+                $this->body = StringUtil::convertEncoding(\mb_detect_encoding($this->body), 'UTF-8', $this->body);
+            } catch (Exception $e) {
+                throw new ParsingFailed(
+                    "Could not parse body, due an invalid charset. The Url could be an image.",
+                    0,
+                    $e
+                );
+            }
+        }
+    }
+
+    /**
+     * Creates the DomDocument.
+     *
+     * @throws ParsingFailed If the body cannot be parsed (e.g. the url is an JSON file).
+     */
+    private function readDomDocument(): void
+    {
+        \libxml_use_internal_errors(true);
+        $this->domDocument = new \DOMDocument();
+        if (!$this->domDocument->loadHTML('<?xml version="1.0" encoding="UTF-8"?>' . $this->body)) {
+            throw new ParsingFailed("Could not parse body.");
+        }
+    }
+
+    /**
+     * Determines the title of the website.
+     */
+    public function getTitle(): ?string
+    {
+        if (!empty($this->body)) {
+            $metaTags = $this->domDocument->getElementsByTagName('meta');
+
+            // og
+            foreach ($metaTags as $metaTag) {
+                foreach ($metaTag->attributes as $attr) {
+                    if ($attr->nodeName == 'property' && $attr->value == 'og:title') {
+                        foreach ($attr->parentNode->attributes as $attr) {
+                            if ($attr->nodeName == 'content') {
+                                return $attr->value;
+                            }
+                        }
+                    }
+                }
+            }
+
+            // title tag
+            $title = $this->domDocument->getElementsByTagName('title');
+            if ($title->length) {
+                return $title->item(0)->nodeValue;
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Determines the description of the website.
+     */
+    public function getDescription(): ?string
+    {
+        if (!empty($this->body)) {
+            $metaTags = $this->domDocument->getElementsByTagName('meta');
+
+            // og:description
+            foreach ($metaTags as $metaTag) {
+                foreach ($metaTag->attributes as $attr) {
+                    if ($attr->nodeName == 'property' && $attr->value == 'og:description') {
+                        foreach ($attr->parentNode->attributes as $attr) {
+                            if ($attr->nodeName == 'content') {
+                                return $attr->value;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Returns the image url for the current url.
+     */
+    public function getImageUrl(): ?string
+    {
+        if (!empty($this->body)) {
+            $metaTags = $this->domDocument->getElementsByTagName('meta');
+
+            // og:image
+            foreach ($metaTags as $metaTag) {
+                foreach ($metaTag->attributes as $attr) {
+                    if ($attr->nodeName == 'property' && $attr->value == 'og:image') {
+                        foreach ($attr->parentNode->attributes as $attr) {
+                            if ($attr->nodeName == 'content') {
+                                return $attr->value;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Returns the Response for the used image.
+     *
+     * @throws BadMethodCallException If the url does not have an image.
+     * @throws DownloadFailed If the url can not be downloaded. This can be a temporary error.
+     * @throws UrlInaccessible If the url is inaccessible (e.g. sends status code 403).
+     */
+    public function getImage(): Response
+    {
+        if (!$this->getImageUrl()) {
+            throw new BadMethodCallException("This url does not have an image.");
+        }
+
+        try {
+            $request = new Request('GET', $this->getImageUrl(), [
+                'accept' => 'image/*',
+                'range' => 'bytes=0-' . (self::MAX_IMAGE_SIZE - 1),
+            ]);
+
+            return self::getHttpClient()->send($request);
+        } catch (BadResponseException $e) {
+            $response = $e->getResponse();
+
+            switch ($response->getStatusCode()) {
+                case 400: // Bad Request
+                case 401: // Unauthorized
+                case 402: // Payment Required
+                case 403: // Forbidden
+                case 404: // Not Found
+                    $message = "Request failed with status code {$response->getStatusCode()}.";
+
+                    throw new UrlInaccessible($message, $response->getStatusCode(), $e);
+                    break;
+
+                default:
+                    throw new DownloadFailed("Could not download content.", $response->getStatusCode(), $e);
+            }
+        } catch (ClientExceptionInterface $e) {
+            throw new DownloadFailed("Could not download content.", 0, $e);
+        }
+    }
+
+    /**
+     * Returns a "static" instance of the HTTP client to use to allow
+     * for TCP connection reuse.
+     */
+    private static function getHttpClient(): ClientInterface
+    {
+        if (!self::$httpClient) {
+            self::$httpClient = HttpFactory::makeClient([
+                RequestOptions::TIMEOUT => 10,
+                RequestOptions::STREAM => true,
+            ]);
+        }
+
+        return self::$httpClient;
+    }
+}
diff --git a/wcfsetup/install/files/lib/system/message/unfurl/UrlInaccessible.class.php b/wcfsetup/install/files/lib/system/message/unfurl/UrlInaccessible.class.php
new file mode 100644 (file)
index 0000000..2db18a4
--- /dev/null
@@ -0,0 +1,18 @@
+<?php
+
+namespace wcf\system\message\unfurl;
+
+use Exception;
+
+/**
+ * Denotes a permanent failing url, because the url is inaccessible.
+ *
+ * @author      Joshua Ruesweg
+ * @copyright   2001-2021 WoltLab GmbH
+ * @license     GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
+ * @package     WoltLabSuite\Core\System\Message\Unfurl
+ * @since       5.4
+ */
+class UrlInaccessible extends Exception
+{
+}
diff --git a/wcfsetup/install/files/lib/util/UnfurlUrlUtil.class.php b/wcfsetup/install/files/lib/util/UnfurlUrlUtil.class.php
deleted file mode 100644 (file)
index faf446d..0000000
+++ /dev/null
@@ -1,254 +0,0 @@
-<?php
-
-namespace wcf\util;
-
-use GuzzleHttp\Exception\BadResponseException;
-use GuzzleHttp\Exception\TooManyRedirectsException;
-use GuzzleHttp\Exception\TransferException;
-use GuzzleHttp\Psr7\Request;
-use GuzzleHttp\RequestOptions;
-use wcf\system\io\HttpFactory;
-
-/**
- * Helper class to unfurl specific urls.
- *
- * @author      Joshua Ruesweg
- * @copyright   2001-2021 WoltLab GmbH
- * @license     GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
- * @package     WoltLabSuite\Core\Util
- * @since       5.4
- */
-final class UnfurlUrlUtil
-{
-    /**
-     * 10 Mebibyte
-     */
-    private const MAX_SIZE = (10 * (1 << 20));
-
-    /**
-     * 3 Mebibyte
-     */
-    private const MAX_IMAGE_SIZE = (3 * (1 << 20));
-
-    /**
-     * @var string
-     */
-    private $url;
-
-    /**
-     * @var string
-     */
-    private $body;
-
-    /**
-     * @var \DOMDocument
-     */
-    private $domDocument;
-
-    public function __construct(string $url)
-    {
-        if (!Url::is($url)) {
-            throw new \InvalidArgumentException('Given URL "' . $url . '" is not a valid URL.');
-        }
-
-        $this->url = $url;
-
-        $this->fetchUrl();
-    }
-
-    /**
-     * Fetches the body of the given url and converts the body to utf-8.
-     */
-    private function fetchUrl(): void
-    {
-        try {
-            $client = HttpFactory::makeClient([
-                RequestOptions::TIMEOUT => 10,
-                RequestOptions::STREAM => true,
-            ]);
-            $request = new Request('GET', $this->url, [
-                'range' => \sprintf('bytes=%d-%d', 0, self::MAX_SIZE - 1),
-            ]);
-            $response = $client->send($request);
-
-            $this->body = "";
-            while (!$response->getBody()->eof()) {
-                $this->body .= $response->getBody()->read(8192);
-
-                if ($response->getBody()->tell() >= self::MAX_SIZE) {
-                    break;
-                }
-            }
-            $response->getBody()->close();
-
-            if (\mb_detect_encoding($this->body) !== 'UTF-8') {
-                $this->body = StringUtil::convertEncoding(\mb_detect_encoding($this->body), 'UTF-8', $this->body);
-            }
-        } catch (TooManyRedirectsException | BadResponseException | TransferException $e) {
-            // Ignore these exceptions.
-        }
-    }
-
-    /**
-     * Returns the dom document of the website.
-     */
-    private function getDomDocument(): \DOMDocument
-    {
-        if ($this->domDocument === null) {
-            \libxml_use_internal_errors(true);
-            $this->domDocument = new \DOMDocument();
-            $this->domDocument->loadHTML('<?xml version="1.0" encoding="UTF-8"?>' . $this->body);
-        }
-
-        return $this->domDocument;
-    }
-
-    /**
-     * Determines the title of the website.
-     */
-    public function getTitle(): ?string
-    {
-        if (!empty($this->body)) {
-            $metaTags = $this->getDomDocument()->getElementsByTagName('meta');
-
-            // og
-            foreach ($metaTags as $metaTag) {
-                foreach ($metaTag->attributes as $attr) {
-                    if ($attr->nodeName == 'property' && $attr->value == 'og:title') {
-                        foreach ($attr->parentNode->attributes as $attr) {
-                            if ($attr->nodeName == 'content') {
-                                return $attr->value;
-                            }
-                        }
-                    }
-                }
-            }
-
-            // title tag
-            $title = $this->getDomDocument()->getElementsByTagName('title');
-            if ($title->length) {
-                return $title->item(0)->nodeValue;
-            }
-        }
-
-        return null;
-    }
-
-    /**
-     * Determines the description of the website.
-     */
-    public function getDescription(): ?string
-    {
-        if (!empty($this->body)) {
-            $metaTags = $this->getDomDocument()->getElementsByTagName('meta');
-
-            // og:description
-            foreach ($metaTags as $metaTag) {
-                foreach ($metaTag->attributes as $attr) {
-                    if ($attr->nodeName == 'property' && $attr->value == 'og:description') {
-                        foreach ($attr->parentNode->attributes as $attr) {
-                            if ($attr->nodeName == 'content') {
-                                return $attr->value;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        return null;
-    }
-
-    /**
-     * Returns the image url for the current url.
-     */
-    public function getImageUrl(): ?string
-    {
-        if (!empty($this->body)) {
-            $metaTags = $this->getDomDocument()->getElementsByTagName('meta');
-
-            // og:image
-            foreach ($metaTags as $metaTag) {
-                foreach ($metaTag->attributes as $attr) {
-                    if ($attr->nodeName == 'property' && $attr->value == 'og:image') {
-                        foreach ($attr->parentNode->attributes as $attr) {
-                            if ($attr->nodeName == 'content') {
-                                return $attr->value;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        return null;
-    }
-
-    /**
-     * Downloads the image from a url and returns the image body.
-     */
-    public static function downloadImageFromUrl(string $url): ?string
-    {
-        try {
-            // Rewrite schemaless URLs to https.
-            $scheme = \parse_url($url, \PHP_URL_SCHEME);
-            if (!$scheme) {
-                if (StringUtil::startsWith($url, '//')) {
-                    $url = 'https:' . $url;
-                } else {
-                    throw new \DomainException();
-                }
-            }
-
-            // download image
-            try {
-                $client = HttpFactory::makeClient([
-                    RequestOptions::TIMEOUT => 10,
-                    RequestOptions::STREAM => true,
-                ]);
-                $request = new Request('GET', $url, [
-                    'via' => '1.1 wsc',
-                    'accept' => 'image/*',
-                    'range' => 'bytes=0-' . (self::MAX_IMAGE_SIZE - 1),
-                ]);
-                $response = $client->send($request);
-
-                $image = "";
-                while (!$response->getBody()->eof()) {
-                    $image .= $response->getBody()->read(8192);
-
-                    if ($response->getBody()->tell() >= self::MAX_IMAGE_SIZE) {
-                        break;
-                    }
-                }
-                $response->getBody()->close();
-            } catch (TransferException $e) {
-                throw new \DomainException('Failed to request', 0, $e);
-            }
-
-            // check file type
-            $imageData = @\getimagesizefromstring($image);
-            if (!$imageData) {
-                throw new \DomainException();
-            }
-
-            switch ($imageData[2]) {
-                case \IMAGETYPE_PNG:
-                    $extension = 'png';
-                    break;
-                case \IMAGETYPE_GIF:
-                    $extension = 'gif';
-                    break;
-                case \IMAGETYPE_JPEG:
-                    $extension = 'jpg';
-                    break;
-                default:
-                    throw new \DomainException();
-            }
-
-            return $image;
-        } catch (\DomainException $e) {
-            return null;
-        }
-    }
-}