Use buffers to write uploaded files
authorAlexander Ebert <ebert@woltlab.com>
Tue, 26 Dec 2023 18:20:15 +0000 (19:20 +0100)
committerAlexander Ebert <ebert@woltlab.com>
Sat, 8 Jun 2024 10:19:37 +0000 (12:19 +0200)
ts/WoltLabSuite/Core/Component/File/Upload.ts
wcfsetup/install/files/js/WoltLabSuite/Core/Component/File/Upload.js
wcfsetup/install/files/lib/action/FileUploadAction.class.php

index 261ab4d9ab145cd70dfcb60fcaca1e75ee307059..5e0236741a98ab37b22584d583917e36c42690d3 100644 (file)
@@ -17,13 +17,30 @@ async function upload(element: WoltlabCoreFileUploadElement, file: File): Promis
   const chunkSize = 2_000_000;
   const chunks = Math.ceil(file.size / chunkSize);
 
+  const arrayBufferToHex = (buffer: ArrayBuffer): string => {
+    return Array.from(new Uint8Array(buffer))
+      .map((b) => b.toString(16).padStart(2, "0"))
+      .join("");
+  };
+
+  const hash = await window.crypto.subtle.digest("SHA-256", await file.arrayBuffer());
+  console.log("checksum for the entire file is:", arrayBufferToHex(hash));
+
+  const data: Blob[] = [];
   for (let i = 0; i < chunks; i++) {
     const start = i * chunkSize;
     const end = start + chunkSize;
     const chunk = file.slice(start, end);
+    data.push(chunk);
+
+    console.log("Uploading", start, "to", end, " (total: " + chunk.size + " of " + file.size + ")");
 
     await prepareRequest(endpoints[i]).post(chunk).fetchAsResponse();
   }
+
+  const uploadedChunks = new Blob(data);
+  const uploadedHash = await window.crypto.subtle.digest("SHA-256", await uploadedChunks.arrayBuffer());
+  console.log("checksum for the entire file is:", arrayBufferToHex(uploadedHash));
 }
 
 export function setup(): void {
@@ -31,5 +48,8 @@ export function setup(): void {
     element.addEventListener("upload", (event: CustomEvent<File>) => {
       void upload(element, event.detail);
     });
+
+    const file = new File(["a".repeat(4_000_001)], "test.txt");
+    void upload(element, file);
   });
 }
index 4fdde035d673f430d42c64a1be5debf48f874431..2c6a0e6dd4021e4effda5c7c725a120a9d138417 100644 (file)
@@ -12,18 +12,33 @@ define(["require", "exports", "WoltLabSuite/Core/Ajax/Backend", "WoltLabSuite/Co
         const { endpoints } = response;
         const chunkSize = 2000000;
         const chunks = Math.ceil(file.size / chunkSize);
+        const arrayBufferToHex = (buffer) => {
+            return Array.from(new Uint8Array(buffer))
+                .map((b) => b.toString(16).padStart(2, "0"))
+                .join("");
+        };
+        const hash = await window.crypto.subtle.digest("SHA-256", await file.arrayBuffer());
+        console.log("checksum for the entire file is:", arrayBufferToHex(hash));
+        const data = [];
         for (let i = 0; i < chunks; i++) {
             const start = i * chunkSize;
             const end = start + chunkSize;
             const chunk = file.slice(start, end);
+            data.push(chunk);
+            console.log("Uploading", start, "to", end, " (total: " + chunk.size + " of " + file.size + ")");
             await (0, Backend_1.prepareRequest)(endpoints[i]).post(chunk).fetchAsResponse();
         }
+        const uploadedChunks = new Blob(data);
+        const uploadedHash = await window.crypto.subtle.digest("SHA-256", await uploadedChunks.arrayBuffer());
+        console.log("checksum for the entire file is:", arrayBufferToHex(uploadedHash));
     }
     function setup() {
         (0, Selector_1.wheneverFirstSeen)("woltlab-core-file-upload", (element) => {
             element.addEventListener("upload", (event) => {
                 void upload(element, event.detail);
             });
+            const file = new File(["a".repeat(4000001)], "test.txt");
+            void upload(element, file);
         });
     }
     exports.setup = setup;
index 4c396ce744ed8022b7829fe9460a44dac592c177..4e14983b4e34b8bac99e71574edbf2d5fd83fd72 100644 (file)
@@ -8,6 +8,7 @@ use Psr\Http\Message\ServerRequestInterface;
 use Psr\Http\Server\RequestHandlerInterface;
 use wcf\http\Helper;
 use wcf\system\exception\IllegalLinkException;
+use wcf\system\io\AtomicWriter;
 use wcf\system\WCF;
 
 final class FileUploadAction implements RequestHandlerInterface
@@ -44,21 +45,6 @@ final class FileUploadAction implements RequestHandlerInterface
             throw new IllegalLinkException();
         }
 
-        // Check if the actual size matches the expectations.
-        if ($parameters['sequenceNo'] === $chunks - 1) {
-            // The last chunk is most likely smaller than our chunk size.
-            $expectedSize = $row['filesize'] - $chunkSize * ($chunks - 1);
-        } else {
-            $expectedSize = $chunkSize;
-        }
-
-        $chunk = \file_get_contents('php://input');
-        $actualSize = \strlen($chunk);
-
-        if ($actualSize !== $expectedSize) {
-            throw new IllegalLinkException();
-        }
-
         $folderA = \substr($row['identifier'], 0, 2);
         $folderB = \substr($row['identifier'], 2, 2);
 
@@ -77,7 +63,61 @@ final class FileUploadAction implements RequestHandlerInterface
             $parameters['sequenceNo'],
         );
 
-        \file_put_contents($tmpPath . $filename, $chunk);
+        // Write the chunk using a buffer to avoid blowing up the memory limit.
+        // See https://stackoverflow.com/a/61997147
+        $file = new AtomicWriter($tmpPath . $filename);
+        $bufferSize = 1 * 1024 * 1024;
+
+        $fh = \fopen('php://input', 'rb');
+        while (!\feof($fh)) {
+            $file->write(\fread($fh, $bufferSize));
+        }
+        \fclose($fh);
+
+        $file->flush();
+
+        // Check if we have all chunks.
+        $data = [];
+        for ($i = 0; $i < $chunks; $i++) {
+            $filename = \sprintf(
+                '%s-%d.bin',
+                $row['identifier'],
+                $i,
+            );
+
+            if (\file_exists($tmpPath . $filename)) {
+                $data[] = $tmpPath . $filename;
+            }
+        }
+
+        if (\count($data) === $chunks) {
+            // Concatenate the files by reading only a limited buffer at a time
+            // to avoid blowing up the memory limit.
+            // See https://stackoverflow.com/a/61997147
+            $bufferSize = 1 * 1024 * 1024;
+
+            $newFilename = \sprintf('%s-final.bin', $row['identifier']);
+            $file = new AtomicWriter($tmpPath . $newFilename);
+            foreach ($data as $fileChunk) {
+                $fh = \fopen($fileChunk, 'rb');
+                while (!\feof($fh)) {
+                    $file->write(\fread($fh, $bufferSize));
+                }
+                \fclose($fh);
+            }
+
+            $file->flush();
+
+            \wcfDebug(
+                \memory_get_peak_usage(true),
+                \hash_file(
+                    'sha256',
+                    $tmpPath . $newFilename,
+                )
+            );
+        }
+
+        \wcfDebug(\memory_get_peak_usage(true));
 
         // TODO: Dummy response to simulate a successful upload of a chunk.
         return new EmptyResponse();