How Google Drive handles HUGE File Uploads ๐Ÿ”ฅ๐Ÿ”ฅ

How Google Drive handles HUGE File Uploads ๐Ÿ”ฅ๐Ÿ”ฅ

How to handle large files uploads in browser under challenging conditions like browser timeout or unstable network condition

ยท

5 min read

Uploading large files in a browser can be challenging due to various limitations like browser timeout, server upload limit, or unstable network conditions.

One common approach is to split the file into smaller chunks and upload these chunks sequentially or in parallel. This method, known as "chunked upload", also provides better error handling, pause/resume capabilities, and progress tracking.

Here's a simple example how you can implement it:

Client Side Code

  1. Split the File into Chunks - You can use the Blob.slice() method to split the file into manageable chunks.

  2. Implement Chunk Upload Function - For each chunk, you'll need to send an HTTP request to the server. You can use the XMLHttpRequest object or the fetch API for this purpose.

  3. Implement the Finalise Function - To signal the server that file upload is now complete


// Function to finalize the upload and start file reassembly
async function finalizeUpload(fileName, totalChunks) {
  const data = { fileName, totalChunks };
  const response = await fetch('YOUR_FINALIZE_ENDPOINT', {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
    },
    body: JSON.stringify(data),
  });

  if (response.ok) {
    console.log('File reassembled successfully on server.');
  } else {
    console.error('Failed to finalize the upload:', await response.text());
  }
}


// Helper function to split file into chunks
function sliceFile(file, chunkSize) {
  let chunks = [];
  for (let start = 0; start < file.size; start += chunkSize) {
    const end = Math.min(start + chunkSize, file.size);
    chunks.push(file.slice(start, end));
  }
  return chunks;
}

// Function to upload a single chunk
async function uploadChunk(chunk, index) {
  const formData = new FormData();
  formData.append('fileChunk', chunk);
  formData.append('chunkIndex', index);

  // Update this URL to your upload endpoint
  const uploadUrl = 'YOUR_UPLOAD_ENDPOINT';

  try {
    const response = await fetch(uploadUrl, {
      method: 'POST',
      body: formData,
    });
    return response.ok;
  } catch (error) {
    console.error('Upload failed for chunk ' + index, error);
    throw error; // Rethrow so we can catch it later
  }
}

// Main function to handle the file upload
async function uploadFile(file) {
  const CHUNK_SIZE = 5 * 1024 * 1024; // 5MB chunk size
  const chunks = sliceFile(file, CHUNK_SIZE);
  for (let index = 0; index < chunks.length; index++) {
    try {
      await uploadChunk(chunks[index], index, progress => updateProgress(index, progress));
      console.log(`Chunk ${index + 1} of ${chunks.length} uploaded successfully`);
    } catch (error) {
      console.error(`Error uploading chunk ${index + 1}:`, error);
      return; // Exit the upload process on error
    }
  }

  console.log('All chunks uploaded successfully');
  // Call finalizeUpload after all chunks are uploaded
  await finalizeUpload(file.name, chunks.length);
}

Server Side Code

  1. /upload - For uploading each file chunk. It temporarily stores each chunk with a unique name combining the original file name and the chunk index.

  2. /finalize - For finalizing the upload. It reassembles the file by sequentially reading and merging each chunk, then deletes the chunks to free up space.

const express = require('express');
const multer  = require('multer');
const fs = require('fs');
const path = require('path');

const app = express();
const upload = multer({ dest: 'uploads/' }); // Temporary upload directory
const PORT = process.env.PORT || 3000;

// Endpoint to upload chunks
app.post('/upload', upload.single('fileChunk'), (req, res) => {
  const { chunkIndex, fileName } = req.body;
  const tempPath = req.file.path;
  const targetPath = path.join(__dirname, 'uploads', fileName + '.' + chunkIndex);

  // Move and rename the chunk for easier reassembly
  fs.rename(tempPath, targetPath, err => {
    if (err) {
      return res.status(500).send(err.message);
    }
    res.send('Chunk uploaded successfully');
  });
});

// Endpoint to finalize the upload and reassemble the file
app.post('/finalize', (req, res) => {
  const { fileName, totalChunks } = req.body;
  const targetPath = path.join(__dirname, 'uploads', fileName);

  // Create a write stream to assemble the file
  const fileWriteStream = fs.createWriteStream(targetPath);

  (async () => {
    for (let i = 0; i < totalChunks; i++) {
      const chunkPath = path.join(__dirname, 'uploads', fileName + '.' + i);

      await new Promise((resolve, reject) => {
        const readStream = fs.createReadStream(chunkPath);
        readStream.pipe(fileWriteStream, { end: false });
        readStream.on('end', () => {
          fs.unlink(chunkPath, (err) => { // Delete chunk after merging
            if (err) reject(err);
            else resolve();
          });
        });
        readStream.on('error', (err) => reject(err));
      });
    }
    fileWriteStream.end();
  })().then(() => {
    res.send('File reassembled successfully');
  }).catch(err => {
    res.status(500).send(err.message);
  });
});

app.listen(PORT, () => {
  console.log(`Server running on port ${PORT}`);
});

Bonus: Progress, Pause Functionality

  1. Pause / Resume - The global isPaused flag controls the upload flow. The upload process checks this flag before uploading each chunk. If paused, it waits until the flag is cleared to resume uploading.

  2. Progress Tracking - The uploadChunk function uses XMLHttpRequest to monitor the progress of each chunk upload. The onProgress callback is called with the chunk's progress, which can be used to calculate and display the overall progress.

let isPaused = false; // Global flag to control pause/resume

// Function to upload a single chunk with progress callback
async function uploadChunk(chunk, index, onProgress) {
  return new Promise((resolve, reject) => {
    const xhr = new XMLHttpRequest();
    xhr.open('POST', 'YOUR_UPLOAD_ENDPOINT', true);

    xhr.upload.onprogress = function(event) {
      if (event.lengthComputable) {
        const progress = (event.loaded / event.total) * 100;
        onProgress(index, progress);
      }
    };

    xhr.onload = function() {
      if (xhr.status === 200) {
        resolve(true);
      } else {
        reject('Upload failed: ' + xhr.statusText);
      }
    };

    xhr.onerror = function() {
      reject('XMLHttpRequest error: ' + xhr.statusText);
    };

    const formData = new FormData();
    formData.append('fileChunk', chunk);
    formData.append('chunkIndex', index);
    xhr.send(formData);
  });
}

// Main function to handle the file upload with pause/resume and progress tracking
async function uploadFile(file, onOverallProgress, onPauseResume) {
  const CHUNK_SIZE = 5 * 1024 * 1024; // 5MB chunk size
  const chunks = sliceFile(file, CHUNK_SIZE);
  let uploadedBytes = 0;

  // Function to update overall progress
  const updateProgress = (index, progress) => {
    uploadedBytes += progress;
    const overallProgress = uploadedBytes / file.size * 100;
    onOverallProgress(overallProgress);
  };

  for (let index = 0; index < chunks.length; index++) {
    if (isPaused) {
      await new Promise(resolve => {
        const interval = setInterval(() => {
          if (!isPaused) {
            clearInterval(interval);
            resolve();
          }
        }, 100); // Check every 100ms
      });
    }

    try {
      await uploadChunk(chunks[index], index, progress => updateProgress(index, progress));
      console.log(`Chunk ${index + 1} of ${chunks.length} uploaded successfully`);
    } catch (error) {
      console.error(`Error uploading chunk ${index + 1}:`, error);
      return; // Exit the upload process on error
    }
  }

  console.log('All chunks uploaded successfully');
  // Notify the server all chunks are uploaded, or handle the finalization process
}

// Example usage
// uploadFile(file, (progress) => console.log(`Overall Progress: ${progress}%`),
//             (paused) => console.log(paused ? 'Upload paused' : 'Upload resumed'));

// Functions to control pause/resume
function pauseUpload() {
  isPaused = true;
}

function resumeUpload() {
  isPaused = false;
}

Google Drive is lot more complex, as it needs to handle performance optimisations, security considerations and much more, but this is a good start for rest of us

ย