search
Python

Download Files and Images from URL with Python

Learn to download files and images in Python using requests. Includes progress bars, batch downloads, and error handling examples.

person By Gautam Sharma
calendar_today December 31, 2024
schedule 7 min read
Python File Download Web Scraping Automation

Download files and images from URLs with Python. Simple scripts for single files, bulk downloads, and progress tracking.

Basic File Download

Simple download using requests library.

pip install requests
import requests

def download_file(url, filename):
    response = requests.get(url)

    with open(filename, 'wb') as f:
        f.write(response.content)

    print(f"Downloaded {filename}")

# Download file
download_file(
    'https://example.com/file.pdf',
    'document.pdf'
)

Download with Progress Bar

Show download progress in terminal.

pip install requests tqdm
import requests
from tqdm import tqdm

def download_with_progress(url, filename):
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))

    with open(filename, 'wb') as f:
        with tqdm(
            total=total_size,
            unit='B',
            unit_scale=True,
            desc=filename
        ) as pbar:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
                pbar.update(len(chunk))

    print(f"\nDownload complete: {filename}")

# Download with progress
download_with_progress(
    'https://example.com/large-file.zip',
    'archive.zip'
)

Download Image

Download and save images.

import requests
from pathlib import Path

def download_image(url, save_dir='images'):
    # Create directory if not exists
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    # Extract filename from URL
    filename = url.split('/')[-1]
    filepath = Path(save_dir) / filename

    # Download
    response = requests.get(url)
    response.raise_for_status()

    with open(filepath, 'wb') as f:
        f.write(response.content)

    print(f"Image saved: {filepath}")
    return filepath

# Download image
download_image('https://example.com/photo.jpg')

Download Multiple Files

Batch download from list of URLs.

import requests
from pathlib import Path

def download_multiple(urls, save_dir='downloads'):
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    for idx, url in enumerate(urls, 1):
        try:
            filename = url.split('/')[-1]
            filepath = Path(save_dir) / filename

            response = requests.get(url, timeout=30)
            response.raise_for_status()

            with open(filepath, 'wb') as f:
                f.write(response.content)

            print(f"[{idx}/{len(urls)}] Downloaded: {filename}")

        except Exception as e:
            print(f"Failed to download {url}: {e}")

# Download multiple files
urls = [
    'https://example.com/file1.pdf',
    'https://example.com/file2.pdf',
    'https://example.com/image.jpg'
]

download_multiple(urls)

Download with Custom Filename

Control output filename.

import requests
from pathlib import Path

def download_as(url, output_path):
    # Create parent directories
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)

    response = requests.get(url)
    response.raise_for_status()

    with open(output_path, 'wb') as f:
        f.write(response.content)

    print(f"Saved as: {output_path}")

# Download with custom name
download_as(
    'https://example.com/document.pdf',
    'reports/2024/annual-report.pdf'
)

Resume Interrupted Downloads

Continue partial downloads.

import requests
from pathlib import Path

def download_resumable(url, filename):
    # Check if partial file exists
    if Path(filename).exists():
        resume_byte_pos = Path(filename).stat().st_size
        headers = {'Range': f'bytes={resume_byte_pos}-'}
        mode = 'ab'
    else:
        resume_byte_pos = 0
        headers = {}
        mode = 'wb'

    response = requests.get(url, headers=headers, stream=True)

    if response.status_code == 416:
        print("File already fully downloaded")
        return

    with open(filename, mode) as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)

    print(f"Download complete: {filename}")

# Resume download
download_resumable('https://example.com/large-file.zip', 'file.zip')

Download from URLs in File

Read URLs from text file and download.

import requests
from pathlib import Path

def download_from_file(url_file, save_dir='downloads'):
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    with open(url_file, 'r') as f:
        urls = [line.strip() for line in f if line.strip()]

    for idx, url in enumerate(urls, 1):
        try:
            filename = url.split('/')[-1].split('?')[0]
            filepath = Path(save_dir) / filename

            print(f"[{idx}/{len(urls)}] Downloading {filename}...")

            response = requests.get(url, timeout=30)
            response.raise_for_status()

            with open(filepath, 'wb') as f:
                f.write(response.content)

            print(f"  ✓ Saved to {filepath}")

        except Exception as e:
            print(f"  ✗ Failed: {e}")

# URLs file format (urls.txt):
# https://example.com/file1.pdf
# https://example.com/image.jpg
# https://example.com/video.mp4

download_from_file('urls.txt')

Download with Retry Logic

Automatic retry on failure.

import requests
import time

def download_with_retry(url, filename, max_retries=3):
    for attempt in range(max_retries):
        try:
            response = requests.get(url, timeout=30)
            response.raise_for_status()

            with open(filename, 'wb') as f:
                f.write(response.content)

            print(f"Downloaded: {filename}")
            return True

        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} failed: {e}")

            if attempt < max_retries - 1:
                wait_time = 2 ** attempt
                print(f"Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                print("Max retries reached. Download failed.")
                return False

# Download with retry
download_with_retry('https://example.com/file.pdf', 'document.pdf')

Async Downloads

Download multiple files concurrently.

pip install aiohttp aiofiles
import asyncio
import aiohttp
import aiofiles
from pathlib import Path

async def download_async(session, url, filepath):
    try:
        async with session.get(url) as response:
            response.raise_for_status()

            async with aiofiles.open(filepath, 'wb') as f:
                await f.write(await response.read())

        print(f"Downloaded: {filepath.name}")
        return True

    except Exception as e:
        print(f"Failed {filepath.name}: {e}")
        return False

async def download_all(urls, save_dir='downloads'):
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    async with aiohttp.ClientSession() as session:
        tasks = []

        for url in urls:
            filename = url.split('/')[-1]
            filepath = Path(save_dir) / filename
            tasks.append(download_async(session, url, filepath))

        results = await asyncio.gather(*tasks)

        success = sum(results)
        print(f"\nCompleted: {success}/{len(urls)} downloads")

# Run async downloads
urls = [
    'https://example.com/file1.pdf',
    'https://example.com/file2.pdf',
    'https://example.com/file3.pdf'
]

asyncio.run(download_all(urls))

Download with Authentication

Handle authenticated downloads.

import requests

def download_authenticated(url, filename, username, password):
    response = requests.get(
        url,
        auth=(username, password)
    )
    response.raise_for_status()

    with open(filename, 'wb') as f:
        f.write(response.content)

    print(f"Downloaded: {filename}")

# With basic auth
download_authenticated(
    'https://secure.example.com/file.pdf',
    'document.pdf',
    'user@example.com',
    'password123'
)

# With API token
def download_with_token(url, filename, token):
    headers = {'Authorization': f'Bearer {token}'}

    response = requests.get(url, headers=headers)
    response.raise_for_status()

    with open(filename, 'wb') as f:
        f.write(response.content)

    print(f"Downloaded: {filename}")

download_with_token(
    'https://api.example.com/files/123',
    'file.pdf',
    'your-api-token'
)

Download Images from Web Page

Extract and download all images from URL.

pip install beautifulsoup4 requests
import requests
from bs4 import BeautifulSoup
from pathlib import Path
from urllib.parse import urljoin, urlparse

def download_page_images(page_url, save_dir='images'):
    Path(save_dir).mkdir(parents=True, exist_ok=True)

    # Get page content
    response = requests.get(page_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all images
    images = soup.find_all('img')
    print(f"Found {len(images)} images")

    for idx, img in enumerate(images, 1):
        try:
            # Get image URL
            img_url = img.get('src')
            if not img_url:
                continue

            # Make absolute URL
            img_url = urljoin(page_url, img_url)

            # Extract filename
            filename = Path(urlparse(img_url).path).name
            if not filename:
                filename = f"image_{idx}.jpg"

            filepath = Path(save_dir) / filename

            # Download image
            img_response = requests.get(img_url, timeout=10)
            img_response.raise_for_status()

            with open(filepath, 'wb') as f:
                f.write(img_response.content)

            print(f"[{idx}/{len(images)}] Downloaded: {filename}")

        except Exception as e:
            print(f"[{idx}/{len(images)}] Failed: {e}")

# Download all images from page
download_page_images('https://example.com/gallery')

Check File Size Before Download

Preview download size.

import requests

def get_file_size(url):
    response = requests.head(url, allow_redirects=True)
    size = int(response.headers.get('content-length', 0))
    return size

def format_size(bytes):
    for unit in ['B', 'KB', 'MB', 'GB']:
        if bytes < 1024:
            return f"{bytes:.2f} {unit}"
        bytes /= 1024
    return f"{bytes:.2f} TB"

def download_with_size_check(url, filename, max_size_mb=100):
    size = get_file_size(url)
    size_mb = size / (1024 * 1024)

    print(f"File size: {format_size(size)}")

    if size_mb > max_size_mb:
        print(f"File too large (>{max_size_mb}MB). Skipping.")
        return False

    response = requests.get(url)
    with open(filename, 'wb') as f:
        f.write(response.content)

    print(f"Downloaded: {filename}")
    return True

# Download with size check
download_with_size_check(
    'https://example.com/large-file.zip',
    'file.zip',
    max_size_mb=50
)

Download Manager CLI

Complete command-line tool.

import argparse
import requests
from pathlib import Path
from tqdm import tqdm

def download_cli():
    parser = argparse.ArgumentParser(
        description='Download files from URLs'
    )
    parser.add_argument('url', help='URL to download')
    parser.add_argument('-o', '--output', help='Output filename')
    parser.add_argument('-d', '--dir', default='.', help='Output directory')

    args = parser.parse_args()

    # Determine filename
    if args.output:
        filename = args.output
    else:
        filename = args.url.split('/')[-1]

    filepath = Path(args.dir) / filename
    Path(args.dir).mkdir(parents=True, exist_ok=True)

    # Download with progress
    print(f"Downloading: {args.url}")
    print(f"Saving to: {filepath}")

    response = requests.get(args.url, stream=True)
    total_size = int(response.headers.get('content-length', 0))

    with open(filepath, 'wb') as f:
        with tqdm(
            total=total_size,
            unit='B',
            unit_scale=True
        ) as pbar:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
                pbar.update(len(chunk))

    print(f"\nDownload complete!")

if __name__ == '__main__':
    download_cli()

Save as downloader.py and use:

python downloader.py https://example.com/file.pdf
python downloader.py https://example.com/file.pdf -o document.pdf
python downloader.py https://example.com/file.pdf -d downloads/

Using urllib (No Dependencies)

Download without external libraries.

import urllib.request
from pathlib import Path

def download_urllib(url, filename):
    urllib.request.urlretrieve(url, filename)
    print(f"Downloaded: {filename}")

# Download with urllib
download_urllib(
    'https://example.com/file.pdf',
    'document.pdf'
)

# With progress callback
def show_progress(block_num, block_size, total_size):
    downloaded = block_num * block_size
    percent = min(100, downloaded * 100 / total_size)
    print(f"\rProgress: {percent:.1f}%", end='')

def download_with_progress_urllib(url, filename):
    urllib.request.urlretrieve(url, filename, show_progress)
    print(f"\nDownload complete: {filename}")

download_with_progress_urllib(
    'https://example.com/file.pdf',
    'document.pdf'
)

Error Handling

Handle common download errors.

import requests
from requests.exceptions import (
    RequestException,
    Timeout,
    ConnectionError,
    HTTPError
)

def safe_download(url, filename, timeout=30):
    try:
        response = requests.get(url, timeout=timeout)
        response.raise_for_status()

        with open(filename, 'wb') as f:
            f.write(response.content)

        print(f"Downloaded: {filename}")
        return True

    except Timeout:
        print(f"Request timed out after {timeout} seconds")
    except ConnectionError:
        print("Failed to connect to server")
    except HTTPError as e:
        print(f"HTTP error: {e.response.status_code}")
    except RequestException as e:
        print(f"Download failed: {e}")

    return False

# Safe download
safe_download('https://example.com/file.pdf', 'document.pdf')

Quick Reference

Install Libraries:

pip install requests tqdm
pip install aiohttp aiofiles  # for async
pip install beautifulsoup4    # for web scraping

Basic Pattern:

response = requests.get(url)
with open(filename, 'wb') as f:
    f.write(response.content)

Common Options:

  • stream=True - Download in chunks
  • timeout=30 - Request timeout
  • headers={'User-Agent': '...'} - Custom headers
  • auth=(user, pass) - Authentication
  • verify=False - Skip SSL verification (not recommended)

File Modes:

  • 'wb' - Write binary (new file)
  • 'ab' - Append binary (resume download)

Best Practices:

  • Always use wb mode for binary files
  • Handle exceptions properly
  • Set reasonable timeouts
  • Show progress for large files
  • Validate file after download
  • Use async for multiple downloads

Conclusion

Python makes downloading files simple. Use requests for most tasks, add tqdm for progress bars, and aiohttp for concurrent downloads. Always handle errors and show progress for better user experience.

Gautam Sharma

About Gautam Sharma

Full-stack developer and tech blogger sharing coding tutorials and best practices

Related Articles

Python

Generate Excel Files from Raw Data with Python

Quick guide to creating Excel files from raw data using Python. Learn to use openpyxl, xlsxwriter, and pandas for Excel generation.

December 31, 2024
Python

Python FFMPEG Integration: Edit Videos in Terminal

Master video editing from the command line using Python and FFmpeg. Learn to trim, merge, compress, and manipulate videos programmatically.

December 31, 2024
Python

Read and Write CSV Files with Python

Simple guide to reading and writing CSV files in Python using csv module and pandas. Quick examples for data processing.

December 31, 2024