No articles found
Try different keywords or browse our categories
Download Files and Images from URL with Python
Learn to download files and images in Python using requests. Includes progress bars, batch downloads, and error handling examples.
Download files and images from URLs with Python. Simple scripts for single files, bulk downloads, and progress tracking.
Basic File Download
Simple download using requests library.
pip install requests
import requests
def download_file(url, filename):
response = requests.get(url)
with open(filename, 'wb') as f:
f.write(response.content)
print(f"Downloaded {filename}")
# Download file
download_file(
'https://example.com/file.pdf',
'document.pdf'
)
Download with Progress Bar
Show download progress in terminal.
pip install requests tqdm
import requests
from tqdm import tqdm
def download_with_progress(url, filename):
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(filename, 'wb') as f:
with tqdm(
total=total_size,
unit='B',
unit_scale=True,
desc=filename
) as pbar:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
pbar.update(len(chunk))
print(f"\nDownload complete: {filename}")
# Download with progress
download_with_progress(
'https://example.com/large-file.zip',
'archive.zip'
)
Download Image
Download and save images.
import requests
from pathlib import Path
def download_image(url, save_dir='images'):
# Create directory if not exists
Path(save_dir).mkdir(parents=True, exist_ok=True)
# Extract filename from URL
filename = url.split('/')[-1]
filepath = Path(save_dir) / filename
# Download
response = requests.get(url)
response.raise_for_status()
with open(filepath, 'wb') as f:
f.write(response.content)
print(f"Image saved: {filepath}")
return filepath
# Download image
download_image('https://example.com/photo.jpg')
Download Multiple Files
Batch download from list of URLs.
import requests
from pathlib import Path
def download_multiple(urls, save_dir='downloads'):
Path(save_dir).mkdir(parents=True, exist_ok=True)
for idx, url in enumerate(urls, 1):
try:
filename = url.split('/')[-1]
filepath = Path(save_dir) / filename
response = requests.get(url, timeout=30)
response.raise_for_status()
with open(filepath, 'wb') as f:
f.write(response.content)
print(f"[{idx}/{len(urls)}] Downloaded: {filename}")
except Exception as e:
print(f"Failed to download {url}: {e}")
# Download multiple files
urls = [
'https://example.com/file1.pdf',
'https://example.com/file2.pdf',
'https://example.com/image.jpg'
]
download_multiple(urls)
Download with Custom Filename
Control output filename.
import requests
from pathlib import Path
def download_as(url, output_path):
# Create parent directories
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
response = requests.get(url)
response.raise_for_status()
with open(output_path, 'wb') as f:
f.write(response.content)
print(f"Saved as: {output_path}")
# Download with custom name
download_as(
'https://example.com/document.pdf',
'reports/2024/annual-report.pdf'
)
Resume Interrupted Downloads
Continue partial downloads.
import requests
from pathlib import Path
def download_resumable(url, filename):
# Check if partial file exists
if Path(filename).exists():
resume_byte_pos = Path(filename).stat().st_size
headers = {'Range': f'bytes={resume_byte_pos}-'}
mode = 'ab'
else:
resume_byte_pos = 0
headers = {}
mode = 'wb'
response = requests.get(url, headers=headers, stream=True)
if response.status_code == 416:
print("File already fully downloaded")
return
with open(filename, mode) as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Download complete: {filename}")
# Resume download
download_resumable('https://example.com/large-file.zip', 'file.zip')
Download from URLs in File
Read URLs from text file and download.
import requests
from pathlib import Path
def download_from_file(url_file, save_dir='downloads'):
Path(save_dir).mkdir(parents=True, exist_ok=True)
with open(url_file, 'r') as f:
urls = [line.strip() for line in f if line.strip()]
for idx, url in enumerate(urls, 1):
try:
filename = url.split('/')[-1].split('?')[0]
filepath = Path(save_dir) / filename
print(f"[{idx}/{len(urls)}] Downloading {filename}...")
response = requests.get(url, timeout=30)
response.raise_for_status()
with open(filepath, 'wb') as f:
f.write(response.content)
print(f" ✓ Saved to {filepath}")
except Exception as e:
print(f" ✗ Failed: {e}")
# URLs file format (urls.txt):
# https://example.com/file1.pdf
# https://example.com/image.jpg
# https://example.com/video.mp4
download_from_file('urls.txt')
Download with Retry Logic
Automatic retry on failure.
import requests
import time
def download_with_retry(url, filename, max_retries=3):
for attempt in range(max_retries):
try:
response = requests.get(url, timeout=30)
response.raise_for_status()
with open(filename, 'wb') as f:
f.write(response.content)
print(f"Downloaded: {filename}")
return True
except requests.exceptions.RequestException as e:
print(f"Attempt {attempt + 1} failed: {e}")
if attempt < max_retries - 1:
wait_time = 2 ** attempt
print(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
else:
print("Max retries reached. Download failed.")
return False
# Download with retry
download_with_retry('https://example.com/file.pdf', 'document.pdf')
Async Downloads
Download multiple files concurrently.
pip install aiohttp aiofiles
import asyncio
import aiohttp
import aiofiles
from pathlib import Path
async def download_async(session, url, filepath):
try:
async with session.get(url) as response:
response.raise_for_status()
async with aiofiles.open(filepath, 'wb') as f:
await f.write(await response.read())
print(f"Downloaded: {filepath.name}")
return True
except Exception as e:
print(f"Failed {filepath.name}: {e}")
return False
async def download_all(urls, save_dir='downloads'):
Path(save_dir).mkdir(parents=True, exist_ok=True)
async with aiohttp.ClientSession() as session:
tasks = []
for url in urls:
filename = url.split('/')[-1]
filepath = Path(save_dir) / filename
tasks.append(download_async(session, url, filepath))
results = await asyncio.gather(*tasks)
success = sum(results)
print(f"\nCompleted: {success}/{len(urls)} downloads")
# Run async downloads
urls = [
'https://example.com/file1.pdf',
'https://example.com/file2.pdf',
'https://example.com/file3.pdf'
]
asyncio.run(download_all(urls))
Download with Authentication
Handle authenticated downloads.
import requests
def download_authenticated(url, filename, username, password):
response = requests.get(
url,
auth=(username, password)
)
response.raise_for_status()
with open(filename, 'wb') as f:
f.write(response.content)
print(f"Downloaded: {filename}")
# With basic auth
download_authenticated(
'https://secure.example.com/file.pdf',
'document.pdf',
'user@example.com',
'password123'
)
# With API token
def download_with_token(url, filename, token):
headers = {'Authorization': f'Bearer {token}'}
response = requests.get(url, headers=headers)
response.raise_for_status()
with open(filename, 'wb') as f:
f.write(response.content)
print(f"Downloaded: {filename}")
download_with_token(
'https://api.example.com/files/123',
'file.pdf',
'your-api-token'
)
Download Images from Web Page
Extract and download all images from URL.
pip install beautifulsoup4 requests
import requests
from bs4 import BeautifulSoup
from pathlib import Path
from urllib.parse import urljoin, urlparse
def download_page_images(page_url, save_dir='images'):
Path(save_dir).mkdir(parents=True, exist_ok=True)
# Get page content
response = requests.get(page_url)
soup = BeautifulSoup(response.text, 'html.parser')
# Find all images
images = soup.find_all('img')
print(f"Found {len(images)} images")
for idx, img in enumerate(images, 1):
try:
# Get image URL
img_url = img.get('src')
if not img_url:
continue
# Make absolute URL
img_url = urljoin(page_url, img_url)
# Extract filename
filename = Path(urlparse(img_url).path).name
if not filename:
filename = f"image_{idx}.jpg"
filepath = Path(save_dir) / filename
# Download image
img_response = requests.get(img_url, timeout=10)
img_response.raise_for_status()
with open(filepath, 'wb') as f:
f.write(img_response.content)
print(f"[{idx}/{len(images)}] Downloaded: {filename}")
except Exception as e:
print(f"[{idx}/{len(images)}] Failed: {e}")
# Download all images from page
download_page_images('https://example.com/gallery')
Check File Size Before Download
Preview download size.
import requests
def get_file_size(url):
response = requests.head(url, allow_redirects=True)
size = int(response.headers.get('content-length', 0))
return size
def format_size(bytes):
for unit in ['B', 'KB', 'MB', 'GB']:
if bytes < 1024:
return f"{bytes:.2f} {unit}"
bytes /= 1024
return f"{bytes:.2f} TB"
def download_with_size_check(url, filename, max_size_mb=100):
size = get_file_size(url)
size_mb = size / (1024 * 1024)
print(f"File size: {format_size(size)}")
if size_mb > max_size_mb:
print(f"File too large (>{max_size_mb}MB). Skipping.")
return False
response = requests.get(url)
with open(filename, 'wb') as f:
f.write(response.content)
print(f"Downloaded: {filename}")
return True
# Download with size check
download_with_size_check(
'https://example.com/large-file.zip',
'file.zip',
max_size_mb=50
)
Download Manager CLI
Complete command-line tool.
import argparse
import requests
from pathlib import Path
from tqdm import tqdm
def download_cli():
parser = argparse.ArgumentParser(
description='Download files from URLs'
)
parser.add_argument('url', help='URL to download')
parser.add_argument('-o', '--output', help='Output filename')
parser.add_argument('-d', '--dir', default='.', help='Output directory')
args = parser.parse_args()
# Determine filename
if args.output:
filename = args.output
else:
filename = args.url.split('/')[-1]
filepath = Path(args.dir) / filename
Path(args.dir).mkdir(parents=True, exist_ok=True)
# Download with progress
print(f"Downloading: {args.url}")
print(f"Saving to: {filepath}")
response = requests.get(args.url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(filepath, 'wb') as f:
with tqdm(
total=total_size,
unit='B',
unit_scale=True
) as pbar:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
pbar.update(len(chunk))
print(f"\nDownload complete!")
if __name__ == '__main__':
download_cli()
Save as downloader.py and use:
python downloader.py https://example.com/file.pdf
python downloader.py https://example.com/file.pdf -o document.pdf
python downloader.py https://example.com/file.pdf -d downloads/
Using urllib (No Dependencies)
Download without external libraries.
import urllib.request
from pathlib import Path
def download_urllib(url, filename):
urllib.request.urlretrieve(url, filename)
print(f"Downloaded: {filename}")
# Download with urllib
download_urllib(
'https://example.com/file.pdf',
'document.pdf'
)
# With progress callback
def show_progress(block_num, block_size, total_size):
downloaded = block_num * block_size
percent = min(100, downloaded * 100 / total_size)
print(f"\rProgress: {percent:.1f}%", end='')
def download_with_progress_urllib(url, filename):
urllib.request.urlretrieve(url, filename, show_progress)
print(f"\nDownload complete: {filename}")
download_with_progress_urllib(
'https://example.com/file.pdf',
'document.pdf'
)
Error Handling
Handle common download errors.
import requests
from requests.exceptions import (
RequestException,
Timeout,
ConnectionError,
HTTPError
)
def safe_download(url, filename, timeout=30):
try:
response = requests.get(url, timeout=timeout)
response.raise_for_status()
with open(filename, 'wb') as f:
f.write(response.content)
print(f"Downloaded: {filename}")
return True
except Timeout:
print(f"Request timed out after {timeout} seconds")
except ConnectionError:
print("Failed to connect to server")
except HTTPError as e:
print(f"HTTP error: {e.response.status_code}")
except RequestException as e:
print(f"Download failed: {e}")
return False
# Safe download
safe_download('https://example.com/file.pdf', 'document.pdf')
Quick Reference
Install Libraries:
pip install requests tqdm
pip install aiohttp aiofiles # for async
pip install beautifulsoup4 # for web scraping
Basic Pattern:
response = requests.get(url)
with open(filename, 'wb') as f:
f.write(response.content)
Common Options:
stream=True- Download in chunkstimeout=30- Request timeoutheaders={'User-Agent': '...'}- Custom headersauth=(user, pass)- Authenticationverify=False- Skip SSL verification (not recommended)
File Modes:
'wb'- Write binary (new file)'ab'- Append binary (resume download)
Best Practices:
- Always use
wbmode for binary files - Handle exceptions properly
- Set reasonable timeouts
- Show progress for large files
- Validate file after download
- Use async for multiple downloads
Conclusion
Python makes downloading files simple. Use requests for most tasks, add tqdm for progress bars, and aiohttp for concurrent downloads. Always handle errors and show progress for better user experience.
Related Articles
Generate Excel Files from Raw Data with Python
Quick guide to creating Excel files from raw data using Python. Learn to use openpyxl, xlsxwriter, and pandas for Excel generation.
Python FFMPEG Integration: Edit Videos in Terminal
Master video editing from the command line using Python and FFmpeg. Learn to trim, merge, compress, and manipulate videos programmatically.
Read and Write CSV Files with Python
Simple guide to reading and writing CSV files in Python using csv module and pandas. Quick examples for data processing.