How to Split PDFs into Individual Pages using PyMuPDF
Jamie Lemon·June 26, 2025

PDF manipulation is a common task in document processing workflows, and one of the most frequent operations is splitting a multi-page PDF into individual page files. PyMuPDF is a powerful Python library that makes this task straightforward and efficient.
What is PyMuPDF?
PyMuPDF is a Python binding for MuPDF, a lightweight PDF toolkit. It provides comprehensive PDF processing capabilities including reading, writing, and manipulating PDF documents. The library is known for its speed and reliability, making it an excellent choice for PDF operations.
Installation
Before we begin, you'll need to install PyMuPDF. You can install it using pip:
pip install PyMuPDF
Basic PDF Splitting
Here's a simple function to split a PDF into individual pages:
import pymupdf
def split_pdf_to_pages(input_path, output_folder):
"""
Split a PDF file into individual pages.
Args:
input_path (str): Path to the input PDF file
output_folder (str): Directory to save individual pages
"""
# Open the PDF document
pdf_document = pymupdf.open(input_path)
# Create output folder if it doesn't exist
import os
os.makedirs(output_folder, exist_ok=True)
# Iterate through each page
for page_num in range(len(pdf_document)):
# Create a new PDF document for this page
new_pdf = pymupdf.open()
# Insert the current page into the new document
new_pdf.insert_pdf(pdf_document, from_page=page_num, to_page=page_num)
# Save the single-page PDF
output_path = os.path.join(output_folder, f"page_{page_num + 1:03d}.pdf")
new_pdf.save(output_path)
new_pdf.close()
# Close the original document
pdf_document.close()
print(f"Successfully split PDF into {len(pdf_document)} pages")
# Usage example
split_pdf_to_pages("input_document.pdf", "output_pages")
Advanced Splitting with Custom Naming
Sometimes you might want more control over the output file names or need to handle specific pages. Here's an enhanced version:
import pymupdf
import os
from pathlib import Path
def split_pdf_advanced(input_path, output_folder,
name_prefix="page",
page_range=None,
preserve_bookmarks=False):
"""
Advanced PDF splitting with customizable options.
Args:
input_path (str): Path to the input PDF file
output_folder (str): Directory to save individual pages
name_prefix (str): Prefix for output filenames
page_range (tuple): Optional (start, end) page range (1-indexed)
preserve_bookmarks (bool): Whether to preserve bookmarks in output
"""
pdf_document = pymupdf.open(input_path)
# Create output directory
Path(output_folder).mkdir(parents=True, exist_ok=True)
# Determine page range
total_pages = len(pdf_document)
if page_range:
start_page, end_page = page_range
start_page = max(1, start_page) - 1 # Convert to 0-indexed
end_page = min(total_pages, end_page)
else:
start_page, end_page = 0, total_pages
pages_split = 0
for page_num in range(start_page, end_page):
# Create new PDF for this page
new_pdf = pymupdf.open()
# Insert the page
new_pdf.insert_pdf(pdf_document, from_page=page_num, to_page=page_num)
# Preserve bookmarks if requested
if preserve_bookmarks:
# Get bookmarks for this page
bookmarks = pdf_document.get_toc()
page_bookmarks = [bm for bm in bookmarks if bm[2] == page_num + 1]
if page_bookmarks:
new_pdf.set_toc(page_bookmarks)
# Generate output filename
output_filename = f"{name_prefix}_{page_num + 1:03d}.pdf"
output_path = os.path.join(output_folder, output_filename)
# Save the page
new_pdf.save(output_path)
new_pdf.close()
pages_split += 1
pdf_document.close()
print(f"Successfully split {pages_split} pages from {input_path}")
return pages_split
# Usage examples
split_pdf_advanced("document.pdf", "output", name_prefix="chapter")
split_pdf_advanced("document.pdf", "output", page_range=(5, 10))
Batch Processing Multiple PDFs
For processing multiple PDF files at once, here's a batch processing function:
import pymupdf
import os
from pathlib import Path
import glob
def batch_split_pdfs(input_folder, output_base_folder):
"""
Split multiple PDF files in a folder.
Args:
input_folder (str): Folder containing PDF files
output_base_folder (str): Base folder for output
"""
pdf_files = glob.glob(os.path.join(input_folder, "*.pdf"))
if not pdf_files:
print("No PDF files found in the input folder")
return
for pdf_file in pdf_files:
# Create output folder for this PDF
pdf_name = Path(pdf_file).stem
output_folder = os.path.join(output_base_folder, pdf_name)
try:
split_pdf_to_pages(pdf_file, output_folder)
print(f"Processed: {pdf_file}")
except Exception as e:
print(f"Error processing {pdf_file}: {str(e)}")
# Usage
batch_split_pdfs("input_pdfs", "split_output")
Error Handling and Validation
Always include proper error handling when working with files:
import pymupdf
import os
from pathlib import Path
def split_pdf_safe(input_path, output_folder):
"""
Split PDF with comprehensive error handling.
"""
try:
# Validate input file
if not os.path.exists(input_path):
raise FileNotFoundError(f"Input file not found: {input_path}")
if not input_path.lower().endswith('.pdf'):
raise ValueError("Input file must be a PDF")
# Open and validate PDF
pdf_document = pymupdf.open(input_path)
if pdf_document.is_encrypted:
raise ValueError("Encrypted PDFs are not supported")
if len(pdf_document) == 0:
raise ValueError("PDF contains no pages")
# Create output directory
Path(output_folder).mkdir(parents=True, exist_ok=True)
# Split pages
for page_num in range(len(pdf_document)):
new_pdf = pymupdf.open()
new_pdf.insert_pdf(pdf_document, from_page=page_num, to_page=page_num)
output_path = os.path.join(output_folder, f"page_{page_num + 1:03d}.pdf")
new_pdf.save(output_path)
new_pdf.close()
pdf_document.close()
return True, f"Successfully split {len(pdf_document)} pages"
except Exception as e:
return False, f"Error: {str(e)}"
# Usage with error handling
success, message = split_pdf_safe("input.pdf", "output")
print(message)
Performance Considerations
When working with large PDFs or many files, consider these optimization tips:
- Memory Management: Close PDF documents when done to free memory
- Batch Operations: Process multiple pages in batches for very large files
- File I/O: Use SSDs for better performance with many small files
- Threading: For batch processing, consider using threading for I/O-bound operations
Alternative: Splitting to Images
Sometimes you might want to convert pages to images instead of keeping them as PDFs:
import pymupdf
def split_pdf_to_images(input_path, output_folder, image_format="PNG", dpi=150):
"""
Convert PDF pages to image files.
Args:
input_path (str): Path to input PDF
output_folder (str): Output directory
image_format (str): Image format (PNG, JPEG, etc.)
dpi (int): Resolution for images
"""
pdf_document = pymupdf.open(input_path)
os.makedirs(output_folder, exist_ok=True)
for page_num in range(len(pdf_document)):
page = pdf_document[page_num]
# Create transformation matrix for desired DPI
mat = pymupdf.Matrix(dpi/72, dpi/72)
# Render page to image
pix = page.get_pixmap(matrix=mat)
# Save image
output_path = os.path.join(output_folder, f"page_{page_num + 1:03d}.{image_format.lower()}")
pix.save(output_path)
pdf_document.close()
print(f"Converted {len(pdf_document)} pages to {image_format} images")
Conclusion
PyMuPDF provides a robust and efficient way to split PDF files into individual pages. Whether you need basic splitting functionality or advanced features like custom naming and batch processing, PyMuPDF offers the tools you need. The library's speed and reliability make it an excellent choice for both small scripts and large-scale document processing applications.
Remember to always handle errors appropriately and close PDF documents when finished to ensure optimal performance and resource management. With these examples and techniques, you'll be able to implement PDF splitting functionality that meets your specific requirements.