Python Excel Automation with openpyxl and pandas
Python is the ideal tool for Excel automation — generating monthly reports, transforming data between systems, and building dashboards that would take hours manually. openpyxl gives you full programmatic control over .xlsx files: formatting, charts, formulas, and multiple sheets. pandas makes bulk data operations trivial and exports to Excel with one line. This guide covers both tools and shows how to serve Excel files from FastAPI.
Table of Contents
Reading Excel Files
pip install openpyxl pandas xlsxwriter
import openpyxl
from openpyxl import load_workbook
# Load existing workbook
wb = load_workbook("sales_report.xlsx", data_only=True) # data_only=True reads values not formulas
ws = wb.active # active sheet
ws2 = wb["Q2 Data"] # sheet by name
print(wb.sheetnames) # ['Sheet1', 'Q2 Data', 'Summary']
# Read cells
val = ws["A1"].value # single cell
val = ws.cell(row=1, column=1).value # equivalent
# Iterate rows
for row in ws.iter_rows(min_row=2, values_only=True):
print(row) # tuple of cell values
# Iterate specific range
for row in ws["A1:D10"]:
for cell in row:
print(cell.value)
# Read into list of dicts (with header row)
def sheet_to_dicts(ws) -> list[dict]:
rows = list(ws.iter_rows(values_only=True))
headers = rows[0]
return [dict(zip(headers, row)) for row in rows[1:] if any(row)]
# pandas is easier for bulk reads
import pandas as pd
df = pd.read_excel("sales_report.xlsx", sheet_name="Q2 Data", header=0)
df_all = pd.read_excel("sales_report.xlsx", sheet_name=None) # dict of all sheets
Writing with openpyxl
from openpyxl import Workbook
from openpyxl.utils import get_column_letter, column_index_from_string
# Create new workbook
wb = Workbook()
ws = wb.active
ws.title = "Sales Data"
# Add a second sheet
ws2 = wb.create_sheet("Summary")
# Write cells
ws["A1"] = "Product"
ws["B1"] = "Units Sold"
ws["C1"] = "Revenue"
# Write rows of data
data = [
("Widget A", 1250, 62500),
("Widget B", 890, 44500),
("Service X", 340, 102000),
]
for row_num, row_data in enumerate(data, start=2):
ws.append(row_data)
# Or write one cell at a time
ws.cell(row=6, column=1, value="Total")
ws.cell(row=6, column=2, value=f"=SUM(B2:B{len(data)+1})")
ws.cell(row=6, column=3, value=f"=SUM(C2:C{len(data)+1})")
# Column widths
ws.column_dimensions["A"].width = 20
ws.column_dimensions["B"].width = 15
ws.column_dimensions["C"].width = 15
# Row height
ws.row_dimensions[1].height = 25
# Freeze panes (keep header visible when scrolling)
ws.freeze_panes = "A2"
wb.save("output.xlsx")
Cell Formatting and Styles
from openpyxl.styles import (
Font, Fill, PatternFill, Border, Side, Alignment,
numbers, GradientFill
)
from openpyxl.utils import get_column_letter
def style_header_row(ws, row: int = 1):
header_fill = PatternFill(start_color="2C3E50", end_color="2C3E50", fill_type="solid")
header_font = Font(name="Calibri", bold=True, color="FFFFFF", size=11)
header_alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
for cell in ws[row]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = header_alignment
def apply_table_style(ws, data_rows: int):
thin_border = Border(
left=Side(style="thin"),
right=Side(style="thin"),
top=Side(style="thin"),
bottom=Side(style="thin"),
)
alt_fill = PatternFill(start_color="F5F5F5", end_color="F5F5F5", fill_type="solid")
for row_num in range(2, data_rows + 2):
for cell in ws[row_num]:
cell.border = thin_border
if row_num % 2 == 0:
cell.fill = alt_fill
def format_currency(ws, col: str, start_row: int, end_row: int):
for row in range(start_row, end_row + 1):
ws[f"{col}{row}"].number_format = "$#,##0.00"
def format_percentage(ws, col: str, start_row: int, end_row: int):
for row in range(start_row, end_row + 1):
ws[f"{col}{row}"].number_format = "0.00%"
# Conditional formatting
from openpyxl.formatting.rule import ColorScaleRule, DataBarRule, CellIsRule
# Color scale: green (high) to red (low)
ws.conditional_formatting.add(
"C2:C100",
ColorScaleRule(
start_type="min", start_color="FF0000",
end_type="max", end_color="00FF00",
),
)
# Highlight cells above threshold
ws.conditional_formatting.add(
"B2:B100",
CellIsRule(operator="greaterThan", formula=["1000"], fill=PatternFill(
start_color="C6EFCE", end_color="C6EFCE", fill_type="solid"
)),
)
Charts and Visualizations
from openpyxl.chart import BarChart, LineChart, PieChart, Reference
def add_bar_chart(ws, title: str, data_range: str, categories_range: str,
anchor: str = "E2"):
chart = BarChart()
chart.type = "col"
chart.grouping = "clustered"
chart.title = title
chart.style = 10
chart.y_axis.title = "Revenue ($)"
chart.x_axis.title = "Product"
chart.width = 15
chart.height = 10
data = Reference(ws, range_string=data_range)
categories = Reference(ws, range_string=categories_range)
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)
ws.add_chart(chart, anchor)
def add_line_chart(ws, anchor: str = "E18"):
chart = LineChart()
chart.title = "Monthly Trend"
chart.style = 13
chart.smooth = True
data = Reference(ws, min_col=3, min_row=1, max_row=13)
chart.add_data(data, titles_from_data=True)
ws.add_chart(chart, anchor)
# Usage
wb = Workbook()
ws = wb.active
ws.append(["Product", "Units", "Revenue"])
for name, units, rev in [("A", 100, 5000), ("B", 80, 4000), ("C", 120, 6000)]:
ws.append([name, units, rev])
add_bar_chart(ws, "Revenue by Product",
f"Sheet!$C$1:$C$4", f"Sheet!$A$2:$A$4")
wb.save("chart_report.xlsx")
Formulas and Named Ranges
from openpyxl.workbook.defined_name import DefinedName
from openpyxl.utils import quote_sheetname, absolute_coordinate
ws["A10"] = "=SUM(A2:A9)"
ws["B10"] = "=AVERAGE(B2:B9)"
ws["C10"] = "=COUNTIF(C2:C9,\">1000\")"
ws["D10"] = "=VLOOKUP(A2,Sheet2!A:B,2,FALSE)"
ws["E10"] = "=IF(B10>1000,\"High\",\"Low\")"
# Named range — define once, use in formulas
from openpyxl.utils import quote_sheetname
attr_text = f"{quote_sheetname(ws.title)}!$B$2:$B$100"
defn = DefinedName("SalesData", attr_text=attr_text)
wb.defined_names.add(defn)
# Table (ListObject) — auto-expands, sortable in Excel
from openpyxl.worksheet.table import Table, TableStyleInfo
table = Table(displayName="SalesTable", ref="A1:C10")
table.tableStyleInfo = TableStyleInfo(
name="TableStyleMedium9", showFirstColumn=False,
showLastColumn=False, showRowStripes=True,
)
ws.add_table(table)
pandas to Excel
import pandas as pd
from io import BytesIO
df = pd.DataFrame({
"Product": ["Widget A", "Widget B", "Service X"],
"Units": [1250, 890, 340],
"Revenue": [62500, 44500, 102000],
"Growth": [0.15, 0.08, 0.32],
})
# Simple export
df.to_excel("report.xlsx", index=False, sheet_name="Sales")
# Multi-sheet export with formatting using ExcelWriter
buffer = BytesIO()
with pd.ExcelWriter(buffer, engine="openpyxl") as writer:
df.to_excel(writer, sheet_name="Sales", index=False)
# Access workbook for further styling
wb = writer.book
ws = writer.sheets["Sales"]
# Format header
from openpyxl.styles import Font, PatternFill
for cell in ws[1]:
cell.font = Font(bold=True, color="FFFFFF")
cell.fill = PatternFill(start_color="2C3E50", fill_type="solid")
# Format percentage column
for row in range(2, len(df) + 2):
ws[f"D{row}"].number_format = "0.0%"
# Auto column widths
for col in ws.columns:
max_len = max(len(str(cell.value or "")) for cell in col)
ws.column_dimensions[col[0].column_letter].width = min(max_len + 2, 40)
# Add summary sheet
summary = {"Total Units": df["Units"].sum(), "Total Revenue": df["Revenue"].sum()}
pd.DataFrame([summary]).to_excel(writer, sheet_name="Summary", index=False)
excel_bytes = buffer.getvalue()
FastAPI Excel Download
from fastapi import FastAPI
from fastapi.responses import Response
from io import BytesIO
import pandas as pd
app = FastAPI()
@app.get("/reports/excel")
async def download_excel_report():
data = await get_report_data()
df = pd.DataFrame(data)
buffer = BytesIO()
with pd.ExcelWriter(buffer, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name="Report")
ws = writer.sheets["Report"]
for col in ws.columns:
ws.column_dimensions[col[0].column_letter].width = 18
buffer.seek(0)
return Response(
content=buffer.read(),
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
headers={"Content-Disposition": "attachment; filename=report.xlsx"},
)
Frequently Asked Questions
- openpyxl vs xlsxwriter — which is better?
- xlsxwriter is faster for write-only workloads and has better chart support, but cannot read existing files. openpyxl can both read and modify existing files. Use xlsxwriter via pandas'
engine="xlsxwriter"when you're generating new files from scratch; use openpyxl when you need to modify existing templates. - How do I handle large Excel files without running out of memory?
- Use openpyxl's read-only mode:
load_workbook(path, read_only=True)and iterate rows withws.iter_rows(). For writing large files, use xlsxwriter's constant memory mode. For truly large datasets (millions of rows), export to CSV — Excel has a 1,048,576 row limit anyway. - Can I generate Excel files with macros (VBA)?
- openpyxl can preserve macros in .xlsm files:
load_workbook("file.xlsm", keep_vba=True). Save as .xlsm to keep them. You cannot create new VBA code with openpyxl — Python can only preserve existing macros.