This guide covers essential MongoDB pipeline operations and aggregation concepts commonly asked in technical interviews. Each question includes detailed answers and practical examples.
Pipeline stages are the building blocks of MongoDB aggregation framework:
Pipeline optimization involves several strategies:
// Optimized pipeline order
db.orders.aggregate([
// Early filtering reduces documents
{ $match: { status: "completed" } },
// Index usage for sorting
{ $sort: { orderDate: -1 } },
// Group after filtering
{ $group: {
_id: "$customerId",
totalOrders: { $sum: 1 },
totalAmount: { $sum: "$total" }
}},
// Final projection
{ $project: {
customerId: "$_id",
totalOrders: 1,
totalAmount: 1,
_id: 0
}}
])
// Using allowDiskUse for large datasets
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $group: {
_id: "$customerId",
orders: { $push: "$$ROOT" }
}},
{ $sort: { "_id": 1 } }
], { allowDiskUse: true })
// Using $facet for parallel processing
db.orders.aggregate([
{ $facet: {
"totalOrders": [
{ $match: { status: "completed" } },
{ $count: "count" }
],
"topCustomers": [
{ $match: { status: "completed" } },
{ $group: {
_id: "$customerId",
total: { $sum: "$total" }
}},
{ $sort: { total: -1 } },
{ $limit: 10 }
]
}}
])
Advanced pipeline operations include complex data transformations:
// Advanced grouping with multiple accumulators
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $group: {
_id: {
year: { $year: "$orderDate" },
month: { $month: "$orderDate" }
},
totalSales: { $sum: "$total" },
avgOrderValue: { $avg: "$total" },
minOrder: { $min: "$total" },
maxOrder: { $max: "$total" },
orderCount: { $sum: 1 }
}},
{ $sort: { "_id.year": -1, "_id.month": -1 } }
])
// Complex data transformation
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $lookup: {
from: "customers",
localField: "customerId",
foreignField: "_id",
as: "customerInfo"
}},
{ $unwind: "$customerInfo" },
{ $project: {
orderId: 1,
total: 1,
customerName: "$customerInfo.name",
customerEmail: "$customerInfo.email",
orderDate: 1,
items: {
$map: {
input: "$items",
as: "item",
in: {
name: "$$item.name",
quantity: "$$item.quantity",
price: "$$item.price",
subtotal: { $multiply: ["$$item.quantity", "$$item.price"] }
}
}
}
}}
])
Handling large datasets requires specific strategies:
// Pagination with large datasets
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $sort: { orderDate: -1 } },
{ $skip: 1000 },
{ $limit: 100 }
], { allowDiskUse: true })
// Parallel processing with $facet
db.orders.aggregate([
{ $facet: {
"dailyStats": [
{ $match: { status: "completed" } },
{ $group: {
_id: { $dateToString: { format: "%Y-%m-%d", date: "$orderDate" } },
total: { $sum: "$total" },
count: { $sum: 1 }
}}
],
"customerStats": [
{ $match: { status: "completed" } },
{ $group: {
_id: "$customerId",
total: { $sum: "$total" },
orders: { $sum: 1 }
}},
{ $sort: { total: -1 } },
{ $limit: 10 }
]
}}
], { allowDiskUse: true })
Follow these best practices for efficient pipeline operations:
// Optimize pipeline with proper indexes
db.orders.createIndex({ status: 1, orderDate: -1 })
// Use covered queries when possible
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $project: {
status: 1,
orderDate: 1,
_id: 0
}}
])
// Implement proper error handling
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $group: {
_id: "$customerId",
total: { $sum: { $ifNull: ["$total", 0] } }
}}
])
// Modular pipeline design
const matchStage = { $match: { status: "completed" } }
const groupStage = {
$group: {
_id: "$customerId",
total: { $sum: "$total" }
}
}
const sortStage = { $sort: { total: -1 } }
db.orders.aggregate([
matchStage,
groupStage,
sortStage
])
// Using $expr for complex conditions
db.orders.aggregate([
{ $match: {
$expr: {
$and: [
{ $eq: ["$status", "completed"] },
{ $gte: ["$total", 100] }
]
}
}}
])
Continue your MongoDB interview preparation with: