This guide covers essential MongoDB aggregation framework concepts commonly asked in technical interviews. Each question includes detailed answers and practical examples.
The MongoDB Aggregation Framework is a powerful tool for data processing and transformation:
MongoDB provides several pipeline stages for data processing:
// Basic pipeline stages
db.orders.aggregate([
// Match stage: Filter documents
{ $match: { status: "completed" } },
// Project stage: Reshape documents
{ $project: {
orderId: 1,
total: 1,
customer: 1,
date: 1
}},
// Group stage: Group documents
{ $group: {
_id: "$customer",
totalOrders: { $sum: 1 },
totalAmount: { $sum: "$total" }
}},
// Sort stage: Sort results
{ $sort: { totalAmount: -1 } }
])
// Advanced pipeline stages
db.orders.aggregate([
// Lookup stage: Join with other collections
{ $lookup: {
from: "customers",
localField: "customerId",
foreignField: "_id",
as: "customerInfo"
}},
// Unwind stage: Deconstruct arrays
{ $unwind: "$items" },
// Facet stage: Multiple aggregation pipelines
{ $facet: {
"totalSales": [
{ $group: { _id: null, total: { $sum: "$total" } } }
],
"topProducts": [
{ $group: { _id: "$items.product", count: { $sum: 1 } } },
{ $sort: { count: -1 } },
{ $limit: 5 }
]
}}
])
Optimizing aggregation pipelines involves several strategies:
// Optimized pipeline
db.orders.aggregate([
// 1. Place $match early to reduce documents
{ $match: {
status: "completed",
date: { $gte: new Date("2024-01-01") }
}},
// 2. Use $project to limit fields
{ $project: {
orderId: 1,
total: 1,
customerId: 1
}},
// 3. Use indexes effectively
{ $lookup: {
from: "customers",
localField: "customerId",
foreignField: "_id",
as: "customer"
}},
// 4. Use $limit early if possible
{ $limit: 1000 }
], {
// 5. Use explain to analyze performance
explain: true
})
// Memory-efficient pipeline
db.orders.aggregate([
// 1. Use $match to filter early
{ $match: { status: "completed" } },
// 2. Use $project to reduce memory usage
{ $project: {
_id: 0,
orderId: 1,
total: 1
}},
// 3. Use $group with $sum instead of $push
{ $group: {
_id: "$customerId",
totalAmount: { $sum: "$total" }
}},
// 4. Use allowDiskUse for large datasets
{ $sort: { totalAmount: -1 } }
], {
allowDiskUse: true
})
MongoDB provides various aggregation operators:
// 1. Arithmetic Operators
db.orders.aggregate([
{ $project: {
total: 1,
tax: { $multiply: ["$total", 0.1] },
discount: { $divide: ["$total", 10] }
}}
])
// 2. Array Operators
db.products.aggregate([
{ $project: {
name: 1,
firstCategory: { $arrayElemAt: ["$categories", 0] },
categoryCount: { $size: "$categories" }
}}
])
// 3. String Operators
db.users.aggregate([
{ $project: {
name: 1,
email: 1,
domain: { $substr: ["$email", { $indexOfBytes: ["$email", "@"] + 1 }, -1] }
}}
])
// 4. Date Operators
db.orders.aggregate([
{ $project: {
orderId: 1,
year: { $year: "$date" },
month: { $month: "$date" },
dayOfWeek: { $dayOfWeek: "$date" }
}}
])
Complex data transformations require careful pipeline design:
// Complex data transformation
db.orders.aggregate([
// 1. Filter relevant documents
{ $match: { status: "completed" } },
// 2. Unwind arrays for processing
{ $unwind: "$items" },
// 3. Lookup related data
{ $lookup: {
from: "products",
localField: "items.productId",
foreignField: "_id",
as: "productInfo"
}},
// 4. Reshape data
{ $project: {
orderId: 1,
customerId: 1,
itemName: { $arrayElemAt: ["$productInfo.name", 0] },
quantity: "$items.quantity",
price: "$items.price",
total: { $multiply: ["$items.quantity", "$items.price"] }
}},
// 5. Group and summarize
{ $group: {
_id: "$customerId",
totalItems: { $sum: "$quantity" },
totalAmount: { $sum: "$total" },
items: { $push: {
name: "$itemName",
quantity: "$quantity",
price: "$price"
}}
}},
// 6. Final sorting
{ $sort: { totalAmount: -1 } }
])
// Conditional processing in pipeline
db.orders.aggregate([
{ $project: {
orderId: 1,
total: 1,
status: 1,
// Conditional field calculation
discount: {
$cond: {
if: { $gte: ["$total", 1000] },
then: { $multiply: ["$total", 0.1] },
else: 0
}
},
// Multiple conditions
category: {
$switch: {
branches: [
{ case: { $lt: ["$total", 100] }, then: "small" },
{ case: { $lt: ["$total", 500] }, then: "medium" }
],
default: "large"
}
}
}}
])
Continue your MongoDB interview preparation with: