This guide covers essential MongoDB aggregation framework concepts commonly asked in technical interviews. Each question includes detailed answers and practical examples.
The MongoDB Aggregation Framework is a powerful tool for data processing and transformation:
MongoDB provides several pipeline stages for data processing:
// Basic pipeline stages
db.orders.aggregate([
    // Match stage: Filter documents
    { $match: { status: "completed" } },
    
    // Project stage: Reshape documents
    { $project: {
        orderId: 1,
        total: 1,
        customer: 1,
        date: 1
    }},
    
    // Group stage: Group documents
    { $group: {
        _id: "$customer",
        totalOrders: { $sum: 1 },
        totalAmount: { $sum: "$total" }
    }},
    
    // Sort stage: Sort results
    { $sort: { totalAmount: -1 } }
])
                            // Advanced pipeline stages
db.orders.aggregate([
    // Lookup stage: Join with other collections
    { $lookup: {
        from: "customers",
        localField: "customerId",
        foreignField: "_id",
        as: "customerInfo"
    }},
    
    // Unwind stage: Deconstruct arrays
    { $unwind: "$items" },
    
    // Facet stage: Multiple aggregation pipelines
    { $facet: {
        "totalSales": [
            { $group: { _id: null, total: { $sum: "$total" } } }
        ],
        "topProducts": [
            { $group: { _id: "$items.product", count: { $sum: 1 } } },
            { $sort: { count: -1 } },
            { $limit: 5 }
        ]
    }}
])
                            Optimizing aggregation pipelines involves several strategies:
// Optimized pipeline
db.orders.aggregate([
    // 1. Place $match early to reduce documents
    { $match: { 
        status: "completed",
        date: { $gte: new Date("2024-01-01") }
    }},
    
    // 2. Use $project to limit fields
    { $project: {
        orderId: 1,
        total: 1,
        customerId: 1
    }},
    
    // 3. Use indexes effectively
    { $lookup: {
        from: "customers",
        localField: "customerId",
        foreignField: "_id",
        as: "customer"
    }},
    
    // 4. Use $limit early if possible
    { $limit: 1000 }
], {
    // 5. Use explain to analyze performance
    explain: true
})
                            // Memory-efficient pipeline
db.orders.aggregate([
    // 1. Use $match to filter early
    { $match: { status: "completed" } },
    
    // 2. Use $project to reduce memory usage
    { $project: {
        _id: 0,
        orderId: 1,
        total: 1
    }},
    
    // 3. Use $group with $sum instead of $push
    { $group: {
        _id: "$customerId",
        totalAmount: { $sum: "$total" }
    }},
    
    // 4. Use allowDiskUse for large datasets
    { $sort: { totalAmount: -1 } }
], {
    allowDiskUse: true
})
                            MongoDB provides various aggregation operators:
// 1. Arithmetic Operators
db.orders.aggregate([
    { $project: {
        total: 1,
        tax: { $multiply: ["$total", 0.1] },
        discount: { $divide: ["$total", 10] }
    }}
])
// 2. Array Operators
db.products.aggregate([
    { $project: {
        name: 1,
        firstCategory: { $arrayElemAt: ["$categories", 0] },
        categoryCount: { $size: "$categories" }
    }}
])
// 3. String Operators
db.users.aggregate([
    { $project: {
        name: 1,
        email: 1,
        domain: { $substr: ["$email", { $indexOfBytes: ["$email", "@"] + 1 }, -1] }
    }}
])
// 4. Date Operators
db.orders.aggregate([
    { $project: {
        orderId: 1,
        year: { $year: "$date" },
        month: { $month: "$date" },
        dayOfWeek: { $dayOfWeek: "$date" }
    }}
])
                        Complex data transformations require careful pipeline design:
// Complex data transformation
db.orders.aggregate([
    // 1. Filter relevant documents
    { $match: { status: "completed" } },
    
    // 2. Unwind arrays for processing
    { $unwind: "$items" },
    
    // 3. Lookup related data
    { $lookup: {
        from: "products",
        localField: "items.productId",
        foreignField: "_id",
        as: "productInfo"
    }},
    
    // 4. Reshape data
    { $project: {
        orderId: 1,
        customerId: 1,
        itemName: { $arrayElemAt: ["$productInfo.name", 0] },
        quantity: "$items.quantity",
        price: "$items.price",
        total: { $multiply: ["$items.quantity", "$items.price"] }
    }},
    
    // 5. Group and summarize
    { $group: {
        _id: "$customerId",
        totalItems: { $sum: "$quantity" },
        totalAmount: { $sum: "$total" },
        items: { $push: {
            name: "$itemName",
            quantity: "$quantity",
            price: "$price"
        }}
    }},
    
    // 6. Final sorting
    { $sort: { totalAmount: -1 } }
])
                            // Conditional processing in pipeline
db.orders.aggregate([
    { $project: {
        orderId: 1,
        total: 1,
        status: 1,
        // Conditional field calculation
        discount: {
            $cond: {
                if: { $gte: ["$total", 1000] },
                then: { $multiply: ["$total", 0.1] },
                else: 0
            }
        },
        // Multiple conditions
        category: {
            $switch: {
                branches: [
                    { case: { $lt: ["$total", 100] }, then: "small" },
                    { case: { $lt: ["$total", 500] }, then: "medium" }
                ],
                default: "large"
            }
        }
    }}
])
                            Continue your MongoDB interview preparation with: