Saturday, April 05, 2014

MongoDB Aggregation for Justin John Mathews - Part 2

Recently on LinkedIn (and as a follow up to a previous post, Justin John Mathews asked a question about being able to return data in a certain format. The input and output records are below. Input
db.users.insert({ courseId: 1, stDt: new Date(2014, 01, 01), endDt: new Date(2014, 01, 20), active: false });
db.users.insert({ courseId: 1, stDt: new Date(2014, 01, 25), endDt: new Date(2014, 02, 10), active: false });
db.users.insert({ courseId: 1, stDt: new Date(2014, 02, 25), endDt: new Date(2014, 03, 10), active: true });
db.users.insert({ courseId: 1, stDt: new Date(2014, 02, 28), endDt: new Date(2014, 06, 10), active: true });
db.users.insert({ courseId: 1, stDt: new Date(2014, 02, 25), endDt: new Date(2014, 02, 30), active: false });
db.users.insert({ courseId: 1, stDt: new Date(2014, 05, 25), endDt: new Date(2014, 10, 30), active: false });
db.users.insert({ courseId: 1, stDt: new Date(2013, 10, 01), endDt: new Date(2014, 08, 10), active: true });
db.users.insert({ courseId: 1, stDt: new Date(2014, 09, 01), endDt: new Date(2014, 11, 30), active: false });
Output (this is my interpretation as the format he gave was not a valid document)
{
    "result" : {
        "past" : [
            { "endDt" : ISODate("2014-02-20T07:00:00Z"), "active" : false },
            { "endDt" : ISODate("2014-03-10T06:00:00Z"), "active" : false },
            { "endDt" : ISODate("2014-03-30T06:00:00Z"), "active" : false }
        ],
        "current" : [
            { "endDt" : ISODate("2014-04-10T06:00:00Z"), "active" : true },
            { "endDt" : ISODate("2014-07-10T06:00:00Z"), "active" : true },
            { "endDt" : ISODate("2014-09-10T06:00:00Z"), "active" : true }
        ],
        "upcoming": [
            { "endDt" : ISODate("2014-11-30T07:00:00Z"), "active" : false },
            { "endDt" : ISODate("2014-12-30T07:00:00Z"), "active" : false }
        ]
    }
}

As you can see the dates are broken up into three different categories of past, current and upcoming. After talking with Justin, he would like to see the stDt value as well and he doesn't need the active field any longer. Each category is sorted by the endDt. A course is past if the endDt is less than the current date. It is active if the stDt is less than the current date and the endDt is greater than the current date. It is upcoming if the stDt is greater than the current date.

This is the code that I came up with. It does most of what he's looking for, but no matter what I did, I couldn't get the final grouping to display in the sorted order he was looking for in all categories. I'm sure I'm missing something simple, but when printing out the sorted list before the final grouping everything looks fine, but for some reason it's not getting added to the array in the order that I would imagine it should have been.

db.users.aggregate([
    {
        "$project": {
            "range": {
                "stDt": "$stDt",
                "endDt": "$endDt"
            },
            "_id": 0
        }
    },
    {
        "$group": {
            "_id": null,
            "past": {
                "$addToSet": {
                    "$cond": [
                        {"$lt": ["$range.endDt", new Date()]},
                        "$range",
                        null
                    ]
                }
            },
            "current": {
                "$addToSet": {
                    "$cond": [
                        {
                            "$and": [
                                {"$gt": ["$range.endDt", new Date()]},
                                {"$lt": ["$range.stDt", new Date()]},
                            ]
                        },
                        "$range",
                        null
                    ]
                }
            },
            "upcoming": {
                "$addToSet": {
                    "$cond": [
                        {"$gt": ["$range.stDt", new Date()]},
                        "$range",
                        null
                    ]
                }
            }
        }
    },
    {"$unwind": "$past"},
    {"$unwind": "$current"},
    {"$unwind": "$upcoming"},
    {
        "$match": {
            "past": {"$ne": null},
            "current": {"$ne": null},
            "upcoming": {"$ne": null}
        }
    },
    {
        "$sort": {
            "past.endDt": -1,
            "current.endDt": -1,
            "upcoming.endDt": -1
        }
    },
    {
        "$group": {
            "_id": null,
            "past": {"$addToSet": "$past"},
            "current": {"$addToSet": "$current"},
            "upcoming": {"$addToSet": "$upcoming"}
        }
    },
    {
        "$project": {
            "past": 1,
            "current": 1,
            "upcoming": 1,
            "_id": 0
        }
    },
])
And here is the output you get in MongoDB 2.2.x or 2.4.x.
{
    "result" : [
        {
            "past" : [
                {
                    "stDt" : ISODate("2014-02-25T07:00:00Z"),
                    "endDt" : ISODate("2014-03-10T06:00:00Z")
                },
                {
                    "stDt" : ISODate("2014-02-01T07:00:00Z"),
                    "endDt" : ISODate("2014-02-20T07:00:00Z")
                },
                {
                    "stDt" : ISODate("2014-03-25T06:00:00Z"),
                    "endDt" : ISODate("2014-03-30T06:00:00Z")
                }
            ],
            "current" : [
                {
                    "stDt" : ISODate("2014-03-25T06:00:00Z"),
                    "endDt" : ISODate("2014-04-10T06:00:00Z")
                },
                {
                    "stDt" : ISODate("2014-03-28T06:00:00Z"),
                    "endDt" : ISODate("2014-07-10T06:00:00Z")
                },
                {
                    "stDt" : ISODate("2013-11-01T06:00:00Z"),
                    "endDt" : ISODate("2014-09-10T06:00:00Z")
                }
            ],
            "upcoming" : [
                {
                    "stDt" : ISODate("2014-06-25T06:00:00Z"),
                    "endDt" : ISODate("2014-11-30T07:00:00Z")
                },
                {
                    "stDt" : ISODate("2014-10-01T06:00:00Z"),
                    "endDt" : ISODate("2014-12-30T07:00:00Z")
                }
            ]
        }
    ],
    "ok" : 1
}

This works well for the small test set you gave, but it might not be that great for larger datasets. You'll need to test that before putting this code into production. If nothing else this gives you an idea of how you can do things with the aggregation framework.

No comments:

Post a Comment