SaltyCrane Blog — Notes on JavaScript and web development

An example using Python's groupby and defaultdict to do the same task

Here is some data that I want to group by model:

    {'model': u'Yaris', 'some_value': 11202, 'trim_name': u'3-Door L Manual'},
    {'model': u'Yaris', 'some_value': 19269, 'trim_name': u'3-Door LE Automatic'},
    {'model': u'Corolla', 'some_value': 27119, 'trim_name': u'L Automatic'},
    {'model': u'Corolla', 'some_value': 32262, 'trim_name': u'LE'},
    {'model': u'Corolla', 'some_value': 37976, 'trim_name': u'S Premium'},
    {'model': u'Camry', 'some_value': 39730, 'trim_name': u'LE 4-Cyl'},
    {'model': u'Camry', 'some_value': 45761, 'trim_name': u'XSE 4-Cyl'},
    {'model': u'Yaris', 'some_value': 48412, 'trim_name': u'3-Door L Automatic'},
    {'model': u'Camry', 'some_value': 55423, 'trim_name': u'XLE 4-Cyl'},
    {'model': u'Corolla', 'some_value': 57055, 'trim_name': u'ECO Premium'},
    {'model': u'Corolla', 'some_value': 61296, 'trim_name': u'ECO Plus'},
    {'model': u'Camry', 'some_value': 63660, 'trim_name': u'XSE V6'},
    {'model': u'Yaris', 'some_value': 65570, 'trim_name': u'5-Door LE Automatic'},
    {'model': u'Camry', 'some_value': 67461, 'trim_name': u'XLE V6'},
    {'model': u'Corolla', 'some_value': 73602, 'trim_name': u'S'},
    {'model': u'Yaris', 'some_value': 74158, 'trim_name': u'5-Door SE Manual'},
    {'model': u'Corolla', 'some_value': 74249, 'trim_name': u'LE Plus'},
    {'model': u'Corolla', 'some_value': 78386, 'trim_name': u'ECO'},
    {'model': u'Camry', 'some_value': 82747, 'trim_name': u'SE 4-Cyl'},
    {'model': u'Corolla', 'some_value': 83162, 'trim_name': u'LE Premium'},
    {'model': u'Corolla', 'some_value': 84863, 'trim_name': u'S Plus Manual'},
    {'model': u'Yaris', 'some_value': 90313, 'trim_name': u'5-Door L Automatic'},
    {'model': u'Corolla', 'some_value': 90452, 'trim_name': u'L Manual'},
    {'model': u'Yaris', 'some_value': 93152, 'trim_name': u'5-Door SE Automatic'},
    {'model': u'Corolla', 'some_value': 94973, 'trim_name': u'S Plus CVT'},

This can be done using defaultdict from the collections module.

import collections

grouped = collections.defaultdict(list)
for item in SOME_DATA:

for model, group in grouped.items():
    print model
    pprint(group, width=150)

Here are the results:

[{'model': u'Yaris', 'some_value': 27065, 'trim_name': u'5-Door L Automatic'},
 {'model': u'Yaris', 'some_value': 32757, 'trim_name': u'5-Door SE Automatic'},
 {'model': u'Yaris', 'some_value': 57344, 'trim_name': u'3-Door L Manual'},
 {'model': u'Yaris', 'some_value': 64002, 'trim_name': u'5-Door SE Manual'},
 {'model': u'Yaris', 'some_value': 77974, 'trim_name': u'3-Door L Automatic'},
 {'model': u'Yaris', 'some_value': 92658, 'trim_name': u'3-Door LE Automatic'},
 {'model': u'Yaris', 'some_value': 98769, 'trim_name': u'5-Door LE Automatic'}]

[{'model': u'Camry', 'some_value': 30247, 'trim_name': u'XSE 4-Cyl'},
 {'model': u'Camry', 'some_value': 33809, 'trim_name': u'XSE V6'},
 {'model': u'Camry', 'some_value': 65637, 'trim_name': u'LE 4-Cyl'},
 {'model': u'Camry', 'some_value': 67329, 'trim_name': u'SE 4-Cyl'},
 {'model': u'Camry', 'some_value': 76269, 'trim_name': u'XLE 4-Cyl'},
 {'model': u'Camry', 'some_value': 87438, 'trim_name': u'XLE V6'}]

[{'model': u'Corolla', 'some_value': 11239, 'trim_name': u'S'},
 {'model': u'Corolla', 'some_value': 27356, 'trim_name': u'S Plus Manual'},
 {'model': u'Corolla', 'some_value': 44792, 'trim_name': u'L Manual'},
 {'model': u'Corolla', 'some_value': 56252, 'trim_name': u'ECO Premium'},
 {'model': u'Corolla', 'some_value': 78570, 'trim_name': u'S Plus CVT'},
 {'model': u'Corolla', 'some_value': 78964, 'trim_name': u'LE Premium'},
 {'model': u'Corolla', 'some_value': 82116, 'trim_name': u'ECO'},
 {'model': u'Corolla', 'some_value': 85467, 'trim_name': u'S Premium'},
 {'model': u'Corolla', 'some_value': 87099, 'trim_name': u'L Automatic'},
 {'model': u'Corolla', 'some_value': 91974, 'trim_name': u'LE Plus'},
 {'model': u'Corolla', 'some_value': 94862, 'trim_name': u'LE'},
 {'model': u'Corolla', 'some_value': 97625, 'trim_name': u'ECO Plus'}]

This can also be done using itertools.groupby. This method is probably better when working with large datasets because groupby returns the group as an iterator. (This is the reason I convert it to a list before printing.)

import itertools

def keyfunc(x):
    return x['model']

SOME_DATA = sorted(SOME_DATA, key=keyfunc)
for model, group in itertools.groupby(SOME_DATA, keyfunc):
    print model
    pprint(list(group), width=150)

Here are the results:

[{'model': u'Camry', 'some_value': 36776, 'trim_name': u'SE 4-Cyl'},
 {'model': u'Camry', 'some_value': 56569, 'trim_name': u'LE 4-Cyl'},
 {'model': u'Camry', 'some_value': 57052, 'trim_name': u'XSE 4-Cyl'},
 {'model': u'Camry', 'some_value': 92360, 'trim_name': u'XLE V6'},
 {'model': u'Camry', 'some_value': 92756, 'trim_name': u'XSE V6'},
 {'model': u'Camry', 'some_value': 94413, 'trim_name': u'XLE 4-Cyl'}]

[{'model': u'Corolla', 'some_value': 13307, 'trim_name': u'L Automatic'},
 {'model': u'Corolla', 'some_value': 15726, 'trim_name': u'ECO Plus'},
 {'model': u'Corolla', 'some_value': 25579, 'trim_name': u'S'},
 {'model': u'Corolla', 'some_value': 31920, 'trim_name': u'ECO Premium'},
 {'model': u'Corolla', 'some_value': 34480, 'trim_name': u'LE'},
 {'model': u'Corolla', 'some_value': 44958, 'trim_name': u'S Plus Manual'},
 {'model': u'Corolla', 'some_value': 49606, 'trim_name': u'LE Premium'},
 {'model': u'Corolla', 'some_value': 59629, 'trim_name': u'LE Plus'},
 {'model': u'Corolla', 'some_value': 74226, 'trim_name': u'S Plus CVT'},
 {'model': u'Corolla', 'some_value': 75725, 'trim_name': u'L Manual'},
 {'model': u'Corolla', 'some_value': 82382, 'trim_name': u'ECO'},
 {'model': u'Corolla', 'some_value': 95633, 'trim_name': u'S Premium'}]

[{'model': u'Yaris', 'some_value': 16789, 'trim_name': u'3-Door L Manual'},
 {'model': u'Yaris', 'some_value': 20349, 'trim_name': u'5-Door LE Automatic'},
 {'model': u'Yaris', 'some_value': 42897, 'trim_name': u'5-Door L Automatic'},
 {'model': u'Yaris', 'some_value': 62045, 'trim_name': u'5-Door SE Automatic'},
 {'model': u'Yaris', 'some_value': 91913, 'trim_name': u'3-Door L Automatic'},
 {'model': u'Yaris', 'some_value': 94218, 'trim_name': u'5-Door SE Manual'},
 {'model': u'Yaris', 'some_value': 97979, 'trim_name': u'3-Door LE Automatic'}]