SaltyCrane Blog — Notes on JavaScript and web development

Tabular data structure conversion in Python

Here is a Python library to convert between various tabular data structures including list of lists, list of dicts, dict of lists, and dict of dicts. My original attempts at these conversions required that the data be rectangular (e.g. each column has the same number of elements). However, further research led me to this ASPN Recipe which uses map to transpose a list of lists even if it is not rectangular. With help from the mailing list, I rewrote the recipe without using lambda. (I did this because Guido suggested not to use map with lambda for the sake of clarity.)

I used list comprehensions wherever possible and a functional/ declarative approach in general. It is likely there is a better way to do many of these conversions. (After all, I just learned how to use zip().) In particular, the functions with the comment "Better way?" use a number of the other conversion functions in series to achieve the desired result. All of these could be optimized. Feedback on better methods is welcome.

Example data structures

Here are examples of the 8 different tabular data structures. Note that if a transpose is performed (i.e. rows switched with columns or vice versa), the output is padded with None. Otherwise, it is left as is.

# lorl- list of lists where each inner list is a row
lorl = [
    ['a1', 'b1', 'c1'],    # row 1
    ['a2', 'b2', 'c2'],    # row 2
    ['a3', 'b3', 'c3'],    # row 3
    ['a4', 'b4',     ],    # row 4
    ]

# locl- list of lists where each inner list is a column
locl = [
    ['a1', 'a2', 'a3', 'a4'],    # col a
    ['b1', 'b2', 'b3', 'b4'],    # col b
    ['c1', 'c2', 'c3',     ],    # col c
    ]

# lord- list of dicts where each dict is a row
lord = [
    {'a':'a1', 'b':'b1', 'c':'c1'},   # row 1
    {'a':'a2', 'b':'b2', 'c':'c2'},   # row 2
    {'a':'a3', 'b':'b3', 'c':'c3'},   # row 3
    {'a':'a4', 'b':'b4',         },   # row 4
    ]

# locd- list of dicts where each dict is a column
locd = [
    {1:'a1', 2:'a2', 3:'a3', 4:'a4'},         # col a
    {1:'b1', 2:'b2', 3:'b3', 4:'b4'},         # col b
    {1:'c1', 2:'c2', 3:'c3',       },         # col c
    ]

# dorl- dict of lists where each list is a row
dorl = {
    1: ['a1', 'b1', 'c1'],            # row 1
    2: ['a2', 'b2', 'c2'],            # row 2
    3: ['a3', 'b3', 'c3'],            # row 3
    4: ['a4', 'b4',     ],            # row 4
    }
# docl- dict of lists where each list is a column
docl = {
    'a': ['a1', 'a2', 'a3', 'a4'],          # column a
    'b': ['b1', 'b2', 'b3', 'b4'],          # column b
    'c': ['c1', 'c2', 'c3',     ],          # column c
    }

# dord- dict of dicts where each inner dict is a row
dord = {
    1: {'a':'a1', 'b':'b1', 'c':'c1'},  # row 1
    2: {'a':'a2', 'b':'b2', 'c':'c2'},  # row 2
    3: {'a':'a3', 'b':'b3', 'c':'c3'},  # row 3
    4: {'a':'a4', 'b':'b4',         },  # row 4
    }

# docd- dict of dicts where each inner dict is a column
docd = {
    'a': {1:'a1', 2:'a2', 3:'a3', 4:'a4'},    # column a
    'b': {1:'b1', 2:'b2', 3:'b3', 4:'b4'},    # column b
    'c': {1:'c1', 2:'c2', 3:'c3',       },    # column c
    }

# list of row keys and column keys
rowkeys = [1, 2, 3, 4]
colkeys = ['a', 'b', 'c']

Code

Below is the library of functions.

"""tabular.py
Functions to convert tabular data structures
The following data structures are supported:
lorl- list of lists where each inner list is a row
locl- list of lists where each inner list is a column
lord- list of dicts where each dict is a row
locd- list of dicts where each dict is a column
dorl- dict of lists where each list is a row
docl- dict of lists where each list is a column
dord- dict of dicts where each inner dict is a row
docd- dict of dicts where each inner dict is a column
"""
#-------------------------------------------------------
# from lorl to ...
#-------------------------------------------------------
def lorl2locl(lorl):
    return [list(col) for col in map(None, *lorl)]

def lorl2lord(lorl, colkeys):
    return [dict(zip(colkeys, row)) for row in lorl]

def lorl2locd(lorl, rowkeys):
    # better way?
    return locl2locd(lorl2locl(lorl), rowkeys)

def lorl2dorl(lorl, rowkeys):
    return dict(zip(rowkeys, [row for row in lorl]))

def lorl2docl(lorl, colkeys):
    # better way?
    return locl2docl(lorl2locl(lorl), colkeys)

def lorl2dord(lorl, rowkeys, colkeys):
    return dict(zip(rowkeys, [dict(zip(colkeys, row))
                              for row in lorl]))

def lorl2docd(lorl, rowkeys, colkeys):
    # better way?
    return dict(zip(colkeys, [dict(zip(rowkeys, col))
                              for col in lorl2locl(lorl)]))

#-------------------------------------------------------
# from locl to ...
#-------------------------------------------------------
def locl2lorl(locl):
    return [list(row) for row in map(None, *locl)]

def locl2lord(locl, colkeys):
    # better way?
    return lorl2lord(locl2lorl(locl), colkeys)

def locl2locd(locl, rowkeys):
    return [dict(zip(rowkeys, col)) for col in locl]

def locl2dorl(locl, rowkeys):
    # better way?
    return dict(zip(rowkeys, [row for row in locl2lorl(locl)]))

def locl2docl(locl, colkeys):
    return dict(zip(colkeys, locl))

def locl2dord(locl, rowkeys, colkeys):
    # better way?
    return dict(zip(rowkeys, [dict(zip(colkeys, row))
                              for row in locl2lorl(locl)]))

def locl2docd(locl, rowkeys, colkeys):
    return dict(zip(colkeys, [dict(zip(rowkeys, col))
                              for col in locl]))

#-------------------------------------------------------
# from lord to ...
#-------------------------------------------------------
def lord2lorl(lord, colkeys):
    return [[row[key] for key in colkeys if key in row]
            for row in lord]

def lord2locl(lord, colkeys):
    # better way?
    return lorl2locl(lord2lorl(lord, colkeys))

def lord2locd(lord, rowkeys, colkeys):
    return [dict([(rkey, row[ckey])
                  for rkey, row in zip(rowkeys, lord) if ckey in row])
            for ckey in colkeys]

def lord2dorl(lord, rowkeys, colkeys):
    return dict(zip(rowkeys, [[row[ckey]
                               for ckey in colkeys if ckey in row]
                              for row in lord]))

def lord2docl(lord, colkeys):
    return dict(zip(colkeys, [[row[ckey]
                               for row in lord if ckey in row]
                              for ckey in colkeys]))

def lord2dord(lord, rowkeys):
    return dict(zip(rowkeys, lord))

def lord2docd(lord, rowkeys, colkeys):
    return dict(zip(colkeys,
                    [dict(zip(rowkeys,
                              [row[ckey]
                               for row in lord if ckey in row]))
                     for ckey in colkeys]))

#-------------------------------------------------------
# from locd to ...
#-------------------------------------------------------
def locd2lorl(locd, rowkeys):
    # better way?
    return locl2lorl(locd2locl(locd, rowkeys))

def locd2locl(locd, rowkeys):
    return [[col[key] for key in rowkeys if key in col]
            for col in locd]

def locd2lord(locd, rowkeys, colkeys):
    return [dict([(ckey, col[rkey])
                  for ckey, col in zip(colkeys, locd) if rkey in col])
            for rkey in rowkeys]

def locd2dorl(locd, rowkeys):
    return dict(zip(rowkeys, [[col[rkey]
                               for col in locd if rkey in col]
                              for rkey in rowkeys]))

def locd2docl(locd, rowkeys, colkeys):
    return dict(zip(colkeys, [[col[rkey]
                               for rkey in rowkeys if rkey in col]
                              for col in locd]))

def locd2dord(locd, rowkeys, colkeys):
    return dict(zip(rowkeys,
                    [dict(zip(colkeys,
                              [col[rkey]
                               for col in locd if rkey in col]))
                     for rkey in rowkeys]))

def locd2docd(locd, colkeys):
    return dict(zip(colkeys, locd))

#-------------------------------------------------------
# from dorl to ...
#-------------------------------------------------------
def dorl2lorl(dorl, rowkeys):
    return [dorl[key] for key in rowkeys]

def dorl2locl(dorl, rowkeys):
    # better way?
    return lorl2locl(dorl2lorl(dorl, rowkeys))

def dorl2lord(dorl, rowkeys, colkeys):
    return [dict(zip(colkeys, dorl[rkey]))
            for rkey in rowkeys]

def dorl2locd(dorl, rowkeys):
    # better way?
    return locl2locd(lorl2locl(dorl2lorl(dorl, rowkeys)), rowkeys)

def dorl2docl(dorl, rowkeys, colkeys):
    # better way?
    return locl2docl(lorl2locl(dorl2lorl(dorl, rowkeys)), colkeys)

def dorl2dord(dorl, rowkeys, colkeys):
    # better way?
    return lorl2dord(dorl2lorl(dorl, rowkeys), rowkeys, colkeys)

def dorl2docd(dorl, rowkeys, colkeys):
    # better way?
    return locl2docd(lorl2locl(dorl2lorl(dorl, rowkeys)),
                     rowkeys, colkeys)

#-------------------------------------------------------
# from docl to ...
#-------------------------------------------------------
def docl2lorl(docl, colkeys):
    # better way?
    return locl2lorl(docl2locl(docl, colkeys))

def docl2locl(docl, colkeys):
    return [docl[key] for key in colkeys]

def docl2lord(docl, rowkeys, colkeys):
    # better way?
    return lorl2lord(locl2lorl(docl2locl(docl, colkeys)), colkeys)

def docl2locd(docl, rowkeys, colkeys):
    #
    return [dict(zip(rowkeys, docl[ckey]))
            for ckey in colkeys]

def docl2dorl(docl, rowkeys, colkeys):
    # better way?
    return lorl2dorl(locl2lorl(docl2locl(docl, colkeys)), rowkeys)

def docl2dord(docl, rowkeys, colkeys):
    # better way?
    return lorl2dord(locl2lorl(docl2locl(docl, colkeys)),
                     rowkeys, colkeys)

def docl2docd(docl, rowkeys, colkeys):
    # better way?
    return locl2docd(docl2locl(docl, colkeys), rowkeys, colkeys)

#-------------------------------------------------------
# from dord to ...
#-------------------------------------------------------
def dord2lorl(dord, rowkeys, colkeys):
    return [[dord[rkey][ckey]
             for ckey in colkeys if ckey in dord[rkey]]
            for rkey in rowkeys if rkey in dord]

def dord2locl(dord, rowkeys, colkeys):
    # better way?
    return lorl2locl(dord2lorl(dord, rowkeys, colkeys))

def dord2lord(dord, rowkeys):
    return [dord[rkey] for rkey in rowkeys]

def dord2locd(dord, rowkeys, colkeys):
    # better way?
    return lord2locd(dord2lord(dord, rowkeys), rowkeys, colkeys)

def dord2dorl(dord, rowkeys, colkeys):
    # don't need zip
    return dict([(rkey, [dord[rkey][ckey]
                         for ckey in colkeys if ckey in dord[rkey]])
                 for rkey in rowkeys])

def dord2docl(dord, rowkeys, colkeys):
    # better way?
    return locl2docl(lorl2locl(dord2lorl(dord, rowkeys, colkeys)),
                     colkeys)

def dord2docd(dord, rowkeys, colkeys):
    # better way?
    return locl2docd(lorl2locl(dord2lorl(dord, rowkeys, colkeys)),
                     rowkeys, colkeys)

#-------------------------------------------------------
# from docd to ...
#-------------------------------------------------------
def docd2lorl(docd, rowkeys, colkeys):
    # better way?
    return locl2lorl(docd2locl(docd, rowkeys, colkeys))

def docd2locl(docd, rowkeys, colkeys):
    return [[docd[ckey][rkey]
             for rkey in rowkeys if rkey in docd[ckey]]
            for ckey in colkeys if ckey in docd]

def docd2lord(docd, rowkeys, colkeys):
    # better way?
    return locd2lord(docd2locd(docd, colkeys), rowkeys, colkeys)

def docd2locd(docd, colkeys):
    return [docd[ckey] for ckey in colkeys]

def docd2dorl(docd, rowkeys, colkeys):
    # better way?
    return lorl2dorl(locl2lorl(docd2locl(docd, rowkeys, colkeys)),
                     rowkeys)

def docd2docl(docd, rowkeys, colkeys):
    # don't need zip
    return dict([(ckey, [docd[ckey][rkey]
                         for rkey in rowkeys if rkey in docd[ckey]])
                 for ckey in colkeys])

def docd2dord(docd, rowkeys, colkeys):
    # better way?
    return lorl2dord(locl2lorl(docd2locl(docd, rowkeys, colkeys)),
                     rowkeys, colkeys)

Comments