SaltyCrane Blog — Notes on JavaScript and web development

Some more python recursion examples

I had a number of yaml files that contained passwords that needed encrypting. After parsing the yaml file with pyyaml, the data looked something like this:

EXAMPLE_DATA = {
    'jobs': [{'frequency': '* * * * *',
              'jobconfig': [{'config': [('*',
                                         {'maxspeed': 1048576,
                                          'password': 'onesecretpassword',
                                          'port': 22,
                                          'url': 'basset://basset1.domain.com/tootsiepop/123.csv',
                                          'username': 'myusername'})],
                             'hasbro': 'basset'},
                            {'config': [('*',
                                         {'field_delim': ',',
                                          'field_line': True,
                                          'no_blanks': True,
                                          'quote_char': '"'})],
                             'hasbro': 'pen'},
                            {'config': [('*',
                                         {'db_database': 'mydatabase',
                                          'db_host': 'myhost',
                                          'db_password': 'anothersecretpassword',
                                          'db_table': 'mytable',
                                          'db_user': 'myuser'})],
                             'hasbro': 'dart'}],
              'jobdesc': 'Data from tootsiepop',
              'jobname': 'tootsiepop',
              'max_records_fail': '110%',
              'min_failure_time': '1000y'}],
    'vendor': 'tootsiepop'}

Here is a recursive function that prints all the leaf nodes of my nested data structure.

def print_all_leaf_nodes(data):
    if isinstance(data, dict):
        for item in data.values():
            print_all_leaf_nodes(item)
    elif isinstance(data, list) or isinstance(data, tuple):
        for item in data:
            print_all_leaf_nodes(item)
    else:
        print data

print_all_leaf_nodes(EXAMPLE_DATA)

Results:

tootsiepop
1000y
tootsiepop
*
basset://basset1.domain.com/tootsiepop/123.csv
myusername
onesecretpassword
1048576
22
basset
*
True
"
,
True
pen
*
anothersecretpassword
mytable
myhost
mydatabase
myuser
dart
* * * * *
110%
Data from tootsiepop

Get all leaf nodes

This function returns all leaf nodes as a list instead of printing them. A wrapper function is used to create a Namespace instance to hold the results variable. This could alternatively be stored in a global (module-level) variable. See my notes on variable scope for more info about using a class as a namespace.

def get_all_leaf_nodes(data):
    class Namespace(object):
        pass
    ns = Namespace()
    ns.results = []

    def inner(data):
        if isinstance(data, dict):
            for item in data.values():
                inner(item)
        elif isinstance(data, list) or isinstance(data, tuple):
            for item in data:
                inner(item)
        else:
            ns.results.append(data)

    inner(data)
    return ns.results

from pprint import pprint
pprint(get_all_leaf_nodes(EXAMPLE_DATA))

Results:

['tootsiepop',
 '1000y',
 'tootsiepop',
 '*',
 'basset://basset1.domain.com/tootsiepop/123.csv',
 'myusername',
 'onesecretpassword',
 1048576,
 22,
 'basset',
 '*',
 True,
 '"',
 ',',
 True,
 'pen',
 '*',
 'anothersecretpassword',
 'mytable',
 'myhost',
 'mydatabase',
 'myuser',
 'dart',
 '* * * * *',
 '110%',
 'Data from tootsiepop']

Get all leaf key value pairs

This function gets all key value pairs where values are not compound data structures (i.e. dicts or lists)

def get_all_key_value_pairs_where_values_are_simple(data):
    class Namespace(object):
        pass
    ns = Namespace()
    ns.results = []

    def inner(data):
        if isinstance(data, dict):
            for k, v in data.iteritems():
                if (isinstance(v, dict) or
                    isinstance(v, list) or
                    isinstance(v, tuple)
                    ):
                    inner(v)
                else:
                    ns.results.append((k, v))
        elif isinstance(data, list) or isinstance(data, tuple):
            for item in data:
                inner(item)

    inner(data)
    return ns.results

from pprint import pprint
pprint(get_all_key_value_pairs_where_values_are_simple(EXAMPLE_DATA))

Results:

[('vendor', 'tootsiepop'),
 ('min_failure_time', '1000y'),
 ('jobname', 'tootsiepop'),
 ('url', 'basset://basset1.domain.com/tootsiepop/123.csv'),
 ('username', 'myusername'),
 ('password', 'onesecretpassword'),
 ('maxspeed', 1048576),
 ('port', 22),
 ('hasbro', 'basset'),
 ('field_line', True),
 ('quote_char', '"'),
 ('field_delim', ','),
 ('no_blanks', True),
 ('hasbro', 'pen'),
 ('db_password', 'anothersecretpassword'),
 ('db_table', 'mytable'),
 ('db_host', 'myhost'),
 ('db_database', 'mydatabase'),
 ('db_user', 'myuser'),
 ('hasbro', 'dart'),
 ('frequency', '* * * * *'),
 ('max_records_fail', '110%'),
 ('jobdesc', 'Data from tootsiepop')]

Modify values of terminal key value in a nested dict

This function modifies all the values of all dicts that are not compound data structures (i.e. dicts or lists). The modfn argument is a function that modifies the key value pair. It should accept two arguments: a key and value and it should return the modified value.

The example function, super_secure_encrypt is a function that checks if the string 'password' is in the key, and "encrypts" the value using the <sarcasm>super secure</sarcasm> ROT13 algorithm. (We are actually using the keyczar toolkit from google to do the encryption.)

def modify_all_simple_dict_values(data, modfn):
    if isinstance(data, dict):
        for k, v in data.iteritems():
            if (isinstance(v, dict) or
                isinstance(v, list) or
                isinstance(v, tuple)
                ):
                modify_all_simple_dict_values(v, modfn)
            else:
                data[k] = modfn(k, v)
    elif isinstance(data, list) or isinstance(data, tuple):
        for item in data:
            modify_all_simple_dict_values(item, modfn)

    return data


def super_secure_encrypt(key, value):
    if 'password' in key:
        value = value.encode('rot13')
    return value


from pprint import pprint
pprint(modify_all_simple_dict_values(EXAMPLE_DATA, super_secure_encrypt))

Results:

{'jobs': [{'frequency': '* * * * *',
           'jobconfig': [{'config': [('*',
                                      {'maxspeed': 1048576,
                                       'password': 'barfrpergcnffjbeq',
                                       'port': 22,
                                       'url': 'basset://basset1.domain.com/tootsiepop/123.csv',
                                       'username': 'myusername'})],
                          'hasbro': 'basset'},
                         {'config': [('*',
                                      {'field_delim': ',',
                                       'field_line': True,
                                       'no_blanks': True,
                                       'quote_char': '"'})],
                          'hasbro': 'pen'},
                         {'config': [('*',
                                      {'db_database': 'mydatabase',
                                       'db_host': 'myhost',
                                       'db_password': 'nabgurefrpergcnffjbeq',
                                       'db_table': 'mytable',
                                       'db_user': 'myuser'})],
                          'hasbro': 'dart'}],
           'jobdesc': 'Data from tootsiepop',
           'jobname': 'tootsiepop',
           'max_records_fail': '110%',
           'min_failure_time': '1000y'}],
 'vendor': 'tootsiepop'}

Comments


#1 Stuart Powers commented on :

Great writeup!

I thought I should mention that isinstance() can test against multiple types at once, this lets you remove some of your ifs and ors if you wanted:

inner(data) could be rewritten as:

def inner(data):
    if isinstance(data, dict):
        for k, v in data.iteritems():
            if isinstance(v, (dict, list, tuple)):
                inner(v)
            else:
                ns.results.append((k, v))
    elif isinstance(data, (list, tuple)):
        for item in data:
            inner(item)

opposed to what you have:

def inner(data):
    if isinstance(data, dict):
        for k, v in data.iteritems():
            if (isinstance(v, dict) or
                isinstance(v, list) or
                isinstance(v, tuple)
                ):
                inner(v)
            else:
                ns.results.append((k, v))
    elif isinstance(data, list) or isinstance(data, tuple):
        for item in data:
            inner(item)

This isn't incredibly important to the subject of your post but I figured I'd mention it anyways.

Stuart


#2 Eliot commented on :

Stuart, thanks for the tip-- I will have to remember that!