Some more python recursion examples
I had a number of yaml files that contained passwords that needed encrypting. After parsing the yaml file with pyyaml, the data looked something like this:
EXAMPLE_DATA = {
    'jobs': [{'frequency': '* * * * *',
              'jobconfig': [{'config': [('*',
                                         {'maxspeed': 1048576,
                                          'password': 'onesecretpassword',
                                          'port': 22,
                                          'url': 'basset://basset1.domain.com/tootsiepop/123.csv',
                                          'username': 'myusername'})],
                             'hasbro': 'basset'},
                            {'config': [('*',
                                         {'field_delim': ',',
                                          'field_line': True,
                                          'no_blanks': True,
                                          'quote_char': '"'})],
                             'hasbro': 'pen'},
                            {'config': [('*',
                                         {'db_database': 'mydatabase',
                                          'db_host': 'myhost',
                                          'db_password': 'anothersecretpassword',
                                          'db_table': 'mytable',
                                          'db_user': 'myuser'})],
                             'hasbro': 'dart'}],
              'jobdesc': 'Data from tootsiepop',
              'jobname': 'tootsiepop',
              'max_records_fail': '110%',
              'min_failure_time': '1000y'}],
    'vendor': 'tootsiepop'}
Print all leaf nodes¶
Here is a recursive function that prints all the leaf nodes of my nested data structure.
def print_all_leaf_nodes(data):
    if isinstance(data, dict):
        for item in data.values():
            print_all_leaf_nodes(item)
    elif isinstance(data, list) or isinstance(data, tuple):
        for item in data:
            print_all_leaf_nodes(item)
    else:
        print data
print_all_leaf_nodes(EXAMPLE_DATA)
Results:
tootsiepop 1000y tootsiepop * basset://basset1.domain.com/tootsiepop/123.csv myusername onesecretpassword 1048576 22 basset * True " , True pen * anothersecretpassword mytable myhost mydatabase myuser dart * * * * * 110% Data from tootsiepop
Get all leaf nodes¶
This function returns all leaf nodes as a list instead of printing them. A wrapper function is used to create a Namespace instance to hold the results variable. This could alternatively be stored in a global (module-level) variable. See my notes on variable scope for more info about using a class as a namespace.
def get_all_leaf_nodes(data):
    class Namespace(object):
        pass
    ns = Namespace()
    ns.results = []
    def inner(data):
        if isinstance(data, dict):
            for item in data.values():
                inner(item)
        elif isinstance(data, list) or isinstance(data, tuple):
            for item in data:
                inner(item)
        else:
            ns.results.append(data)
    inner(data)
    return ns.results
from pprint import pprint
pprint(get_all_leaf_nodes(EXAMPLE_DATA))
Results:
['tootsiepop', '1000y', 'tootsiepop', '*', 'basset://basset1.domain.com/tootsiepop/123.csv', 'myusername', 'onesecretpassword', 1048576, 22, 'basset', '*', True, '"', ',', True, 'pen', '*', 'anothersecretpassword', 'mytable', 'myhost', 'mydatabase', 'myuser', 'dart', '* * * * *', '110%', 'Data from tootsiepop']
Get all leaf key value pairs¶
This function gets all key value pairs where values are not compound data structures (i.e. dicts or lists)
def get_all_key_value_pairs_where_values_are_simple(data):
    class Namespace(object):
        pass
    ns = Namespace()
    ns.results = []
    def inner(data):
        if isinstance(data, dict):
            for k, v in data.iteritems():
                if (isinstance(v, dict) or
                    isinstance(v, list) or
                    isinstance(v, tuple)
                    ):
                    inner(v)
                else:
                    ns.results.append((k, v))
        elif isinstance(data, list) or isinstance(data, tuple):
            for item in data:
                inner(item)
    inner(data)
    return ns.results
from pprint import pprint
pprint(get_all_key_value_pairs_where_values_are_simple(EXAMPLE_DATA))
Results:
[('vendor', 'tootsiepop'),
 ('min_failure_time', '1000y'),
 ('jobname', 'tootsiepop'),
 ('url', 'basset://basset1.domain.com/tootsiepop/123.csv'),
 ('username', 'myusername'),
 ('password', 'onesecretpassword'),
 ('maxspeed', 1048576),
 ('port', 22),
 ('hasbro', 'basset'),
 ('field_line', True),
 ('quote_char', '"'),
 ('field_delim', ','),
 ('no_blanks', True),
 ('hasbro', 'pen'),
 ('db_password', 'anothersecretpassword'),
 ('db_table', 'mytable'),
 ('db_host', 'myhost'),
 ('db_database', 'mydatabase'),
 ('db_user', 'myuser'),
 ('hasbro', 'dart'),
 ('frequency', '* * * * *'),
 ('max_records_fail', '110%'),
 ('jobdesc', 'Data from tootsiepop')]Modify values of terminal key value in a nested dict¶
This function modifies all the values of all dicts that are not compound data structures (i.e. dicts or lists). The modfn argument is a function that modifies the key value pair. It should accept two arguments: a key and value and it should return the modified value.
The example function, super_secure_encrypt is a function that checks if the string 'password' is in the key, and "encrypts" the value using the <sarcasm>super secure</sarcasm> ROT13 algorithm. (We are actually using the keyczar toolkit from google to do the encryption.)
def modify_all_simple_dict_values(data, modfn):
    if isinstance(data, dict):
        for k, v in data.iteritems():
            if (isinstance(v, dict) or
                isinstance(v, list) or
                isinstance(v, tuple)
                ):
                modify_all_simple_dict_values(v, modfn)
            else:
                data[k] = modfn(k, v)
    elif isinstance(data, list) or isinstance(data, tuple):
        for item in data:
            modify_all_simple_dict_values(item, modfn)
    return data
def super_secure_encrypt(key, value):
    if 'password' in key:
        value = value.encode('rot13')
    return value
from pprint import pprint
pprint(modify_all_simple_dict_values(EXAMPLE_DATA, super_secure_encrypt))
Results:
{'jobs': [{'frequency': '* * * * *',
           'jobconfig': [{'config': [('*',
                                      {'maxspeed': 1048576,
                                       'password': 'barfrpergcnffjbeq',
                                       'port': 22,
                                       'url': 'basset://basset1.domain.com/tootsiepop/123.csv',
                                       'username': 'myusername'})],
                          'hasbro': 'basset'},
                         {'config': [('*',
                                      {'field_delim': ',',
                                       'field_line': True,
                                       'no_blanks': True,
                                       'quote_char': '"'})],
                          'hasbro': 'pen'},
                         {'config': [('*',
                                      {'db_database': 'mydatabase',
                                       'db_host': 'myhost',
                                       'db_password': 'nabgurefrpergcnffjbeq',
                                       'db_table': 'mytable',
                                       'db_user': 'myuser'})],
                          'hasbro': 'dart'}],
           'jobdesc': 'Data from tootsiepop',
           'jobname': 'tootsiepop',
           'max_records_fail': '110%',
           'min_failure_time': '1000y'}],
 'vendor': 'tootsiepop'}Related posts
- Find all combinations of a set of lists with itertools.product — posted 2011-11-01
- Free Computer Science courses online — posted 2009-06-30
- Find the N longest lines in a file with Python — posted 2009-06-28
- How to reverse words in a sentence using Python and C — posted 2009-04-22
- Python recursion example to navigate tree data — posted 2008-08-19
Comments
Great writeup!
I thought I should mention that isinstance() can test against multiple types at once, this lets you remove some of your ifs and ors if you wanted:
inner(data) could be rewritten as:
def inner(data):
    if isinstance(data, dict):
        for k, v in data.iteritems():
            if isinstance(v, (dict, list, tuple)):
                inner(v)
            else:
                ns.results.append((k, v))
    elif isinstance(data, (list, tuple)):
        for item in data:
            inner(item)
opposed to what you have:
def inner(data):
    if isinstance(data, dict):
        for k, v in data.iteritems():
            if (isinstance(v, dict) or
                isinstance(v, list) or
                isinstance(v, tuple)
                ):
                inner(v)
            else:
                ns.results.append((k, v))
    elif isinstance(data, list) or isinstance(data, tuple):
        for item in data:
            inner(item)
This isn't incredibly important to the subject of your post but I figured I'd mention it anyways.
Stuart
Stuart, thanks for the tip-- I will have to remember that!

 
  
  
 