Saltycrane logo

SaltyCrane Blog

Notes on Python, Django, and web development on Ubuntu Linux

    

Example parsing XML with lxml.objectify

Example run with lxml 2.3, Python 2.6.6 on Ubuntu 10.10

from lxml import objectify, etree

xml = '''
<dataset>
  <statusthing>success</statusthing>
  <datathing gabble="sent">joe@email.com</datathing>
  <datathing gabble="not sent"></datathing>
</dataset>
'''

root = objectify.fromstring(xml)

print root.tag
print root.text
print root.attrib
# dataset
# None
# {}

print root.statusthing.tag
print root.statusthing.text
print root.statusthing.attrib
# statusthing
# success
# {}

for e in root.datathing:
    print e.tag
    print e.text
    print e.attrib
    print e.attrib['gabble']
# datathing
# joe@email.com
# {'gabble': 'sent'}
# sent
# datathing
# None
# {'gabble': 'not sent'}
# not sent

for e in root.getchildren():
    print e.tag
# statusthing
# datathing
# datathing

for e in root.iterchildren():
    print e.tag
# statusthing
# datathing
# datathing

# you cannot modify the text attribute of an element.
# instead just assign to the element itself.
try:
    root.statusthing.text = 'failure'
except:
    import traceback
    traceback.print_exc()
# Traceback (most recent call last):
#   File "lxml_ex.py", line 54, in <module>
#     root.statusthing.text = 'failure'
#   File "lxml.objectify.pyx", line 237, in lxml.objectify.ObjectifiedElement.__setattr__ (src/lxml/lxml.objectify.c:2980)
# TypeError: attribute 'text' of 'StringElement' objects is not writable

# modify element text and write it out as xml again
root.statusthing = 'failure'
xml_new = etree.tostring(root, pretty_print=True)
print xml_new
# <dataset>
#   <statusthing xmlns:py="http://codespeak.net/lxml/objectify/pytype" py:pytype="str">failure</statusthing>
#   <datathing gabble="sent">joe@email.com</datathing>
#   <datathing gabble="not sent">
# </datathing></dataset>

# Use deannotate() to get rid of 'py:pytype' information
objectify.deannotate(root, cleanup_namespaces=True)
xml_new = etree.tostring(root, pretty_print=True)
print xml_new
# <dataset>
#   <statusthing>failure</statusthing>
#   <datathing gabble="sent">joe@email.com</datathing>
#   <datathing gabble="not sent">
# </datathing></dataset>

# Add a child element to the root
c = etree.Element("thisdoesntmatter")
c.tag = "thisdoesntmattereither"
c.text = "mytext"
c.attrib['myattr'] = 'myvalue'
root.newchild = c
objectify.deannotate(root, cleanup_namespaces=True)
xml_new = etree.tostring(root, pretty_print=True)
print xml_new
# <dataset>
#   <statusthing>failure</statusthing>
#   <datathing gabble="sent">joe@email.com</datathing>
#   <datathing gabble="not sent">
#   <newchild myattr="myvalue">mytext</newchild>
# </datathing></dataset></module>

References:

2 Comments — feed icon Comments feed for this post


#1 Alvin Mites commented on 2011-09-16:

Thank you for the post, seems I end up referencing your blog quite a bit via Google.

Your examples are consistently simple and to the point.


#2 chebrian commented on 2012-11-26:

nice guide.

great examples.

Post a comment

Required
Required, but not displayed
Optional

Format using Markdown. (No HTML.)
  • Code blocks: prefix each line by at least 4 spaces or 1 tab (and a blank line before and after)
  • Code span: surround with backticks
  • Blockquotes: prefix lines to be quoted with >
  • Links: <URL>
  • Links w/ description: [description](URL)
Created with Django | Hosted by Linode