SaltyCrane Blog —  Notes on Python and web development on Ubuntu Linux

Example parsing XML with lxml.objectify

Example run with lxml 2.3, Python 2.6.6 on Ubuntu 10.10

from lxml import objectify, etree

xml = '''
<dataset>
  <statusthing>success</statusthing>
  <datathing gabble="sent">joe@email.com</datathing>
  <datathing gabble="not sent"></datathing>
</dataset>
'''

root = objectify.fromstring(xml)

print root.tag
print root.text
print root.attrib
# dataset
# None
# {}

print root.statusthing.tag
print root.statusthing.text
print root.statusthing.attrib
# statusthing
# success
# {}

for e in root.datathing:
    print e.tag
    print e.text
    print e.attrib
    print e.attrib['gabble']
# datathing
# joe@email.com
# {'gabble': 'sent'}
# sent
# datathing
# None
# {'gabble': 'not sent'}
# not sent

for e in root.getchildren():
    print e.tag
# statusthing
# datathing
# datathing

for e in root.iterchildren():
    print e.tag
# statusthing
# datathing
# datathing

# you cannot modify the text attribute of an element.
# instead just assign to the element itself.
try:
    root.statusthing.text = 'failure'
except:
    import traceback
    traceback.print_exc()
# Traceback (most recent call last):
#   File "lxml_ex.py", line 54, in <module>
#     root.statusthing.text = 'failure'
#   File "lxml.objectify.pyx", line 237, in lxml.objectify.ObjectifiedElement.__setattr__ (src/lxml/lxml.objectify.c:2980)
# TypeError: attribute 'text' of 'StringElement' objects is not writable

# modify element text and write it out as xml again
root.statusthing = 'failure'
xml_new = etree.tostring(root, pretty_print=True)
print xml_new
# <dataset>
#   <statusthing xmlns:py="http://codespeak.net/lxml/objectify/pytype" py:pytype="str">failure</statusthing>
#   <datathing gabble="sent">joe@email.com</datathing>
#   <datathing gabble="not sent">
# </datathing></dataset>

# Use deannotate() to get rid of 'py:pytype' information
objectify.deannotate(root, cleanup_namespaces=True)
xml_new = etree.tostring(root, pretty_print=True)
print xml_new
# <dataset>
#   <statusthing>failure</statusthing>
#   <datathing gabble="sent">joe@email.com</datathing>
#   <datathing gabble="not sent">
# </datathing></dataset>

# Add a child element to the root
c = etree.Element("thisdoesntmatter")
c.tag = "thisdoesntmattereither"
c.text = "mytext"
c.attrib['myattr'] = 'myvalue'
root.newchild = c
objectify.deannotate(root, cleanup_namespaces=True)
xml_new = etree.tostring(root, pretty_print=True)
print xml_new
# <dataset>
#   <statusthing>failure</statusthing>
#   <datathing gabble="sent">joe@email.com</datathing>
#   <datathing gabble="not sent">
#   <newchild myattr="myvalue">mytext</newchild>
# </datathing></dataset></module>

References:

2 Comments — feed icon Comments feed for this post


#1 Alvin Mites commented on 2011-09-16:

Thank you for the post, seems I end up referencing your blog quite a bit via Google.

Your examples are consistently simple and to the point.


#2 chebrian commented on 2012-11-26:

nice guide.

great examples.

Post a comment

Required
Required, but not displayed
Optional

Format using Markdown. (No HTML.)
  • Code blocks: prefix each line by at least 4 spaces or 1 tab (and a blank line before and after)
  • Code span: surround with backticks
  • Blockquotes: prefix lines to be quoted with >
  • Links: <URL>
  • Links w/ description: [description](URL)
Created with Django and Bootstrap | Hosted by Linode