# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
# light-weight XML support for Python 2.3 and later.
# 2005-11-12 fl added tostringlist/fromstringlist helpers
# 2006-07-05 fl merged in selected changes from the 1.3 sandbox
# 2006-07-05 fl removed support for 2.1 and earlier
# 2007-06-21 fl added deprecation/future warnings
# 2007-08-25 fl added doctype hook, added parser version attribute etc
# 2007-08-26 fl added new serializer code (better namespace handling, etc)
# 2007-08-27 fl warn for broken /tag searches on tree level
# 2007-09-02 fl added html/text methods to serializer (experimental)
# 2007-09-05 fl added method argument to tostring/tostringlist
# 2007-09-06 fl improved error handling
# 2007-09-13 fl added itertext, iterfind; assorted cleanups
# 2007-12-15 fl added C14N hooks, copy method (experimental)
# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
# http://www.pythonware.com
# --------------------------------------------------------------------
# The ElementTree toolkit is
# Copyright (c) 1999-2008 by Fredrik Lundh
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# --------------------------------------------------------------------
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/psf/license for licensing details.
"Element", "ElementTree",
"fromstring", "fromstringlist",
"iselement", "iterparse",
"PI", "ProcessingInstruction",
"tostring", "tostringlist",
"XMLParser", "XMLTreeBuilder",
# The <b>Element</b> type is a flexible container object, designed to
# store hierarchical data structures in memory. The type can be
# described as a cross between a list and a dictionary.
# Each element has a number of properties associated with it:
# <li>a <i>tag</i>. This is a string identifying what kind of data
# this element represents (the element type, in other words).</li>
# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
# <li>a <i>text</i> string.</li>
# <li>an optional <i>tail</i> string.</li>
# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
# To create an element instance, use the {@link #Element} constructor
# or the {@link #SubElement} factory function.
# The {@link #ElementTree} class can be used to wrap an element
# structure, and convert it from and to XML.
class _SimpleElementPath(object):
# emulate pre-1.2 find/findtext/findall behaviour
def find(self, element, tag, namespaces=None):
def findtext(self, element, tag, default=None, namespaces=None):
elem = self.find(element, tag)
def iterfind(self, element, tag, namespaces=None):
for elem in element.iter(tag[3:]):
def findall(self, element, tag, namespaces=None):
return list(self.iterfind(element, tag, namespaces))
from . import ElementPath
ElementPath = _SimpleElementPath()
# Parser error. This is a subclass of <b>SyntaxError</b>.
# In addition to the exception value, an exception instance contains a
# specific exception code in the <b>code</b> attribute, and the line and
# column of the error in the <b>position</b> attribute.
class ParseError(SyntaxError):
# --------------------------------------------------------------------
# Checks if an object appears to be a valid element object.
# @param An element instance.
# @return A true value if this is an element object.
# FIXME: not sure about this; might be a better idea to look
# for tag/attrib/text attributes
return isinstance(element, Element) or hasattr(element, "tag")
# Element class. This class defines the Element interface, and
# provides a reference implementation of this interface.
# The element name, attribute names, and attribute values can be
# either ASCII strings (ordinary Python strings containing only 7-bit
# ASCII characters) or Unicode strings.
# @param tag The element name.
# @param attrib An optional dictionary, containing element attributes.
# @param **extra Additional attributes, given as keyword arguments.
# @see ProcessingInstruction
# <tag attrib>text<child/>...</tag>tail
# (Attribute) Element tag.
# (Attribute) Element attribute dictionary. Where possible, use
# {@link #Element.keys}, and
# {@link #Element.items} to access
# (Attribute) Text before first subelement. This is either a
# string or the value None. Note that if there was no text, this
# attribute may be either None or an empty string, depending on
# (Attribute) Text after this element's end tag, but before the
# next sibling element's start tag. This is either a string or
# the value None. Note that if there was no text, this attribute
# may be either None or an empty string, depending on the parser.
tail = None # text after end tag, if any
def __init__(self, tag, attrib={}, **extra):
return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
# Creates a new element object of the same type as this element.
# @param tag Element tag.
# @param attrib Element attributes, given as a dictionary.
# @return A new element instance.
def makeelement(self, tag, attrib):
return self.__class__(tag, attrib)
# (Experimental) Copies the current element. This creates a
# shallow copy; subelements will be shared with the original tree.
# @return A new element instance.
elem = self.makeelement(self.tag, self.attrib)
# Returns the number of subelements. Note that this only counts
# full elements; to check if there's any content in an element, you
# have to check both the length and the <b>text</b> attribute.
# @return The number of subelements.
return len(self._children)
"The behavior of this method will change in future versions. "
"Use specific 'len(elem)' or 'elem is not None' test instead.",
FutureWarning, stacklevel=2
return len(self._children) != 0 # emulate old behaviour, for now
# Returns the given subelement, by index.
# @param index What subelement to return.
# @return The given subelement.
# @exception IndexError If the given element does not exist.
def __getitem__(self, index):
return self._children[index]
# Replaces the given subelement, by index.
# @param index What subelement to replace.
# @param element The new element value.
# @exception IndexError If the given element does not exist.
def __setitem__(self, index, element):
# if isinstance(index, slice):
# assert iselement(element)
self._children[index] = element
# Deletes the given subelement, by index.
# @param index What subelement to delete.
# @exception IndexError If the given element does not exist.
def __delitem__(self, index):
del self._children[index]
# Adds a subelement to the end of this element. In document order,
# the new element will appear after the last existing subelement (or
# directly after the text, if it's the first subelement), but before
# the end tag for this element.
# @param element The element to add.
def append(self, element):
# assert iselement(element)
self._children.append(element)
# Appends subelements from a sequence.
# @param elements A sequence object with zero or more elements.
def extend(self, elements):
# for element in elements:
# assert iselement(element)
self._children.extend(elements)
# Inserts a subelement at the given position in this element.
# @param index Where to insert the new subelement.
def insert(self, index, element):
# assert iselement(element)
self._children.insert(index, element)
# Removes a matching subelement. Unlike the <b>find</b> methods,
# this method compares elements based on identity, not on tag
# value or contents. To remove subelements by other means, the
# easiest way is often to use a list comprehension to select what
# elements to keep, and use slice assignment to update the parent
# @param element What element to remove.
# @exception ValueError If a matching element could not be found.
def remove(self, element):
# assert iselement(element)
self._children.remove(element)
# (Deprecated) Returns all subelements. The elements are returned
# @return A list of subelements.
# @defreturn list of Element instances
"This method will be removed in future versions. "
"Use 'list(elem)' or iteration over elem instead.",
DeprecationWarning, stacklevel=2
# Finds the first matching subelement, by tag name or path.
# @param path What element to look for.
# @keyparam namespaces Optional namespace prefix map.
# @return The first matching element, or None if no element was found.
# @defreturn Element or None
def find(self, path, namespaces=None):
return ElementPath.find(self, path, namespaces)
# Finds text for the first matching subelement, by tag name or path.
# @param path What element to look for.
# @param default What to return if the element was not found.
# @keyparam namespaces Optional namespace prefix map.
# @return The text content of the first matching element, or the
# default value no element was found. Note that if the element
# is found, but has no text content, this method returns an
def findtext(self, path, default=None, namespaces=None):
return ElementPath.findtext(self, path, default, namespaces)
# Finds all matching subelements, by tag name or path.
# @param path What element to look for.
# @keyparam namespaces Optional namespace prefix map.
# @return A list or other sequence containing all matching elements,
# @defreturn list of Element instances
def findall(self, path, namespaces=None):
return ElementPath.findall(self, path, namespaces)
# Finds all matching subelements, by tag name or path.
# @param path What element to look for.
# @keyparam namespaces Optional namespace prefix map.
# @return An iterator or sequence containing all matching elements,
# @defreturn a generated sequence of Element instances
def iterfind(self, path, namespaces=None):
return ElementPath.iterfind(self, path, namespaces)
# Resets an element. This function removes all subelements, clears
# all attributes, and sets the <b>text</b> and <b>tail</b> attributes
self.text = self.tail = None
# Gets an element attribute. Equivalent to <b>attrib.get</b>, but
# some implementations may handle this a bit more efficiently.
# @param key What attribute to look for.
# @param default What to return if the attribute was not found.
# @return The attribute value, or the default value, if the
# attribute was not found.
# @defreturn string or None
def get(self, key, default=None):
return self.attrib.get(key, default)
# Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,
# but some implementations may handle this a bit more efficiently.
# @param key What attribute to set.
# @param value The attribute value.
def set(self, key, value):
# Gets a list of attribute names. The names are returned in an
# arbitrary order (just like for an ordinary Python dictionary).
# Equivalent to <b>attrib.keys()</b>.
# @return A list of element attribute names.
# @defreturn list of strings
return self.attrib.keys()
# Gets element attributes, as a sequence. The attributes are
# returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.
# @return A list of (name, value) tuples for all attributes.
# @defreturn list of (string, string) tuples
return self.attrib.items()
# Creates a tree iterator. The iterator loops over this element
# and all subelements, in document order, and returns all elements
# If the tree structure is modified during iteration, new or removed
# elements may or may not be included. To get a stable set, use the
# list() function on the iterator, and loop over the resulting list.
# @param tag What tags to look for (default is to return all elements).
# @return An iterator containing all the matching elements.
def iter(self, tag=None):
if tag is None or self.tag == tag:
def getiterator(self, tag=None):
# Change for a DeprecationWarning in 1.4
"This method will be removed in future versions. "
"Use 'elem.iter()' or 'list(elem.iter())' instead.",
PendingDeprecationWarning, stacklevel=2
return list(self.iter(tag))
# Creates a text iterator. The iterator loops over this element
# and all subelements, in document order, and returns all inner
# @return An iterator containing all inner text.
if not isinstance(tag, basestring) and tag is not None: