"""Lightweight XML support for Python.
XML is an inherently hierarchical data format, and the most natural way to
represent it is with a tree. This module has two classes for this purpose:
1. ElementTree represents the whole XML document as a tree and
2. Element represents a single node in this tree.
Interactions with the whole document (reading and writing to/from files) are
usually done on the ElementTree level. Interactions with a single XML element
and its sub-elements are done on the Element level.
Element is a flexible container object designed to store hierarchical data
structures in memory. It can be described as a cross between a list and a
dictionary. Each Element has a number of properties associated with it:
'tag' - a string containing the element's name.
'attributes' - a Python dictionary storing the element's attributes.
'text' - a string containing the element's text content.
'tail' - an optional string containing text after the element's end tag.
And a number of child elements stored in a Python sequence.
To create an element instance, use the Element constructor,
or the SubElement factory function.
You can also use the ElementTree class to wrap an element structure
and convert it to and from XML.
#---------------------------------------------------------------------
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/psf/license for licensing details.
# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
# http://www.pythonware.com
# --------------------------------------------------------------------
# The ElementTree toolkit is
# Copyright (c) 1999-2008 by Fredrik Lundh
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# --------------------------------------------------------------------
"Element", "ElementTree",
"fromstring", "fromstringlist",
"iselement", "iterparse",
"PI", "ProcessingInstruction",
"tostring", "tostringlist",
"XMLParser", "XMLPullParser",
from . import ElementPath
class ParseError(SyntaxError):
"""An error when parsing an XML document.
In addition to its exception value, a ParseError contains
'code' - the specific exception code
'position' - the line and column of the error
# --------------------------------------------------------------------
"""Return True if *element* appears to be an Element."""
return hasattr(element, 'tag')
This class is the reference implementation of the Element interface.
An element's length is its number of subelements. That means if you
want to check if an element is truly empty, you should check BOTH
its length AND its text attribute.
The element tag, attribute names, and attribute values can be either
*tag* is the element name. *attrib* is an optional dictionary containing
element attributes. *extra* are additional element attributes given as
<tag attrib>text<child/>...</tag>tail
"""The element's name."""
"""Dictionary of the element's attributes."""
Text before first subelement. This is either a string or the value None.
Note that if there is no text, this attribute may be either
None or the empty string, depending on the parser.
Text after this element's end tag, but before the next sibling element's
start tag. This is either a string or the value None. Note that if there
was no text, this attribute may be either None or an empty string,
def __init__(self, tag, attrib={}, **extra):
if not isinstance(attrib, dict):
raise TypeError("attrib must be dict, not %s" % (
attrib.__class__.__name__,))
return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
def makeelement(self, tag, attrib):
"""Create a new element with the same type.
*tag* is a string containing the element name.
*attrib* is a dictionary containing the element attributes.
Do not call this method, use the SubElement factory function instead.
return self.__class__(tag, attrib)
"""Return copy of current element.
This creates a shallow copy. Subelements will be shared with the
elem = self.makeelement(self.tag, self.attrib)
return len(self._children)
"The behavior of this method will change in future versions. "
"Use specific 'len(elem)' or 'elem is not None' test instead.",
FutureWarning, stacklevel=2
return len(self._children) != 0 # emulate old behaviour, for now
def __getitem__(self, index):
return self._children[index]
def __setitem__(self, index, element):
# if isinstance(index, slice):
# assert iselement(element)
self._children[index] = element
def __delitem__(self, index):
del self._children[index]
def append(self, subelement):
"""Add *subelement* to the end of this element.
The new element will appear in document order after the last existing
subelement (or directly after the text, if it's the first subelement),
but before the end tag for this element.
self._assert_is_element(subelement)
self._children.append(subelement)
def extend(self, elements):
"""Append subelements from a sequence.
*elements* is a sequence with zero or more elements.
self._assert_is_element(element)
self._children.extend(elements)
def insert(self, index, subelement):
"""Insert *subelement* at position *index*."""
self._assert_is_element(subelement)
self._children.insert(index, subelement)
def _assert_is_element(self, e):
# Need to refer to the actual Python implementation, not the
# shadowing C implementation.
if not isinstance(e, _Element_Py):
raise TypeError('expected an Element, not %s' % type(e).__name__)
def remove(self, subelement):
"""Remove matching subelement.
Unlike the find methods, this method compares elements based on
identity, NOT ON tag value or contents. To remove subelements by
other means, the easiest way is to use a list comprehension to
select what elements to keep, and then use slice assignment to update
ValueError is raised if a matching element could not be found.
# assert iselement(element)
self._children.remove(subelement)
"""(Deprecated) Return all subelements.
Elements are returned in document order.
"This method will be removed in future versions. "
"Use 'list(elem)' or iteration over elem instead.",
DeprecationWarning, stacklevel=2
def find(self, path, namespaces=None):
"""Find first matching element by tag name or path.
*path* is a string having either an element tag or an XPath,
*namespaces* is an optional mapping from namespace prefix to full name.
Return the first matching element, or None if no element was found.
return ElementPath.find(self, path, namespaces)
def findtext(self, path, default=None, namespaces=None):
"""Find text for first matching element by tag name or path.
*path* is a string having either an element tag or an XPath,
*default* is the value to return if the element was not found,
*namespaces* is an optional mapping from namespace prefix to full name.
Return text content of first matching element, or default value if
none was found. Note that if an element is found having no text
content, the empty string is returned.
return ElementPath.findtext(self, path, default, namespaces)
def findall(self, path, namespaces=None):
"""Find all matching subelements by tag name or path.
*path* is a string having either an element tag or an XPath,
*namespaces* is an optional mapping from namespace prefix to full name.
Returns list containing all matching elements in document order.
return ElementPath.findall(self, path, namespaces)
def iterfind(self, path, namespaces=None):
"""Find all matching subelements by tag name or path.
*path* is a string having either an element tag or an XPath,
*namespaces* is an optional mapping from namespace prefix to full name.
Return an iterable yielding all matching elements in document order.
return ElementPath.iterfind(self, path, namespaces)
This function removes all subelements, clears all attributes, and sets
the text and tail attributes to None.
self.text = self.tail = None
def get(self, key, default=None):
"""Get element attribute.
Equivalent to attrib.get, but some implementations may handle this a
bit more efficiently. *key* is what attribute to look for, and
*default* is what to return if the attribute was not found.
Returns a string containing the attribute value, or the default if
return self.attrib.get(key, default)
def set(self, key, value):
"""Set element attribute.
Equivalent to attrib[key] = value, but some implementations may handle
this a bit more efficiently. *key* is what attribute to set, and
*value* is the attribute value to set it to.
"""Get list of attribute names.
Names are returned in an arbitrary order, just like an ordinary
Python dict. Equivalent to attrib.keys()
return self.attrib.keys()
"""Get element attributes as a sequence.
The attributes are returned in arbitrary order. Equivalent to
Return a list of (name, value) tuples.
return self.attrib.items()
def iter(self, tag=None):
The iterator loops over the element and all subelements in document
order, returning all elements with a matching tag.
If the tree structure is modified during iteration, new or removed
elements may or may not be included. To get a stable set, use the
list() function on the iterator, and loop over the resulting list.
*tag* is what tags to look for (default is to return all elements)
Return an iterator containing all the matching elements.
if tag is None or self.tag == tag:
def getiterator(self, tag=None):
# Change for a DeprecationWarning in 1.4
"This method will be removed in future versions. "
"Use 'elem.iter()' or 'list(elem.iter())' instead.",
PendingDeprecationWarning, stacklevel=2
return list(self.iter(tag))
The iterator loops over the element and all subelements in document
order, returning all inner text.
if not isinstance(tag, str) and tag is not None:
def SubElement(parent, tag, attrib={}, **extra):
"""Subelement factory which creates an element instance, and appends it
The element tag, attribute names, and attribute values can be either
bytes or Unicode strings.
*parent* is the parent element, *tag* is the subelements name, *attrib* is
an optional directory containing element attributes, *extra* are
additional attributes given as keyword arguments.
element = parent.makeelement(tag, attrib)
"""Comment element factory.
This function creates a special element which the standard serializer
serializes as an XML comment.
*text* is a string containing the comment string.
element = Element(Comment)
def ProcessingInstruction(target, text=None):
"""Processing Instruction element factory.
This function creates a special element which the standard serializer
serializes as an XML comment.
*target* is a string containing the processing instruction, *text* is a
string containing the processing instruction contents, if any.
element = Element(ProcessingInstruction)
element.text = element.text + " " + text
PI = ProcessingInstruction
"""Qualified name wrapper.
This class can be used to wrap a QName attribute value in order to get
proper namespace handing on output.
*text_or_uri* is a string containing the QName value either in the form