_Element = _ElementInterface = Element
# Subelement factory. This function creates an element instance, and
# appends it to an existing element.
# The element name, attribute names, and attribute values can be
# either 8-bit ASCII strings or Unicode strings.
# @param parent The parent element.
# @param tag The subelement name.
# @param attrib An optional dictionary, containing element attributes.
# @param **extra Additional attributes, given as keyword arguments.
# @return An element instance.
def SubElement(parent, tag, attrib={}, **extra):
element = parent.makeelement(tag, attrib)
# Comment element factory. This factory function creates a special
# element that will be serialized as an XML comment by the standard
# The comment string can be either an 8-bit ASCII string or a Unicode
# @param text A string containing the comment string.
# @return An element instance, representing a comment.
element = Element(Comment)
# PI element factory. This factory function creates a special element
# that will be serialized as an XML processing instruction by the standard
# @param target A string containing the PI target.
# @param text A string containing the PI contents, if any.
# @return An element instance, representing a PI.
def ProcessingInstruction(target, text=None):
element = Element(ProcessingInstruction)
element.text = element.text + " " + text
PI = ProcessingInstruction
# QName wrapper. This can be used to wrap a QName attribute value, in
# order to get proper namespace handling on output.
# @param text A string containing the QName value, in the form {uri}local,
# or, if the tag argument is given, the URI part of a QName.
# @param tag Optional tag. If given, the first argument is interpreted as
# a URI, and this argument is interpreted as a local name.
# @return An opaque object, representing the QName.
def __init__(self, text_or_uri, tag=None):
text_or_uri = "{%s}%s" % (text_or_uri, tag)
def __cmp__(self, other):
if isinstance(other, QName):
return cmp(self.text, other.text)
return cmp(self.text, other)
# --------------------------------------------------------------------
# ElementTree wrapper class. This class represents an entire element
# hierarchy, and adds some extra support for serialization to and from
# @param element Optional root element.
# @keyparam file Optional file handle or file name. If given, the
# tree is initialized with the contents of this XML file.
class ElementTree(object):
def __init__(self, element=None, file=None):
# assert element is None or iselement(element)
self._root = element # first node
# Gets the root element for this tree.
# @return An element instance.
# Replaces the root element for this tree. This discards the
# current contents of the tree, and replaces it with the given
# element. Use with care.
# @param element An element instance.
def _setroot(self, element):
# assert iselement(element)
# Loads an external XML document into this element tree.
# @param source A file name or file object. If a file object is
# given, it only has to implement a <b>read(n)</b> method.
# @keyparam parser An optional parser instance. If not given, the
# standard {@link XMLParser} parser is used.
# @return The document root element.
# @exception ParseError If the parser fails to parse the document.
def parse(self, source, parser=None):
if not hasattr(source, "read"):
source = open(source, "rb")
parser = XMLParser(target=TreeBuilder())
data = source.read(65536)
self._root = parser.close()
# Creates a tree iterator for the root element. The iterator loops
# over all elements in this tree, in document order.
# @param tag What tags to look for (default is to return all elements)
def iter(self, tag=None):
# assert self._root is not None
return self._root.iter(tag)
def getiterator(self, tag=None):
# Change for a DeprecationWarning in 1.4
"This method will be removed in future versions. "
"Use 'tree.iter()' or 'list(tree.iter())' instead.",
PendingDeprecationWarning, stacklevel=2
return list(self.iter(tag))
# Same as getroot().find(path), starting at the root of the
# @param path What element to look for.
# @keyparam namespaces Optional namespace prefix map.
# @return The first matching element, or None if no element was found.
# @defreturn Element or None
def find(self, path, namespaces=None):
# assert self._root is not None
"This search is broken in 1.3 and earlier, and will be "
"fixed in a future version. If you rely on the current "
"behaviour, change it to %r" % path,
FutureWarning, stacklevel=2
return self._root.find(path, namespaces)
# Same as getroot().findtext(path), starting at the root of the tree.
# @param path What element to look for.
# @param default What to return if the element was not found.
# @keyparam namespaces Optional namespace prefix map.
# @return The text content of the first matching element, or the
# default value no element was found. Note that if the element
# is found, but has no text content, this method returns an
def findtext(self, path, default=None, namespaces=None):
# assert self._root is not None
"This search is broken in 1.3 and earlier, and will be "
"fixed in a future version. If you rely on the current "
"behaviour, change it to %r" % path,
FutureWarning, stacklevel=2
return self._root.findtext(path, default, namespaces)
# Same as getroot().findall(path), starting at the root of the tree.
# @param path What element to look for.
# @keyparam namespaces Optional namespace prefix map.
# @return A list or iterator containing all matching elements,
# @defreturn list of Element instances
def findall(self, path, namespaces=None):
# assert self._root is not None
"This search is broken in 1.3 and earlier, and will be "
"fixed in a future version. If you rely on the current "
"behaviour, change it to %r" % path,
FutureWarning, stacklevel=2
return self._root.findall(path, namespaces)
# Finds all matching subelements, by tag name or path.
# Same as getroot().iterfind(path).
# @param path What element to look for.
# @keyparam namespaces Optional namespace prefix map.
# @return An iterator or sequence containing all matching elements,
# @defreturn a generated sequence of Element instances
def iterfind(self, path, namespaces=None):
# assert self._root is not None
"This search is broken in 1.3 and earlier, and will be "
"fixed in a future version. If you rely on the current "
"behaviour, change it to %r" % path,
FutureWarning, stacklevel=2
return self._root.iterfind(path, namespaces)
# Writes the element tree to a file, as XML.
# @def write(file, **options)
# @param file A file name, or a file object opened for writing.
# @param **options Options, given as keyword arguments.
# @keyparam encoding Optional output encoding (default is US-ASCII).
# @keyparam xml_declaration Controls if an XML declaration should
# be added to the file. Use False for never, True for always,
# None for only if not US-ASCII or UTF-8. None is default.
# @keyparam default_namespace Sets the default XML namespace (for "xmlns").
# @keyparam method Optional output method ("xml", "html", "text" or
# "c14n"; default is "xml").
def write(self, file_or_filename,
# assert self._root is not None
elif method not in _serialize:
# FIXME: raise an ImportError for c14n if ElementC14N is missing?
raise ValueError("unknown method %r" % method)
if hasattr(file_or_filename, "write"):
file = open(file_or_filename, "wb")
elif xml_declaration or (xml_declaration is None and
encoding not in ("utf-8", "us-ascii")):
write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
_serialize_text(write, self._root, encoding)
qnames, namespaces = _namespaces(
self._root, encoding, default_namespace
serialize = _serialize[method]
serialize(write, self._root, encoding, qnames, namespaces)
if file_or_filename is not file:
def write_c14n(self, file):
# lxml.etree compatibility. use output method instead
return self.write(file, method="c14n")
# --------------------------------------------------------------------
def _namespaces(elem, encoding, default_namespace=None):
# identify namespaces used in this tree
# maps qnames to *encoded* prefix:local names
namespaces[default_namespace] = ""
return text.encode(encoding)
# calculate serialized qname representation
uri, tag = qname[1:].rsplit("}", 1)
prefix = namespaces.get(uri)
prefix = _namespace_map.get(uri)
prefix = "ns%d" % len(namespaces)
qnames[qname] = encode("%s:%s" % (prefix, tag))
qnames[qname] = encode(tag) # default element
# FIXME: can this be handled in XML 1.0?
"cannot use non-qualified names with "
"default_namespace option"
qnames[qname] = encode(qname)
_raise_serialization_error(qname)
# populate qname and namespaces table
iterate = elem.getiterator # cET compatibility
if isinstance(tag, QName):
if tag.text not in qnames:
elif isinstance(tag, basestring):
elif tag is not None and tag is not Comment and tag is not PI:
_raise_serialization_error(tag)
for key, value in elem.items():
if isinstance(key, QName):
if isinstance(value, QName) and value.text not in qnames:
if isinstance(text, QName) and text.text not in qnames:
return qnames, namespaces
def _serialize_xml(write, elem, encoding, qnames, namespaces):
write("<!--%s-->" % _encode(text, encoding))
elif tag is ProcessingInstruction:
write("<?%s?>" % _encode(text, encoding))
write(_escape_cdata(text, encoding))
_serialize_xml(write, e, encoding, qnames, None)
for v, k in sorted(namespaces.items(),
key=lambda x: x[1]): # sort on prefix
write(" xmlns%s=\"%s\"" % (
_escape_attrib(v, encoding)
for k, v in sorted(items): # lexical order
v = _escape_attrib(v, encoding)
write(" %s=\"%s\"" % (qnames[k], v))
write(_escape_cdata(text, encoding))
_serialize_xml(write, e, encoding, qnames, None)
write(_escape_cdata(elem.tail, encoding))
HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
"img", "input", "isindex", "link", "meta", "param")
HTML_EMPTY = set(HTML_EMPTY)
def _serialize_html(write, elem, encoding, qnames, namespaces):
write("<!--%s-->" % _escape_cdata(text, encoding))
elif tag is ProcessingInstruction:
write("<?%s?>" % _escape_cdata(text, encoding))
write(_escape_cdata(text, encoding))
_serialize_html(write, e, encoding, qnames, None)
for v, k in sorted(namespaces.items(),
key=lambda x: x[1]): # sort on prefix
write(" xmlns%s=\"%s\"" % (
_escape_attrib(v, encoding)
for k, v in sorted(items): # lexical order
v = _escape_attrib_html(v, encoding)
# FIXME: handle boolean attributes
write(" %s=\"%s\"" % (qnames[k], v))
if ltag == "script" or ltag == "style":
write(_encode(text, encoding))
write(_escape_cdata(text, encoding))
_serialize_html(write, e, encoding, qnames, None)
if ltag not in HTML_EMPTY: