{uri}local, or if the tag argument is given, the URI part of a QName.
*tag* is an optional argument which if given, will make the first
argument (text_or_uri) be interpreted as a URI, and this argument (tag)
be interpreted as a local name.
def __init__(self, text_or_uri, tag=None):
text_or_uri = "{%s}%s" % (text_or_uri, tag)
return '<%s %r>' % (self.__class__.__name__, self.text)
if isinstance(other, QName):
return self.text <= other.text
return self.text <= other
if isinstance(other, QName):
return self.text < other.text
if isinstance(other, QName):
return self.text >= other.text
return self.text >= other
if isinstance(other, QName):
return self.text > other.text
if isinstance(other, QName):
return self.text == other.text
return self.text == other
# --------------------------------------------------------------------
"""An XML element hierarchy.
This class also provides support for serialization to and from
*element* is an optional root element node,
*file* is an optional file handle or file name of an XML file whose
contents will be used to initialize the tree with.
def __init__(self, element=None, file=None):
# assert element is None or iselement(element)
self._root = element # first node
"""Return root element of this tree."""
def _setroot(self, element):
"""Replace root element of this tree.
This will discard the current contents of the tree and replace it
with the given element. Use with care!
# assert iselement(element)
def parse(self, source, parser=None):
"""Load external XML document into element tree.
*source* is a file name or file object, *parser* is an optional parser
instance that defaults to XMLParser.
ParseError is raised if the parser fails to parse the document.
Returns the root element of the given source document.
if not hasattr(source, "read"):
source = open(source, "rb")
# If no parser was specified, create a default XMLParser
if hasattr(parser, '_parse_whole'):
# The default XMLParser, when it comes from an accelerator,
# can define an internal _parse_whole API for efficiency.
# It can be used to parse the whole source without feeding
self._root = parser._parse_whole(source)
data = source.read(65536)
self._root = parser.close()
def iter(self, tag=None):
"""Create and return tree iterator for the root element.
The iterator loops over all elements in this tree, in document order.
*tag* is a string with the tag name to iterate over
(default is to return all elements).
# assert self._root is not None
return self._root.iter(tag)
def getiterator(self, tag=None):
# Change for a DeprecationWarning in 1.4
"This method will be removed in future versions. "
"Use 'tree.iter()' or 'list(tree.iter())' instead.",
PendingDeprecationWarning, stacklevel=2
return list(self.iter(tag))
def find(self, path, namespaces=None):
"""Find first matching element by tag name or path.
Same as getroot().find(path), which is Element.find()
*path* is a string having either an element tag or an XPath,
*namespaces* is an optional mapping from namespace prefix to full name.
Return the first matching element, or None if no element was found.
# assert self._root is not None
"This search is broken in 1.3 and earlier, and will be "
"fixed in a future version. If you rely on the current "
"behaviour, change it to %r" % path,
FutureWarning, stacklevel=2
return self._root.find(path, namespaces)
def findtext(self, path, default=None, namespaces=None):
"""Find first matching element by tag name or path.
Same as getroot().findtext(path), which is Element.findtext()
*path* is a string having either an element tag or an XPath,
*namespaces* is an optional mapping from namespace prefix to full name.
Return the first matching element, or None if no element was found.
# assert self._root is not None
"This search is broken in 1.3 and earlier, and will be "
"fixed in a future version. If you rely on the current "
"behaviour, change it to %r" % path,
FutureWarning, stacklevel=2
return self._root.findtext(path, default, namespaces)
def findall(self, path, namespaces=None):
"""Find all matching subelements by tag name or path.
Same as getroot().findall(path), which is Element.findall().
*path* is a string having either an element tag or an XPath,
*namespaces* is an optional mapping from namespace prefix to full name.
Return list containing all matching elements in document order.
# assert self._root is not None
"This search is broken in 1.3 and earlier, and will be "
"fixed in a future version. If you rely on the current "
"behaviour, change it to %r" % path,
FutureWarning, stacklevel=2
return self._root.findall(path, namespaces)
def iterfind(self, path, namespaces=None):
"""Find all matching subelements by tag name or path.
Same as getroot().iterfind(path), which is element.iterfind()
*path* is a string having either an element tag or an XPath,
*namespaces* is an optional mapping from namespace prefix to full name.
Return an iterable yielding all matching elements in document order.
# assert self._root is not None
"This search is broken in 1.3 and earlier, and will be "
"fixed in a future version. If you rely on the current "
"behaviour, change it to %r" % path,
FutureWarning, stacklevel=2
return self._root.iterfind(path, namespaces)
def write(self, file_or_filename,
short_empty_elements=True):
"""Write element tree to a file as XML.
*file_or_filename* -- file name or a file object opened for writing
*encoding* -- the output encoding (default: US-ASCII)
*xml_declaration* -- bool indicating if an XML declaration should be
added to the output. If None, an XML declaration
is added if encoding IS NOT either of:
US-ASCII, UTF-8, or Unicode
*default_namespace* -- sets the default XML namespace (for "xmlns")
*method* -- either "xml" (default), "html, "text", or "c14n"
*short_empty_elements* -- controls the formatting of elements
that contain no content. If True (default)
they are emitted as a single self-closed
tag, otherwise they are emitted as a pair
elif method not in _serialize:
raise ValueError("unknown method %r" % method)
enc_lower = encoding.lower()
with _get_writer(file_or_filename, enc_lower) as write:
if method == "xml" and (xml_declaration or
(xml_declaration is None and
enc_lower not in ("utf-8", "us-ascii", "unicode"))):
declared_encoding = encoding
if enc_lower == "unicode":
# Retrieve the default encoding for the xml declaration
declared_encoding = locale.getpreferredencoding()
write("<?xml version='1.0' encoding='%s'?>\n" % (
_serialize_text(write, self._root)
qnames, namespaces = _namespaces(self._root, default_namespace)
serialize = _serialize[method]
serialize(write, self._root, qnames, namespaces,
short_empty_elements=short_empty_elements)
def write_c14n(self, file):
# lxml.etree compatibility. use output method instead
return self.write(file, method="c14n")
# --------------------------------------------------------------------
@contextlib.contextmanager
def _get_writer(file_or_filename, encoding):
# returns text write method and release all resources after using
write = file_or_filename.write
# file_or_filename is a file name
if encoding == "unicode":
file = open(file_or_filename, "w")
file = open(file_or_filename, "w", encoding=encoding,
errors="xmlcharrefreplace")
# file_or_filename is a file-like object
# encoding determines if it is a text or binary writer
if encoding == "unicode":
# use a text writer as is
# wrap a binary writer with TextIOWrapper
with contextlib.ExitStack() as stack:
if isinstance(file_or_filename, io.BufferedIOBase):
elif isinstance(file_or_filename, io.RawIOBase):
file = io.BufferedWriter(file_or_filename)
# Keep the original file open when the BufferedWriter is
stack.callback(file.detach)
# This is to handle passed objects that aren't in the
# IOBase hierarchy, but just have a write method
file = io.BufferedIOBase()
file.writable = lambda: True
# TextIOWrapper uses this methods to determine
# if BOM (for UTF-16, etc) should be added
file.seekable = file_or_filename.seekable
file.tell = file_or_filename.tell
file = io.TextIOWrapper(file,
errors="xmlcharrefreplace",
# Keep the original file open when the TextIOWrapper is
stack.callback(file.detach)
def _namespaces(elem, default_namespace=None):
# identify namespaces used in this tree
# maps qnames to *encoded* prefix:local names
namespaces[default_namespace] = ""
# calculate serialized qname representation
uri, tag = qname[1:].rsplit("}", 1)
prefix = namespaces.get(uri)
prefix = _namespace_map.get(uri)
prefix = "ns%d" % len(namespaces)
qnames[qname] = "%s:%s" % (prefix, tag)
qnames[qname] = tag # default element
# FIXME: can this be handled in XML 1.0?
"cannot use non-qualified names with "
"default_namespace option"
_raise_serialization_error(qname)
# populate qname and namespaces table
if isinstance(tag, QName):
if tag.text not in qnames:
elif isinstance(tag, str):
elif tag is not None and tag is not Comment and tag is not PI:
_raise_serialization_error(tag)
for key, value in elem.items():
if isinstance(key, QName):
if isinstance(value, QName) and value.text not in qnames:
if isinstance(text, QName) and text.text not in qnames:
return qnames, namespaces
def _serialize_xml(write, elem, qnames, namespaces,
short_empty_elements, **kwargs):
write("<!--%s-->" % text)
elif tag is ProcessingInstruction:
write(_escape_cdata(text))
_serialize_xml(write, e, qnames, None,
short_empty_elements=short_empty_elements)
items = list(elem.items())
for v, k in sorted(namespaces.items(),
key=lambda x: x[1]): # sort on prefix
write(" xmlns%s=\"%s\"" % (
for k, v in sorted(items): # lexical order
write(" %s=\"%s\"" % (qnames[k], v))
if text or len(elem) or not short_empty_elements:
write(_escape_cdata(text))
_serialize_xml(write, e, qnames, None,
short_empty_elements=short_empty_elements)
write(_escape_cdata(elem.tail))
HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
"img", "input", "isindex", "link", "meta", "param")
HTML_EMPTY = set(HTML_EMPTY)
def _serialize_html(write, elem, qnames, namespaces, **kwargs):
write("<!--%s-->" % _escape_cdata(text))
elif tag is ProcessingInstruction:
write("<?%s?>" % _escape_cdata(text))
write(_escape_cdata(text))
_serialize_html(write, e, qnames, None)
items = list(elem.items())
for v, k in sorted(namespaces.items(),
key=lambda x: x[1]): # sort on prefix
write(" xmlns%s=\"%s\"" % (
for k, v in sorted(items): # lexical order
v = _escape_attrib_html(v)
# FIXME: handle boolean attributes
write(" %s=\"%s\"" % (qnames[k], v))
if ltag == "script" or ltag == "style":
write(_escape_cdata(text))
_serialize_html(write, e, qnames, None)