write(_escape_cdata(elem.tail, encoding))
def _serialize_text(write, elem, encoding):
for part in elem.itertext():
write(part.encode(encoding))
write(elem.tail.encode(encoding))
# this optional method is imported at the end of the module
# "c14n": _serialize_c14n,
# Registers a namespace prefix. The registry is global, and any
# existing mapping for either the given prefix or the namespace URI
# @param prefix Namespace prefix.
# @param uri Namespace uri. Tags and attributes in this namespace
# will be serialized with the given prefix, if at all possible.
# @exception ValueError If the prefix is reserved, or is otherwise
def register_namespace(prefix, uri):
if re.match("ns\d+$", prefix):
raise ValueError("Prefix format reserved for internal use")
for k, v in _namespace_map.items():
if k == uri or v == prefix:
_namespace_map[uri] = prefix
# "well-known" namespace prefixes
"http://www.w3.org/XML/1998/namespace": "xml",
"http://www.w3.org/1999/xhtml": "html",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
"http://schemas.xmlsoap.org/wsdl/": "wsdl",
"http://www.w3.org/2001/XMLSchema": "xs",
"http://www.w3.org/2001/XMLSchema-instance": "xsi",
"http://purl.org/dc/elements/1.1/": "dc",
def _raise_serialization_error(text):
"cannot serialize %r (type %s)" % (text, type(text).__name__)
def _encode(text, encoding):
return text.encode(encoding, "xmlcharrefreplace")
except (TypeError, AttributeError):
_raise_serialization_error(text)
def _escape_cdata(text, encoding):
# it's worth avoiding do-nothing calls for strings that are
# shorter than 500 character, or so. assume that's, by far,
# the most common case in most applications.
text = text.replace("&", "&")
text = text.replace("<", "<")
text = text.replace(">", ">")
return text.encode(encoding, "xmlcharrefreplace")
except (TypeError, AttributeError):
_raise_serialization_error(text)
def _escape_attrib(text, encoding):
text = text.replace("&", "&")
text = text.replace("<", "<")
text = text.replace(">", ">")
text = text.replace("\"", """)
text = text.replace("\n", " ")
return text.encode(encoding, "xmlcharrefreplace")
except (TypeError, AttributeError):
_raise_serialization_error(text)
def _escape_attrib_html(text, encoding):
text = text.replace("&", "&")
text = text.replace(">", ">")
text = text.replace("\"", """)
return text.encode(encoding, "xmlcharrefreplace")
except (TypeError, AttributeError):
_raise_serialization_error(text)
# --------------------------------------------------------------------
# Generates a string representation of an XML element, including all
# @param element An Element instance.
# @keyparam encoding Optional output encoding (default is US-ASCII).
# @keyparam method Optional output method ("xml", "html", "text" or
# "c14n"; default is "xml").
# @return An encoded string containing the XML data.
def tostring(element, encoding=None, method=None):
ElementTree(element).write(file, encoding, method=method)
# Generates a string representation of an XML element, including all
# subelements. The string is returned as a sequence of string fragments.
# @param element An Element instance.
# @keyparam encoding Optional output encoding (default is US-ASCII).
# @keyparam method Optional output method ("xml", "html", "text" or
# "c14n"; default is "xml").
# @return A sequence object containing the XML data.
def tostringlist(element, encoding=None, method=None):
ElementTree(element).write(file, encoding, method=method)
# FIXME: merge small fragments into larger parts
# Writes an element tree or element structure to sys.stdout. This
# function should be used for debugging only.
# The exact output format is implementation dependent. In this
# version, it's written as an ordinary XML file.
# @param elem An element tree or an individual element.
if not isinstance(elem, ElementTree):
tail = elem.getroot().tail
if not tail or tail[-1] != "\n":
# --------------------------------------------------------------------
# Parses an XML document into an element tree.
# @param source A filename or file object containing XML data.
# @param parser An optional parser instance. If not given, the
# standard {@link XMLParser} parser is used.
# @return An ElementTree instance
def parse(source, parser=None):
tree.parse(source, parser)
# Parses an XML document into an element tree incrementally, and reports
# what's going on to the user.
# @param source A filename or file object containing XML data.
# @param events A list of events to report back. If omitted, only "end"
# @param parser An optional parser instance. If not given, the
# standard {@link XMLParser} parser is used.
# @return A (event, elem) iterator.
def iterparse(source, events=None, parser=None):
if not hasattr(source, "read"):
source = open(source, "rb")
parser = XMLParser(target=TreeBuilder())
return _IterParseIterator(source, events, parser, close_source)
class _IterParseIterator(object):
def __init__(self, source, events, parser, close_source=False):
self._close_file = close_source
self.root = self._root = None
# wire up the parser for event reporting
parser = self._parser._parser
append = self._events.append
parser.ordered_attributes = 1
parser.specified_attributes = 1
def handler(tag, attrib_in, event=event, append=append,
start=self._parser._start_list):
append((event, start(tag, attrib_in)))
parser.StartElementHandler = handler
def handler(tag, attrib_in, event=event, append=append,
start=self._parser._start):
append((event, start(tag, attrib_in)))
parser.StartElementHandler = handler
def handler(tag, event=event, append=append,
append((event, end(tag)))
parser.EndElementHandler = handler
elif event == "start-ns":
def handler(prefix, uri, event=event, append=append):
uri = (uri or "").encode("ascii")
append((event, (prefix or "", uri or "")))
parser.StartNamespaceDeclHandler = handler
def handler(prefix, event=event, append=append):
parser.EndNamespaceDeclHandler = handler
raise ValueError("unknown event %r" % event)
item = self._events[self._index]
data = self._file.read(16384)
except SyntaxError as exc:
self._root = self._parser.close()
# Parses an XML document from a string constant. This function can
# be used to embed "XML literals" in Python code.
# @param source A string containing XML data.
# @param parser An optional parser instance. If not given, the
# standard {@link XMLParser} parser is used.
# @return An Element instance.
def XML(text, parser=None):
parser = XMLParser(target=TreeBuilder())
# Parses an XML document from a string constant, and also returns
# a dictionary which maps from element id:s to elements.
# @param source A string containing XML data.
# @param parser An optional parser instance. If not given, the
# standard {@link XMLParser} parser is used.
# @return A tuple containing an Element instance and a dictionary.
# @defreturn (Element, dictionary)
def XMLID(text, parser=None):
parser = XMLParser(target=TreeBuilder())
# Parses an XML document from a string constant. Same as {@link #XML}.
# @param source A string containing XML data.
# @return An Element instance.
# Parses an XML document from a sequence of string fragments.
# @param sequence A list or other sequence containing XML data fragments.
# @param parser An optional parser instance. If not given, the
# standard {@link XMLParser} parser is used.
# @return An Element instance.
def fromstringlist(sequence, parser=None):
parser = XMLParser(target=TreeBuilder())
# --------------------------------------------------------------------
# Generic element structure builder. This builder converts a sequence
# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
# #TreeBuilder.end} method calls to a well-formed element structure.
# You can use this class to build an element structure using a custom XML
# parser, or a parser for some other XML-like format.
# @param element_factory Optional element factory. This factory
# is called to create new Element instances, as necessary.
class TreeBuilder(object):
def __init__(self, element_factory=None):
self._data = [] # data collector
self._elem = [] # element stack
self._last = None # last element
self._tail = None # true if we're after an end tag
if element_factory is None:
element_factory = Element
self._factory = element_factory
# Flushes the builder buffers, and returns the toplevel document
# @return An Element instance.
assert len(self._elem) == 0, "missing end tags"
assert self._last is not None, "missing toplevel element"
if self._last is not None:
text = "".join(self._data)
assert self._last.tail is None, "internal error (tail)"
assert self._last.text is None, "internal error (text)"
# Adds text to the current element.
# @param data A string. This should be either an 8-bit string
# containing ASCII text, or a Unicode string.
# @param tag The element name.
# @param attrib A dictionary containing element attributes.
# @return The opened element.
def start(self, tag, attrs):
self._last = elem = self._factory(tag, attrs)
self._elem[-1].append(elem)
# Closes the current element.
# @param tag The element name.
# @return The closed element.
self._last = self._elem.pop()
assert self._last.tag == tag,\
"end tag mismatch (expected %s, got %s)" % (
# Element structure builder for XML source data, based on the
# @keyparam target Target object. If omitted, the builder uses an
# instance of the standard {@link #TreeBuilder} class.
# @keyparam html Predefine HTML entities. This flag is not supported
# by the current implementation.
# @keyparam encoding Optional encoding. If given, the value overrides
# the encoding specified in the XML file.
def __init__(self, html=_sentinel, target=None, encoding=None):
if html is not _sentinel:
"The html argument of XMLParser() is deprecated",
DeprecationWarning, stacklevel=2)
from xml.parsers import expat
"No module named expat; use SimpleXMLTreeBuilder instead"
parser = expat.ParserCreate(encoding, "}")
# underscored names are provided for compatibility only
self.parser = self._parser = parser
self.target = self._target = target
self._error = expat.error
self._names = {} # name memo cache
parser.DefaultHandlerExpand = self._default
parser.StartElementHandler = self._start
parser.EndElementHandler = self._end
parser.CharacterDataHandler = self._data
parser.CommentHandler = self._comment
parser.ProcessingInstructionHandler = self._pi
# let expat do the buffering, if supported