if ltag not in HTML_EMPTY:
write(_escape_cdata(elem.tail))
def _serialize_text(write, elem):
for part in elem.itertext():
# this optional method is imported at the end of the module
# "c14n": _serialize_c14n,
def register_namespace(prefix, uri):
"""Register a namespace prefix.
The registry is global, and any existing mapping for either the
given prefix or the namespace URI will be removed.
*prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
attributes in this namespace will be serialized with prefix if possible.
ValueError is raised if prefix is reserved or is invalid.
if re.match(r"ns\d+$", prefix):
raise ValueError("Prefix format reserved for internal use")
for k, v in list(_namespace_map.items()):
if k == uri or v == prefix:
_namespace_map[uri] = prefix
# "well-known" namespace prefixes
"http://www.w3.org/XML/1998/namespace": "xml",
"http://www.w3.org/1999/xhtml": "html",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
"http://schemas.xmlsoap.org/wsdl/": "wsdl",
"http://www.w3.org/2001/XMLSchema": "xs",
"http://www.w3.org/2001/XMLSchema-instance": "xsi",
"http://purl.org/dc/elements/1.1/": "dc",
# For tests and troubleshooting
register_namespace._namespace_map = _namespace_map
def _raise_serialization_error(text):
"cannot serialize %r (type %s)" % (text, type(text).__name__)
# it's worth avoiding do-nothing calls for strings that are
# shorter than 500 characters, or so. assume that's, by far,
# the most common case in most applications.
text = text.replace("&", "&")
text = text.replace("<", "<")
text = text.replace(">", ">")
except (TypeError, AttributeError):
_raise_serialization_error(text)
def _escape_attrib(text):
text = text.replace("&", "&")
text = text.replace("<", "<")
text = text.replace(">", ">")
text = text.replace("\"", """)
# The following business with carriage returns is to satisfy
# Section 2.11 of the XML specification, stating that
# CR or CR LN should be replaced with just LN
# http://www.w3.org/TR/REC-xml/#sec-line-ends
text = text.replace("\r\n", "\n")
text = text.replace("\r", "\n")
#The following four lines are issue 17582
text = text.replace("\n", " ")
text = text.replace("\t", "	")
except (TypeError, AttributeError):
_raise_serialization_error(text)
def _escape_attrib_html(text):
text = text.replace("&", "&")
text = text.replace(">", ">")
text = text.replace("\"", """)
except (TypeError, AttributeError):
_raise_serialization_error(text)
# --------------------------------------------------------------------
def tostring(element, encoding=None, method=None, *,
short_empty_elements=True):
"""Generate string representation of XML element.
All subelements are included. If encoding is "unicode", a string
is returned. Otherwise a bytestring is returned.
*element* is an Element instance, *encoding* is an optional output
encoding defaulting to US-ASCII, *method* is an optional output which can
be one of "xml" (default), "html", "text" or "c14n".
Returns an (optionally) encoded string containing the XML data.
stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
ElementTree(element).write(stream, encoding, method=method,
short_empty_elements=short_empty_elements)
class _ListDataStream(io.BufferedIOBase):
"""An auxiliary stream accumulating into a list reference."""
def tostringlist(element, encoding=None, method=None, *,
short_empty_elements=True):
stream = _ListDataStream(lst)
ElementTree(element).write(stream, encoding, method=method,
short_empty_elements=short_empty_elements)
"""Write element tree or element structure to sys.stdout.
This function should be used for debugging only.
*elem* is either an ElementTree, or a single Element. The exact output
format is implementation dependent. In this version, it's written as an
if not isinstance(elem, ElementTree):
elem.write(sys.stdout, encoding="unicode")
tail = elem.getroot().tail
if not tail or tail[-1] != "\n":
# --------------------------------------------------------------------
def parse(source, parser=None):
"""Parse XML document into element tree.
*source* is a filename or file object containing XML data,
*parser* is an optional parser instance defaulting to XMLParser.
Return an ElementTree instance.
tree.parse(source, parser)
def iterparse(source, events=None, parser=None):
"""Incrementally parse XML document into ElementTree.
This class also reports what's going on to the user based on the
*events* it is initialized with. The supported events are the strings
"start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
detailed namespace information). If *events* is omitted, only
"end" events are reported.
*source* is a filename or file object containing XML data, *events* is
a list of events to report back, *parser* is an optional parser instance.
Returns an iterator providing (event, elem) pairs.
# Use the internal, undocumented _parser argument for now; When the
# parser argument of iterparse is removed, this can be killed.
pullparser = XMLPullParser(events=events, _parser=parser)
yield from pullparser.read_events()
data = source.read(16 * 1024)
root = pullparser._close_and_return_root()
yield from pullparser.read_events()
class IterParseIterator(collections.Iterator):
__next__ = iterator().__next__
del iterator, IterParseIterator
if not hasattr(source, "read"):
source = open(source, "rb")
def __init__(self, events=None, *, _parser=None):
# The _parser argument is for internal use only and must not be relied
# upon in user code. It will be removed in a future release.
# See http://bugs.python.org/issue17741 for more details.
self._events_queue = collections.deque()
self._parser = _parser or XMLParser(target=TreeBuilder())
# wire up the parser for event reporting
self._parser._setevents(self._events_queue, events)
"""Feed encoded data to parser."""
raise ValueError("feed() called after end of stream")
except SyntaxError as exc:
self._events_queue.append(exc)
def _close_and_return_root(self):
# iterparse needs this to set its root attribute properly :(
root = self._parser.close()
"""Finish feeding data to parser.
Unlike XMLParser, does not return the root element. Use
read_events() to consume elements from XMLPullParser.
self._close_and_return_root()
"""Return an iterator over currently available (event, elem) pairs.
Events are consumed from the internal event queue as they are
retrieved from the iterator.
events = self._events_queue
if isinstance(event, Exception):
def XML(text, parser=None):
"""Parse XML document from string constant.
This function can be used to embed "XML Literals" in Python code.
*text* is a string containing XML data, *parser* is an
optional parser instance, defaulting to the standard XMLParser.
Returns an Element instance.
parser = XMLParser(target=TreeBuilder())
def XMLID(text, parser=None):
"""Parse XML document from string constant for its IDs.
*text* is a string containing XML data, *parser* is an
optional parser instance, defaulting to the standard XMLParser.
Returns an (Element, dict) tuple, in which the
dict maps element id:s to elements.
parser = XMLParser(target=TreeBuilder())
# Parse XML document from string constant. Alias for XML().
def fromstringlist(sequence, parser=None):
"""Parse XML document from sequence of string fragments.
*sequence* is a list of other sequence, *parser* is an optional parser
instance, defaulting to the standard XMLParser.
Returns an Element instance.
parser = XMLParser(target=TreeBuilder())
# --------------------------------------------------------------------
"""Generic element structure builder.
This builder converts a sequence of start, data, and end method
calls to a well-formed element structure.
You can use this class to build an element structure using a custom XML
parser, or a parser for some other XML-like format.
*element_factory* is an optional element factory which is called
to create new Element instances, as necessary.
def __init__(self, element_factory=None):
self._data = [] # data collector
self._elem = [] # element stack
self._last = None # last element
self._tail = None # true if we're after an end tag
if element_factory is None:
element_factory = Element
self._factory = element_factory
"""Flush builder buffers and return toplevel document Element."""
assert len(self._elem) == 0, "missing end tags"
assert self._last is not None, "missing toplevel element"
if self._last is not None:
text = "".join(self._data)
assert self._last.tail is None, "internal error (tail)"
assert self._last.text is None, "internal error (text)"
"""Add text to current element."""
def start(self, tag, attrs):
"""Open new element and return it.
*tag* is the element name, *attrs* is a dict containing element
self._last = elem = self._factory(tag, attrs)
self._elem[-1].append(elem)
"""Close and return current Element.
*tag* is the element name.
self._last = self._elem.pop()
assert self._last.tag == tag,\
"end tag mismatch (expected %s, got %s)" % (
# also see ElementTree and TreeBuilder
"""Element structure builder for XML source data based on the expat parser.
*html* are predefined HTML entities (deprecated and not supported),
*target* is an optional target object which defaults to an instance of the
standard TreeBuilder class, *encoding* is an optional encoding string
which if given, overrides the encoding specified in the XML file:
http://www.iana.org/assignments/character-sets
def __init__(self, html=0, target=None, encoding=None):
from xml.parsers import expat
"No module named expat; use SimpleXMLTreeBuilder instead"
parser = expat.ParserCreate(encoding, "}")
# underscored names are provided for compatibility only
self.parser = self._parser = parser
self.target = self._target = target
self._error = expat.error
self._names = {} # name memo cache
parser.DefaultHandlerExpand = self._default
if hasattr(target, 'start'):
parser.StartElementHandler = self._start
if hasattr(target, 'end'):
parser.EndElementHandler = self._end
if hasattr(target, 'data'):
parser.CharacterDataHandler = target.data
# miscellaneous callbacks
if hasattr(target, 'comment'):
parser.CommentHandler = target.comment
if hasattr(target, 'pi'):
parser.ProcessingInstructionHandler = target.pi
# Configure pyexpat: buffering, new-style attribute handling.
parser.ordered_attributes = 1
parser.specified_attributes = 1
self.version = "Expat %d.%d.%d" % expat.version_info
def _setevents(self, events_queue, events_to_report):
# Internal API for XMLPullParser
# events_to_report: a list of events to report during parsing (same as
# the *events* of XMLPullParser's constructor.
# events_queue: a list of actual parsing events that will be populated
# by the underlying parser.
append = events_queue.append
for event_name in events_to_report:
if event_name == "start":
parser.ordered_attributes = 1
parser.specified_attributes = 1