self._parser.buffer_text = 1
# use new-style attribute handling, if supported
self._parser.ordered_attributes = 1
self._parser.specified_attributes = 1
parser.StartElementHandler = self._start_list
self.version = "Expat %d.%d.%d" % expat.version_info
def _raiseerror(self, value):
err.position = value.lineno, value.offset
def _fixtext(self, text):
# convert text string to ascii, if possible
return text.encode("ascii")
# expand qname, and convert name string to ascii, if possible
self._names[key] = name = self._fixtext(name)
def _start(self, tag, attrib_in):
for key, value in attrib_in.items():
attrib[fixname(key)] = fixtext(value)
return self.target.start(tag, attrib)
def _start_list(self, tag, attrib_in):
for i in range(0, len(attrib_in), 2):
attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
return self.target.start(tag, attrib)
return self.target.data(self._fixtext(text))
return self.target.end(self._fixname(tag))
def _comment(self, data):
comment = self.target.comment
return comment(self._fixtext(data))
def _pi(self, target, data):
return pi(self._fixtext(target), self._fixtext(data))
def _default(self, text):
# deal with undefined entities
self.target.data(self.entity[text[1:-1]])
from xml.parsers import expat
"undefined entity %s: line %d, column %d" %
(text, self._parser.ErrorLineNumber,
self._parser.ErrorColumnNumber)
err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
err.lineno = self._parser.ErrorLineNumber
err.offset = self._parser.ErrorColumnNumber
elif prefix == "<" and text[:9] == "<!DOCTYPE":
self._doctype = [] # inside a doctype declaration
elif self._doctype is not None:
self._doctype.append(text)
if type == "PUBLIC" and n == 4:
name, type, pubid, system = self._doctype
elif type == "SYSTEM" and n == 3:
name, type, system = self._doctype
if hasattr(self.target, "doctype"):
self.target.doctype(name, pubid, system[1:-1])
elif self.doctype != self._XMLParser__doctype:
# warn about deprecated call
self._XMLParser__doctype(name, pubid, system[1:-1])
self.doctype(name, pubid, system[1:-1])
# (Deprecated) Handles a doctype declaration.
# @param name Doctype name.
# @param pubid Public identifier.
# @param system System identifier.
def doctype(self, name, pubid, system):
"""This method of XMLParser is deprecated."""
"This method of XMLParser is deprecated. Define doctype() "
"method on the TreeBuilder target.",
# sentinel, if doctype is redefined in a subclass
# Feeds data to the parser.
# @param data Encoded data.
self._parser.Parse(data, 0)
# Finishes feeding data to the parser.
# @return An element structure.
self._parser.Parse("", 1) # end of data
tree = self.target.close()
del self.target, self._parser # get rid of circular references
XMLTreeBuilder = XMLParser
# workaround circular import.
from ElementC14N import _serialize_c14n
_serialize["c14n"] = _serialize_c14n