Edit File by line

"""

[0] Fix | Delete

SAX driver for the pyexpat C module. This driver works with

[1] Fix | Delete

pyexpat.__version__ == '2.22'.

[2] Fix | Delete

"""

[3] Fix | Delete

[4] Fix | Delete

version = "0.20"

[5] Fix | Delete

[6] Fix | Delete

from xml.sax._exceptions import *

[7] Fix | Delete

from xml.sax.handler import feature_validation, feature_namespaces

[8] Fix | Delete

from xml.sax.handler import feature_namespace_prefixes

[9] Fix | Delete

from xml.sax.handler import feature_external_ges, feature_external_pes

[10] Fix | Delete

from xml.sax.handler import feature_string_interning

[11] Fix | Delete

from xml.sax.handler import property_xml_string, property_interning_dict

[12] Fix | Delete

[13] Fix | Delete

# xml.parsers.expat does not raise ImportError in Jython

[14] Fix | Delete

import sys

[15] Fix | Delete

if sys.platform[:4] == "java":

[16] Fix | Delete

raise SAXReaderNotAvailable("expat not available in Java", None)

[17] Fix | Delete

del sys

[18] Fix | Delete

[19] Fix | Delete

try:

[20] Fix | Delete

from xml.parsers import expat

[21] Fix | Delete

except ImportError:

[22] Fix | Delete

raise SAXReaderNotAvailable("expat not supported", None)

[23] Fix | Delete

else:

[24] Fix | Delete

if not hasattr(expat, "ParserCreate"):

[25] Fix | Delete

raise SAXReaderNotAvailable("expat not supported", None)

[26] Fix | Delete

from xml.sax import xmlreader, saxutils, handler

[27] Fix | Delete

[28] Fix | Delete

AttributesImpl = xmlreader.AttributesImpl

[29] Fix | Delete

AttributesNSImpl = xmlreader.AttributesNSImpl

[30] Fix | Delete

[31] Fix | Delete

# If we're using a sufficiently recent version of Python, we can use

[32] Fix | Delete

# weak references to avoid cycles between the parser and content

[33] Fix | Delete

# handler, otherwise we'll just have to pretend.

[34] Fix | Delete

try:

[35] Fix | Delete

import _weakref

[36] Fix | Delete

except ImportError:

[37] Fix | Delete

def _mkproxy(o):

[38] Fix | Delete

return o

[39] Fix | Delete

else:

[40] Fix | Delete

import weakref

[41] Fix | Delete

_mkproxy = weakref.proxy

[42] Fix | Delete

del weakref, _weakref

[43] Fix | Delete

[44] Fix | Delete

class _ClosedParser:

[45] Fix | Delete

pass

[46] Fix | Delete

[47] Fix | Delete

# --- ExpatLocator

[48] Fix | Delete

[49] Fix | Delete

class ExpatLocator(xmlreader.Locator):

[50] Fix | Delete

"""Locator for use with the ExpatParser class.

[51] Fix | Delete

[52] Fix | Delete

This uses a weak reference to the parser object to avoid creating

[53] Fix | Delete

a circular reference between the parser and the content handler.

[54] Fix | Delete

"""

[55] Fix | Delete

def __init__(self, parser):

[56] Fix | Delete

self._ref = _mkproxy(parser)

[57] Fix | Delete

[58] Fix | Delete

def getColumnNumber(self):

[59] Fix | Delete

parser = self._ref

[60] Fix | Delete

if parser._parser is None:

[61] Fix | Delete

return None

[62] Fix | Delete

return parser._parser.ErrorColumnNumber

[63] Fix | Delete

[64] Fix | Delete

def getLineNumber(self):

[65] Fix | Delete

parser = self._ref

[66] Fix | Delete

if parser._parser is None:

[67] Fix | Delete

return 1

[68] Fix | Delete

return parser._parser.ErrorLineNumber

[69] Fix | Delete

[70] Fix | Delete

def getPublicId(self):

[71] Fix | Delete

parser = self._ref

[72] Fix | Delete

if parser is None:

[73] Fix | Delete

return None

[74] Fix | Delete

return parser._source.getPublicId()

[75] Fix | Delete

[76] Fix | Delete

def getSystemId(self):

[77] Fix | Delete

parser = self._ref

[78] Fix | Delete

if parser is None:

[79] Fix | Delete

return None

[80] Fix | Delete

return parser._source.getSystemId()

[81] Fix | Delete

[82] Fix | Delete

[83] Fix | Delete

# --- ExpatParser

[84] Fix | Delete

[85] Fix | Delete

class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):

[86] Fix | Delete

"""SAX driver for the pyexpat C module."""

[87] Fix | Delete

[88] Fix | Delete

def __init__(self, namespaceHandling=0, bufsize=2**16-20):

[89] Fix | Delete

xmlreader.IncrementalParser.__init__(self, bufsize)

[90] Fix | Delete

self._source = xmlreader.InputSource()

[91] Fix | Delete

self._parser = None

[92] Fix | Delete

self._namespaces = namespaceHandling

[93] Fix | Delete

self._lex_handler_prop = None

[94] Fix | Delete

self._parsing = 0

[95] Fix | Delete

self._entity_stack = []

[96] Fix | Delete

self._external_ges = 1

[97] Fix | Delete

self._interning = None

[98] Fix | Delete

[99] Fix | Delete

# XMLReader methods

[100] Fix | Delete

[101] Fix | Delete

def parse(self, source):

[102] Fix | Delete

"Parse an XML document from a URL or an InputSource."

[103] Fix | Delete

source = saxutils.prepare_input_source(source)

[104] Fix | Delete

[105] Fix | Delete

self._source = source

[106] Fix | Delete

try:

[107] Fix | Delete

self.reset()

[108] Fix | Delete

self._cont_handler.setDocumentLocator(ExpatLocator(self))

[109] Fix | Delete

xmlreader.IncrementalParser.parse(self, source)

[110] Fix | Delete

except:

[111] Fix | Delete

# bpo-30264: Close the source on error to not leak resources:

[112] Fix | Delete

# xml.sax.parse() doesn't give access to the underlying parser

[113] Fix | Delete

# to the caller

[114] Fix | Delete

self._close_source()

[115] Fix | Delete

raise

[116] Fix | Delete

[117] Fix | Delete

def prepareParser(self, source):

[118] Fix | Delete

if source.getSystemId() is not None:

[119] Fix | Delete

base = source.getSystemId()

[120] Fix | Delete

if isinstance(base, unicode):

[121] Fix | Delete

base = base.encode('utf-8')

[122] Fix | Delete

self._parser.SetBase(base)

[123] Fix | Delete

[124] Fix | Delete

# Redefined setContentHandler to allow changing handlers during parsing

[125] Fix | Delete

[126] Fix | Delete

def setContentHandler(self, handler):

[127] Fix | Delete

xmlreader.IncrementalParser.setContentHandler(self, handler)

[128] Fix | Delete

if self._parsing:

[129] Fix | Delete

self._reset_cont_handler()

[130] Fix | Delete

[131] Fix | Delete

def getFeature(self, name):

[132] Fix | Delete

if name == feature_namespaces:

[133] Fix | Delete

return self._namespaces

[134] Fix | Delete

elif name == feature_string_interning:

[135] Fix | Delete

return self._interning is not None

[136] Fix | Delete

elif name in (feature_validation, feature_external_pes,

[137] Fix | Delete

feature_namespace_prefixes):

[138] Fix | Delete

return 0

[139] Fix | Delete

elif name == feature_external_ges:

[140] Fix | Delete

return self._external_ges

[141] Fix | Delete

raise SAXNotRecognizedException("Feature '%s' not recognized" % name)

[142] Fix | Delete

[143] Fix | Delete

def setFeature(self, name, state):

[144] Fix | Delete

if self._parsing:

[145] Fix | Delete

raise SAXNotSupportedException("Cannot set features while parsing")

[146] Fix | Delete

[147] Fix | Delete

if name == feature_namespaces:

[148] Fix | Delete

self._namespaces = state

[149] Fix | Delete

elif name == feature_external_ges:

[150] Fix | Delete

self._external_ges = state

[151] Fix | Delete

elif name == feature_string_interning:

[152] Fix | Delete

if state:

[153] Fix | Delete

if self._interning is None:

[154] Fix | Delete

self._interning = {}

[155] Fix | Delete

else:

[156] Fix | Delete

self._interning = None

[157] Fix | Delete

elif name == feature_validation:

[158] Fix | Delete

if state:

[159] Fix | Delete

raise SAXNotSupportedException(

[160] Fix | Delete

"expat does not support validation")

[161] Fix | Delete

elif name == feature_external_pes:

[162] Fix | Delete

if state:

[163] Fix | Delete

raise SAXNotSupportedException(

[164] Fix | Delete

"expat does not read external parameter entities")

[165] Fix | Delete

elif name == feature_namespace_prefixes:

[166] Fix | Delete

if state:

[167] Fix | Delete

raise SAXNotSupportedException(

[168] Fix | Delete

"expat does not report namespace prefixes")

[169] Fix | Delete

else:

[170] Fix | Delete

raise SAXNotRecognizedException(

[171] Fix | Delete

"Feature '%s' not recognized" % name)

[172] Fix | Delete

[173] Fix | Delete

def getProperty(self, name):

[174] Fix | Delete

if name == handler.property_lexical_handler:

[175] Fix | Delete

return self._lex_handler_prop

[176] Fix | Delete

elif name == property_interning_dict:

[177] Fix | Delete

return self._interning

[178] Fix | Delete

elif name == property_xml_string:

[179] Fix | Delete

if self._parser:

[180] Fix | Delete

if hasattr(self._parser, "GetInputContext"):

[181] Fix | Delete

return self._parser.GetInputContext()

[182] Fix | Delete

else:

[183] Fix | Delete

raise SAXNotRecognizedException(

[184] Fix | Delete

"This version of expat does not support getting"

[185] Fix | Delete

" the XML string")

[186] Fix | Delete

else:

[187] Fix | Delete

raise SAXNotSupportedException(

[188] Fix | Delete

"XML string cannot be returned when not parsing")

[189] Fix | Delete

raise SAXNotRecognizedException("Property '%s' not recognized" % name)

[190] Fix | Delete

[191] Fix | Delete

def setProperty(self, name, value):

[192] Fix | Delete

if name == handler.property_lexical_handler:

[193] Fix | Delete

self._lex_handler_prop = value

[194] Fix | Delete

if self._parsing:

[195] Fix | Delete

self._reset_lex_handler_prop()

[196] Fix | Delete

elif name == property_interning_dict:

[197] Fix | Delete

self._interning = value

[198] Fix | Delete

elif name == property_xml_string:

[199] Fix | Delete

raise SAXNotSupportedException("Property '%s' cannot be set" %

[200] Fix | Delete

name)

[201] Fix | Delete

else:

[202] Fix | Delete

raise SAXNotRecognizedException("Property '%s' not recognized" %

[203] Fix | Delete

name)

[204] Fix | Delete

[205] Fix | Delete

# IncrementalParser methods

[206] Fix | Delete

[207] Fix | Delete

def feed(self, data, isFinal = 0):

[208] Fix | Delete

if not self._parsing:

[209] Fix | Delete

self.reset()

[210] Fix | Delete

self._parsing = 1

[211] Fix | Delete

self._cont_handler.startDocument()

[212] Fix | Delete

[213] Fix | Delete

try:

[214] Fix | Delete

# The isFinal parameter is internal to the expat reader.

[215] Fix | Delete

# If it is set to true, expat will check validity of the entire

[216] Fix | Delete

# document. When feeding chunks, they are not normally final -

[217] Fix | Delete

# except when invoked from close.

[218] Fix | Delete

self._parser.Parse(data, isFinal)

[219] Fix | Delete

except expat.error, e:

[220] Fix | Delete

exc = SAXParseException(expat.ErrorString(e.code), e, self)

[221] Fix | Delete

# FIXME: when to invoke error()?

[222] Fix | Delete

self._err_handler.fatalError(exc)

[223] Fix | Delete

[224] Fix | Delete

def _close_source(self):

[225] Fix | Delete

source = self._source

[226] Fix | Delete

try:

[227] Fix | Delete

file = source.getCharacterStream()

[228] Fix | Delete

if file is not None:

[229] Fix | Delete

file.close()

[230] Fix | Delete

finally:

[231] Fix | Delete

file = source.getByteStream()

[232] Fix | Delete

if file is not None:

[233] Fix | Delete

file.close()

[234] Fix | Delete

[235] Fix | Delete

def close(self):

[236] Fix | Delete

if (self._entity_stack or self._parser is None or

[237] Fix | Delete

isinstance(self._parser, _ClosedParser)):

[238] Fix | Delete

# If we are completing an external entity, do nothing here

[239] Fix | Delete

return

[240] Fix | Delete

try:

[241] Fix | Delete

self.feed("", isFinal = 1)

[242] Fix | Delete

self._cont_handler.endDocument()

[243] Fix | Delete

self._parsing = 0

[244] Fix | Delete

# break cycle created by expat handlers pointing to our methods

[245] Fix | Delete

self._parser = None

[246] Fix | Delete

finally:

[247] Fix | Delete

self._parsing = 0

[248] Fix | Delete

if self._parser is not None:

[249] Fix | Delete

# Keep ErrorColumnNumber and ErrorLineNumber after closing.

[250] Fix | Delete

parser = _ClosedParser()

[251] Fix | Delete

parser.ErrorColumnNumber = self._parser.ErrorColumnNumber

[252] Fix | Delete

parser.ErrorLineNumber = self._parser.ErrorLineNumber

[253] Fix | Delete

self._parser = parser

[254] Fix | Delete

self._close_source()

[255] Fix | Delete

[256] Fix | Delete

def _reset_cont_handler(self):

[257] Fix | Delete

self._parser.ProcessingInstructionHandler = \

[258] Fix | Delete

self._cont_handler.processingInstruction

[259] Fix | Delete

self._parser.CharacterDataHandler = self._cont_handler.characters

[260] Fix | Delete

[261] Fix | Delete

def _reset_lex_handler_prop(self):

[262] Fix | Delete

lex = self._lex_handler_prop

[263] Fix | Delete

parser = self._parser

[264] Fix | Delete

if lex is None:

[265] Fix | Delete

parser.CommentHandler = None

[266] Fix | Delete

parser.StartCdataSectionHandler = None

[267] Fix | Delete

parser.EndCdataSectionHandler = None

[268] Fix | Delete

parser.StartDoctypeDeclHandler = None

[269] Fix | Delete

parser.EndDoctypeDeclHandler = None

[270] Fix | Delete

else:

[271] Fix | Delete

parser.CommentHandler = lex.comment

[272] Fix | Delete

parser.StartCdataSectionHandler = lex.startCDATA

[273] Fix | Delete

parser.EndCdataSectionHandler = lex.endCDATA

[274] Fix | Delete

parser.StartDoctypeDeclHandler = self.start_doctype_decl

[275] Fix | Delete

parser.EndDoctypeDeclHandler = lex.endDTD

[276] Fix | Delete

[277] Fix | Delete

def reset(self):

[278] Fix | Delete

if self._namespaces:

[279] Fix | Delete

self._parser = expat.ParserCreate(self._source.getEncoding(), " ",

[280] Fix | Delete

intern=self._interning)

[281] Fix | Delete

self._parser.namespace_prefixes = 1

[282] Fix | Delete

self._parser.StartElementHandler = self.start_element_ns

[283] Fix | Delete

self._parser.EndElementHandler = self.end_element_ns

[284] Fix | Delete

else:

[285] Fix | Delete

self._parser = expat.ParserCreate(self._source.getEncoding(),

[286] Fix | Delete

intern = self._interning)

[287] Fix | Delete

self._parser.StartElementHandler = self.start_element

[288] Fix | Delete

self._parser.EndElementHandler = self.end_element

[289] Fix | Delete

[290] Fix | Delete

self._reset_cont_handler()

[291] Fix | Delete

self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl

[292] Fix | Delete

self._parser.NotationDeclHandler = self.notation_decl

[293] Fix | Delete

self._parser.StartNamespaceDeclHandler = self.start_namespace_decl

[294] Fix | Delete

self._parser.EndNamespaceDeclHandler = self.end_namespace_decl

[295] Fix | Delete

[296] Fix | Delete

self._decl_handler_prop = None

[297] Fix | Delete

if self._lex_handler_prop:

[298] Fix | Delete

self._reset_lex_handler_prop()

[299] Fix | Delete

# self._parser.DefaultHandler =

[300] Fix | Delete

# self._parser.DefaultHandlerExpand =

[301] Fix | Delete

# self._parser.NotStandaloneHandler =

[302] Fix | Delete

self._parser.ExternalEntityRefHandler = self.external_entity_ref

[303] Fix | Delete

try:

[304] Fix | Delete

self._parser.SkippedEntityHandler = self.skipped_entity_handler

[305] Fix | Delete

except AttributeError:

[306] Fix | Delete

# This pyexpat does not support SkippedEntity

[307] Fix | Delete

pass

[308] Fix | Delete

self._parser.SetParamEntityParsing(

[309] Fix | Delete

expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)

[310] Fix | Delete

[311] Fix | Delete

self._parsing = 0

[312] Fix | Delete

self._entity_stack = []

[313] Fix | Delete

[314] Fix | Delete

# Locator methods

[315] Fix | Delete

[316] Fix | Delete

def getColumnNumber(self):

[317] Fix | Delete

if self._parser is None:

[318] Fix | Delete

return None

[319] Fix | Delete

return self._parser.ErrorColumnNumber

[320] Fix | Delete

[321] Fix | Delete

def getLineNumber(self):

[322] Fix | Delete

if self._parser is None:

[323] Fix | Delete

return 1

[324] Fix | Delete

return self._parser.ErrorLineNumber

[325] Fix | Delete

[326] Fix | Delete

def getPublicId(self):

[327] Fix | Delete

return self._source.getPublicId()

[328] Fix | Delete

[329] Fix | Delete

def getSystemId(self):

[330] Fix | Delete

return self._source.getSystemId()

[331] Fix | Delete

[332] Fix | Delete

# event handlers

[333] Fix | Delete

def start_element(self, name, attrs):

[334] Fix | Delete

self._cont_handler.startElement(name, AttributesImpl(attrs))

[335] Fix | Delete

[336] Fix | Delete

def end_element(self, name):

[337] Fix | Delete

self._cont_handler.endElement(name)

[338] Fix | Delete

[339] Fix | Delete

def start_element_ns(self, name, attrs):

[340] Fix | Delete

pair = name.split()

[341] Fix | Delete

if len(pair) == 1:

[342] Fix | Delete

# no namespace

[343] Fix | Delete

pair = (None, name)

[344] Fix | Delete

elif len(pair) == 3:

[345] Fix | Delete

pair = pair[0], pair[1]

[346] Fix | Delete

else:

[347] Fix | Delete

# default namespace

[348] Fix | Delete

pair = tuple(pair)

[349] Fix | Delete

[350] Fix | Delete

newattrs = {}

[351] Fix | Delete

qnames = {}

[352] Fix | Delete

for (aname, value) in attrs.items():

[353] Fix | Delete

parts = aname.split()

[354] Fix | Delete

length = len(parts)

[355] Fix | Delete

if length == 1:

[356] Fix | Delete

# no namespace

[357] Fix | Delete

qname = aname

[358] Fix | Delete

apair = (None, aname)

[359] Fix | Delete

elif length == 3:

[360] Fix | Delete

qname = "%s:%s" % (parts[2], parts[1])

[361] Fix | Delete

apair = parts[0], parts[1]

[362] Fix | Delete

else:

[363] Fix | Delete

# default namespace

[364] Fix | Delete

qname = parts[1]

[365] Fix | Delete

apair = tuple(parts)

[366] Fix | Delete

[367] Fix | Delete

newattrs[apair] = value

[368] Fix | Delete

qnames[apair] = qname

[369] Fix | Delete

[370] Fix | Delete

self._cont_handler.startElementNS(pair, None,

[371] Fix | Delete

AttributesNSImpl(newattrs, qnames))

[372] Fix | Delete

[373] Fix | Delete

def end_element_ns(self, name):

[374] Fix | Delete

pair = name.split()

[375] Fix | Delete

if len(pair) == 1:

[376] Fix | Delete

pair = (None, name)

[377] Fix | Delete

elif len(pair) == 3:

[378] Fix | Delete

pair = pair[0], pair[1]

[379] Fix | Delete

else:

[380] Fix | Delete

pair = tuple(pair)

[381] Fix | Delete

[382] Fix | Delete

self._cont_handler.endElementNS(pair, None)

[383] Fix | Delete

[384] Fix | Delete

# this is not used (call directly to ContentHandler)

[385] Fix | Delete

def processing_instruction(self, target, data):

[386] Fix | Delete

self._cont_handler.processingInstruction(target, data)

[387] Fix | Delete

[388] Fix | Delete

# this is not used (call directly to ContentHandler)

[389] Fix | Delete

def character_data(self, data):

[390] Fix | Delete

self._cont_handler.characters(data)

[391] Fix | Delete

[392] Fix | Delete

def start_namespace_decl(self, prefix, uri):

[393] Fix | Delete

self._cont_handler.startPrefixMapping(prefix, uri)

[394] Fix | Delete

[395] Fix | Delete

def end_namespace_decl(self, prefix):

[396] Fix | Delete

self._cont_handler.endPrefixMapping(prefix)

[397] Fix | Delete

[398] Fix | Delete

def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):

[399] Fix | Delete

self._lex_handler_prop.startDTD(name, pubid, sysid)

[400] Fix | Delete

[401] Fix | Delete

def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):

[402] Fix | Delete

self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)

[403] Fix | Delete

[404] Fix | Delete

def notation_decl(self, name, base, sysid, pubid):

[405] Fix | Delete

self._dtd_handler.notationDecl(name, pubid, sysid)

[406] Fix | Delete

[407] Fix | Delete

def external_entity_ref(self, context, base, sysid, pubid):

[408] Fix | Delete

if not self._external_ges:

[409] Fix | Delete

return 1

[410] Fix | Delete

[411] Fix | Delete

source = self._ent_handler.resolveEntity(pubid, sysid)

[412] Fix | Delete

source = saxutils.prepare_input_source(source,

[413] Fix | Delete

self._source.getSystemId() or

[414] Fix | Delete

"")

[415] Fix | Delete

[416] Fix | Delete

self._entity_stack.append((self._parser, self._source))

[417] Fix | Delete

self._parser = self._parser.ExternalEntityParserCreate(context)

[418] Fix | Delete

self._source = source

[419] Fix | Delete

[420] Fix | Delete

try:

[421] Fix | Delete

xmlreader.IncrementalParser.parse(self, source)

[422] Fix | Delete

except:

[423] Fix | Delete

return 0 # FIXME: save error info here?

[424] Fix | Delete

[425] Fix | Delete

(self._parser, self._source) = self._entity_stack[-1]

[426] Fix | Delete

del self._entity_stack[-1]

[427] Fix | Delete

return 1

[428] Fix | Delete

[429] Fix | Delete

def skipped_entity_handler(self, name, is_pe):

[430] Fix | Delete

if is_pe:

[431] Fix | Delete

# The SAX spec requires to report skipped PEs with a '%'

[432] Fix | Delete

name = '%'+name

[433] Fix | Delete

self._cont_handler.skippedEntity(name)

[434] Fix | Delete

[435] Fix | Delete

# ---

[436] Fix | Delete

[437] Fix | Delete

def create_parser(*args, **kwargs):

[438] Fix | Delete

return ExpatParser(*args, **kwargs)

[439] Fix | Delete

[440] Fix | Delete

# ---

[441] Fix | Delete

[442] Fix | Delete

if __name__ == "__main__":

[443] Fix | Delete

import xml.sax.saxutils

[444] Fix | Delete

p = create_parser()

[445] Fix | Delete

p.setContentHandler(xml.sax.saxutils.XMLGenerator())

[446] Fix | Delete

p.setErrorHandler(xml.sax.ErrorHandler())

[447] Fix | Delete

p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")

[448] Fix | Delete

[449] Fix | Delete