Edit File by line
/home/barbar84/public_h.../wp-conte.../plugins/sujqvwi/ShExBy/shex_roo.../lib64/python2..../xml/sax
File: expatreader.py
"""
[0] Fix | Delete
SAX driver for the pyexpat C module. This driver works with
[1] Fix | Delete
pyexpat.__version__ == '2.22'.
[2] Fix | Delete
"""
[3] Fix | Delete
[4] Fix | Delete
version = "0.20"
[5] Fix | Delete
[6] Fix | Delete
from xml.sax._exceptions import *
[7] Fix | Delete
from xml.sax.handler import feature_validation, feature_namespaces
[8] Fix | Delete
from xml.sax.handler import feature_namespace_prefixes
[9] Fix | Delete
from xml.sax.handler import feature_external_ges, feature_external_pes
[10] Fix | Delete
from xml.sax.handler import feature_string_interning
[11] Fix | Delete
from xml.sax.handler import property_xml_string, property_interning_dict
[12] Fix | Delete
[13] Fix | Delete
# xml.parsers.expat does not raise ImportError in Jython
[14] Fix | Delete
import sys
[15] Fix | Delete
if sys.platform[:4] == "java":
[16] Fix | Delete
raise SAXReaderNotAvailable("expat not available in Java", None)
[17] Fix | Delete
del sys
[18] Fix | Delete
[19] Fix | Delete
try:
[20] Fix | Delete
from xml.parsers import expat
[21] Fix | Delete
except ImportError:
[22] Fix | Delete
raise SAXReaderNotAvailable("expat not supported", None)
[23] Fix | Delete
else:
[24] Fix | Delete
if not hasattr(expat, "ParserCreate"):
[25] Fix | Delete
raise SAXReaderNotAvailable("expat not supported", None)
[26] Fix | Delete
from xml.sax import xmlreader, saxutils, handler
[27] Fix | Delete
[28] Fix | Delete
AttributesImpl = xmlreader.AttributesImpl
[29] Fix | Delete
AttributesNSImpl = xmlreader.AttributesNSImpl
[30] Fix | Delete
[31] Fix | Delete
# If we're using a sufficiently recent version of Python, we can use
[32] Fix | Delete
# weak references to avoid cycles between the parser and content
[33] Fix | Delete
# handler, otherwise we'll just have to pretend.
[34] Fix | Delete
try:
[35] Fix | Delete
import _weakref
[36] Fix | Delete
except ImportError:
[37] Fix | Delete
def _mkproxy(o):
[38] Fix | Delete
return o
[39] Fix | Delete
else:
[40] Fix | Delete
import weakref
[41] Fix | Delete
_mkproxy = weakref.proxy
[42] Fix | Delete
del weakref, _weakref
[43] Fix | Delete
[44] Fix | Delete
class _ClosedParser:
[45] Fix | Delete
pass
[46] Fix | Delete
[47] Fix | Delete
# --- ExpatLocator
[48] Fix | Delete
[49] Fix | Delete
class ExpatLocator(xmlreader.Locator):
[50] Fix | Delete
"""Locator for use with the ExpatParser class.
[51] Fix | Delete
[52] Fix | Delete
This uses a weak reference to the parser object to avoid creating
[53] Fix | Delete
a circular reference between the parser and the content handler.
[54] Fix | Delete
"""
[55] Fix | Delete
def __init__(self, parser):
[56] Fix | Delete
self._ref = _mkproxy(parser)
[57] Fix | Delete
[58] Fix | Delete
def getColumnNumber(self):
[59] Fix | Delete
parser = self._ref
[60] Fix | Delete
if parser._parser is None:
[61] Fix | Delete
return None
[62] Fix | Delete
return parser._parser.ErrorColumnNumber
[63] Fix | Delete
[64] Fix | Delete
def getLineNumber(self):
[65] Fix | Delete
parser = self._ref
[66] Fix | Delete
if parser._parser is None:
[67] Fix | Delete
return 1
[68] Fix | Delete
return parser._parser.ErrorLineNumber
[69] Fix | Delete
[70] Fix | Delete
def getPublicId(self):
[71] Fix | Delete
parser = self._ref
[72] Fix | Delete
if parser is None:
[73] Fix | Delete
return None
[74] Fix | Delete
return parser._source.getPublicId()
[75] Fix | Delete
[76] Fix | Delete
def getSystemId(self):
[77] Fix | Delete
parser = self._ref
[78] Fix | Delete
if parser is None:
[79] Fix | Delete
return None
[80] Fix | Delete
return parser._source.getSystemId()
[81] Fix | Delete
[82] Fix | Delete
[83] Fix | Delete
# --- ExpatParser
[84] Fix | Delete
[85] Fix | Delete
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
[86] Fix | Delete
"""SAX driver for the pyexpat C module."""
[87] Fix | Delete
[88] Fix | Delete
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
[89] Fix | Delete
xmlreader.IncrementalParser.__init__(self, bufsize)
[90] Fix | Delete
self._source = xmlreader.InputSource()
[91] Fix | Delete
self._parser = None
[92] Fix | Delete
self._namespaces = namespaceHandling
[93] Fix | Delete
self._lex_handler_prop = None
[94] Fix | Delete
self._parsing = 0
[95] Fix | Delete
self._entity_stack = []
[96] Fix | Delete
self._external_ges = 1
[97] Fix | Delete
self._interning = None
[98] Fix | Delete
[99] Fix | Delete
# XMLReader methods
[100] Fix | Delete
[101] Fix | Delete
def parse(self, source):
[102] Fix | Delete
"Parse an XML document from a URL or an InputSource."
[103] Fix | Delete
source = saxutils.prepare_input_source(source)
[104] Fix | Delete
[105] Fix | Delete
self._source = source
[106] Fix | Delete
try:
[107] Fix | Delete
self.reset()
[108] Fix | Delete
self._cont_handler.setDocumentLocator(ExpatLocator(self))
[109] Fix | Delete
xmlreader.IncrementalParser.parse(self, source)
[110] Fix | Delete
except:
[111] Fix | Delete
# bpo-30264: Close the source on error to not leak resources:
[112] Fix | Delete
# xml.sax.parse() doesn't give access to the underlying parser
[113] Fix | Delete
# to the caller
[114] Fix | Delete
self._close_source()
[115] Fix | Delete
raise
[116] Fix | Delete
[117] Fix | Delete
def prepareParser(self, source):
[118] Fix | Delete
if source.getSystemId() is not None:
[119] Fix | Delete
base = source.getSystemId()
[120] Fix | Delete
if isinstance(base, unicode):
[121] Fix | Delete
base = base.encode('utf-8')
[122] Fix | Delete
self._parser.SetBase(base)
[123] Fix | Delete
[124] Fix | Delete
# Redefined setContentHandler to allow changing handlers during parsing
[125] Fix | Delete
[126] Fix | Delete
def setContentHandler(self, handler):
[127] Fix | Delete
xmlreader.IncrementalParser.setContentHandler(self, handler)
[128] Fix | Delete
if self._parsing:
[129] Fix | Delete
self._reset_cont_handler()
[130] Fix | Delete
[131] Fix | Delete
def getFeature(self, name):
[132] Fix | Delete
if name == feature_namespaces:
[133] Fix | Delete
return self._namespaces
[134] Fix | Delete
elif name == feature_string_interning:
[135] Fix | Delete
return self._interning is not None
[136] Fix | Delete
elif name in (feature_validation, feature_external_pes,
[137] Fix | Delete
feature_namespace_prefixes):
[138] Fix | Delete
return 0
[139] Fix | Delete
elif name == feature_external_ges:
[140] Fix | Delete
return self._external_ges
[141] Fix | Delete
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
[142] Fix | Delete
[143] Fix | Delete
def setFeature(self, name, state):
[144] Fix | Delete
if self._parsing:
[145] Fix | Delete
raise SAXNotSupportedException("Cannot set features while parsing")
[146] Fix | Delete
[147] Fix | Delete
if name == feature_namespaces:
[148] Fix | Delete
self._namespaces = state
[149] Fix | Delete
elif name == feature_external_ges:
[150] Fix | Delete
self._external_ges = state
[151] Fix | Delete
elif name == feature_string_interning:
[152] Fix | Delete
if state:
[153] Fix | Delete
if self._interning is None:
[154] Fix | Delete
self._interning = {}
[155] Fix | Delete
else:
[156] Fix | Delete
self._interning = None
[157] Fix | Delete
elif name == feature_validation:
[158] Fix | Delete
if state:
[159] Fix | Delete
raise SAXNotSupportedException(
[160] Fix | Delete
"expat does not support validation")
[161] Fix | Delete
elif name == feature_external_pes:
[162] Fix | Delete
if state:
[163] Fix | Delete
raise SAXNotSupportedException(
[164] Fix | Delete
"expat does not read external parameter entities")
[165] Fix | Delete
elif name == feature_namespace_prefixes:
[166] Fix | Delete
if state:
[167] Fix | Delete
raise SAXNotSupportedException(
[168] Fix | Delete
"expat does not report namespace prefixes")
[169] Fix | Delete
else:
[170] Fix | Delete
raise SAXNotRecognizedException(
[171] Fix | Delete
"Feature '%s' not recognized" % name)
[172] Fix | Delete
[173] Fix | Delete
def getProperty(self, name):
[174] Fix | Delete
if name == handler.property_lexical_handler:
[175] Fix | Delete
return self._lex_handler_prop
[176] Fix | Delete
elif name == property_interning_dict:
[177] Fix | Delete
return self._interning
[178] Fix | Delete
elif name == property_xml_string:
[179] Fix | Delete
if self._parser:
[180] Fix | Delete
if hasattr(self._parser, "GetInputContext"):
[181] Fix | Delete
return self._parser.GetInputContext()
[182] Fix | Delete
else:
[183] Fix | Delete
raise SAXNotRecognizedException(
[184] Fix | Delete
"This version of expat does not support getting"
[185] Fix | Delete
" the XML string")
[186] Fix | Delete
else:
[187] Fix | Delete
raise SAXNotSupportedException(
[188] Fix | Delete
"XML string cannot be returned when not parsing")
[189] Fix | Delete
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
[190] Fix | Delete
[191] Fix | Delete
def setProperty(self, name, value):
[192] Fix | Delete
if name == handler.property_lexical_handler:
[193] Fix | Delete
self._lex_handler_prop = value
[194] Fix | Delete
if self._parsing:
[195] Fix | Delete
self._reset_lex_handler_prop()
[196] Fix | Delete
elif name == property_interning_dict:
[197] Fix | Delete
self._interning = value
[198] Fix | Delete
elif name == property_xml_string:
[199] Fix | Delete
raise SAXNotSupportedException("Property '%s' cannot be set" %
[200] Fix | Delete
name)
[201] Fix | Delete
else:
[202] Fix | Delete
raise SAXNotRecognizedException("Property '%s' not recognized" %
[203] Fix | Delete
name)
[204] Fix | Delete
[205] Fix | Delete
# IncrementalParser methods
[206] Fix | Delete
[207] Fix | Delete
def feed(self, data, isFinal = 0):
[208] Fix | Delete
if not self._parsing:
[209] Fix | Delete
self.reset()
[210] Fix | Delete
self._parsing = 1
[211] Fix | Delete
self._cont_handler.startDocument()
[212] Fix | Delete
[213] Fix | Delete
try:
[214] Fix | Delete
# The isFinal parameter is internal to the expat reader.
[215] Fix | Delete
# If it is set to true, expat will check validity of the entire
[216] Fix | Delete
# document. When feeding chunks, they are not normally final -
[217] Fix | Delete
# except when invoked from close.
[218] Fix | Delete
self._parser.Parse(data, isFinal)
[219] Fix | Delete
except expat.error, e:
[220] Fix | Delete
exc = SAXParseException(expat.ErrorString(e.code), e, self)
[221] Fix | Delete
# FIXME: when to invoke error()?
[222] Fix | Delete
self._err_handler.fatalError(exc)
[223] Fix | Delete
[224] Fix | Delete
def _close_source(self):
[225] Fix | Delete
source = self._source
[226] Fix | Delete
try:
[227] Fix | Delete
file = source.getCharacterStream()
[228] Fix | Delete
if file is not None:
[229] Fix | Delete
file.close()
[230] Fix | Delete
finally:
[231] Fix | Delete
file = source.getByteStream()
[232] Fix | Delete
if file is not None:
[233] Fix | Delete
file.close()
[234] Fix | Delete
[235] Fix | Delete
def close(self):
[236] Fix | Delete
if (self._entity_stack or self._parser is None or
[237] Fix | Delete
isinstance(self._parser, _ClosedParser)):
[238] Fix | Delete
# If we are completing an external entity, do nothing here
[239] Fix | Delete
return
[240] Fix | Delete
try:
[241] Fix | Delete
self.feed("", isFinal = 1)
[242] Fix | Delete
self._cont_handler.endDocument()
[243] Fix | Delete
self._parsing = 0
[244] Fix | Delete
# break cycle created by expat handlers pointing to our methods
[245] Fix | Delete
self._parser = None
[246] Fix | Delete
finally:
[247] Fix | Delete
self._parsing = 0
[248] Fix | Delete
if self._parser is not None:
[249] Fix | Delete
# Keep ErrorColumnNumber and ErrorLineNumber after closing.
[250] Fix | Delete
parser = _ClosedParser()
[251] Fix | Delete
parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
[252] Fix | Delete
parser.ErrorLineNumber = self._parser.ErrorLineNumber
[253] Fix | Delete
self._parser = parser
[254] Fix | Delete
self._close_source()
[255] Fix | Delete
[256] Fix | Delete
def _reset_cont_handler(self):
[257] Fix | Delete
self._parser.ProcessingInstructionHandler = \
[258] Fix | Delete
self._cont_handler.processingInstruction
[259] Fix | Delete
self._parser.CharacterDataHandler = self._cont_handler.characters
[260] Fix | Delete
[261] Fix | Delete
def _reset_lex_handler_prop(self):
[262] Fix | Delete
lex = self._lex_handler_prop
[263] Fix | Delete
parser = self._parser
[264] Fix | Delete
if lex is None:
[265] Fix | Delete
parser.CommentHandler = None
[266] Fix | Delete
parser.StartCdataSectionHandler = None
[267] Fix | Delete
parser.EndCdataSectionHandler = None
[268] Fix | Delete
parser.StartDoctypeDeclHandler = None
[269] Fix | Delete
parser.EndDoctypeDeclHandler = None
[270] Fix | Delete
else:
[271] Fix | Delete
parser.CommentHandler = lex.comment
[272] Fix | Delete
parser.StartCdataSectionHandler = lex.startCDATA
[273] Fix | Delete
parser.EndCdataSectionHandler = lex.endCDATA
[274] Fix | Delete
parser.StartDoctypeDeclHandler = self.start_doctype_decl
[275] Fix | Delete
parser.EndDoctypeDeclHandler = lex.endDTD
[276] Fix | Delete
[277] Fix | Delete
def reset(self):
[278] Fix | Delete
if self._namespaces:
[279] Fix | Delete
self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
[280] Fix | Delete
intern=self._interning)
[281] Fix | Delete
self._parser.namespace_prefixes = 1
[282] Fix | Delete
self._parser.StartElementHandler = self.start_element_ns
[283] Fix | Delete
self._parser.EndElementHandler = self.end_element_ns
[284] Fix | Delete
else:
[285] Fix | Delete
self._parser = expat.ParserCreate(self._source.getEncoding(),
[286] Fix | Delete
intern = self._interning)
[287] Fix | Delete
self._parser.StartElementHandler = self.start_element
[288] Fix | Delete
self._parser.EndElementHandler = self.end_element
[289] Fix | Delete
[290] Fix | Delete
self._reset_cont_handler()
[291] Fix | Delete
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
[292] Fix | Delete
self._parser.NotationDeclHandler = self.notation_decl
[293] Fix | Delete
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
[294] Fix | Delete
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
[295] Fix | Delete
[296] Fix | Delete
self._decl_handler_prop = None
[297] Fix | Delete
if self._lex_handler_prop:
[298] Fix | Delete
self._reset_lex_handler_prop()
[299] Fix | Delete
# self._parser.DefaultHandler =
[300] Fix | Delete
# self._parser.DefaultHandlerExpand =
[301] Fix | Delete
# self._parser.NotStandaloneHandler =
[302] Fix | Delete
self._parser.ExternalEntityRefHandler = self.external_entity_ref
[303] Fix | Delete
try:
[304] Fix | Delete
self._parser.SkippedEntityHandler = self.skipped_entity_handler
[305] Fix | Delete
except AttributeError:
[306] Fix | Delete
# This pyexpat does not support SkippedEntity
[307] Fix | Delete
pass
[308] Fix | Delete
self._parser.SetParamEntityParsing(
[309] Fix | Delete
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
[310] Fix | Delete
[311] Fix | Delete
self._parsing = 0
[312] Fix | Delete
self._entity_stack = []
[313] Fix | Delete
[314] Fix | Delete
# Locator methods
[315] Fix | Delete
[316] Fix | Delete
def getColumnNumber(self):
[317] Fix | Delete
if self._parser is None:
[318] Fix | Delete
return None
[319] Fix | Delete
return self._parser.ErrorColumnNumber
[320] Fix | Delete
[321] Fix | Delete
def getLineNumber(self):
[322] Fix | Delete
if self._parser is None:
[323] Fix | Delete
return 1
[324] Fix | Delete
return self._parser.ErrorLineNumber
[325] Fix | Delete
[326] Fix | Delete
def getPublicId(self):
[327] Fix | Delete
return self._source.getPublicId()
[328] Fix | Delete
[329] Fix | Delete
def getSystemId(self):
[330] Fix | Delete
return self._source.getSystemId()
[331] Fix | Delete
[332] Fix | Delete
# event handlers
[333] Fix | Delete
def start_element(self, name, attrs):
[334] Fix | Delete
self._cont_handler.startElement(name, AttributesImpl(attrs))
[335] Fix | Delete
[336] Fix | Delete
def end_element(self, name):
[337] Fix | Delete
self._cont_handler.endElement(name)
[338] Fix | Delete
[339] Fix | Delete
def start_element_ns(self, name, attrs):
[340] Fix | Delete
pair = name.split()
[341] Fix | Delete
if len(pair) == 1:
[342] Fix | Delete
# no namespace
[343] Fix | Delete
pair = (None, name)
[344] Fix | Delete
elif len(pair) == 3:
[345] Fix | Delete
pair = pair[0], pair[1]
[346] Fix | Delete
else:
[347] Fix | Delete
# default namespace
[348] Fix | Delete
pair = tuple(pair)
[349] Fix | Delete
[350] Fix | Delete
newattrs = {}
[351] Fix | Delete
qnames = {}
[352] Fix | Delete
for (aname, value) in attrs.items():
[353] Fix | Delete
parts = aname.split()
[354] Fix | Delete
length = len(parts)
[355] Fix | Delete
if length == 1:
[356] Fix | Delete
# no namespace
[357] Fix | Delete
qname = aname
[358] Fix | Delete
apair = (None, aname)
[359] Fix | Delete
elif length == 3:
[360] Fix | Delete
qname = "%s:%s" % (parts[2], parts[1])
[361] Fix | Delete
apair = parts[0], parts[1]
[362] Fix | Delete
else:
[363] Fix | Delete
# default namespace
[364] Fix | Delete
qname = parts[1]
[365] Fix | Delete
apair = tuple(parts)
[366] Fix | Delete
[367] Fix | Delete
newattrs[apair] = value
[368] Fix | Delete
qnames[apair] = qname
[369] Fix | Delete
[370] Fix | Delete
self._cont_handler.startElementNS(pair, None,
[371] Fix | Delete
AttributesNSImpl(newattrs, qnames))
[372] Fix | Delete
[373] Fix | Delete
def end_element_ns(self, name):
[374] Fix | Delete
pair = name.split()
[375] Fix | Delete
if len(pair) == 1:
[376] Fix | Delete
pair = (None, name)
[377] Fix | Delete
elif len(pair) == 3:
[378] Fix | Delete
pair = pair[0], pair[1]
[379] Fix | Delete
else:
[380] Fix | Delete
pair = tuple(pair)
[381] Fix | Delete
[382] Fix | Delete
self._cont_handler.endElementNS(pair, None)
[383] Fix | Delete
[384] Fix | Delete
# this is not used (call directly to ContentHandler)
[385] Fix | Delete
def processing_instruction(self, target, data):
[386] Fix | Delete
self._cont_handler.processingInstruction(target, data)
[387] Fix | Delete
[388] Fix | Delete
# this is not used (call directly to ContentHandler)
[389] Fix | Delete
def character_data(self, data):
[390] Fix | Delete
self._cont_handler.characters(data)
[391] Fix | Delete
[392] Fix | Delete
def start_namespace_decl(self, prefix, uri):
[393] Fix | Delete
self._cont_handler.startPrefixMapping(prefix, uri)
[394] Fix | Delete
[395] Fix | Delete
def end_namespace_decl(self, prefix):
[396] Fix | Delete
self._cont_handler.endPrefixMapping(prefix)
[397] Fix | Delete
[398] Fix | Delete
def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
[399] Fix | Delete
self._lex_handler_prop.startDTD(name, pubid, sysid)
[400] Fix | Delete
[401] Fix | Delete
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
[402] Fix | Delete
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
[403] Fix | Delete
[404] Fix | Delete
def notation_decl(self, name, base, sysid, pubid):
[405] Fix | Delete
self._dtd_handler.notationDecl(name, pubid, sysid)
[406] Fix | Delete
[407] Fix | Delete
def external_entity_ref(self, context, base, sysid, pubid):
[408] Fix | Delete
if not self._external_ges:
[409] Fix | Delete
return 1
[410] Fix | Delete
[411] Fix | Delete
source = self._ent_handler.resolveEntity(pubid, sysid)
[412] Fix | Delete
source = saxutils.prepare_input_source(source,
[413] Fix | Delete
self._source.getSystemId() or
[414] Fix | Delete
"")
[415] Fix | Delete
[416] Fix | Delete
self._entity_stack.append((self._parser, self._source))
[417] Fix | Delete
self._parser = self._parser.ExternalEntityParserCreate(context)
[418] Fix | Delete
self._source = source
[419] Fix | Delete
[420] Fix | Delete
try:
[421] Fix | Delete
xmlreader.IncrementalParser.parse(self, source)
[422] Fix | Delete
except:
[423] Fix | Delete
return 0 # FIXME: save error info here?
[424] Fix | Delete
[425] Fix | Delete
(self._parser, self._source) = self._entity_stack[-1]
[426] Fix | Delete
del self._entity_stack[-1]
[427] Fix | Delete
return 1
[428] Fix | Delete
[429] Fix | Delete
def skipped_entity_handler(self, name, is_pe):
[430] Fix | Delete
if is_pe:
[431] Fix | Delete
# The SAX spec requires to report skipped PEs with a '%'
[432] Fix | Delete
name = '%'+name
[433] Fix | Delete
self._cont_handler.skippedEntity(name)
[434] Fix | Delete
[435] Fix | Delete
# ---
[436] Fix | Delete
[437] Fix | Delete
def create_parser(*args, **kwargs):
[438] Fix | Delete
return ExpatParser(*args, **kwargs)
[439] Fix | Delete
[440] Fix | Delete
# ---
[441] Fix | Delete
[442] Fix | Delete
if __name__ == "__main__":
[443] Fix | Delete
import xml.sax.saxutils
[444] Fix | Delete
p = create_parser()
[445] Fix | Delete
p.setContentHandler(xml.sax.saxutils.XMLGenerator())
[446] Fix | Delete
p.setErrorHandler(xml.sax.ErrorHandler())
[447] Fix | Delete
p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")
[448] Fix | Delete
[449] Fix | Delete
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function