Edit File by line
/home/barbar84/public_h.../wp-conte.../plugins/sujqvwi/ShExBy/shex_roo.../lib64/python2....
File: xmllib.py
"""A parser for XML, using the derived class as static DTD."""
[0] Fix | Delete
[1] Fix | Delete
# Author: Sjoerd Mullender.
[2] Fix | Delete
[3] Fix | Delete
import re
[4] Fix | Delete
import string
[5] Fix | Delete
[6] Fix | Delete
import warnings
[7] Fix | Delete
warnings.warn("The xmllib module is obsolete. Use xml.sax instead.",
[8] Fix | Delete
DeprecationWarning, 2)
[9] Fix | Delete
del warnings
[10] Fix | Delete
[11] Fix | Delete
version = '0.3'
[12] Fix | Delete
[13] Fix | Delete
class Error(RuntimeError):
[14] Fix | Delete
pass
[15] Fix | Delete
[16] Fix | Delete
# Regular expressions used for parsing
[17] Fix | Delete
[18] Fix | Delete
_S = '[ \t\r\n]+' # white space
[19] Fix | Delete
_opS = '[ \t\r\n]*' # optional white space
[20] Fix | Delete
_Name = '[a-zA-Z_:][-a-zA-Z0-9._:]*' # valid XML name
[21] Fix | Delete
_QStr = "(?:'[^']*'|\"[^\"]*\")" # quoted XML string
[22] Fix | Delete
illegal = re.compile('[^\t\r\n -\176\240-\377]') # illegal chars in content
[23] Fix | Delete
interesting = re.compile('[]&<]')
[24] Fix | Delete
[25] Fix | Delete
amp = re.compile('&')
[26] Fix | Delete
ref = re.compile('&(' + _Name + '|#[0-9]+|#x[0-9a-fA-F]+)[^-a-zA-Z0-9._:]')
[27] Fix | Delete
entityref = re.compile('&(?P<name>' + _Name + ')[^-a-zA-Z0-9._:]')
[28] Fix | Delete
charref = re.compile('&#(?P<char>[0-9]+[^0-9]|x[0-9a-fA-F]+[^0-9a-fA-F])')
[29] Fix | Delete
space = re.compile(_S + '$')
[30] Fix | Delete
newline = re.compile('\n')
[31] Fix | Delete
[32] Fix | Delete
attrfind = re.compile(
[33] Fix | Delete
_S + '(?P<name>' + _Name + ')'
[34] Fix | Delete
'(' + _opS + '=' + _opS +
[35] Fix | Delete
'(?P<value>'+_QStr+'|[-a-zA-Z0-9.:+*%?!\(\)_#=~]+))?')
[36] Fix | Delete
starttagopen = re.compile('<' + _Name)
[37] Fix | Delete
starttagend = re.compile(_opS + '(?P<slash>/?)>')
[38] Fix | Delete
starttagmatch = re.compile('<(?P<tagname>'+_Name+')'
[39] Fix | Delete
'(?P<attrs>(?:'+attrfind.pattern+')*)'+
[40] Fix | Delete
starttagend.pattern)
[41] Fix | Delete
endtagopen = re.compile('</')
[42] Fix | Delete
endbracket = re.compile(_opS + '>')
[43] Fix | Delete
endbracketfind = re.compile('(?:[^>\'"]|'+_QStr+')*>')
[44] Fix | Delete
tagfind = re.compile(_Name)
[45] Fix | Delete
cdataopen = re.compile(r'<!\[CDATA\[')
[46] Fix | Delete
cdataclose = re.compile(r'\]\]>')
[47] Fix | Delete
# this matches one of the following:
[48] Fix | Delete
# SYSTEM SystemLiteral
[49] Fix | Delete
# PUBLIC PubidLiteral SystemLiteral
[50] Fix | Delete
_SystemLiteral = '(?P<%s>'+_QStr+')'
[51] Fix | Delete
_PublicLiteral = '(?P<%s>"[-\'\(\)+,./:=?;!*#@$_%% \n\ra-zA-Z0-9]*"|' \
[52] Fix | Delete
"'[-\(\)+,./:=?;!*#@$_%% \n\ra-zA-Z0-9]*')"
[53] Fix | Delete
_ExternalId = '(?:SYSTEM|' \
[54] Fix | Delete
'PUBLIC'+_S+_PublicLiteral%'pubid'+ \
[55] Fix | Delete
')'+_S+_SystemLiteral%'syslit'
[56] Fix | Delete
doctype = re.compile('<!DOCTYPE'+_S+'(?P<name>'+_Name+')'
[57] Fix | Delete
'(?:'+_S+_ExternalId+')?'+_opS)
[58] Fix | Delete
xmldecl = re.compile('<\?xml'+_S+
[59] Fix | Delete
'version'+_opS+'='+_opS+'(?P<version>'+_QStr+')'+
[60] Fix | Delete
'(?:'+_S+'encoding'+_opS+'='+_opS+
[61] Fix | Delete
"(?P<encoding>'[A-Za-z][-A-Za-z0-9._]*'|"
[62] Fix | Delete
'"[A-Za-z][-A-Za-z0-9._]*"))?'
[63] Fix | Delete
'(?:'+_S+'standalone'+_opS+'='+_opS+
[64] Fix | Delete
'(?P<standalone>\'(?:yes|no)\'|"(?:yes|no)"))?'+
[65] Fix | Delete
_opS+'\?>')
[66] Fix | Delete
procopen = re.compile(r'<\?(?P<proc>' + _Name + ')' + _opS)
[67] Fix | Delete
procclose = re.compile(_opS + r'\?>')
[68] Fix | Delete
commentopen = re.compile('<!--')
[69] Fix | Delete
commentclose = re.compile('-->')
[70] Fix | Delete
doubledash = re.compile('--')
[71] Fix | Delete
attrtrans = string.maketrans(' \r\n\t', ' ')
[72] Fix | Delete
[73] Fix | Delete
# definitions for XML namespaces
[74] Fix | Delete
_NCName = '[a-zA-Z_][-a-zA-Z0-9._]*' # XML Name, minus the ":"
[75] Fix | Delete
ncname = re.compile(_NCName + '$')
[76] Fix | Delete
qname = re.compile('(?:(?P<prefix>' + _NCName + '):)?' # optional prefix
[77] Fix | Delete
'(?P<local>' + _NCName + ')$')
[78] Fix | Delete
[79] Fix | Delete
xmlns = re.compile('xmlns(?::(?P<ncname>'+_NCName+'))?$')
[80] Fix | Delete
[81] Fix | Delete
# XML parser base class -- find tags and call handler functions.
[82] Fix | Delete
# Usage: p = XMLParser(); p.feed(data); ...; p.close().
[83] Fix | Delete
# The dtd is defined by deriving a class which defines methods with
[84] Fix | Delete
# special names to handle tags: start_foo and end_foo to handle <foo>
[85] Fix | Delete
# and </foo>, respectively. The data between tags is passed to the
[86] Fix | Delete
# parser by calling self.handle_data() with some data as argument (the
[87] Fix | Delete
# data may be split up in arbitrary chunks).
[88] Fix | Delete
[89] Fix | Delete
class XMLParser:
[90] Fix | Delete
attributes = {} # default, to be overridden
[91] Fix | Delete
elements = {} # default, to be overridden
[92] Fix | Delete
[93] Fix | Delete
# parsing options, settable using keyword args in __init__
[94] Fix | Delete
__accept_unquoted_attributes = 0
[95] Fix | Delete
__accept_missing_endtag_name = 0
[96] Fix | Delete
__map_case = 0
[97] Fix | Delete
__accept_utf8 = 0
[98] Fix | Delete
__translate_attribute_references = 1
[99] Fix | Delete
[100] Fix | Delete
# Interface -- initialize and reset this instance
[101] Fix | Delete
def __init__(self, **kw):
[102] Fix | Delete
self.__fixed = 0
[103] Fix | Delete
if 'accept_unquoted_attributes' in kw:
[104] Fix | Delete
self.__accept_unquoted_attributes = kw['accept_unquoted_attributes']
[105] Fix | Delete
if 'accept_missing_endtag_name' in kw:
[106] Fix | Delete
self.__accept_missing_endtag_name = kw['accept_missing_endtag_name']
[107] Fix | Delete
if 'map_case' in kw:
[108] Fix | Delete
self.__map_case = kw['map_case']
[109] Fix | Delete
if 'accept_utf8' in kw:
[110] Fix | Delete
self.__accept_utf8 = kw['accept_utf8']
[111] Fix | Delete
if 'translate_attribute_references' in kw:
[112] Fix | Delete
self.__translate_attribute_references = kw['translate_attribute_references']
[113] Fix | Delete
self.reset()
[114] Fix | Delete
[115] Fix | Delete
def __fixelements(self):
[116] Fix | Delete
self.__fixed = 1
[117] Fix | Delete
self.elements = {}
[118] Fix | Delete
self.__fixdict(self.__dict__)
[119] Fix | Delete
self.__fixclass(self.__class__)
[120] Fix | Delete
[121] Fix | Delete
def __fixclass(self, kl):
[122] Fix | Delete
self.__fixdict(kl.__dict__)
[123] Fix | Delete
for k in kl.__bases__:
[124] Fix | Delete
self.__fixclass(k)
[125] Fix | Delete
[126] Fix | Delete
def __fixdict(self, dict):
[127] Fix | Delete
for key in dict.keys():
[128] Fix | Delete
if key[:6] == 'start_':
[129] Fix | Delete
tag = key[6:]
[130] Fix | Delete
start, end = self.elements.get(tag, (None, None))
[131] Fix | Delete
if start is None:
[132] Fix | Delete
self.elements[tag] = getattr(self, key), end
[133] Fix | Delete
elif key[:4] == 'end_':
[134] Fix | Delete
tag = key[4:]
[135] Fix | Delete
start, end = self.elements.get(tag, (None, None))
[136] Fix | Delete
if end is None:
[137] Fix | Delete
self.elements[tag] = start, getattr(self, key)
[138] Fix | Delete
[139] Fix | Delete
# Interface -- reset this instance. Loses all unprocessed data
[140] Fix | Delete
def reset(self):
[141] Fix | Delete
self.rawdata = ''
[142] Fix | Delete
self.stack = []
[143] Fix | Delete
self.nomoretags = 0
[144] Fix | Delete
self.literal = 0
[145] Fix | Delete
self.lineno = 1
[146] Fix | Delete
self.__at_start = 1
[147] Fix | Delete
self.__seen_doctype = None
[148] Fix | Delete
self.__seen_starttag = 0
[149] Fix | Delete
self.__use_namespaces = 0
[150] Fix | Delete
self.__namespaces = {'xml':None} # xml is implicitly declared
[151] Fix | Delete
# backward compatibility hack: if elements not overridden,
[152] Fix | Delete
# fill it in ourselves
[153] Fix | Delete
if self.elements is XMLParser.elements:
[154] Fix | Delete
self.__fixelements()
[155] Fix | Delete
[156] Fix | Delete
# For derived classes only -- enter literal mode (CDATA) till EOF
[157] Fix | Delete
def setnomoretags(self):
[158] Fix | Delete
self.nomoretags = self.literal = 1
[159] Fix | Delete
[160] Fix | Delete
# For derived classes only -- enter literal mode (CDATA)
[161] Fix | Delete
def setliteral(self, *args):
[162] Fix | Delete
self.literal = 1
[163] Fix | Delete
[164] Fix | Delete
# Interface -- feed some data to the parser. Call this as
[165] Fix | Delete
# often as you want, with as little or as much text as you
[166] Fix | Delete
# want (may include '\n'). (This just saves the text, all the
[167] Fix | Delete
# processing is done by goahead().)
[168] Fix | Delete
def feed(self, data):
[169] Fix | Delete
self.rawdata = self.rawdata + data
[170] Fix | Delete
self.goahead(0)
[171] Fix | Delete
[172] Fix | Delete
# Interface -- handle the remaining data
[173] Fix | Delete
def close(self):
[174] Fix | Delete
self.goahead(1)
[175] Fix | Delete
if self.__fixed:
[176] Fix | Delete
self.__fixed = 0
[177] Fix | Delete
# remove self.elements so that we don't leak
[178] Fix | Delete
del self.elements
[179] Fix | Delete
[180] Fix | Delete
# Interface -- translate references
[181] Fix | Delete
def translate_references(self, data, all = 1):
[182] Fix | Delete
if not self.__translate_attribute_references:
[183] Fix | Delete
return data
[184] Fix | Delete
i = 0
[185] Fix | Delete
while 1:
[186] Fix | Delete
res = amp.search(data, i)
[187] Fix | Delete
if res is None:
[188] Fix | Delete
return data
[189] Fix | Delete
s = res.start(0)
[190] Fix | Delete
res = ref.match(data, s)
[191] Fix | Delete
if res is None:
[192] Fix | Delete
self.syntax_error("bogus `&'")
[193] Fix | Delete
i = s+1
[194] Fix | Delete
continue
[195] Fix | Delete
i = res.end(0)
[196] Fix | Delete
str = res.group(1)
[197] Fix | Delete
rescan = 0
[198] Fix | Delete
if str[0] == '#':
[199] Fix | Delete
if str[1] == 'x':
[200] Fix | Delete
str = chr(int(str[2:], 16))
[201] Fix | Delete
else:
[202] Fix | Delete
str = chr(int(str[1:]))
[203] Fix | Delete
if data[i - 1] != ';':
[204] Fix | Delete
self.syntax_error("`;' missing after char reference")
[205] Fix | Delete
i = i-1
[206] Fix | Delete
elif all:
[207] Fix | Delete
if str in self.entitydefs:
[208] Fix | Delete
str = self.entitydefs[str]
[209] Fix | Delete
rescan = 1
[210] Fix | Delete
elif data[i - 1] != ';':
[211] Fix | Delete
self.syntax_error("bogus `&'")
[212] Fix | Delete
i = s + 1 # just past the &
[213] Fix | Delete
continue
[214] Fix | Delete
else:
[215] Fix | Delete
self.syntax_error("reference to unknown entity `&%s;'" % str)
[216] Fix | Delete
str = '&' + str + ';'
[217] Fix | Delete
elif data[i - 1] != ';':
[218] Fix | Delete
self.syntax_error("bogus `&'")
[219] Fix | Delete
i = s + 1 # just past the &
[220] Fix | Delete
continue
[221] Fix | Delete
[222] Fix | Delete
# when we get here, str contains the translated text and i points
[223] Fix | Delete
# to the end of the string that is to be replaced
[224] Fix | Delete
data = data[:s] + str + data[i:]
[225] Fix | Delete
if rescan:
[226] Fix | Delete
i = s
[227] Fix | Delete
else:
[228] Fix | Delete
i = s + len(str)
[229] Fix | Delete
[230] Fix | Delete
# Interface - return a dictionary of all namespaces currently valid
[231] Fix | Delete
def getnamespace(self):
[232] Fix | Delete
nsdict = {}
[233] Fix | Delete
for t, d, nst in self.stack:
[234] Fix | Delete
nsdict.update(d)
[235] Fix | Delete
return nsdict
[236] Fix | Delete
[237] Fix | Delete
# Internal -- handle data as far as reasonable. May leave state
[238] Fix | Delete
# and data to be processed by a subsequent call. If 'end' is
[239] Fix | Delete
# true, force handling all data as if followed by EOF marker.
[240] Fix | Delete
def goahead(self, end):
[241] Fix | Delete
rawdata = self.rawdata
[242] Fix | Delete
i = 0
[243] Fix | Delete
n = len(rawdata)
[244] Fix | Delete
while i < n:
[245] Fix | Delete
if i > 0:
[246] Fix | Delete
self.__at_start = 0
[247] Fix | Delete
if self.nomoretags:
[248] Fix | Delete
data = rawdata[i:n]
[249] Fix | Delete
self.handle_data(data)
[250] Fix | Delete
self.lineno = self.lineno + data.count('\n')
[251] Fix | Delete
i = n
[252] Fix | Delete
break
[253] Fix | Delete
res = interesting.search(rawdata, i)
[254] Fix | Delete
if res:
[255] Fix | Delete
j = res.start(0)
[256] Fix | Delete
else:
[257] Fix | Delete
j = n
[258] Fix | Delete
if i < j:
[259] Fix | Delete
data = rawdata[i:j]
[260] Fix | Delete
if self.__at_start and space.match(data) is None:
[261] Fix | Delete
self.syntax_error('illegal data at start of file')
[262] Fix | Delete
self.__at_start = 0
[263] Fix | Delete
if not self.stack and space.match(data) is None:
[264] Fix | Delete
self.syntax_error('data not in content')
[265] Fix | Delete
if not self.__accept_utf8 and illegal.search(data):
[266] Fix | Delete
self.syntax_error('illegal character in content')
[267] Fix | Delete
self.handle_data(data)
[268] Fix | Delete
self.lineno = self.lineno + data.count('\n')
[269] Fix | Delete
i = j
[270] Fix | Delete
if i == n: break
[271] Fix | Delete
if rawdata[i] == '<':
[272] Fix | Delete
if starttagopen.match(rawdata, i):
[273] Fix | Delete
if self.literal:
[274] Fix | Delete
data = rawdata[i]
[275] Fix | Delete
self.handle_data(data)
[276] Fix | Delete
self.lineno = self.lineno + data.count('\n')
[277] Fix | Delete
i = i+1
[278] Fix | Delete
continue
[279] Fix | Delete
k = self.parse_starttag(i)
[280] Fix | Delete
if k < 0: break
[281] Fix | Delete
self.__seen_starttag = 1
[282] Fix | Delete
self.lineno = self.lineno + rawdata[i:k].count('\n')
[283] Fix | Delete
i = k
[284] Fix | Delete
continue
[285] Fix | Delete
if endtagopen.match(rawdata, i):
[286] Fix | Delete
k = self.parse_endtag(i)
[287] Fix | Delete
if k < 0: break
[288] Fix | Delete
self.lineno = self.lineno + rawdata[i:k].count('\n')
[289] Fix | Delete
i = k
[290] Fix | Delete
continue
[291] Fix | Delete
if commentopen.match(rawdata, i):
[292] Fix | Delete
if self.literal:
[293] Fix | Delete
data = rawdata[i]
[294] Fix | Delete
self.handle_data(data)
[295] Fix | Delete
self.lineno = self.lineno + data.count('\n')
[296] Fix | Delete
i = i+1
[297] Fix | Delete
continue
[298] Fix | Delete
k = self.parse_comment(i)
[299] Fix | Delete
if k < 0: break
[300] Fix | Delete
self.lineno = self.lineno + rawdata[i:k].count('\n')
[301] Fix | Delete
i = k
[302] Fix | Delete
continue
[303] Fix | Delete
if cdataopen.match(rawdata, i):
[304] Fix | Delete
k = self.parse_cdata(i)
[305] Fix | Delete
if k < 0: break
[306] Fix | Delete
self.lineno = self.lineno + rawdata[i:k].count('\n')
[307] Fix | Delete
i = k
[308] Fix | Delete
continue
[309] Fix | Delete
res = xmldecl.match(rawdata, i)
[310] Fix | Delete
if res:
[311] Fix | Delete
if not self.__at_start:
[312] Fix | Delete
self.syntax_error("<?xml?> declaration not at start of document")
[313] Fix | Delete
version, encoding, standalone = res.group('version',
[314] Fix | Delete
'encoding',
[315] Fix | Delete
'standalone')
[316] Fix | Delete
if version[1:-1] != '1.0':
[317] Fix | Delete
raise Error('only XML version 1.0 supported')
[318] Fix | Delete
if encoding: encoding = encoding[1:-1]
[319] Fix | Delete
if standalone: standalone = standalone[1:-1]
[320] Fix | Delete
self.handle_xml(encoding, standalone)
[321] Fix | Delete
i = res.end(0)
[322] Fix | Delete
continue
[323] Fix | Delete
res = procopen.match(rawdata, i)
[324] Fix | Delete
if res:
[325] Fix | Delete
k = self.parse_proc(i)
[326] Fix | Delete
if k < 0: break
[327] Fix | Delete
self.lineno = self.lineno + rawdata[i:k].count('\n')
[328] Fix | Delete
i = k
[329] Fix | Delete
continue
[330] Fix | Delete
res = doctype.match(rawdata, i)
[331] Fix | Delete
if res:
[332] Fix | Delete
if self.literal:
[333] Fix | Delete
data = rawdata[i]
[334] Fix | Delete
self.handle_data(data)
[335] Fix | Delete
self.lineno = self.lineno + data.count('\n')
[336] Fix | Delete
i = i+1
[337] Fix | Delete
continue
[338] Fix | Delete
if self.__seen_doctype:
[339] Fix | Delete
self.syntax_error('multiple DOCTYPE elements')
[340] Fix | Delete
if self.__seen_starttag:
[341] Fix | Delete
self.syntax_error('DOCTYPE not at beginning of document')
[342] Fix | Delete
k = self.parse_doctype(res)
[343] Fix | Delete
if k < 0: break
[344] Fix | Delete
self.__seen_doctype = res.group('name')
[345] Fix | Delete
if self.__map_case:
[346] Fix | Delete
self.__seen_doctype = self.__seen_doctype.lower()
[347] Fix | Delete
self.lineno = self.lineno + rawdata[i:k].count('\n')
[348] Fix | Delete
i = k
[349] Fix | Delete
continue
[350] Fix | Delete
elif rawdata[i] == '&':
[351] Fix | Delete
if self.literal:
[352] Fix | Delete
data = rawdata[i]
[353] Fix | Delete
self.handle_data(data)
[354] Fix | Delete
i = i+1
[355] Fix | Delete
continue
[356] Fix | Delete
res = charref.match(rawdata, i)
[357] Fix | Delete
if res is not None:
[358] Fix | Delete
i = res.end(0)
[359] Fix | Delete
if rawdata[i-1] != ';':
[360] Fix | Delete
self.syntax_error("`;' missing in charref")
[361] Fix | Delete
i = i-1
[362] Fix | Delete
if not self.stack:
[363] Fix | Delete
self.syntax_error('data not in content')
[364] Fix | Delete
self.handle_charref(res.group('char')[:-1])
[365] Fix | Delete
self.lineno = self.lineno + res.group(0).count('\n')
[366] Fix | Delete
continue
[367] Fix | Delete
res = entityref.match(rawdata, i)
[368] Fix | Delete
if res is not None:
[369] Fix | Delete
i = res.end(0)
[370] Fix | Delete
if rawdata[i-1] != ';':
[371] Fix | Delete
self.syntax_error("`;' missing in entityref")
[372] Fix | Delete
i = i-1
[373] Fix | Delete
name = res.group('name')
[374] Fix | Delete
if self.__map_case:
[375] Fix | Delete
name = name.lower()
[376] Fix | Delete
if name in self.entitydefs:
[377] Fix | Delete
self.rawdata = rawdata = rawdata[:res.start(0)] + self.entitydefs[name] + rawdata[i:]
[378] Fix | Delete
n = len(rawdata)
[379] Fix | Delete
i = res.start(0)
[380] Fix | Delete
else:
[381] Fix | Delete
self.unknown_entityref(name)
[382] Fix | Delete
self.lineno = self.lineno + res.group(0).count('\n')
[383] Fix | Delete
continue
[384] Fix | Delete
elif rawdata[i] == ']':
[385] Fix | Delete
if self.literal:
[386] Fix | Delete
data = rawdata[i]
[387] Fix | Delete
self.handle_data(data)
[388] Fix | Delete
i = i+1
[389] Fix | Delete
continue
[390] Fix | Delete
if n-i < 3:
[391] Fix | Delete
break
[392] Fix | Delete
if cdataclose.match(rawdata, i):
[393] Fix | Delete
self.syntax_error("bogus `]]>'")
[394] Fix | Delete
self.handle_data(rawdata[i])
[395] Fix | Delete
i = i+1
[396] Fix | Delete
continue
[397] Fix | Delete
else:
[398] Fix | Delete
raise Error('neither < nor & ??')
[399] Fix | Delete
# We get here only if incomplete matches but
[400] Fix | Delete
# nothing else
[401] Fix | Delete
break
[402] Fix | Delete
# end while
[403] Fix | Delete
if i > 0:
[404] Fix | Delete
self.__at_start = 0
[405] Fix | Delete
if end and i < n:
[406] Fix | Delete
data = rawdata[i]
[407] Fix | Delete
self.syntax_error("bogus `%s'" % data)
[408] Fix | Delete
if not self.__accept_utf8 and illegal.search(data):
[409] Fix | Delete
self.syntax_error('illegal character in content')
[410] Fix | Delete
self.handle_data(data)
[411] Fix | Delete
self.lineno = self.lineno + data.count('\n')
[412] Fix | Delete
self.rawdata = rawdata[i+1:]
[413] Fix | Delete
return self.goahead(end)
[414] Fix | Delete
self.rawdata = rawdata[i:]
[415] Fix | Delete
if end:
[416] Fix | Delete
if not self.__seen_starttag:
[417] Fix | Delete
self.syntax_error('no elements in file')
[418] Fix | Delete
if self.stack:
[419] Fix | Delete
self.syntax_error('missing end tags')
[420] Fix | Delete
while self.stack:
[421] Fix | Delete
self.finish_endtag(self.stack[-1][0])
[422] Fix | Delete
[423] Fix | Delete
# Internal -- parse comment, return length or -1 if not terminated
[424] Fix | Delete
def parse_comment(self, i):
[425] Fix | Delete
rawdata = self.rawdata
[426] Fix | Delete
if rawdata[i:i+4] != '<!--':
[427] Fix | Delete
raise Error('unexpected call to handle_comment')
[428] Fix | Delete
res = commentclose.search(rawdata, i+4)
[429] Fix | Delete
if res is None:
[430] Fix | Delete
return -1
[431] Fix | Delete
if doubledash.search(rawdata, i+4, res.start(0)):
[432] Fix | Delete
self.syntax_error("`--' inside comment")
[433] Fix | Delete
if rawdata[res.start(0)-1] == '-':
[434] Fix | Delete
self.syntax_error('comment cannot end in three dashes')
[435] Fix | Delete
if not self.__accept_utf8 and \
[436] Fix | Delete
illegal.search(rawdata, i+4, res.start(0)):
[437] Fix | Delete
self.syntax_error('illegal character in comment')
[438] Fix | Delete
self.handle_comment(rawdata[i+4: res.start(0)])
[439] Fix | Delete
return res.end(0)
[440] Fix | Delete
[441] Fix | Delete
# Internal -- handle DOCTYPE tag, return length or -1 if not terminated
[442] Fix | Delete
def parse_doctype(self, res):
[443] Fix | Delete
rawdata = self.rawdata
[444] Fix | Delete
n = len(rawdata)
[445] Fix | Delete
name = res.group('name')
[446] Fix | Delete
if self.__map_case:
[447] Fix | Delete
name = name.lower()
[448] Fix | Delete
pubid, syslit = res.group('pubid', 'syslit')
[449] Fix | Delete
if pubid is not None:
[450] Fix | Delete
pubid = pubid[1:-1] # remove quotes
[451] Fix | Delete
pubid = ' '.join(pubid.split()) # normalize
[452] Fix | Delete
if syslit is not None: syslit = syslit[1:-1] # remove quotes
[453] Fix | Delete
j = k = res.end(0)
[454] Fix | Delete
if k >= n:
[455] Fix | Delete
return -1
[456] Fix | Delete
if rawdata[k] == '[':
[457] Fix | Delete
level = 0
[458] Fix | Delete
k = k+1
[459] Fix | Delete
dq = sq = 0
[460] Fix | Delete
while k < n:
[461] Fix | Delete
c = rawdata[k]
[462] Fix | Delete
if not sq and c == '"':
[463] Fix | Delete
dq = not dq
[464] Fix | Delete
elif not dq and c == "'":
[465] Fix | Delete
sq = not sq
[466] Fix | Delete
elif sq or dq:
[467] Fix | Delete
pass
[468] Fix | Delete
elif level <= 0 and c == ']':
[469] Fix | Delete
res = endbracket.match(rawdata, k+1)
[470] Fix | Delete
if res is None:
[471] Fix | Delete
return -1
[472] Fix | Delete
self.handle_doctype(name, pubid, syslit, rawdata[j+1:k])
[473] Fix | Delete
return res.end(0)
[474] Fix | Delete
elif c == '<':
[475] Fix | Delete
level = level + 1
[476] Fix | Delete
elif c == '>':
[477] Fix | Delete
level = level - 1
[478] Fix | Delete
if level < 0:
[479] Fix | Delete
self.syntax_error("bogus `>' in DOCTYPE")
[480] Fix | Delete
k = k+1
[481] Fix | Delete
res = endbracketfind.match(rawdata, k)
[482] Fix | Delete
if res is None:
[483] Fix | Delete
return -1
[484] Fix | Delete
if endbracket.match(rawdata, k) is None:
[485] Fix | Delete
self.syntax_error('garbage in DOCTYPE')
[486] Fix | Delete
self.handle_doctype(name, pubid, syslit, None)
[487] Fix | Delete
return res.end(0)
[488] Fix | Delete
[489] Fix | Delete
# Internal -- handle CDATA tag, return length or -1 if not terminated
[490] Fix | Delete
def parse_cdata(self, i):
[491] Fix | Delete
rawdata = self.rawdata
[492] Fix | Delete
if rawdata[i:i+9] != '<![CDATA[':
[493] Fix | Delete
raise Error('unexpected call to parse_cdata')
[494] Fix | Delete
res = cdataclose.search(rawdata, i+9)
[495] Fix | Delete
if res is None:
[496] Fix | Delete
return -1
[497] Fix | Delete
if not self.__accept_utf8 and \
[498] Fix | Delete
illegal.search(rawdata, i+9, res.start(0)):
[499] Fix | Delete
12
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function