Edit File by line
/home/barbar84/www/wp-conte.../plugins/sujqvwi/AnonR/smanonr..../lib64/python3....
File: sre_parse.py
#
[0] Fix | Delete
# Secret Labs' Regular Expression Engine
[1] Fix | Delete
#
[2] Fix | Delete
# convert re-style regular expression to sre pattern
[3] Fix | Delete
#
[4] Fix | Delete
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
[5] Fix | Delete
#
[6] Fix | Delete
# See the sre.py file for information on usage and redistribution.
[7] Fix | Delete
#
[8] Fix | Delete
[9] Fix | Delete
"""Internal support module for sre"""
[10] Fix | Delete
[11] Fix | Delete
# XXX: show string offset and offending character for all errors
[12] Fix | Delete
[13] Fix | Delete
from sre_constants import *
[14] Fix | Delete
[15] Fix | Delete
SPECIAL_CHARS = ".\\[{()*+?^$|"
[16] Fix | Delete
REPEAT_CHARS = "*+?{"
[17] Fix | Delete
[18] Fix | Delete
DIGITS = frozenset("0123456789")
[19] Fix | Delete
[20] Fix | Delete
OCTDIGITS = frozenset("01234567")
[21] Fix | Delete
HEXDIGITS = frozenset("0123456789abcdefABCDEF")
[22] Fix | Delete
ASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
[23] Fix | Delete
[24] Fix | Delete
WHITESPACE = frozenset(" \t\n\r\v\f")
[25] Fix | Delete
[26] Fix | Delete
_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT})
[27] Fix | Delete
_UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY})
[28] Fix | Delete
[29] Fix | Delete
ESCAPES = {
[30] Fix | Delete
r"\a": (LITERAL, ord("\a")),
[31] Fix | Delete
r"\b": (LITERAL, ord("\b")),
[32] Fix | Delete
r"\f": (LITERAL, ord("\f")),
[33] Fix | Delete
r"\n": (LITERAL, ord("\n")),
[34] Fix | Delete
r"\r": (LITERAL, ord("\r")),
[35] Fix | Delete
r"\t": (LITERAL, ord("\t")),
[36] Fix | Delete
r"\v": (LITERAL, ord("\v")),
[37] Fix | Delete
r"\\": (LITERAL, ord("\\"))
[38] Fix | Delete
}
[39] Fix | Delete
[40] Fix | Delete
CATEGORIES = {
[41] Fix | Delete
r"\A": (AT, AT_BEGINNING_STRING), # start of string
[42] Fix | Delete
r"\b": (AT, AT_BOUNDARY),
[43] Fix | Delete
r"\B": (AT, AT_NON_BOUNDARY),
[44] Fix | Delete
r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
[45] Fix | Delete
r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
[46] Fix | Delete
r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
[47] Fix | Delete
r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
[48] Fix | Delete
r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
[49] Fix | Delete
r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
[50] Fix | Delete
r"\Z": (AT, AT_END_STRING), # end of string
[51] Fix | Delete
}
[52] Fix | Delete
[53] Fix | Delete
FLAGS = {
[54] Fix | Delete
# standard flags
[55] Fix | Delete
"i": SRE_FLAG_IGNORECASE,
[56] Fix | Delete
"L": SRE_FLAG_LOCALE,
[57] Fix | Delete
"m": SRE_FLAG_MULTILINE,
[58] Fix | Delete
"s": SRE_FLAG_DOTALL,
[59] Fix | Delete
"x": SRE_FLAG_VERBOSE,
[60] Fix | Delete
# extensions
[61] Fix | Delete
"a": SRE_FLAG_ASCII,
[62] Fix | Delete
"t": SRE_FLAG_TEMPLATE,
[63] Fix | Delete
"u": SRE_FLAG_UNICODE,
[64] Fix | Delete
}
[65] Fix | Delete
[66] Fix | Delete
TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE
[67] Fix | Delete
GLOBAL_FLAGS = SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE
[68] Fix | Delete
[69] Fix | Delete
class Verbose(Exception):
[70] Fix | Delete
pass
[71] Fix | Delete
[72] Fix | Delete
class State:
[73] Fix | Delete
# keeps track of state for parsing
[74] Fix | Delete
def __init__(self):
[75] Fix | Delete
self.flags = 0
[76] Fix | Delete
self.groupdict = {}
[77] Fix | Delete
self.groupwidths = [None] # group 0
[78] Fix | Delete
self.lookbehindgroups = None
[79] Fix | Delete
@property
[80] Fix | Delete
def groups(self):
[81] Fix | Delete
return len(self.groupwidths)
[82] Fix | Delete
def opengroup(self, name=None):
[83] Fix | Delete
gid = self.groups
[84] Fix | Delete
self.groupwidths.append(None)
[85] Fix | Delete
if self.groups > MAXGROUPS:
[86] Fix | Delete
raise error("too many groups")
[87] Fix | Delete
if name is not None:
[88] Fix | Delete
ogid = self.groupdict.get(name, None)
[89] Fix | Delete
if ogid is not None:
[90] Fix | Delete
raise error("redefinition of group name %r as group %d; "
[91] Fix | Delete
"was group %d" % (name, gid, ogid))
[92] Fix | Delete
self.groupdict[name] = gid
[93] Fix | Delete
return gid
[94] Fix | Delete
def closegroup(self, gid, p):
[95] Fix | Delete
self.groupwidths[gid] = p.getwidth()
[96] Fix | Delete
def checkgroup(self, gid):
[97] Fix | Delete
return gid < self.groups and self.groupwidths[gid] is not None
[98] Fix | Delete
[99] Fix | Delete
def checklookbehindgroup(self, gid, source):
[100] Fix | Delete
if self.lookbehindgroups is not None:
[101] Fix | Delete
if not self.checkgroup(gid):
[102] Fix | Delete
raise source.error('cannot refer to an open group')
[103] Fix | Delete
if gid >= self.lookbehindgroups:
[104] Fix | Delete
raise source.error('cannot refer to group defined in the same '
[105] Fix | Delete
'lookbehind subpattern')
[106] Fix | Delete
[107] Fix | Delete
class SubPattern:
[108] Fix | Delete
# a subpattern, in intermediate form
[109] Fix | Delete
def __init__(self, state, data=None):
[110] Fix | Delete
self.state = state
[111] Fix | Delete
if data is None:
[112] Fix | Delete
data = []
[113] Fix | Delete
self.data = data
[114] Fix | Delete
self.width = None
[115] Fix | Delete
[116] Fix | Delete
def dump(self, level=0):
[117] Fix | Delete
nl = True
[118] Fix | Delete
seqtypes = (tuple, list)
[119] Fix | Delete
for op, av in self.data:
[120] Fix | Delete
print(level*" " + str(op), end='')
[121] Fix | Delete
if op is IN:
[122] Fix | Delete
# member sublanguage
[123] Fix | Delete
print()
[124] Fix | Delete
for op, a in av:
[125] Fix | Delete
print((level+1)*" " + str(op), a)
[126] Fix | Delete
elif op is BRANCH:
[127] Fix | Delete
print()
[128] Fix | Delete
for i, a in enumerate(av[1]):
[129] Fix | Delete
if i:
[130] Fix | Delete
print(level*" " + "OR")
[131] Fix | Delete
a.dump(level+1)
[132] Fix | Delete
elif op is GROUPREF_EXISTS:
[133] Fix | Delete
condgroup, item_yes, item_no = av
[134] Fix | Delete
print('', condgroup)
[135] Fix | Delete
item_yes.dump(level+1)
[136] Fix | Delete
if item_no:
[137] Fix | Delete
print(level*" " + "ELSE")
[138] Fix | Delete
item_no.dump(level+1)
[139] Fix | Delete
elif isinstance(av, seqtypes):
[140] Fix | Delete
nl = False
[141] Fix | Delete
for a in av:
[142] Fix | Delete
if isinstance(a, SubPattern):
[143] Fix | Delete
if not nl:
[144] Fix | Delete
print()
[145] Fix | Delete
a.dump(level+1)
[146] Fix | Delete
nl = True
[147] Fix | Delete
else:
[148] Fix | Delete
if not nl:
[149] Fix | Delete
print(' ', end='')
[150] Fix | Delete
print(a, end='')
[151] Fix | Delete
nl = False
[152] Fix | Delete
if not nl:
[153] Fix | Delete
print()
[154] Fix | Delete
else:
[155] Fix | Delete
print('', av)
[156] Fix | Delete
def __repr__(self):
[157] Fix | Delete
return repr(self.data)
[158] Fix | Delete
def __len__(self):
[159] Fix | Delete
return len(self.data)
[160] Fix | Delete
def __delitem__(self, index):
[161] Fix | Delete
del self.data[index]
[162] Fix | Delete
def __getitem__(self, index):
[163] Fix | Delete
if isinstance(index, slice):
[164] Fix | Delete
return SubPattern(self.state, self.data[index])
[165] Fix | Delete
return self.data[index]
[166] Fix | Delete
def __setitem__(self, index, code):
[167] Fix | Delete
self.data[index] = code
[168] Fix | Delete
def insert(self, index, code):
[169] Fix | Delete
self.data.insert(index, code)
[170] Fix | Delete
def append(self, code):
[171] Fix | Delete
self.data.append(code)
[172] Fix | Delete
def getwidth(self):
[173] Fix | Delete
# determine the width (min, max) for this subpattern
[174] Fix | Delete
if self.width is not None:
[175] Fix | Delete
return self.width
[176] Fix | Delete
lo = hi = 0
[177] Fix | Delete
for op, av in self.data:
[178] Fix | Delete
if op is BRANCH:
[179] Fix | Delete
i = MAXREPEAT - 1
[180] Fix | Delete
j = 0
[181] Fix | Delete
for av in av[1]:
[182] Fix | Delete
l, h = av.getwidth()
[183] Fix | Delete
i = min(i, l)
[184] Fix | Delete
j = max(j, h)
[185] Fix | Delete
lo = lo + i
[186] Fix | Delete
hi = hi + j
[187] Fix | Delete
elif op is CALL:
[188] Fix | Delete
i, j = av.getwidth()
[189] Fix | Delete
lo = lo + i
[190] Fix | Delete
hi = hi + j
[191] Fix | Delete
elif op is SUBPATTERN:
[192] Fix | Delete
i, j = av[-1].getwidth()
[193] Fix | Delete
lo = lo + i
[194] Fix | Delete
hi = hi + j
[195] Fix | Delete
elif op in _REPEATCODES:
[196] Fix | Delete
i, j = av[2].getwidth()
[197] Fix | Delete
lo = lo + i * av[0]
[198] Fix | Delete
hi = hi + j * av[1]
[199] Fix | Delete
elif op in _UNITCODES:
[200] Fix | Delete
lo = lo + 1
[201] Fix | Delete
hi = hi + 1
[202] Fix | Delete
elif op is GROUPREF:
[203] Fix | Delete
i, j = self.state.groupwidths[av]
[204] Fix | Delete
lo = lo + i
[205] Fix | Delete
hi = hi + j
[206] Fix | Delete
elif op is GROUPREF_EXISTS:
[207] Fix | Delete
i, j = av[1].getwidth()
[208] Fix | Delete
if av[2] is not None:
[209] Fix | Delete
l, h = av[2].getwidth()
[210] Fix | Delete
i = min(i, l)
[211] Fix | Delete
j = max(j, h)
[212] Fix | Delete
else:
[213] Fix | Delete
i = 0
[214] Fix | Delete
lo = lo + i
[215] Fix | Delete
hi = hi + j
[216] Fix | Delete
elif op is SUCCESS:
[217] Fix | Delete
break
[218] Fix | Delete
self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)
[219] Fix | Delete
return self.width
[220] Fix | Delete
[221] Fix | Delete
class Tokenizer:
[222] Fix | Delete
def __init__(self, string):
[223] Fix | Delete
self.istext = isinstance(string, str)
[224] Fix | Delete
self.string = string
[225] Fix | Delete
if not self.istext:
[226] Fix | Delete
string = str(string, 'latin1')
[227] Fix | Delete
self.decoded_string = string
[228] Fix | Delete
self.index = 0
[229] Fix | Delete
self.next = None
[230] Fix | Delete
self.__next()
[231] Fix | Delete
def __next(self):
[232] Fix | Delete
index = self.index
[233] Fix | Delete
try:
[234] Fix | Delete
char = self.decoded_string[index]
[235] Fix | Delete
except IndexError:
[236] Fix | Delete
self.next = None
[237] Fix | Delete
return
[238] Fix | Delete
if char == "\\":
[239] Fix | Delete
index += 1
[240] Fix | Delete
try:
[241] Fix | Delete
char += self.decoded_string[index]
[242] Fix | Delete
except IndexError:
[243] Fix | Delete
raise error("bad escape (end of pattern)",
[244] Fix | Delete
self.string, len(self.string) - 1) from None
[245] Fix | Delete
self.index = index + 1
[246] Fix | Delete
self.next = char
[247] Fix | Delete
def match(self, char):
[248] Fix | Delete
if char == self.next:
[249] Fix | Delete
self.__next()
[250] Fix | Delete
return True
[251] Fix | Delete
return False
[252] Fix | Delete
def get(self):
[253] Fix | Delete
this = self.next
[254] Fix | Delete
self.__next()
[255] Fix | Delete
return this
[256] Fix | Delete
def getwhile(self, n, charset):
[257] Fix | Delete
result = ''
[258] Fix | Delete
for _ in range(n):
[259] Fix | Delete
c = self.next
[260] Fix | Delete
if c not in charset:
[261] Fix | Delete
break
[262] Fix | Delete
result += c
[263] Fix | Delete
self.__next()
[264] Fix | Delete
return result
[265] Fix | Delete
def getuntil(self, terminator, name):
[266] Fix | Delete
result = ''
[267] Fix | Delete
while True:
[268] Fix | Delete
c = self.next
[269] Fix | Delete
self.__next()
[270] Fix | Delete
if c is None:
[271] Fix | Delete
if not result:
[272] Fix | Delete
raise self.error("missing " + name)
[273] Fix | Delete
raise self.error("missing %s, unterminated name" % terminator,
[274] Fix | Delete
len(result))
[275] Fix | Delete
if c == terminator:
[276] Fix | Delete
if not result:
[277] Fix | Delete
raise self.error("missing " + name, 1)
[278] Fix | Delete
break
[279] Fix | Delete
result += c
[280] Fix | Delete
return result
[281] Fix | Delete
@property
[282] Fix | Delete
def pos(self):
[283] Fix | Delete
return self.index - len(self.next or '')
[284] Fix | Delete
def tell(self):
[285] Fix | Delete
return self.index - len(self.next or '')
[286] Fix | Delete
def seek(self, index):
[287] Fix | Delete
self.index = index
[288] Fix | Delete
self.__next()
[289] Fix | Delete
[290] Fix | Delete
def error(self, msg, offset=0):
[291] Fix | Delete
return error(msg, self.string, self.tell() - offset)
[292] Fix | Delete
[293] Fix | Delete
def _class_escape(source, escape):
[294] Fix | Delete
# handle escape code inside character class
[295] Fix | Delete
code = ESCAPES.get(escape)
[296] Fix | Delete
if code:
[297] Fix | Delete
return code
[298] Fix | Delete
code = CATEGORIES.get(escape)
[299] Fix | Delete
if code and code[0] is IN:
[300] Fix | Delete
return code
[301] Fix | Delete
try:
[302] Fix | Delete
c = escape[1:2]
[303] Fix | Delete
if c == "x":
[304] Fix | Delete
# hexadecimal escape (exactly two digits)
[305] Fix | Delete
escape += source.getwhile(2, HEXDIGITS)
[306] Fix | Delete
if len(escape) != 4:
[307] Fix | Delete
raise source.error("incomplete escape %s" % escape, len(escape))
[308] Fix | Delete
return LITERAL, int(escape[2:], 16)
[309] Fix | Delete
elif c == "u" and source.istext:
[310] Fix | Delete
# unicode escape (exactly four digits)
[311] Fix | Delete
escape += source.getwhile(4, HEXDIGITS)
[312] Fix | Delete
if len(escape) != 6:
[313] Fix | Delete
raise source.error("incomplete escape %s" % escape, len(escape))
[314] Fix | Delete
return LITERAL, int(escape[2:], 16)
[315] Fix | Delete
elif c == "U" and source.istext:
[316] Fix | Delete
# unicode escape (exactly eight digits)
[317] Fix | Delete
escape += source.getwhile(8, HEXDIGITS)
[318] Fix | Delete
if len(escape) != 10:
[319] Fix | Delete
raise source.error("incomplete escape %s" % escape, len(escape))
[320] Fix | Delete
c = int(escape[2:], 16)
[321] Fix | Delete
chr(c) # raise ValueError for invalid code
[322] Fix | Delete
return LITERAL, c
[323] Fix | Delete
elif c == "N" and source.istext:
[324] Fix | Delete
import unicodedata
[325] Fix | Delete
# named unicode escape e.g. \N{EM DASH}
[326] Fix | Delete
if not source.match('{'):
[327] Fix | Delete
raise source.error("missing {")
[328] Fix | Delete
charname = source.getuntil('}', 'character name')
[329] Fix | Delete
try:
[330] Fix | Delete
c = ord(unicodedata.lookup(charname))
[331] Fix | Delete
except KeyError:
[332] Fix | Delete
raise source.error("undefined character name %r" % charname,
[333] Fix | Delete
len(charname) + len(r'\N{}'))
[334] Fix | Delete
return LITERAL, c
[335] Fix | Delete
elif c in OCTDIGITS:
[336] Fix | Delete
# octal escape (up to three digits)
[337] Fix | Delete
escape += source.getwhile(2, OCTDIGITS)
[338] Fix | Delete
c = int(escape[1:], 8)
[339] Fix | Delete
if c > 0o377:
[340] Fix | Delete
raise source.error('octal escape value %s outside of '
[341] Fix | Delete
'range 0-0o377' % escape, len(escape))
[342] Fix | Delete
return LITERAL, c
[343] Fix | Delete
elif c in DIGITS:
[344] Fix | Delete
raise ValueError
[345] Fix | Delete
if len(escape) == 2:
[346] Fix | Delete
if c in ASCIILETTERS:
[347] Fix | Delete
raise source.error('bad escape %s' % escape, len(escape))
[348] Fix | Delete
return LITERAL, ord(escape[1])
[349] Fix | Delete
except ValueError:
[350] Fix | Delete
pass
[351] Fix | Delete
raise source.error("bad escape %s" % escape, len(escape))
[352] Fix | Delete
[353] Fix | Delete
def _escape(source, escape, state):
[354] Fix | Delete
# handle escape code in expression
[355] Fix | Delete
code = CATEGORIES.get(escape)
[356] Fix | Delete
if code:
[357] Fix | Delete
return code
[358] Fix | Delete
code = ESCAPES.get(escape)
[359] Fix | Delete
if code:
[360] Fix | Delete
return code
[361] Fix | Delete
try:
[362] Fix | Delete
c = escape[1:2]
[363] Fix | Delete
if c == "x":
[364] Fix | Delete
# hexadecimal escape
[365] Fix | Delete
escape += source.getwhile(2, HEXDIGITS)
[366] Fix | Delete
if len(escape) != 4:
[367] Fix | Delete
raise source.error("incomplete escape %s" % escape, len(escape))
[368] Fix | Delete
return LITERAL, int(escape[2:], 16)
[369] Fix | Delete
elif c == "u" and source.istext:
[370] Fix | Delete
# unicode escape (exactly four digits)
[371] Fix | Delete
escape += source.getwhile(4, HEXDIGITS)
[372] Fix | Delete
if len(escape) != 6:
[373] Fix | Delete
raise source.error("incomplete escape %s" % escape, len(escape))
[374] Fix | Delete
return LITERAL, int(escape[2:], 16)
[375] Fix | Delete
elif c == "U" and source.istext:
[376] Fix | Delete
# unicode escape (exactly eight digits)
[377] Fix | Delete
escape += source.getwhile(8, HEXDIGITS)
[378] Fix | Delete
if len(escape) != 10:
[379] Fix | Delete
raise source.error("incomplete escape %s" % escape, len(escape))
[380] Fix | Delete
c = int(escape[2:], 16)
[381] Fix | Delete
chr(c) # raise ValueError for invalid code
[382] Fix | Delete
return LITERAL, c
[383] Fix | Delete
elif c == "N" and source.istext:
[384] Fix | Delete
import unicodedata
[385] Fix | Delete
# named unicode escape e.g. \N{EM DASH}
[386] Fix | Delete
if not source.match('{'):
[387] Fix | Delete
raise source.error("missing {")
[388] Fix | Delete
charname = source.getuntil('}', 'character name')
[389] Fix | Delete
try:
[390] Fix | Delete
c = ord(unicodedata.lookup(charname))
[391] Fix | Delete
except KeyError:
[392] Fix | Delete
raise source.error("undefined character name %r" % charname,
[393] Fix | Delete
len(charname) + len(r'\N{}'))
[394] Fix | Delete
return LITERAL, c
[395] Fix | Delete
elif c == "0":
[396] Fix | Delete
# octal escape
[397] Fix | Delete
escape += source.getwhile(2, OCTDIGITS)
[398] Fix | Delete
return LITERAL, int(escape[1:], 8)
[399] Fix | Delete
elif c in DIGITS:
[400] Fix | Delete
# octal escape *or* decimal group reference (sigh)
[401] Fix | Delete
if source.next in DIGITS:
[402] Fix | Delete
escape += source.get()
[403] Fix | Delete
if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
[404] Fix | Delete
source.next in OCTDIGITS):
[405] Fix | Delete
# got three octal digits; this is an octal escape
[406] Fix | Delete
escape += source.get()
[407] Fix | Delete
c = int(escape[1:], 8)
[408] Fix | Delete
if c > 0o377:
[409] Fix | Delete
raise source.error('octal escape value %s outside of '
[410] Fix | Delete
'range 0-0o377' % escape,
[411] Fix | Delete
len(escape))
[412] Fix | Delete
return LITERAL, c
[413] Fix | Delete
# not an octal escape, so this is a group reference
[414] Fix | Delete
group = int(escape[1:])
[415] Fix | Delete
if group < state.groups:
[416] Fix | Delete
if not state.checkgroup(group):
[417] Fix | Delete
raise source.error("cannot refer to an open group",
[418] Fix | Delete
len(escape))
[419] Fix | Delete
state.checklookbehindgroup(group, source)
[420] Fix | Delete
return GROUPREF, group
[421] Fix | Delete
raise source.error("invalid group reference %d" % group, len(escape) - 1)
[422] Fix | Delete
if len(escape) == 2:
[423] Fix | Delete
if c in ASCIILETTERS:
[424] Fix | Delete
raise source.error("bad escape %s" % escape, len(escape))
[425] Fix | Delete
return LITERAL, ord(escape[1])
[426] Fix | Delete
except ValueError:
[427] Fix | Delete
pass
[428] Fix | Delete
raise source.error("bad escape %s" % escape, len(escape))
[429] Fix | Delete
[430] Fix | Delete
def _uniq(items):
[431] Fix | Delete
return list(dict.fromkeys(items))
[432] Fix | Delete
[433] Fix | Delete
def _parse_sub(source, state, verbose, nested):
[434] Fix | Delete
# parse an alternation: a|b|c
[435] Fix | Delete
[436] Fix | Delete
items = []
[437] Fix | Delete
itemsappend = items.append
[438] Fix | Delete
sourcematch = source.match
[439] Fix | Delete
start = source.tell()
[440] Fix | Delete
while True:
[441] Fix | Delete
itemsappend(_parse(source, state, verbose, nested + 1,
[442] Fix | Delete
not nested and not items))
[443] Fix | Delete
if not sourcematch("|"):
[444] Fix | Delete
break
[445] Fix | Delete
[446] Fix | Delete
if len(items) == 1:
[447] Fix | Delete
return items[0]
[448] Fix | Delete
[449] Fix | Delete
subpattern = SubPattern(state)
[450] Fix | Delete
[451] Fix | Delete
# check if all items share a common prefix
[452] Fix | Delete
while True:
[453] Fix | Delete
prefix = None
[454] Fix | Delete
for item in items:
[455] Fix | Delete
if not item:
[456] Fix | Delete
break
[457] Fix | Delete
if prefix is None:
[458] Fix | Delete
prefix = item[0]
[459] Fix | Delete
elif item[0] != prefix:
[460] Fix | Delete
break
[461] Fix | Delete
else:
[462] Fix | Delete
# all subitems start with a common "prefix".
[463] Fix | Delete
# move it out of the branch
[464] Fix | Delete
for item in items:
[465] Fix | Delete
del item[0]
[466] Fix | Delete
subpattern.append(prefix)
[467] Fix | Delete
continue # check next one
[468] Fix | Delete
break
[469] Fix | Delete
[470] Fix | Delete
# check if the branch can be replaced by a character set
[471] Fix | Delete
set = []
[472] Fix | Delete
for item in items:
[473] Fix | Delete
if len(item) != 1:
[474] Fix | Delete
break
[475] Fix | Delete
op, av = item[0]
[476] Fix | Delete
if op is LITERAL:
[477] Fix | Delete
set.append((op, av))
[478] Fix | Delete
elif op is IN and av[0][0] is not NEGATE:
[479] Fix | Delete
set.extend(av)
[480] Fix | Delete
else:
[481] Fix | Delete
break
[482] Fix | Delete
else:
[483] Fix | Delete
# we can store this as a character set instead of a
[484] Fix | Delete
# branch (the compiler may optimize this even more)
[485] Fix | Delete
subpattern.append((IN, _uniq(set)))
[486] Fix | Delete
return subpattern
[487] Fix | Delete
[488] Fix | Delete
subpattern.append((BRANCH, (None, items)))
[489] Fix | Delete
return subpattern
[490] Fix | Delete
[491] Fix | Delete
def _parse(source, state, verbose, nested, first=False):
[492] Fix | Delete
# parse a simple pattern
[493] Fix | Delete
subpattern = SubPattern(state)
[494] Fix | Delete
[495] Fix | Delete
# precompute constants into local variables
[496] Fix | Delete
subpatternappend = subpattern.append
[497] Fix | Delete
sourceget = source.get
[498] Fix | Delete
sourcematch = source.match
[499] Fix | Delete
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function