Edit File by line
/home/barbar84/www/wp-conte.../plugins/sujqvwi/ShExBy/shex_roo.../proc/self/root/lib64/python3....
File: sre_compile.py
#
[0] Fix | Delete
# Secret Labs' Regular Expression Engine
[1] Fix | Delete
#
[2] Fix | Delete
# convert template to internal format
[3] Fix | Delete
#
[4] Fix | Delete
# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
[5] Fix | Delete
#
[6] Fix | Delete
# See the sre.py file for information on usage and redistribution.
[7] Fix | Delete
#
[8] Fix | Delete
[9] Fix | Delete
"""Internal support module for sre"""
[10] Fix | Delete
[11] Fix | Delete
import _sre
[12] Fix | Delete
import sre_parse
[13] Fix | Delete
from sre_constants import *
[14] Fix | Delete
[15] Fix | Delete
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
[16] Fix | Delete
[17] Fix | Delete
_LITERAL_CODES = {LITERAL, NOT_LITERAL}
[18] Fix | Delete
_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
[19] Fix | Delete
_SUCCESS_CODES = {SUCCESS, FAILURE}
[20] Fix | Delete
_ASSERT_CODES = {ASSERT, ASSERT_NOT}
[21] Fix | Delete
_UNIT_CODES = _LITERAL_CODES | {ANY, IN}
[22] Fix | Delete
[23] Fix | Delete
# Sets of lowercase characters which have the same uppercase.
[24] Fix | Delete
_equivalences = (
[25] Fix | Delete
# LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I
[26] Fix | Delete
(0x69, 0x131), # iı
[27] Fix | Delete
# LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S
[28] Fix | Delete
(0x73, 0x17f), # sſ
[29] Fix | Delete
# MICRO SIGN, GREEK SMALL LETTER MU
[30] Fix | Delete
(0xb5, 0x3bc), # µμ
[31] Fix | Delete
# COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI
[32] Fix | Delete
(0x345, 0x3b9, 0x1fbe), # \u0345ιι
[33] Fix | Delete
# GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
[34] Fix | Delete
(0x390, 0x1fd3), # ΐΐ
[35] Fix | Delete
# GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
[36] Fix | Delete
(0x3b0, 0x1fe3), # ΰΰ
[37] Fix | Delete
# GREEK SMALL LETTER BETA, GREEK BETA SYMBOL
[38] Fix | Delete
(0x3b2, 0x3d0), # βϐ
[39] Fix | Delete
# GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL
[40] Fix | Delete
(0x3b5, 0x3f5), # εϵ
[41] Fix | Delete
# GREEK SMALL LETTER THETA, GREEK THETA SYMBOL
[42] Fix | Delete
(0x3b8, 0x3d1), # θϑ
[43] Fix | Delete
# GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL
[44] Fix | Delete
(0x3ba, 0x3f0), # κϰ
[45] Fix | Delete
# GREEK SMALL LETTER PI, GREEK PI SYMBOL
[46] Fix | Delete
(0x3c0, 0x3d6), # πϖ
[47] Fix | Delete
# GREEK SMALL LETTER RHO, GREEK RHO SYMBOL
[48] Fix | Delete
(0x3c1, 0x3f1), # ρϱ
[49] Fix | Delete
# GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA
[50] Fix | Delete
(0x3c2, 0x3c3), # ςσ
[51] Fix | Delete
# GREEK SMALL LETTER PHI, GREEK PHI SYMBOL
[52] Fix | Delete
(0x3c6, 0x3d5), # φϕ
[53] Fix | Delete
# LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE
[54] Fix | Delete
(0x1e61, 0x1e9b), # ṡẛ
[55] Fix | Delete
# LATIN SMALL LIGATURE LONG S T, LATIN SMALL LIGATURE ST
[56] Fix | Delete
(0xfb05, 0xfb06), # ſtst
[57] Fix | Delete
)
[58] Fix | Delete
[59] Fix | Delete
# Maps the lowercase code to lowercase codes which have the same uppercase.
[60] Fix | Delete
_ignorecase_fixes = {i: tuple(j for j in t if i != j)
[61] Fix | Delete
for t in _equivalences for i in t}
[62] Fix | Delete
[63] Fix | Delete
def _combine_flags(flags, add_flags, del_flags,
[64] Fix | Delete
TYPE_FLAGS=sre_parse.TYPE_FLAGS):
[65] Fix | Delete
if add_flags & TYPE_FLAGS:
[66] Fix | Delete
flags &= ~TYPE_FLAGS
[67] Fix | Delete
return (flags | add_flags) & ~del_flags
[68] Fix | Delete
[69] Fix | Delete
def _compile(code, pattern, flags):
[70] Fix | Delete
# internal: compile a (sub)pattern
[71] Fix | Delete
emit = code.append
[72] Fix | Delete
_len = len
[73] Fix | Delete
LITERAL_CODES = _LITERAL_CODES
[74] Fix | Delete
REPEATING_CODES = _REPEATING_CODES
[75] Fix | Delete
SUCCESS_CODES = _SUCCESS_CODES
[76] Fix | Delete
ASSERT_CODES = _ASSERT_CODES
[77] Fix | Delete
iscased = None
[78] Fix | Delete
tolower = None
[79] Fix | Delete
fixes = None
[80] Fix | Delete
if flags & SRE_FLAG_IGNORECASE and not flags & SRE_FLAG_LOCALE:
[81] Fix | Delete
if flags & SRE_FLAG_UNICODE:
[82] Fix | Delete
iscased = _sre.unicode_iscased
[83] Fix | Delete
tolower = _sre.unicode_tolower
[84] Fix | Delete
fixes = _ignorecase_fixes
[85] Fix | Delete
else:
[86] Fix | Delete
iscased = _sre.ascii_iscased
[87] Fix | Delete
tolower = _sre.ascii_tolower
[88] Fix | Delete
for op, av in pattern:
[89] Fix | Delete
if op in LITERAL_CODES:
[90] Fix | Delete
if not flags & SRE_FLAG_IGNORECASE:
[91] Fix | Delete
emit(op)
[92] Fix | Delete
emit(av)
[93] Fix | Delete
elif flags & SRE_FLAG_LOCALE:
[94] Fix | Delete
emit(OP_LOCALE_IGNORE[op])
[95] Fix | Delete
emit(av)
[96] Fix | Delete
elif not iscased(av):
[97] Fix | Delete
emit(op)
[98] Fix | Delete
emit(av)
[99] Fix | Delete
else:
[100] Fix | Delete
lo = tolower(av)
[101] Fix | Delete
if not fixes: # ascii
[102] Fix | Delete
emit(OP_IGNORE[op])
[103] Fix | Delete
emit(lo)
[104] Fix | Delete
elif lo not in fixes:
[105] Fix | Delete
emit(OP_UNICODE_IGNORE[op])
[106] Fix | Delete
emit(lo)
[107] Fix | Delete
else:
[108] Fix | Delete
emit(IN_UNI_IGNORE)
[109] Fix | Delete
skip = _len(code); emit(0)
[110] Fix | Delete
if op is NOT_LITERAL:
[111] Fix | Delete
emit(NEGATE)
[112] Fix | Delete
for k in (lo,) + fixes[lo]:
[113] Fix | Delete
emit(LITERAL)
[114] Fix | Delete
emit(k)
[115] Fix | Delete
emit(FAILURE)
[116] Fix | Delete
code[skip] = _len(code) - skip
[117] Fix | Delete
elif op is IN:
[118] Fix | Delete
charset, hascased = _optimize_charset(av, iscased, tolower, fixes)
[119] Fix | Delete
if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
[120] Fix | Delete
emit(IN_LOC_IGNORE)
[121] Fix | Delete
elif not hascased:
[122] Fix | Delete
emit(IN)
[123] Fix | Delete
elif not fixes: # ascii
[124] Fix | Delete
emit(IN_IGNORE)
[125] Fix | Delete
else:
[126] Fix | Delete
emit(IN_UNI_IGNORE)
[127] Fix | Delete
skip = _len(code); emit(0)
[128] Fix | Delete
_compile_charset(charset, flags, code)
[129] Fix | Delete
code[skip] = _len(code) - skip
[130] Fix | Delete
elif op is ANY:
[131] Fix | Delete
if flags & SRE_FLAG_DOTALL:
[132] Fix | Delete
emit(ANY_ALL)
[133] Fix | Delete
else:
[134] Fix | Delete
emit(ANY)
[135] Fix | Delete
elif op in REPEATING_CODES:
[136] Fix | Delete
if flags & SRE_FLAG_TEMPLATE:
[137] Fix | Delete
raise error("internal: unsupported template operator %r" % (op,))
[138] Fix | Delete
if _simple(av[2]):
[139] Fix | Delete
if op is MAX_REPEAT:
[140] Fix | Delete
emit(REPEAT_ONE)
[141] Fix | Delete
else:
[142] Fix | Delete
emit(MIN_REPEAT_ONE)
[143] Fix | Delete
skip = _len(code); emit(0)
[144] Fix | Delete
emit(av[0])
[145] Fix | Delete
emit(av[1])
[146] Fix | Delete
_compile(code, av[2], flags)
[147] Fix | Delete
emit(SUCCESS)
[148] Fix | Delete
code[skip] = _len(code) - skip
[149] Fix | Delete
else:
[150] Fix | Delete
emit(REPEAT)
[151] Fix | Delete
skip = _len(code); emit(0)
[152] Fix | Delete
emit(av[0])
[153] Fix | Delete
emit(av[1])
[154] Fix | Delete
_compile(code, av[2], flags)
[155] Fix | Delete
code[skip] = _len(code) - skip
[156] Fix | Delete
if op is MAX_REPEAT:
[157] Fix | Delete
emit(MAX_UNTIL)
[158] Fix | Delete
else:
[159] Fix | Delete
emit(MIN_UNTIL)
[160] Fix | Delete
elif op is SUBPATTERN:
[161] Fix | Delete
group, add_flags, del_flags, p = av
[162] Fix | Delete
if group:
[163] Fix | Delete
emit(MARK)
[164] Fix | Delete
emit((group-1)*2)
[165] Fix | Delete
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
[166] Fix | Delete
_compile(code, p, _combine_flags(flags, add_flags, del_flags))
[167] Fix | Delete
if group:
[168] Fix | Delete
emit(MARK)
[169] Fix | Delete
emit((group-1)*2+1)
[170] Fix | Delete
elif op in SUCCESS_CODES:
[171] Fix | Delete
emit(op)
[172] Fix | Delete
elif op in ASSERT_CODES:
[173] Fix | Delete
emit(op)
[174] Fix | Delete
skip = _len(code); emit(0)
[175] Fix | Delete
if av[0] >= 0:
[176] Fix | Delete
emit(0) # look ahead
[177] Fix | Delete
else:
[178] Fix | Delete
lo, hi = av[1].getwidth()
[179] Fix | Delete
if lo != hi:
[180] Fix | Delete
raise error("look-behind requires fixed-width pattern")
[181] Fix | Delete
emit(lo) # look behind
[182] Fix | Delete
_compile(code, av[1], flags)
[183] Fix | Delete
emit(SUCCESS)
[184] Fix | Delete
code[skip] = _len(code) - skip
[185] Fix | Delete
elif op is CALL:
[186] Fix | Delete
emit(op)
[187] Fix | Delete
skip = _len(code); emit(0)
[188] Fix | Delete
_compile(code, av, flags)
[189] Fix | Delete
emit(SUCCESS)
[190] Fix | Delete
code[skip] = _len(code) - skip
[191] Fix | Delete
elif op is AT:
[192] Fix | Delete
emit(op)
[193] Fix | Delete
if flags & SRE_FLAG_MULTILINE:
[194] Fix | Delete
av = AT_MULTILINE.get(av, av)
[195] Fix | Delete
if flags & SRE_FLAG_LOCALE:
[196] Fix | Delete
av = AT_LOCALE.get(av, av)
[197] Fix | Delete
elif flags & SRE_FLAG_UNICODE:
[198] Fix | Delete
av = AT_UNICODE.get(av, av)
[199] Fix | Delete
emit(av)
[200] Fix | Delete
elif op is BRANCH:
[201] Fix | Delete
emit(op)
[202] Fix | Delete
tail = []
[203] Fix | Delete
tailappend = tail.append
[204] Fix | Delete
for av in av[1]:
[205] Fix | Delete
skip = _len(code); emit(0)
[206] Fix | Delete
# _compile_info(code, av, flags)
[207] Fix | Delete
_compile(code, av, flags)
[208] Fix | Delete
emit(JUMP)
[209] Fix | Delete
tailappend(_len(code)); emit(0)
[210] Fix | Delete
code[skip] = _len(code) - skip
[211] Fix | Delete
emit(FAILURE) # end of branch
[212] Fix | Delete
for tail in tail:
[213] Fix | Delete
code[tail] = _len(code) - tail
[214] Fix | Delete
elif op is CATEGORY:
[215] Fix | Delete
emit(op)
[216] Fix | Delete
if flags & SRE_FLAG_LOCALE:
[217] Fix | Delete
av = CH_LOCALE[av]
[218] Fix | Delete
elif flags & SRE_FLAG_UNICODE:
[219] Fix | Delete
av = CH_UNICODE[av]
[220] Fix | Delete
emit(av)
[221] Fix | Delete
elif op is GROUPREF:
[222] Fix | Delete
if not flags & SRE_FLAG_IGNORECASE:
[223] Fix | Delete
emit(op)
[224] Fix | Delete
elif flags & SRE_FLAG_LOCALE:
[225] Fix | Delete
emit(GROUPREF_LOC_IGNORE)
[226] Fix | Delete
elif not fixes: # ascii
[227] Fix | Delete
emit(GROUPREF_IGNORE)
[228] Fix | Delete
else:
[229] Fix | Delete
emit(GROUPREF_UNI_IGNORE)
[230] Fix | Delete
emit(av-1)
[231] Fix | Delete
elif op is GROUPREF_EXISTS:
[232] Fix | Delete
emit(op)
[233] Fix | Delete
emit(av[0]-1)
[234] Fix | Delete
skipyes = _len(code); emit(0)
[235] Fix | Delete
_compile(code, av[1], flags)
[236] Fix | Delete
if av[2]:
[237] Fix | Delete
emit(JUMP)
[238] Fix | Delete
skipno = _len(code); emit(0)
[239] Fix | Delete
code[skipyes] = _len(code) - skipyes + 1
[240] Fix | Delete
_compile(code, av[2], flags)
[241] Fix | Delete
code[skipno] = _len(code) - skipno
[242] Fix | Delete
else:
[243] Fix | Delete
code[skipyes] = _len(code) - skipyes + 1
[244] Fix | Delete
else:
[245] Fix | Delete
raise error("internal: unsupported operand type %r" % (op,))
[246] Fix | Delete
[247] Fix | Delete
def _compile_charset(charset, flags, code):
[248] Fix | Delete
# compile charset subprogram
[249] Fix | Delete
emit = code.append
[250] Fix | Delete
for op, av in charset:
[251] Fix | Delete
emit(op)
[252] Fix | Delete
if op is NEGATE:
[253] Fix | Delete
pass
[254] Fix | Delete
elif op is LITERAL:
[255] Fix | Delete
emit(av)
[256] Fix | Delete
elif op is RANGE or op is RANGE_UNI_IGNORE:
[257] Fix | Delete
emit(av[0])
[258] Fix | Delete
emit(av[1])
[259] Fix | Delete
elif op is CHARSET:
[260] Fix | Delete
code.extend(av)
[261] Fix | Delete
elif op is BIGCHARSET:
[262] Fix | Delete
code.extend(av)
[263] Fix | Delete
elif op is CATEGORY:
[264] Fix | Delete
if flags & SRE_FLAG_LOCALE:
[265] Fix | Delete
emit(CH_LOCALE[av])
[266] Fix | Delete
elif flags & SRE_FLAG_UNICODE:
[267] Fix | Delete
emit(CH_UNICODE[av])
[268] Fix | Delete
else:
[269] Fix | Delete
emit(av)
[270] Fix | Delete
else:
[271] Fix | Delete
raise error("internal: unsupported set operator %r" % (op,))
[272] Fix | Delete
emit(FAILURE)
[273] Fix | Delete
[274] Fix | Delete
def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
[275] Fix | Delete
# internal: optimize character set
[276] Fix | Delete
out = []
[277] Fix | Delete
tail = []
[278] Fix | Delete
charmap = bytearray(256)
[279] Fix | Delete
hascased = False
[280] Fix | Delete
for op, av in charset:
[281] Fix | Delete
while True:
[282] Fix | Delete
try:
[283] Fix | Delete
if op is LITERAL:
[284] Fix | Delete
if fixup:
[285] Fix | Delete
lo = fixup(av)
[286] Fix | Delete
charmap[lo] = 1
[287] Fix | Delete
if fixes and lo in fixes:
[288] Fix | Delete
for k in fixes[lo]:
[289] Fix | Delete
charmap[k] = 1
[290] Fix | Delete
if not hascased and iscased(av):
[291] Fix | Delete
hascased = True
[292] Fix | Delete
else:
[293] Fix | Delete
charmap[av] = 1
[294] Fix | Delete
elif op is RANGE:
[295] Fix | Delete
r = range(av[0], av[1]+1)
[296] Fix | Delete
if fixup:
[297] Fix | Delete
if fixes:
[298] Fix | Delete
for i in map(fixup, r):
[299] Fix | Delete
charmap[i] = 1
[300] Fix | Delete
if i in fixes:
[301] Fix | Delete
for k in fixes[i]:
[302] Fix | Delete
charmap[k] = 1
[303] Fix | Delete
else:
[304] Fix | Delete
for i in map(fixup, r):
[305] Fix | Delete
charmap[i] = 1
[306] Fix | Delete
if not hascased:
[307] Fix | Delete
hascased = any(map(iscased, r))
[308] Fix | Delete
else:
[309] Fix | Delete
for i in r:
[310] Fix | Delete
charmap[i] = 1
[311] Fix | Delete
elif op is NEGATE:
[312] Fix | Delete
out.append((op, av))
[313] Fix | Delete
else:
[314] Fix | Delete
tail.append((op, av))
[315] Fix | Delete
except IndexError:
[316] Fix | Delete
if len(charmap) == 256:
[317] Fix | Delete
# character set contains non-UCS1 character codes
[318] Fix | Delete
charmap += b'\0' * 0xff00
[319] Fix | Delete
continue
[320] Fix | Delete
# Character set contains non-BMP character codes.
[321] Fix | Delete
if fixup:
[322] Fix | Delete
hascased = True
[323] Fix | Delete
# There are only two ranges of cased non-BMP characters:
[324] Fix | Delete
# 10400-1044F (Deseret) and 118A0-118DF (Warang Citi),
[325] Fix | Delete
# and for both ranges RANGE_UNI_IGNORE works.
[326] Fix | Delete
if op is RANGE:
[327] Fix | Delete
op = RANGE_UNI_IGNORE
[328] Fix | Delete
tail.append((op, av))
[329] Fix | Delete
break
[330] Fix | Delete
[331] Fix | Delete
# compress character map
[332] Fix | Delete
runs = []
[333] Fix | Delete
q = 0
[334] Fix | Delete
while True:
[335] Fix | Delete
p = charmap.find(1, q)
[336] Fix | Delete
if p < 0:
[337] Fix | Delete
break
[338] Fix | Delete
if len(runs) >= 2:
[339] Fix | Delete
runs = None
[340] Fix | Delete
break
[341] Fix | Delete
q = charmap.find(0, p)
[342] Fix | Delete
if q < 0:
[343] Fix | Delete
runs.append((p, len(charmap)))
[344] Fix | Delete
break
[345] Fix | Delete
runs.append((p, q))
[346] Fix | Delete
if runs is not None:
[347] Fix | Delete
# use literal/range
[348] Fix | Delete
for p, q in runs:
[349] Fix | Delete
if q - p == 1:
[350] Fix | Delete
out.append((LITERAL, p))
[351] Fix | Delete
else:
[352] Fix | Delete
out.append((RANGE, (p, q - 1)))
[353] Fix | Delete
out += tail
[354] Fix | Delete
# if the case was changed or new representation is more compact
[355] Fix | Delete
if hascased or len(out) < len(charset):
[356] Fix | Delete
return out, hascased
[357] Fix | Delete
# else original character set is good enough
[358] Fix | Delete
return charset, hascased
[359] Fix | Delete
[360] Fix | Delete
# use bitmap
[361] Fix | Delete
if len(charmap) == 256:
[362] Fix | Delete
data = _mk_bitmap(charmap)
[363] Fix | Delete
out.append((CHARSET, data))
[364] Fix | Delete
out += tail
[365] Fix | Delete
return out, hascased
[366] Fix | Delete
[367] Fix | Delete
# To represent a big charset, first a bitmap of all characters in the
[368] Fix | Delete
# set is constructed. Then, this bitmap is sliced into chunks of 256
[369] Fix | Delete
# characters, duplicate chunks are eliminated, and each chunk is
[370] Fix | Delete
# given a number. In the compiled expression, the charset is
[371] Fix | Delete
# represented by a 32-bit word sequence, consisting of one word for
[372] Fix | Delete
# the number of different chunks, a sequence of 256 bytes (64 words)
[373] Fix | Delete
# of chunk numbers indexed by their original chunk position, and a
[374] Fix | Delete
# sequence of 256-bit chunks (8 words each).
[375] Fix | Delete
[376] Fix | Delete
# Compression is normally good: in a typical charset, large ranges of
[377] Fix | Delete
# Unicode will be either completely excluded (e.g. if only cyrillic
[378] Fix | Delete
# letters are to be matched), or completely included (e.g. if large
[379] Fix | Delete
# subranges of Kanji match). These ranges will be represented by
[380] Fix | Delete
# chunks of all one-bits or all zero-bits.
[381] Fix | Delete
[382] Fix | Delete
# Matching can be also done efficiently: the more significant byte of
[383] Fix | Delete
# the Unicode character is an index into the chunk number, and the
[384] Fix | Delete
# less significant byte is a bit index in the chunk (just like the
[385] Fix | Delete
# CHARSET matching).
[386] Fix | Delete
[387] Fix | Delete
charmap = bytes(charmap) # should be hashable
[388] Fix | Delete
comps = {}
[389] Fix | Delete
mapping = bytearray(256)
[390] Fix | Delete
block = 0
[391] Fix | Delete
data = bytearray()
[392] Fix | Delete
for i in range(0, 65536, 256):
[393] Fix | Delete
chunk = charmap[i: i + 256]
[394] Fix | Delete
if chunk in comps:
[395] Fix | Delete
mapping[i // 256] = comps[chunk]
[396] Fix | Delete
else:
[397] Fix | Delete
mapping[i // 256] = comps[chunk] = block
[398] Fix | Delete
block += 1
[399] Fix | Delete
data += chunk
[400] Fix | Delete
data = _mk_bitmap(data)
[401] Fix | Delete
data[0:0] = [block] + _bytes_to_codes(mapping)
[402] Fix | Delete
out.append((BIGCHARSET, data))
[403] Fix | Delete
out += tail
[404] Fix | Delete
return out, hascased
[405] Fix | Delete
[406] Fix | Delete
_CODEBITS = _sre.CODESIZE * 8
[407] Fix | Delete
MAXCODE = (1 << _CODEBITS) - 1
[408] Fix | Delete
_BITS_TRANS = b'0' + b'1' * 255
[409] Fix | Delete
def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
[410] Fix | Delete
s = bits.translate(_BITS_TRANS)[::-1]
[411] Fix | Delete
return [_int(s[i - _CODEBITS: i], 2)
[412] Fix | Delete
for i in range(len(s), 0, -_CODEBITS)]
[413] Fix | Delete
[414] Fix | Delete
def _bytes_to_codes(b):
[415] Fix | Delete
# Convert block indices to word array
[416] Fix | Delete
a = memoryview(b).cast('I')
[417] Fix | Delete
assert a.itemsize == _sre.CODESIZE
[418] Fix | Delete
assert len(a) * a.itemsize == len(b)
[419] Fix | Delete
return a.tolist()
[420] Fix | Delete
[421] Fix | Delete
def _simple(p):
[422] Fix | Delete
# check if this subpattern is a "simple" operator
[423] Fix | Delete
if len(p) != 1:
[424] Fix | Delete
return False
[425] Fix | Delete
op, av = p[0]
[426] Fix | Delete
if op is SUBPATTERN:
[427] Fix | Delete
return av[0] is None and _simple(av[-1])
[428] Fix | Delete
return op in _UNIT_CODES
[429] Fix | Delete
[430] Fix | Delete
def _generate_overlap_table(prefix):
[431] Fix | Delete
"""
[432] Fix | Delete
Generate an overlap table for the following prefix.
[433] Fix | Delete
An overlap table is a table of the same size as the prefix which
[434] Fix | Delete
informs about the potential self-overlap for each index in the prefix:
[435] Fix | Delete
- if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...]
[436] Fix | Delete
- if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with
[437] Fix | Delete
prefix[0:k]
[438] Fix | Delete
"""
[439] Fix | Delete
table = [0] * len(prefix)
[440] Fix | Delete
for i in range(1, len(prefix)):
[441] Fix | Delete
idx = table[i - 1]
[442] Fix | Delete
while prefix[i] != prefix[idx]:
[443] Fix | Delete
if idx == 0:
[444] Fix | Delete
table[i] = 0
[445] Fix | Delete
break
[446] Fix | Delete
idx = table[idx - 1]
[447] Fix | Delete
else:
[448] Fix | Delete
table[i] = idx + 1
[449] Fix | Delete
return table
[450] Fix | Delete
[451] Fix | Delete
def _get_iscased(flags):
[452] Fix | Delete
if not flags & SRE_FLAG_IGNORECASE:
[453] Fix | Delete
return None
[454] Fix | Delete
elif flags & SRE_FLAG_UNICODE:
[455] Fix | Delete
return _sre.unicode_iscased
[456] Fix | Delete
else:
[457] Fix | Delete
return _sre.ascii_iscased
[458] Fix | Delete
[459] Fix | Delete
def _get_literal_prefix(pattern, flags):
[460] Fix | Delete
# look for literal prefix
[461] Fix | Delete
prefix = []
[462] Fix | Delete
prefixappend = prefix.append
[463] Fix | Delete
prefix_skip = None
[464] Fix | Delete
iscased = _get_iscased(flags)
[465] Fix | Delete
for op, av in pattern.data:
[466] Fix | Delete
if op is LITERAL:
[467] Fix | Delete
if iscased and iscased(av):
[468] Fix | Delete
break
[469] Fix | Delete
prefixappend(av)
[470] Fix | Delete
elif op is SUBPATTERN:
[471] Fix | Delete
group, add_flags, del_flags, p = av
[472] Fix | Delete
flags1 = _combine_flags(flags, add_flags, del_flags)
[473] Fix | Delete
if flags1 & SRE_FLAG_IGNORECASE and flags1 & SRE_FLAG_LOCALE:
[474] Fix | Delete
break
[475] Fix | Delete
prefix1, prefix_skip1, got_all = _get_literal_prefix(p, flags1)
[476] Fix | Delete
if prefix_skip is None:
[477] Fix | Delete
if group is not None:
[478] Fix | Delete
prefix_skip = len(prefix)
[479] Fix | Delete
elif prefix_skip1 is not None:
[480] Fix | Delete
prefix_skip = len(prefix) + prefix_skip1
[481] Fix | Delete
prefix.extend(prefix1)
[482] Fix | Delete
if not got_all:
[483] Fix | Delete
break
[484] Fix | Delete
else:
[485] Fix | Delete
break
[486] Fix | Delete
else:
[487] Fix | Delete
return prefix, prefix_skip, True
[488] Fix | Delete
return prefix, prefix_skip, False
[489] Fix | Delete
[490] Fix | Delete
def _get_charset_prefix(pattern, flags):
[491] Fix | Delete
while True:
[492] Fix | Delete
if not pattern.data:
[493] Fix | Delete
return None
[494] Fix | Delete
op, av = pattern.data[0]
[495] Fix | Delete
if op is not SUBPATTERN:
[496] Fix | Delete
break
[497] Fix | Delete
group, add_flags, del_flags, pattern = av
[498] Fix | Delete
flags = _combine_flags(flags, add_flags, del_flags)
[499] Fix | Delete
12
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function