"""Filename matching with shell patterns.
fnmatch(FILENAME, PATTERN) matches according to the local convention.
fnmatchcase(FILENAME, PATTERN) always takes case in account.
The functions operate by translating the pattern into a regular
expression. They cache the compiled regular expressions for speed.
The function translate(PATTERN) returns a regular expression
corresponding to PATTERN. (It does not compile it.)
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
# Build a thread-safe incrementing counter to help create unique regexp group
from itertools import count
_nextgroupnum = count().__next__
"""Test whether FILENAME matches PATTERN.
Patterns are Unix shell style:
? matches any single character
[seq] matches any character in seq
[!seq] matches any char not in seq
An initial period in FILENAME is not special.
Both FILENAME and PATTERN are first case-normalized
if the operating system requires it.
If you don't want this, use fnmatchcase(FILENAME, PATTERN).
name = os.path.normcase(name)
pat = os.path.normcase(pat)
return fnmatchcase(name, pat)
@functools.lru_cache(maxsize=256, typed=True)
def _compile_pattern(pat):
if isinstance(pat, bytes):
pat_str = str(pat, 'ISO-8859-1')
res_str = translate(pat_str)
res = bytes(res_str, 'ISO-8859-1')
return re.compile(res).match
"""Construct a list from those elements of the iterable NAMES that match PAT."""
pat = os.path.normcase(pat)
match = _compile_pattern(pat)
# normcase on posix is NOP. Optimize it away from the loop.
if match(os.path.normcase(name)):
def fnmatchcase(name, pat):
"""Test whether FILENAME matches PATTERN, including case.
This is a version of fnmatch() which doesn't case-normalize
match = _compile_pattern(pat)
return match(name) is not None
"""Translate a shell PATTERN to a regular expression.
There is no way to quote meta-characters.
# compress consecutive `*` into one
if (not res) or res[-1] is not STAR:
if j < n and pat[j] == '!':
if j < n and pat[j] == ']':
while j < n and pat[j] != ']':
stuff = stuff.replace('\\', r'\\')
k = i+2 if pat[i] == '!' else i+1
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
elif stuff[0] in ('^', '['):
# Fixed pieces at the start?
while i < n and inp[i] is not STAR:
# Now deal with STAR fixed STAR fixed ...
# For an interior `STAR fixed` pairing, we want to do a minimal
# .*? match followed by `fixed`, with no possibility of backtracking.
# We can't spell that directly, but can trick it into working by matching
# in a lookahead assertion, save the matched part in a group, then
# consume that group via a backreference. If the overall match fails,
# the lookahead assertion won't try alternatives. So the translation is:
# (?=(?P<name>.*?fixed))(?P=name)
# Group names are created as needed: g0, g1, g2, ...
# The numbers are obtained from _nextgroupnum() to ensure they're unique
# across calls and across threads. This is because people rely on the
# undocumented ability to join multiple translate() results together via
# "|" to build large regexps matching "one of many" shell patterns.
assert inp[i] is not STAR
while i < n and inp[i] is not STAR:
groupnum = _nextgroupnum()
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")