Edit File by line

"""A lexical analyzer class for simple shell-like syntaxes."""

[0] Fix | Delete

[1] Fix | Delete

# Module and documentation by Eric S. Raymond, 21 Dec 1998

[2] Fix | Delete

# Input stacking and error message cleanup added by ESR, March 2000

[3] Fix | Delete

# push_source() and pop_source() made explicit by ESR, January 2001.

[4] Fix | Delete

# Posix compliance, split(), string arguments, and

[5] Fix | Delete

# iterator interface by Gustavo Niemeyer, April 2003.

[6] Fix | Delete

# changes to tokenize more like Posix shells by Vinay Sajip, July 2016.

[7] Fix | Delete

[8] Fix | Delete

import os

[9] Fix | Delete

import re

[10] Fix | Delete

import sys

[11] Fix | Delete

from collections import deque

[12] Fix | Delete

[13] Fix | Delete

from io import StringIO

[14] Fix | Delete

[15] Fix | Delete

__all__ = ["shlex", "split", "quote"]

[16] Fix | Delete

[17] Fix | Delete

class shlex:

[18] Fix | Delete

"A lexical analyzer class for simple shell-like syntaxes."

[19] Fix | Delete

def __init__(self, instream=None, infile=None, posix=False,

[20] Fix | Delete

punctuation_chars=False):

[21] Fix | Delete

if isinstance(instream, str):

[22] Fix | Delete

instream = StringIO(instream)

[23] Fix | Delete

if instream is not None:

[24] Fix | Delete

self.instream = instream

[25] Fix | Delete

self.infile = infile

[26] Fix | Delete

else:

[27] Fix | Delete

self.instream = sys.stdin

[28] Fix | Delete

self.infile = None

[29] Fix | Delete

self.posix = posix

[30] Fix | Delete

if posix:

[31] Fix | Delete

self.eof = None

[32] Fix | Delete

else:

[33] Fix | Delete

self.eof = ''

[34] Fix | Delete

self.commenters = '#'

[35] Fix | Delete

self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'

[36] Fix | Delete

'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')

[37] Fix | Delete

if self.posix:

[38] Fix | Delete

self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'

[39] Fix | Delete

'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')

[40] Fix | Delete

self.whitespace = ' \t\r\n'

[41] Fix | Delete

self.whitespace_split = False

[42] Fix | Delete

self.quotes = '\'"'

[43] Fix | Delete

self.escape = '\\'

[44] Fix | Delete

self.escapedquotes = '"'

[45] Fix | Delete

self.state = ' '

[46] Fix | Delete

self.pushback = deque()

[47] Fix | Delete

self.lineno = 1

[48] Fix | Delete

self.debug = 0

[49] Fix | Delete

self.token = ''

[50] Fix | Delete

self.filestack = deque()

[51] Fix | Delete

self.source = None

[52] Fix | Delete

if not punctuation_chars:

[53] Fix | Delete

punctuation_chars = ''

[54] Fix | Delete

elif punctuation_chars is True:

[55] Fix | Delete

punctuation_chars = '();<>|&'

[56] Fix | Delete

self.punctuation_chars = punctuation_chars

[57] Fix | Delete

if punctuation_chars:

[58] Fix | Delete

# _pushback_chars is a push back queue used by lookahead logic

[59] Fix | Delete

self._pushback_chars = deque()

[60] Fix | Delete

# these chars added because allowed in file names, args, wildcards

[61] Fix | Delete

self.wordchars += '~-./*?='

[62] Fix | Delete

#remove any punctuation chars from wordchars

[63] Fix | Delete

t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))

[64] Fix | Delete

self.wordchars = self.wordchars.translate(t)

[65] Fix | Delete

[66] Fix | Delete

def push_token(self, tok):

[67] Fix | Delete

"Push a token onto the stack popped by the get_token method"

[68] Fix | Delete

if self.debug >= 1:

[69] Fix | Delete

print("shlex: pushing token " + repr(tok))

[70] Fix | Delete

self.pushback.appendleft(tok)

[71] Fix | Delete

[72] Fix | Delete

def push_source(self, newstream, newfile=None):

[73] Fix | Delete

"Push an input source onto the lexer's input source stack."

[74] Fix | Delete

if isinstance(newstream, str):

[75] Fix | Delete

newstream = StringIO(newstream)

[76] Fix | Delete

self.filestack.appendleft((self.infile, self.instream, self.lineno))

[77] Fix | Delete

self.infile = newfile

[78] Fix | Delete

self.instream = newstream

[79] Fix | Delete

self.lineno = 1

[80] Fix | Delete

if self.debug:

[81] Fix | Delete

if newfile is not None:

[82] Fix | Delete

print('shlex: pushing to file %s' % (self.infile,))

[83] Fix | Delete

else:

[84] Fix | Delete

print('shlex: pushing to stream %s' % (self.instream,))

[85] Fix | Delete

[86] Fix | Delete

def pop_source(self):

[87] Fix | Delete

"Pop the input source stack."

[88] Fix | Delete

self.instream.close()

[89] Fix | Delete

(self.infile, self.instream, self.lineno) = self.filestack.popleft()

[90] Fix | Delete

if self.debug:

[91] Fix | Delete

print('shlex: popping to %s, line %d' \

[92] Fix | Delete

% (self.instream, self.lineno))

[93] Fix | Delete

self.state = ' '

[94] Fix | Delete

[95] Fix | Delete

def get_token(self):

[96] Fix | Delete

"Get a token from the input stream (or from stack if it's nonempty)"

[97] Fix | Delete

if self.pushback:

[98] Fix | Delete

tok = self.pushback.popleft()

[99] Fix | Delete

if self.debug >= 1:

[100] Fix | Delete

print("shlex: popping token " + repr(tok))

[101] Fix | Delete

return tok

[102] Fix | Delete

# No pushback. Get a token.

[103] Fix | Delete

raw = self.read_token()

[104] Fix | Delete

# Handle inclusions

[105] Fix | Delete

if self.source is not None:

[106] Fix | Delete

while raw == self.source:

[107] Fix | Delete

spec = self.sourcehook(self.read_token())

[108] Fix | Delete

if spec:

[109] Fix | Delete

(newfile, newstream) = spec

[110] Fix | Delete

self.push_source(newstream, newfile)

[111] Fix | Delete

raw = self.get_token()

[112] Fix | Delete

# Maybe we got EOF instead?

[113] Fix | Delete

while raw == self.eof:

[114] Fix | Delete

if not self.filestack:

[115] Fix | Delete

return self.eof

[116] Fix | Delete

else:

[117] Fix | Delete

self.pop_source()

[118] Fix | Delete

raw = self.get_token()

[119] Fix | Delete

# Neither inclusion nor EOF

[120] Fix | Delete

if self.debug >= 1:

[121] Fix | Delete

if raw != self.eof:

[122] Fix | Delete

print("shlex: token=" + repr(raw))

[123] Fix | Delete

else:

[124] Fix | Delete

print("shlex: token=EOF")

[125] Fix | Delete

return raw

[126] Fix | Delete

[127] Fix | Delete

def read_token(self):

[128] Fix | Delete

quoted = False

[129] Fix | Delete

escapedstate = ' '

[130] Fix | Delete

while True:

[131] Fix | Delete

if self.punctuation_chars and self._pushback_chars:

[132] Fix | Delete

nextchar = self._pushback_chars.pop()

[133] Fix | Delete

else:

[134] Fix | Delete

nextchar = self.instream.read(1)

[135] Fix | Delete

if nextchar == '\n':

[136] Fix | Delete

self.lineno += 1

[137] Fix | Delete

if self.debug >= 3:

[138] Fix | Delete

print("shlex: in state %r I see character: %r" % (self.state,

[139] Fix | Delete

nextchar))

[140] Fix | Delete

if self.state is None:

[141] Fix | Delete

self.token = '' # past end of file

[142] Fix | Delete

break

[143] Fix | Delete

elif self.state == ' ':

[144] Fix | Delete

if not nextchar:

[145] Fix | Delete

self.state = None # end of file

[146] Fix | Delete

break

[147] Fix | Delete

elif nextchar in self.whitespace:

[148] Fix | Delete

if self.debug >= 2:

[149] Fix | Delete

print("shlex: I see whitespace in whitespace state")

[150] Fix | Delete

if self.token or (self.posix and quoted):

[151] Fix | Delete

break # emit current token

[152] Fix | Delete

else:

[153] Fix | Delete

continue

[154] Fix | Delete

elif nextchar in self.commenters:

[155] Fix | Delete

self.instream.readline()

[156] Fix | Delete

self.lineno += 1

[157] Fix | Delete

elif self.posix and nextchar in self.escape:

[158] Fix | Delete

escapedstate = 'a'

[159] Fix | Delete

self.state = nextchar

[160] Fix | Delete

elif nextchar in self.wordchars:

[161] Fix | Delete

self.token = nextchar

[162] Fix | Delete

self.state = 'a'

[163] Fix | Delete

elif nextchar in self.punctuation_chars:

[164] Fix | Delete

self.token = nextchar

[165] Fix | Delete

self.state = 'c'

[166] Fix | Delete

elif nextchar in self.quotes:

[167] Fix | Delete

if not self.posix:

[168] Fix | Delete

self.token = nextchar

[169] Fix | Delete

self.state = nextchar

[170] Fix | Delete

elif self.whitespace_split:

[171] Fix | Delete

self.token = nextchar

[172] Fix | Delete

self.state = 'a'

[173] Fix | Delete

else:

[174] Fix | Delete

self.token = nextchar

[175] Fix | Delete

if self.token or (self.posix and quoted):

[176] Fix | Delete

break # emit current token

[177] Fix | Delete

else:

[178] Fix | Delete

continue

[179] Fix | Delete

elif self.state in self.quotes:

[180] Fix | Delete

quoted = True

[181] Fix | Delete

if not nextchar: # end of file

[182] Fix | Delete

if self.debug >= 2:

[183] Fix | Delete

print("shlex: I see EOF in quotes state")

[184] Fix | Delete

# XXX what error should be raised here?

[185] Fix | Delete

raise ValueError("No closing quotation")

[186] Fix | Delete

if nextchar == self.state:

[187] Fix | Delete

if not self.posix:

[188] Fix | Delete

self.token += nextchar

[189] Fix | Delete

self.state = ' '

[190] Fix | Delete

break

[191] Fix | Delete

else:

[192] Fix | Delete

self.state = 'a'

[193] Fix | Delete

elif (self.posix and nextchar in self.escape and self.state

[194] Fix | Delete

in self.escapedquotes):

[195] Fix | Delete

escapedstate = self.state

[196] Fix | Delete

self.state = nextchar

[197] Fix | Delete

else:

[198] Fix | Delete

self.token += nextchar

[199] Fix | Delete

elif self.state in self.escape:

[200] Fix | Delete

if not nextchar: # end of file

[201] Fix | Delete

if self.debug >= 2:

[202] Fix | Delete

print("shlex: I see EOF in escape state")

[203] Fix | Delete

# XXX what error should be raised here?

[204] Fix | Delete

raise ValueError("No escaped character")

[205] Fix | Delete

# In posix shells, only the quote itself or the escape

[206] Fix | Delete

# character may be escaped within quotes.

[207] Fix | Delete

if (escapedstate in self.quotes and

[208] Fix | Delete

nextchar != self.state and nextchar != escapedstate):

[209] Fix | Delete

self.token += self.state

[210] Fix | Delete

self.token += nextchar

[211] Fix | Delete

self.state = escapedstate

[212] Fix | Delete

elif self.state in ('a', 'c'):

[213] Fix | Delete

if not nextchar:

[214] Fix | Delete

self.state = None # end of file

[215] Fix | Delete

break

[216] Fix | Delete

elif nextchar in self.whitespace:

[217] Fix | Delete

if self.debug >= 2:

[218] Fix | Delete

print("shlex: I see whitespace in word state")

[219] Fix | Delete

self.state = ' '

[220] Fix | Delete

if self.token or (self.posix and quoted):

[221] Fix | Delete

break # emit current token

[222] Fix | Delete

else:

[223] Fix | Delete

continue

[224] Fix | Delete

elif nextchar in self.commenters:

[225] Fix | Delete

self.instream.readline()

[226] Fix | Delete

self.lineno += 1

[227] Fix | Delete

if self.posix:

[228] Fix | Delete

self.state = ' '

[229] Fix | Delete

if self.token or (self.posix and quoted):

[230] Fix | Delete

break # emit current token

[231] Fix | Delete

else:

[232] Fix | Delete

continue

[233] Fix | Delete

elif self.state == 'c':

[234] Fix | Delete

if nextchar in self.punctuation_chars:

[235] Fix | Delete

self.token += nextchar

[236] Fix | Delete

else:

[237] Fix | Delete

if nextchar not in self.whitespace:

[238] Fix | Delete

self._pushback_chars.append(nextchar)

[239] Fix | Delete

self.state = ' '

[240] Fix | Delete

break

[241] Fix | Delete

elif self.posix and nextchar in self.quotes:

[242] Fix | Delete

self.state = nextchar

[243] Fix | Delete

elif self.posix and nextchar in self.escape:

[244] Fix | Delete

escapedstate = 'a'

[245] Fix | Delete

self.state = nextchar

[246] Fix | Delete

elif (nextchar in self.wordchars or nextchar in self.quotes

[247] Fix | Delete

or self.whitespace_split):

[248] Fix | Delete

self.token += nextchar

[249] Fix | Delete

else:

[250] Fix | Delete

if self.punctuation_chars:

[251] Fix | Delete

self._pushback_chars.append(nextchar)

[252] Fix | Delete

else:

[253] Fix | Delete

self.pushback.appendleft(nextchar)

[254] Fix | Delete

if self.debug >= 2:

[255] Fix | Delete

print("shlex: I see punctuation in word state")

[256] Fix | Delete

self.state = ' '

[257] Fix | Delete

if self.token or (self.posix and quoted):

[258] Fix | Delete

break # emit current token

[259] Fix | Delete

else:

[260] Fix | Delete

continue

[261] Fix | Delete

result = self.token

[262] Fix | Delete

self.token = ''

[263] Fix | Delete

if self.posix and not quoted and result == '':

[264] Fix | Delete

result = None

[265] Fix | Delete

if self.debug > 1:

[266] Fix | Delete

if result:

[267] Fix | Delete

print("shlex: raw token=" + repr(result))

[268] Fix | Delete

else:

[269] Fix | Delete

print("shlex: raw token=EOF")

[270] Fix | Delete

return result

[271] Fix | Delete

[272] Fix | Delete

def sourcehook(self, newfile):

[273] Fix | Delete

"Hook called on a filename to be sourced."

[274] Fix | Delete

if newfile[0] == '"':

[275] Fix | Delete

newfile = newfile[1:-1]

[276] Fix | Delete

# This implements cpp-like semantics for relative-path inclusion.

[277] Fix | Delete

if isinstance(self.infile, str) and not os.path.isabs(newfile):

[278] Fix | Delete

newfile = os.path.join(os.path.dirname(self.infile), newfile)

[279] Fix | Delete

return (newfile, open(newfile, "r"))

[280] Fix | Delete

[281] Fix | Delete

def error_leader(self, infile=None, lineno=None):

[282] Fix | Delete

"Emit a C-compiler-like, Emacs-friendly error-message leader."

[283] Fix | Delete

if infile is None:

[284] Fix | Delete

infile = self.infile

[285] Fix | Delete

if lineno is None:

[286] Fix | Delete

lineno = self.lineno

[287] Fix | Delete

return "\"%s\", line %d: " % (infile, lineno)

[288] Fix | Delete

[289] Fix | Delete

def __iter__(self):

[290] Fix | Delete

return self

[291] Fix | Delete

[292] Fix | Delete

def __next__(self):

[293] Fix | Delete

token = self.get_token()

[294] Fix | Delete

if token == self.eof:

[295] Fix | Delete

raise StopIteration

[296] Fix | Delete

return token

[297] Fix | Delete

[298] Fix | Delete

def split(s, comments=False, posix=True):

[299] Fix | Delete

lex = shlex(s, posix=posix)

[300] Fix | Delete

lex.whitespace_split = True

[301] Fix | Delete

if not comments:

[302] Fix | Delete

lex.commenters = ''

[303] Fix | Delete

return list(lex)

[304] Fix | Delete

[305] Fix | Delete

[306] Fix | Delete

_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search

[307] Fix | Delete

[308] Fix | Delete

def quote(s):

[309] Fix | Delete

"""Return a shell-escaped version of the string *s*."""

[310] Fix | Delete

if not s:

[311] Fix | Delete

return "''"

[312] Fix | Delete

if _find_unsafe(s) is None:

[313] Fix | Delete

return s

[314] Fix | Delete

[315] Fix | Delete

# use single quotes, and put single quotes into double quotes

[316] Fix | Delete

# the string $'b is then quoted as '$'"'"'b'

[317] Fix | Delete

return "'" + s.replace("'", "'\"'\"'") + "'"

[318] Fix | Delete

[319] Fix | Delete

[320] Fix | Delete

def _print_tokens(lexer):

[321] Fix | Delete

while 1:

[322] Fix | Delete

tt = lexer.get_token()

[323] Fix | Delete

if not tt:

[324] Fix | Delete

break

[325] Fix | Delete

print("Token: " + repr(tt))

[326] Fix | Delete

[327] Fix | Delete

if __name__ == '__main__':

[328] Fix | Delete

if len(sys.argv) == 1:

[329] Fix | Delete

_print_tokens(shlex())

[330] Fix | Delete

else:

[331] Fix | Delete

fn = sys.argv[1]

[332] Fix | Delete

with open(fn) as f:

[333] Fix | Delete

_print_tokens(shlex(f, fn))

[334] Fix | Delete

[335] Fix | Delete