Edit File by line
/home/barbar84/public_h.../wp-conte.../plugins/sujqvwi/ShExBy/shex_roo.../lib64/python3..../urllib
File: parse.py
"""Parse (absolute and relative) URLs.
[0] Fix | Delete
[1] Fix | Delete
urlparse module is based upon the following RFC specifications.
[2] Fix | Delete
[3] Fix | Delete
RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
[4] Fix | Delete
and L. Masinter, January 2005.
[5] Fix | Delete
[6] Fix | Delete
RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
[7] Fix | Delete
and L.Masinter, December 1999.
[8] Fix | Delete
[9] Fix | Delete
RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
[10] Fix | Delete
Berners-Lee, R. Fielding, and L. Masinter, August 1998.
[11] Fix | Delete
[12] Fix | Delete
RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.
[13] Fix | Delete
[14] Fix | Delete
RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
[15] Fix | Delete
1995.
[16] Fix | Delete
[17] Fix | Delete
RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
[18] Fix | Delete
McCahill, December 1994
[19] Fix | Delete
[20] Fix | Delete
RFC 3986 is considered the current standard and any future changes to
[21] Fix | Delete
urlparse module should conform with it. The urlparse module is
[22] Fix | Delete
currently not entirely compliant with this RFC due to defacto
[23] Fix | Delete
scenarios for parsing, and for backward compatibility purposes, some
[24] Fix | Delete
parsing quirks from older RFCs are retained. The testcases in
[25] Fix | Delete
test_urlparse.py provides a good indicator of parsing behavior.
[26] Fix | Delete
[27] Fix | Delete
The WHATWG URL Parser spec should also be considered. We are not compliant with
[28] Fix | Delete
it either due to existing user code API behavior expectations (Hyrum's Law).
[29] Fix | Delete
It serves as a useful guide when making changes.
[30] Fix | Delete
"""
[31] Fix | Delete
[32] Fix | Delete
import re
[33] Fix | Delete
import os
[34] Fix | Delete
import sys
[35] Fix | Delete
import collections
[36] Fix | Delete
[37] Fix | Delete
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
[38] Fix | Delete
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
[39] Fix | Delete
"parse_qsl", "quote", "quote_plus", "quote_from_bytes",
[40] Fix | Delete
"unquote", "unquote_plus", "unquote_to_bytes",
[41] Fix | Delete
"DefragResult", "ParseResult", "SplitResult",
[42] Fix | Delete
"DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
[43] Fix | Delete
[44] Fix | Delete
# A classification of schemes.
[45] Fix | Delete
# The empty string classifies URLs with no scheme specified,
[46] Fix | Delete
# being the default value returned by “urlsplit” and “urlparse”.
[47] Fix | Delete
[48] Fix | Delete
uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap',
[49] Fix | Delete
'wais', 'file', 'https', 'shttp', 'mms',
[50] Fix | Delete
'prospero', 'rtsp', 'rtspu', 'sftp',
[51] Fix | Delete
'svn', 'svn+ssh', 'ws', 'wss']
[52] Fix | Delete
[53] Fix | Delete
uses_netloc = ['', 'ftp', 'http', 'gopher', 'nntp', 'telnet',
[54] Fix | Delete
'imap', 'wais', 'file', 'mms', 'https', 'shttp',
[55] Fix | Delete
'snews', 'prospero', 'rtsp', 'rtspu', 'rsync',
[56] Fix | Delete
'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh',
[57] Fix | Delete
'ws', 'wss']
[58] Fix | Delete
[59] Fix | Delete
uses_params = ['', 'ftp', 'hdl', 'prospero', 'http', 'imap',
[60] Fix | Delete
'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
[61] Fix | Delete
'mms', 'sftp', 'tel']
[62] Fix | Delete
[63] Fix | Delete
# These are not actually used anymore, but should stay for backwards
[64] Fix | Delete
# compatibility. (They are undocumented, but have a public-looking name.)
[65] Fix | Delete
[66] Fix | Delete
non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
[67] Fix | Delete
'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
[68] Fix | Delete
[69] Fix | Delete
uses_query = ['', 'http', 'wais', 'imap', 'https', 'shttp', 'mms',
[70] Fix | Delete
'gopher', 'rtsp', 'rtspu', 'sip', 'sips']
[71] Fix | Delete
[72] Fix | Delete
uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news',
[73] Fix | Delete
'nntp', 'wais', 'https', 'shttp', 'snews',
[74] Fix | Delete
'file', 'prospero']
[75] Fix | Delete
[76] Fix | Delete
# Characters valid in scheme names
[77] Fix | Delete
scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
[78] Fix | Delete
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
[79] Fix | Delete
'0123456789'
[80] Fix | Delete
'+-.')
[81] Fix | Delete
[82] Fix | Delete
# Leading and trailing C0 control and space to be stripped per WHATWG spec.
[83] Fix | Delete
# == "".join([chr(i) for i in range(0, 0x20 + 1)])
[84] Fix | Delete
_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
[85] Fix | Delete
[86] Fix | Delete
# Unsafe bytes to be removed per WHATWG spec
[87] Fix | Delete
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
[88] Fix | Delete
[89] Fix | Delete
# XXX: Consider replacing with functools.lru_cache
[90] Fix | Delete
MAX_CACHE_SIZE = 20
[91] Fix | Delete
_parse_cache = {}
[92] Fix | Delete
[93] Fix | Delete
def clear_cache():
[94] Fix | Delete
"""Clear the parse cache and the quoters cache."""
[95] Fix | Delete
_parse_cache.clear()
[96] Fix | Delete
_safe_quoters.clear()
[97] Fix | Delete
[98] Fix | Delete
[99] Fix | Delete
# Helpers for bytes handling
[100] Fix | Delete
# For 3.2, we deliberately require applications that
[101] Fix | Delete
# handle improperly quoted URLs to do their own
[102] Fix | Delete
# decoding and encoding. If valid use cases are
[103] Fix | Delete
# presented, we may relax this by using latin-1
[104] Fix | Delete
# decoding internally for 3.3
[105] Fix | Delete
_implicit_encoding = 'ascii'
[106] Fix | Delete
_implicit_errors = 'strict'
[107] Fix | Delete
[108] Fix | Delete
def _noop(obj):
[109] Fix | Delete
return obj
[110] Fix | Delete
[111] Fix | Delete
def _encode_result(obj, encoding=_implicit_encoding,
[112] Fix | Delete
errors=_implicit_errors):
[113] Fix | Delete
return obj.encode(encoding, errors)
[114] Fix | Delete
[115] Fix | Delete
def _decode_args(args, encoding=_implicit_encoding,
[116] Fix | Delete
errors=_implicit_errors):
[117] Fix | Delete
return tuple(x.decode(encoding, errors) if x else '' for x in args)
[118] Fix | Delete
[119] Fix | Delete
def _coerce_args(*args):
[120] Fix | Delete
# Invokes decode if necessary to create str args
[121] Fix | Delete
# and returns the coerced inputs along with
[122] Fix | Delete
# an appropriate result coercion function
[123] Fix | Delete
# - noop for str inputs
[124] Fix | Delete
# - encoding function otherwise
[125] Fix | Delete
str_input = isinstance(args[0], str)
[126] Fix | Delete
for arg in args[1:]:
[127] Fix | Delete
# We special-case the empty string to support the
[128] Fix | Delete
# "scheme=''" default argument to some functions
[129] Fix | Delete
if arg and isinstance(arg, str) != str_input:
[130] Fix | Delete
raise TypeError("Cannot mix str and non-str arguments")
[131] Fix | Delete
if str_input:
[132] Fix | Delete
return args + (_noop,)
[133] Fix | Delete
return _decode_args(args) + (_encode_result,)
[134] Fix | Delete
[135] Fix | Delete
# Result objects are more helpful than simple tuples
[136] Fix | Delete
class _ResultMixinStr(object):
[137] Fix | Delete
"""Standard approach to encoding parsed results from str to bytes"""
[138] Fix | Delete
__slots__ = ()
[139] Fix | Delete
[140] Fix | Delete
def encode(self, encoding='ascii', errors='strict'):
[141] Fix | Delete
return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))
[142] Fix | Delete
[143] Fix | Delete
[144] Fix | Delete
class _ResultMixinBytes(object):
[145] Fix | Delete
"""Standard approach to decoding parsed results from bytes to str"""
[146] Fix | Delete
__slots__ = ()
[147] Fix | Delete
[148] Fix | Delete
def decode(self, encoding='ascii', errors='strict'):
[149] Fix | Delete
return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
[150] Fix | Delete
[151] Fix | Delete
[152] Fix | Delete
class _NetlocResultMixinBase(object):
[153] Fix | Delete
"""Shared methods for the parsed result objects containing a netloc element"""
[154] Fix | Delete
__slots__ = ()
[155] Fix | Delete
[156] Fix | Delete
@property
[157] Fix | Delete
def username(self):
[158] Fix | Delete
return self._userinfo[0]
[159] Fix | Delete
[160] Fix | Delete
@property
[161] Fix | Delete
def password(self):
[162] Fix | Delete
return self._userinfo[1]
[163] Fix | Delete
[164] Fix | Delete
@property
[165] Fix | Delete
def hostname(self):
[166] Fix | Delete
hostname = self._hostinfo[0]
[167] Fix | Delete
if not hostname:
[168] Fix | Delete
return None
[169] Fix | Delete
# Scoped IPv6 address may have zone info, which must not be lowercased
[170] Fix | Delete
# like http://[fe80::822a:a8ff:fe49:470c%tESt]:1234/keys
[171] Fix | Delete
separator = '%' if isinstance(hostname, str) else b'%'
[172] Fix | Delete
hostname, percent, zone = hostname.partition(separator)
[173] Fix | Delete
return hostname.lower() + percent + zone
[174] Fix | Delete
[175] Fix | Delete
@property
[176] Fix | Delete
def port(self):
[177] Fix | Delete
port = self._hostinfo[1]
[178] Fix | Delete
if port is not None:
[179] Fix | Delete
port = int(port, 10)
[180] Fix | Delete
if not ( 0 <= port <= 65535):
[181] Fix | Delete
raise ValueError("Port out of range 0-65535")
[182] Fix | Delete
return port
[183] Fix | Delete
[184] Fix | Delete
[185] Fix | Delete
class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
[186] Fix | Delete
__slots__ = ()
[187] Fix | Delete
[188] Fix | Delete
@property
[189] Fix | Delete
def _userinfo(self):
[190] Fix | Delete
netloc = self.netloc
[191] Fix | Delete
userinfo, have_info, hostinfo = netloc.rpartition('@')
[192] Fix | Delete
if have_info:
[193] Fix | Delete
username, have_password, password = userinfo.partition(':')
[194] Fix | Delete
if not have_password:
[195] Fix | Delete
password = None
[196] Fix | Delete
else:
[197] Fix | Delete
username = password = None
[198] Fix | Delete
return username, password
[199] Fix | Delete
[200] Fix | Delete
@property
[201] Fix | Delete
def _hostinfo(self):
[202] Fix | Delete
netloc = self.netloc
[203] Fix | Delete
_, _, hostinfo = netloc.rpartition('@')
[204] Fix | Delete
_, have_open_br, bracketed = hostinfo.partition('[')
[205] Fix | Delete
if have_open_br:
[206] Fix | Delete
hostname, _, port = bracketed.partition(']')
[207] Fix | Delete
_, _, port = port.partition(':')
[208] Fix | Delete
else:
[209] Fix | Delete
hostname, _, port = hostinfo.partition(':')
[210] Fix | Delete
if not port:
[211] Fix | Delete
port = None
[212] Fix | Delete
return hostname, port
[213] Fix | Delete
[214] Fix | Delete
[215] Fix | Delete
class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
[216] Fix | Delete
__slots__ = ()
[217] Fix | Delete
[218] Fix | Delete
@property
[219] Fix | Delete
def _userinfo(self):
[220] Fix | Delete
netloc = self.netloc
[221] Fix | Delete
userinfo, have_info, hostinfo = netloc.rpartition(b'@')
[222] Fix | Delete
if have_info:
[223] Fix | Delete
username, have_password, password = userinfo.partition(b':')
[224] Fix | Delete
if not have_password:
[225] Fix | Delete
password = None
[226] Fix | Delete
else:
[227] Fix | Delete
username = password = None
[228] Fix | Delete
return username, password
[229] Fix | Delete
[230] Fix | Delete
@property
[231] Fix | Delete
def _hostinfo(self):
[232] Fix | Delete
netloc = self.netloc
[233] Fix | Delete
_, _, hostinfo = netloc.rpartition(b'@')
[234] Fix | Delete
_, have_open_br, bracketed = hostinfo.partition(b'[')
[235] Fix | Delete
if have_open_br:
[236] Fix | Delete
hostname, _, port = bracketed.partition(b']')
[237] Fix | Delete
_, _, port = port.partition(b':')
[238] Fix | Delete
else:
[239] Fix | Delete
hostname, _, port = hostinfo.partition(b':')
[240] Fix | Delete
if not port:
[241] Fix | Delete
port = None
[242] Fix | Delete
return hostname, port
[243] Fix | Delete
[244] Fix | Delete
[245] Fix | Delete
from collections import namedtuple
[246] Fix | Delete
[247] Fix | Delete
_DefragResultBase = namedtuple('DefragResult', 'url fragment')
[248] Fix | Delete
_SplitResultBase = namedtuple(
[249] Fix | Delete
'SplitResult', 'scheme netloc path query fragment')
[250] Fix | Delete
_ParseResultBase = namedtuple(
[251] Fix | Delete
'ParseResult', 'scheme netloc path params query fragment')
[252] Fix | Delete
[253] Fix | Delete
_DefragResultBase.__doc__ = """
[254] Fix | Delete
DefragResult(url, fragment)
[255] Fix | Delete
[256] Fix | Delete
A 2-tuple that contains the url without fragment identifier and the fragment
[257] Fix | Delete
identifier as a separate argument.
[258] Fix | Delete
"""
[259] Fix | Delete
[260] Fix | Delete
_DefragResultBase.url.__doc__ = """The URL with no fragment identifier."""
[261] Fix | Delete
[262] Fix | Delete
_DefragResultBase.fragment.__doc__ = """
[263] Fix | Delete
Fragment identifier separated from URL, that allows indirect identification of a
[264] Fix | Delete
secondary resource by reference to a primary resource and additional identifying
[265] Fix | Delete
information.
[266] Fix | Delete
"""
[267] Fix | Delete
[268] Fix | Delete
_SplitResultBase.__doc__ = """
[269] Fix | Delete
SplitResult(scheme, netloc, path, query, fragment)
[270] Fix | Delete
[271] Fix | Delete
A 5-tuple that contains the different components of a URL. Similar to
[272] Fix | Delete
ParseResult, but does not split params.
[273] Fix | Delete
"""
[274] Fix | Delete
[275] Fix | Delete
_SplitResultBase.scheme.__doc__ = """Specifies URL scheme for the request."""
[276] Fix | Delete
[277] Fix | Delete
_SplitResultBase.netloc.__doc__ = """
[278] Fix | Delete
Network location where the request is made to.
[279] Fix | Delete
"""
[280] Fix | Delete
[281] Fix | Delete
_SplitResultBase.path.__doc__ = """
[282] Fix | Delete
The hierarchical path, such as the path to a file to download.
[283] Fix | Delete
"""
[284] Fix | Delete
[285] Fix | Delete
_SplitResultBase.query.__doc__ = """
[286] Fix | Delete
The query component, that contains non-hierarchical data, that along with data
[287] Fix | Delete
in path component, identifies a resource in the scope of URI's scheme and
[288] Fix | Delete
network location.
[289] Fix | Delete
"""
[290] Fix | Delete
[291] Fix | Delete
_SplitResultBase.fragment.__doc__ = """
[292] Fix | Delete
Fragment identifier, that allows indirect identification of a secondary resource
[293] Fix | Delete
by reference to a primary resource and additional identifying information.
[294] Fix | Delete
"""
[295] Fix | Delete
[296] Fix | Delete
_ParseResultBase.__doc__ = """
[297] Fix | Delete
ParseResult(scheme, netloc, path, params, query, fragment)
[298] Fix | Delete
[299] Fix | Delete
A 6-tuple that contains components of a parsed URL.
[300] Fix | Delete
"""
[301] Fix | Delete
[302] Fix | Delete
_ParseResultBase.scheme.__doc__ = _SplitResultBase.scheme.__doc__
[303] Fix | Delete
_ParseResultBase.netloc.__doc__ = _SplitResultBase.netloc.__doc__
[304] Fix | Delete
_ParseResultBase.path.__doc__ = _SplitResultBase.path.__doc__
[305] Fix | Delete
_ParseResultBase.params.__doc__ = """
[306] Fix | Delete
Parameters for last path element used to dereference the URI in order to provide
[307] Fix | Delete
access to perform some operation on the resource.
[308] Fix | Delete
"""
[309] Fix | Delete
[310] Fix | Delete
_ParseResultBase.query.__doc__ = _SplitResultBase.query.__doc__
[311] Fix | Delete
_ParseResultBase.fragment.__doc__ = _SplitResultBase.fragment.__doc__
[312] Fix | Delete
[313] Fix | Delete
[314] Fix | Delete
# For backwards compatibility, alias _NetlocResultMixinStr
[315] Fix | Delete
# ResultBase is no longer part of the documented API, but it is
[316] Fix | Delete
# retained since deprecating it isn't worth the hassle
[317] Fix | Delete
ResultBase = _NetlocResultMixinStr
[318] Fix | Delete
[319] Fix | Delete
# Structured result objects for string data
[320] Fix | Delete
class DefragResult(_DefragResultBase, _ResultMixinStr):
[321] Fix | Delete
__slots__ = ()
[322] Fix | Delete
def geturl(self):
[323] Fix | Delete
if self.fragment:
[324] Fix | Delete
return self.url + '#' + self.fragment
[325] Fix | Delete
else:
[326] Fix | Delete
return self.url
[327] Fix | Delete
[328] Fix | Delete
class SplitResult(_SplitResultBase, _NetlocResultMixinStr):
[329] Fix | Delete
__slots__ = ()
[330] Fix | Delete
def geturl(self):
[331] Fix | Delete
return urlunsplit(self)
[332] Fix | Delete
[333] Fix | Delete
class ParseResult(_ParseResultBase, _NetlocResultMixinStr):
[334] Fix | Delete
__slots__ = ()
[335] Fix | Delete
def geturl(self):
[336] Fix | Delete
return urlunparse(self)
[337] Fix | Delete
[338] Fix | Delete
# Structured result objects for bytes data
[339] Fix | Delete
class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):
[340] Fix | Delete
__slots__ = ()
[341] Fix | Delete
def geturl(self):
[342] Fix | Delete
if self.fragment:
[343] Fix | Delete
return self.url + b'#' + self.fragment
[344] Fix | Delete
else:
[345] Fix | Delete
return self.url
[346] Fix | Delete
[347] Fix | Delete
class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):
[348] Fix | Delete
__slots__ = ()
[349] Fix | Delete
def geturl(self):
[350] Fix | Delete
return urlunsplit(self)
[351] Fix | Delete
[352] Fix | Delete
class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):
[353] Fix | Delete
__slots__ = ()
[354] Fix | Delete
def geturl(self):
[355] Fix | Delete
return urlunparse(self)
[356] Fix | Delete
[357] Fix | Delete
# Set up the encode/decode result pairs
[358] Fix | Delete
def _fix_result_transcoding():
[359] Fix | Delete
_result_pairs = (
[360] Fix | Delete
(DefragResult, DefragResultBytes),
[361] Fix | Delete
(SplitResult, SplitResultBytes),
[362] Fix | Delete
(ParseResult, ParseResultBytes),
[363] Fix | Delete
)
[364] Fix | Delete
for _decoded, _encoded in _result_pairs:
[365] Fix | Delete
_decoded._encoded_counterpart = _encoded
[366] Fix | Delete
_encoded._decoded_counterpart = _decoded
[367] Fix | Delete
[368] Fix | Delete
_fix_result_transcoding()
[369] Fix | Delete
del _fix_result_transcoding
[370] Fix | Delete
[371] Fix | Delete
def urlparse(url, scheme='', allow_fragments=True):
[372] Fix | Delete
"""Parse a URL into 6 components:
[373] Fix | Delete
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
[374] Fix | Delete
Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
[375] Fix | Delete
Note that we don't break the components up in smaller bits
[376] Fix | Delete
(e.g. netloc is a single string) and we don't expand % escapes."""
[377] Fix | Delete
url, scheme, _coerce_result = _coerce_args(url, scheme)
[378] Fix | Delete
splitresult = urlsplit(url, scheme, allow_fragments)
[379] Fix | Delete
scheme, netloc, url, query, fragment = splitresult
[380] Fix | Delete
if scheme in uses_params and ';' in url:
[381] Fix | Delete
url, params = _splitparams(url)
[382] Fix | Delete
else:
[383] Fix | Delete
params = ''
[384] Fix | Delete
result = ParseResult(scheme, netloc, url, params, query, fragment)
[385] Fix | Delete
return _coerce_result(result)
[386] Fix | Delete
[387] Fix | Delete
def _splitparams(url):
[388] Fix | Delete
if '/' in url:
[389] Fix | Delete
i = url.find(';', url.rfind('/'))
[390] Fix | Delete
if i < 0:
[391] Fix | Delete
return url, ''
[392] Fix | Delete
else:
[393] Fix | Delete
i = url.find(';')
[394] Fix | Delete
return url[:i], url[i+1:]
[395] Fix | Delete
[396] Fix | Delete
def _splitnetloc(url, start=0):
[397] Fix | Delete
delim = len(url) # position of end of domain part of url, default is end
[398] Fix | Delete
for c in '/?#': # look for delimiters; the order is NOT important
[399] Fix | Delete
wdelim = url.find(c, start) # find first of this delim
[400] Fix | Delete
if wdelim >= 0: # if found
[401] Fix | Delete
delim = min(delim, wdelim) # use earliest delim position
[402] Fix | Delete
return url[start:delim], url[delim:] # return (domain, rest)
[403] Fix | Delete
[404] Fix | Delete
def _checknetloc(netloc):
[405] Fix | Delete
if not netloc or not any(ord(c) > 127 for c in netloc):
[406] Fix | Delete
return
[407] Fix | Delete
# looking for characters like \u2100 that expand to 'a/c'
[408] Fix | Delete
# IDNA uses NFKC equivalence, so normalize for this check
[409] Fix | Delete
import unicodedata
[410] Fix | Delete
n = netloc.replace('@', '') # ignore characters already included
[411] Fix | Delete
n = n.replace(':', '') # but not the surrounding text
[412] Fix | Delete
n = n.replace('#', '')
[413] Fix | Delete
n = n.replace('?', '')
[414] Fix | Delete
netloc2 = unicodedata.normalize('NFKC', n)
[415] Fix | Delete
if n == netloc2:
[416] Fix | Delete
return
[417] Fix | Delete
for c in '/?#@:':
[418] Fix | Delete
if c in netloc2:
[419] Fix | Delete
raise ValueError("netloc '" + netloc + "' contains invalid " +
[420] Fix | Delete
"characters under NFKC normalization")
[421] Fix | Delete
[422] Fix | Delete
def _remove_unsafe_bytes_from_url(url):
[423] Fix | Delete
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
[424] Fix | Delete
url = url.replace(b, "")
[425] Fix | Delete
return url
[426] Fix | Delete
[427] Fix | Delete
def urlsplit(url, scheme='', allow_fragments=True):
[428] Fix | Delete
"""Parse a URL into 5 components:
[429] Fix | Delete
<scheme>://<netloc>/<path>?<query>#<fragment>
[430] Fix | Delete
Return a 5-tuple: (scheme, netloc, path, query, fragment).
[431] Fix | Delete
Note that we don't break the components up in smaller bits
[432] Fix | Delete
(e.g. netloc is a single string) and we don't expand % escapes."""
[433] Fix | Delete
url, scheme, _coerce_result = _coerce_args(url, scheme)
[434] Fix | Delete
url = _remove_unsafe_bytes_from_url(url)
[435] Fix | Delete
scheme = _remove_unsafe_bytes_from_url(scheme)
[436] Fix | Delete
# Only lstrip url as some applications rely on preserving trailing space.
[437] Fix | Delete
# (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
[438] Fix | Delete
url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
[439] Fix | Delete
scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
[440] Fix | Delete
allow_fragments = bool(allow_fragments)
[441] Fix | Delete
key = url, scheme, allow_fragments, type(url), type(scheme)
[442] Fix | Delete
cached = _parse_cache.get(key, None)
[443] Fix | Delete
if cached:
[444] Fix | Delete
return _coerce_result(cached)
[445] Fix | Delete
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
[446] Fix | Delete
clear_cache()
[447] Fix | Delete
netloc = query = fragment = ''
[448] Fix | Delete
i = url.find(':')
[449] Fix | Delete
if i > 0:
[450] Fix | Delete
if url[:i] == 'http': # optimize the common case
[451] Fix | Delete
scheme = url[:i].lower()
[452] Fix | Delete
url = url[i+1:]
[453] Fix | Delete
if url[:2] == '//':
[454] Fix | Delete
netloc, url = _splitnetloc(url, 2)
[455] Fix | Delete
if (('[' in netloc and ']' not in netloc) or
[456] Fix | Delete
(']' in netloc and '[' not in netloc)):
[457] Fix | Delete
raise ValueError("Invalid IPv6 URL")
[458] Fix | Delete
if allow_fragments and '#' in url:
[459] Fix | Delete
url, fragment = url.split('#', 1)
[460] Fix | Delete
if '?' in url:
[461] Fix | Delete
url, query = url.split('?', 1)
[462] Fix | Delete
_checknetloc(netloc)
[463] Fix | Delete
v = SplitResult(scheme, netloc, url, query, fragment)
[464] Fix | Delete
_parse_cache[key] = v
[465] Fix | Delete
return _coerce_result(v)
[466] Fix | Delete
for c in url[:i]:
[467] Fix | Delete
if c not in scheme_chars:
[468] Fix | Delete
break
[469] Fix | Delete
else:
[470] Fix | Delete
# make sure "url" is not actually a port number (in which case
[471] Fix | Delete
# "scheme" is really part of the path)
[472] Fix | Delete
rest = url[i+1:]
[473] Fix | Delete
if not rest or any(c not in '0123456789' for c in rest):
[474] Fix | Delete
# not a port number
[475] Fix | Delete
scheme, url = url[:i].lower(), rest
[476] Fix | Delete
[477] Fix | Delete
if url[:2] == '//':
[478] Fix | Delete
netloc, url = _splitnetloc(url, 2)
[479] Fix | Delete
if (('[' in netloc and ']' not in netloc) or
[480] Fix | Delete
(']' in netloc and '[' not in netloc)):
[481] Fix | Delete
raise ValueError("Invalid IPv6 URL")
[482] Fix | Delete
if allow_fragments and '#' in url:
[483] Fix | Delete
url, fragment = url.split('#', 1)
[484] Fix | Delete
if '?' in url:
[485] Fix | Delete
url, query = url.split('?', 1)
[486] Fix | Delete
_checknetloc(netloc)
[487] Fix | Delete
v = SplitResult(scheme, netloc, url, query, fragment)
[488] Fix | Delete
_parse_cache[key] = v
[489] Fix | Delete
return _coerce_result(v)
[490] Fix | Delete
[491] Fix | Delete
def urlunparse(components):
[492] Fix | Delete
"""Put a parsed URL back together again. This may result in a
[493] Fix | Delete
slightly different, but equivalent URL, if the URL that was parsed
[494] Fix | Delete
originally had redundant delimiters, e.g. a ? with an empty query
[495] Fix | Delete
(the draft states that these are equivalent)."""
[496] Fix | Delete
scheme, netloc, url, params, query, fragment, _coerce_result = (
[497] Fix | Delete
_coerce_args(*components))
[498] Fix | Delete
if params:
[499] Fix | Delete
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function