Edit File by line

"""Open an arbitrary URL.

[0] Fix | Delete

[1] Fix | Delete

See the following document for more info on URLs:

[2] Fix | Delete

"Names and Addresses, URIs, URLs, URNs, URCs", at

[3] Fix | Delete

http://www.w3.org/pub/WWW/Addressing/Overview.html

[4] Fix | Delete

[5] Fix | Delete

See also the HTTP spec (from which the error codes are derived):

[6] Fix | Delete

"HTTP - Hypertext Transfer Protocol", at

[7] Fix | Delete

http://www.w3.org/pub/WWW/Protocols/

[8] Fix | Delete

[9] Fix | Delete

Related standards and specs:

[10] Fix | Delete

- RFC1808: the "relative URL" spec. (authoritative status)

[11] Fix | Delete

- RFC1738 - the "URL standard". (authoritative status)

[12] Fix | Delete

- RFC1630 - the "URI spec". (informational status)

[13] Fix | Delete

[14] Fix | Delete

The object returned by URLopener().open(file) will differ per

[15] Fix | Delete

protocol. All you know is that is has methods read(), readline(),

[16] Fix | Delete

readlines(), fileno(), close() and info(). The read*(), fileno()

[17] Fix | Delete

and close() methods work like those of open files.

[18] Fix | Delete

The info() method returns a mimetools.Message object which can be

[19] Fix | Delete

used to query various info about the object, if available.

[20] Fix | Delete

(mimetools.Message objects are queried with the getheader() method.)

[21] Fix | Delete

"""

[22] Fix | Delete

[23] Fix | Delete

import string

[24] Fix | Delete

import socket

[25] Fix | Delete

import os

[26] Fix | Delete

import time

[27] Fix | Delete

import sys

[28] Fix | Delete

import base64

[29] Fix | Delete

import re

[30] Fix | Delete

[31] Fix | Delete

from urlparse import urljoin as basejoin

[32] Fix | Delete

[33] Fix | Delete

__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",

[34] Fix | Delete

"urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",

[35] Fix | Delete

"urlencode", "url2pathname", "pathname2url", "splittag",

[36] Fix | Delete

"localhost", "thishost", "ftperrors", "basejoin", "unwrap",

[37] Fix | Delete

"splittype", "splithost", "splituser", "splitpasswd", "splitport",

[38] Fix | Delete

"splitnport", "splitquery", "splitattr", "splitvalue",

[39] Fix | Delete

"getproxies"]

[40] Fix | Delete

[41] Fix | Delete

__version__ = '1.17' # XXX This version is not always updated :-(

[42] Fix | Delete

[43] Fix | Delete

MAXFTPCACHE = 10 # Trim the ftp cache beyond this size

[44] Fix | Delete

[45] Fix | Delete

# Helper for non-unix systems

[46] Fix | Delete

if os.name == 'nt':

[47] Fix | Delete

from nturl2path import url2pathname, pathname2url

[48] Fix | Delete

elif os.name == 'riscos':

[49] Fix | Delete

from rourl2path import url2pathname, pathname2url

[50] Fix | Delete

else:

[51] Fix | Delete

def url2pathname(pathname):

[52] Fix | Delete

"""OS-specific conversion from a relative URL of the 'file' scheme

[53] Fix | Delete

to a file system path; not recommended for general use."""

[54] Fix | Delete

return unquote(pathname)

[55] Fix | Delete

[56] Fix | Delete

def pathname2url(pathname):

[57] Fix | Delete

"""OS-specific conversion from a file system path to a relative URL

[58] Fix | Delete

of the 'file' scheme; not recommended for general use."""

[59] Fix | Delete

return quote(pathname)

[60] Fix | Delete

[61] Fix | Delete

# This really consists of two pieces:

[62] Fix | Delete

# (1) a class which handles opening of all sorts of URLs

[63] Fix | Delete

# (plus assorted utilities etc.)

[64] Fix | Delete

# (2) a set of functions for parsing URLs

[65] Fix | Delete

# XXX Should these be separated out into different modules?

[66] Fix | Delete

[67] Fix | Delete

[68] Fix | Delete

# Shortcut for basic usage

[69] Fix | Delete

_urlopener = None

[70] Fix | Delete

def urlopen(url, data=None, proxies=None, context=None):

[71] Fix | Delete

"""Create a file-like object for the specified URL to read from."""

[72] Fix | Delete

from warnings import warnpy3k

[73] Fix | Delete

warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "

[74] Fix | Delete

"favor of urllib2.urlopen()", stacklevel=2)

[75] Fix | Delete

[76] Fix | Delete

global _urlopener

[77] Fix | Delete

if proxies is not None or context is not None:

[78] Fix | Delete

opener = FancyURLopener(proxies=proxies, context=context)

[79] Fix | Delete

elif not _urlopener:

[80] Fix | Delete

opener = FancyURLopener()

[81] Fix | Delete

_urlopener = opener

[82] Fix | Delete

else:

[83] Fix | Delete

opener = _urlopener

[84] Fix | Delete

if data is None:

[85] Fix | Delete

return opener.open(url)

[86] Fix | Delete

else:

[87] Fix | Delete

return opener.open(url, data)

[88] Fix | Delete

def urlretrieve(url, filename=None, reporthook=None, data=None, context=None):

[89] Fix | Delete

global _urlopener

[90] Fix | Delete

if context is not None:

[91] Fix | Delete

opener = FancyURLopener(context=context)

[92] Fix | Delete

elif not _urlopener:

[93] Fix | Delete

_urlopener = opener = FancyURLopener()

[94] Fix | Delete

else:

[95] Fix | Delete

opener = _urlopener

[96] Fix | Delete

return opener.retrieve(url, filename, reporthook, data)

[97] Fix | Delete

def urlcleanup():

[98] Fix | Delete

if _urlopener:

[99] Fix | Delete

_urlopener.cleanup()

[100] Fix | Delete

_safe_quoters.clear()

[101] Fix | Delete

ftpcache.clear()

[102] Fix | Delete

[103] Fix | Delete

# check for SSL

[104] Fix | Delete

try:

[105] Fix | Delete

import ssl

[106] Fix | Delete

except:

[107] Fix | Delete

_have_ssl = False

[108] Fix | Delete

else:

[109] Fix | Delete

_have_ssl = True

[110] Fix | Delete

[111] Fix | Delete

# exception raised when downloaded size does not match content-length

[112] Fix | Delete

class ContentTooShortError(IOError):

[113] Fix | Delete

def __init__(self, message, content):

[114] Fix | Delete

IOError.__init__(self, message)

[115] Fix | Delete

self.content = content

[116] Fix | Delete

[117] Fix | Delete

ftpcache = {}

[118] Fix | Delete

class URLopener:

[119] Fix | Delete

"""Class to open URLs.

[120] Fix | Delete

This is a class rather than just a subroutine because we may need

[121] Fix | Delete

more than one set of global protocol-specific options.

[122] Fix | Delete

Note -- this is a base class for those who don't want the

[123] Fix | Delete

automatic handling of errors type 302 (relocated) and 401

[124] Fix | Delete

(authorization needed)."""

[125] Fix | Delete

[126] Fix | Delete

__tempfiles = None

[127] Fix | Delete

[128] Fix | Delete

version = "Python-urllib/%s" % __version__

[129] Fix | Delete

[130] Fix | Delete

# Constructor

[131] Fix | Delete

def __init__(self, proxies=None, context=None, **x509):

[132] Fix | Delete

if proxies is None:

[133] Fix | Delete

proxies = getproxies()

[134] Fix | Delete

assert hasattr(proxies, 'has_key'), "proxies must be a mapping"

[135] Fix | Delete

self.proxies = proxies

[136] Fix | Delete

self.key_file = x509.get('key_file')

[137] Fix | Delete

self.cert_file = x509.get('cert_file')

[138] Fix | Delete

self.context = context

[139] Fix | Delete

self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]

[140] Fix | Delete

self.__tempfiles = []

[141] Fix | Delete

self.__unlink = os.unlink # See cleanup()

[142] Fix | Delete

self.tempcache = None

[143] Fix | Delete

# Undocumented feature: if you assign {} to tempcache,

[144] Fix | Delete

# it is used to cache files retrieved with

[145] Fix | Delete

# self.retrieve(). This is not enabled by default

[146] Fix | Delete

# since it does not work for changing documents (and I

[147] Fix | Delete

# haven't got the logic to check expiration headers

[148] Fix | Delete

# yet).

[149] Fix | Delete

self.ftpcache = ftpcache

[150] Fix | Delete

# Undocumented feature: you can use a different

[151] Fix | Delete

# ftp cache by assigning to the .ftpcache member;

[152] Fix | Delete

# in case you want logically independent URL openers

[153] Fix | Delete

# XXX This is not threadsafe. Bah.

[154] Fix | Delete

[155] Fix | Delete

def __del__(self):

[156] Fix | Delete

self.close()

[157] Fix | Delete

[158] Fix | Delete

def close(self):

[159] Fix | Delete

self.cleanup()

[160] Fix | Delete

[161] Fix | Delete

def cleanup(self):

[162] Fix | Delete

# This code sometimes runs when the rest of this module

[163] Fix | Delete

# has already been deleted, so it can't use any globals

[164] Fix | Delete

# or import anything.

[165] Fix | Delete

if self.__tempfiles:

[166] Fix | Delete

for file in self.__tempfiles:

[167] Fix | Delete

try:

[168] Fix | Delete

self.__unlink(file)

[169] Fix | Delete

except OSError:

[170] Fix | Delete

pass

[171] Fix | Delete

del self.__tempfiles[:]

[172] Fix | Delete

if self.tempcache:

[173] Fix | Delete

self.tempcache.clear()

[174] Fix | Delete

[175] Fix | Delete

def addheader(self, *args):

[176] Fix | Delete

"""Add a header to be used by the HTTP interface only

[177] Fix | Delete

e.g. u.addheader('Accept', 'sound/basic')"""

[178] Fix | Delete

self.addheaders.append(args)

[179] Fix | Delete

[180] Fix | Delete

# External interface

[181] Fix | Delete

def open(self, fullurl, data=None):

[182] Fix | Delete

"""Use URLopener().open(file) instead of open(file, 'r')."""

[183] Fix | Delete

fullurl = unwrap(toBytes(fullurl))

[184] Fix | Delete

# percent encode url, fixing lame server errors for e.g, like space

[185] Fix | Delete

# within url paths.

[186] Fix | Delete

fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")

[187] Fix | Delete

if self.tempcache and fullurl in self.tempcache:

[188] Fix | Delete

filename, headers = self.tempcache[fullurl]

[189] Fix | Delete

fp = open(filename, 'rb')

[190] Fix | Delete

return addinfourl(fp, headers, fullurl)

[191] Fix | Delete

urltype, url = splittype(fullurl)

[192] Fix | Delete

if not urltype:

[193] Fix | Delete

urltype = 'file'

[194] Fix | Delete

if urltype in self.proxies:

[195] Fix | Delete

proxy = self.proxies[urltype]

[196] Fix | Delete

urltype, proxyhost = splittype(proxy)

[197] Fix | Delete

host, selector = splithost(proxyhost)

[198] Fix | Delete

url = (host, fullurl) # Signal special case to open_*()

[199] Fix | Delete

else:

[200] Fix | Delete

proxy = None

[201] Fix | Delete

name = 'open_' + urltype

[202] Fix | Delete

self.type = urltype

[203] Fix | Delete

name = name.replace('-', '_')

[204] Fix | Delete

[205] Fix | Delete

# bpo-35907: disallow the file reading with the type not allowed

[206] Fix | Delete

if not hasattr(self, name) or name == 'open_local_file':

[207] Fix | Delete

if proxy:

[208] Fix | Delete

return self.open_unknown_proxy(proxy, fullurl, data)

[209] Fix | Delete

else:

[210] Fix | Delete

return self.open_unknown(fullurl, data)

[211] Fix | Delete

try:

[212] Fix | Delete

if data is None:

[213] Fix | Delete

return getattr(self, name)(url)

[214] Fix | Delete

else:

[215] Fix | Delete

return getattr(self, name)(url, data)

[216] Fix | Delete

except socket.error, msg:

[217] Fix | Delete

raise IOError, ('socket error', msg), sys.exc_info()[2]

[218] Fix | Delete

[219] Fix | Delete

def open_unknown(self, fullurl, data=None):

[220] Fix | Delete

"""Overridable interface to open unknown URL type."""

[221] Fix | Delete

type, url = splittype(fullurl)

[222] Fix | Delete

raise IOError, ('url error', 'unknown url type', type)

[223] Fix | Delete

[224] Fix | Delete

def open_unknown_proxy(self, proxy, fullurl, data=None):

[225] Fix | Delete

"""Overridable interface to open unknown URL type."""

[226] Fix | Delete

type, url = splittype(fullurl)

[227] Fix | Delete

raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)

[228] Fix | Delete

[229] Fix | Delete

# External interface

[230] Fix | Delete

def retrieve(self, url, filename=None, reporthook=None, data=None):

[231] Fix | Delete

"""retrieve(url) returns (filename, headers) for a local object

[232] Fix | Delete

or (tempfilename, headers) for a remote object."""

[233] Fix | Delete

url = unwrap(toBytes(url))

[234] Fix | Delete

if self.tempcache and url in self.tempcache:

[235] Fix | Delete

return self.tempcache[url]

[236] Fix | Delete

type, url1 = splittype(url)

[237] Fix | Delete

if filename is None and (not type or type == 'file'):

[238] Fix | Delete

try:

[239] Fix | Delete

fp = self.open_local_file(url1)

[240] Fix | Delete

hdrs = fp.info()

[241] Fix | Delete

fp.close()

[242] Fix | Delete

return url2pathname(splithost(url1)[1]), hdrs

[243] Fix | Delete

except IOError:

[244] Fix | Delete

pass

[245] Fix | Delete

fp = self.open(url, data)

[246] Fix | Delete

try:

[247] Fix | Delete

headers = fp.info()

[248] Fix | Delete

if filename:

[249] Fix | Delete

tfp = open(filename, 'wb')

[250] Fix | Delete

else:

[251] Fix | Delete

import tempfile

[252] Fix | Delete

garbage, path = splittype(url)

[253] Fix | Delete

garbage, path = splithost(path or "")

[254] Fix | Delete

path, garbage = splitquery(path or "")

[255] Fix | Delete

path, garbage = splitattr(path or "")

[256] Fix | Delete

suffix = os.path.splitext(path)[1]

[257] Fix | Delete

(fd, filename) = tempfile.mkstemp(suffix)

[258] Fix | Delete

self.__tempfiles.append(filename)

[259] Fix | Delete

tfp = os.fdopen(fd, 'wb')

[260] Fix | Delete

try:

[261] Fix | Delete

result = filename, headers

[262] Fix | Delete

if self.tempcache is not None:

[263] Fix | Delete

self.tempcache[url] = result

[264] Fix | Delete

bs = 1024*8

[265] Fix | Delete

size = -1

[266] Fix | Delete

read = 0

[267] Fix | Delete

blocknum = 0

[268] Fix | Delete

if "content-length" in headers:

[269] Fix | Delete

size = int(headers["Content-Length"])

[270] Fix | Delete

if reporthook:

[271] Fix | Delete

reporthook(blocknum, bs, size)

[272] Fix | Delete

while 1:

[273] Fix | Delete

block = fp.read(bs)

[274] Fix | Delete

if block == "":

[275] Fix | Delete

break

[276] Fix | Delete

read += len(block)

[277] Fix | Delete

tfp.write(block)

[278] Fix | Delete

blocknum += 1

[279] Fix | Delete

if reporthook:

[280] Fix | Delete

reporthook(blocknum, bs, size)

[281] Fix | Delete

finally:

[282] Fix | Delete

tfp.close()

[283] Fix | Delete

finally:

[284] Fix | Delete

fp.close()

[285] Fix | Delete

[286] Fix | Delete

# raise exception if actual size does not match content-length header

[287] Fix | Delete

if size >= 0 and read < size:

[288] Fix | Delete

raise ContentTooShortError("retrieval incomplete: got only %i out "

[289] Fix | Delete

"of %i bytes" % (read, size), result)

[290] Fix | Delete

[291] Fix | Delete

return result

[292] Fix | Delete

[293] Fix | Delete

# Each method named open_<type> knows how to open that type of URL

[294] Fix | Delete

[295] Fix | Delete

def open_http(self, url, data=None):

[296] Fix | Delete

"""Use HTTP protocol."""

[297] Fix | Delete

import httplib

[298] Fix | Delete

user_passwd = None

[299] Fix | Delete

proxy_passwd= None

[300] Fix | Delete

if isinstance(url, str):

[301] Fix | Delete

host, selector = splithost(url)

[302] Fix | Delete

if host:

[303] Fix | Delete

user_passwd, host = splituser(host)

[304] Fix | Delete

host = unquote(host)

[305] Fix | Delete

realhost = host

[306] Fix | Delete

else:

[307] Fix | Delete

host, selector = url

[308] Fix | Delete

# check whether the proxy contains authorization information

[309] Fix | Delete

proxy_passwd, host = splituser(host)

[310] Fix | Delete

# now we proceed with the url we want to obtain

[311] Fix | Delete

urltype, rest = splittype(selector)

[312] Fix | Delete

url = rest

[313] Fix | Delete

user_passwd = None

[314] Fix | Delete

if urltype.lower() != 'http':

[315] Fix | Delete

realhost = None

[316] Fix | Delete

else:

[317] Fix | Delete

realhost, rest = splithost(rest)

[318] Fix | Delete

if realhost:

[319] Fix | Delete

user_passwd, realhost = splituser(realhost)

[320] Fix | Delete

if user_passwd:

[321] Fix | Delete

selector = "%s://%s%s" % (urltype, realhost, rest)

[322] Fix | Delete

if proxy_bypass(realhost):

[323] Fix | Delete

host = realhost

[324] Fix | Delete

[325] Fix | Delete

#print "proxy via http:", host, selector

[326] Fix | Delete

if not host: raise IOError, ('http error', 'no host given')

[327] Fix | Delete

[328] Fix | Delete

if proxy_passwd:

[329] Fix | Delete

proxy_passwd = unquote(proxy_passwd)

[330] Fix | Delete

proxy_auth = base64.b64encode(proxy_passwd).strip()

[331] Fix | Delete

else:

[332] Fix | Delete

proxy_auth = None

[333] Fix | Delete

[334] Fix | Delete

if user_passwd:

[335] Fix | Delete

user_passwd = unquote(user_passwd)

[336] Fix | Delete

auth = base64.b64encode(user_passwd).strip()

[337] Fix | Delete

else:

[338] Fix | Delete

auth = None

[339] Fix | Delete

h = httplib.HTTP(host)

[340] Fix | Delete

if data is not None:

[341] Fix | Delete

h.putrequest('POST', selector)

[342] Fix | Delete

h.putheader('Content-Type', 'application/x-www-form-urlencoded')

[343] Fix | Delete

h.putheader('Content-Length', '%d' % len(data))

[344] Fix | Delete

else:

[345] Fix | Delete

h.putrequest('GET', selector)

[346] Fix | Delete

if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)

[347] Fix | Delete

if auth: h.putheader('Authorization', 'Basic %s' % auth)

[348] Fix | Delete

if realhost: h.putheader('Host', realhost)

[349] Fix | Delete

for args in self.addheaders: h.putheader(*args)

[350] Fix | Delete

h.endheaders(data)

[351] Fix | Delete

errcode, errmsg, headers = h.getreply()

[352] Fix | Delete

fp = h.getfile()

[353] Fix | Delete

if errcode == -1:

[354] Fix | Delete

if fp: fp.close()

[355] Fix | Delete

# something went wrong with the HTTP status line

[356] Fix | Delete

raise IOError, ('http protocol error', 0,

[357] Fix | Delete

'got a bad status line', None)

[358] Fix | Delete

# According to RFC 2616, "2xx" code indicates that the client's

[359] Fix | Delete

# request was successfully received, understood, and accepted.

[360] Fix | Delete

if (200 <= errcode < 300):

[361] Fix | Delete

return addinfourl(fp, headers, "http:" + url, errcode)

[362] Fix | Delete

else:

[363] Fix | Delete

if data is None:

[364] Fix | Delete

return self.http_error(url, fp, errcode, errmsg, headers)

[365] Fix | Delete

else:

[366] Fix | Delete

return self.http_error(url, fp, errcode, errmsg, headers, data)

[367] Fix | Delete

[368] Fix | Delete

def http_error(self, url, fp, errcode, errmsg, headers, data=None):

[369] Fix | Delete

"""Handle http errors.

[370] Fix | Delete

Derived class can override this, or provide specific handlers

[371] Fix | Delete

named http_error_DDD where DDD is the 3-digit error code."""

[372] Fix | Delete

# First check if there's a specific handler for this error

[373] Fix | Delete

name = 'http_error_%d' % errcode

[374] Fix | Delete

if hasattr(self, name):

[375] Fix | Delete

method = getattr(self, name)

[376] Fix | Delete

if data is None:

[377] Fix | Delete

result = method(url, fp, errcode, errmsg, headers)

[378] Fix | Delete

else:

[379] Fix | Delete

result = method(url, fp, errcode, errmsg, headers, data)

[380] Fix | Delete

if result: return result

[381] Fix | Delete

return self.http_error_default(url, fp, errcode, errmsg, headers)

[382] Fix | Delete

[383] Fix | Delete

def http_error_default(self, url, fp, errcode, errmsg, headers):

[384] Fix | Delete

"""Default error handler: close the connection and raise IOError."""

[385] Fix | Delete

fp.close()

[386] Fix | Delete

raise IOError, ('http error', errcode, errmsg, headers)

[387] Fix | Delete

[388] Fix | Delete

if _have_ssl:

[389] Fix | Delete

def open_https(self, url, data=None):

[390] Fix | Delete

"""Use HTTPS protocol."""

[391] Fix | Delete

[392] Fix | Delete

import httplib

[393] Fix | Delete

user_passwd = None

[394] Fix | Delete

proxy_passwd = None

[395] Fix | Delete

if isinstance(url, str):

[396] Fix | Delete

host, selector = splithost(url)

[397] Fix | Delete

if host:

[398] Fix | Delete

user_passwd, host = splituser(host)

[399] Fix | Delete

host = unquote(host)

[400] Fix | Delete

realhost = host

[401] Fix | Delete

else:

[402] Fix | Delete

host, selector = url

[403] Fix | Delete

# here, we determine, whether the proxy contains authorization information

[404] Fix | Delete

proxy_passwd, host = splituser(host)

[405] Fix | Delete

urltype, rest = splittype(selector)

[406] Fix | Delete

url = rest

[407] Fix | Delete

user_passwd = None

[408] Fix | Delete

if urltype.lower() != 'https':

[409] Fix | Delete

realhost = None

[410] Fix | Delete

else:

[411] Fix | Delete

realhost, rest = splithost(rest)

[412] Fix | Delete

if realhost:

[413] Fix | Delete

user_passwd, realhost = splituser(realhost)

[414] Fix | Delete

if user_passwd:

[415] Fix | Delete

selector = "%s://%s%s" % (urltype, realhost, rest)

[416] Fix | Delete

#print "proxy via https:", host, selector

[417] Fix | Delete

if not host: raise IOError, ('https error', 'no host given')

[418] Fix | Delete

if proxy_passwd:

[419] Fix | Delete

proxy_passwd = unquote(proxy_passwd)

[420] Fix | Delete

proxy_auth = base64.b64encode(proxy_passwd).strip()

[421] Fix | Delete

else:

[422] Fix | Delete

proxy_auth = None

[423] Fix | Delete

if user_passwd:

[424] Fix | Delete

user_passwd = unquote(user_passwd)

[425] Fix | Delete

auth = base64.b64encode(user_passwd).strip()

[426] Fix | Delete

else:

[427] Fix | Delete

auth = None

[428] Fix | Delete

h = httplib.HTTPS(host, 0,

[429] Fix | Delete

key_file=self.key_file,

[430] Fix | Delete

cert_file=self.cert_file,

[431] Fix | Delete

context=self.context)

[432] Fix | Delete

if data is not None:

[433] Fix | Delete

h.putrequest('POST', selector)

[434] Fix | Delete

h.putheader('Content-Type',

[435] Fix | Delete

'application/x-www-form-urlencoded')

[436] Fix | Delete

h.putheader('Content-Length', '%d' % len(data))

[437] Fix | Delete

else:

[438] Fix | Delete

h.putrequest('GET', selector)

[439] Fix | Delete

if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)

[440] Fix | Delete

if auth: h.putheader('Authorization', 'Basic %s' % auth)

[441] Fix | Delete

if realhost: h.putheader('Host', realhost)

[442] Fix | Delete

for args in self.addheaders: h.putheader(*args)

[443] Fix | Delete

h.endheaders(data)

[444] Fix | Delete

errcode, errmsg, headers = h.getreply()

[445] Fix | Delete

fp = h.getfile()

[446] Fix | Delete

if errcode == -1:

[447] Fix | Delete

if fp: fp.close()

[448] Fix | Delete

# something went wrong with the HTTP status line

[449] Fix | Delete

raise IOError, ('http protocol error', 0,

[450] Fix | Delete

'got a bad status line', None)

[451] Fix | Delete

# According to RFC 2616, "2xx" code indicates that the client's

[452] Fix | Delete

# request was successfully received, understood, and accepted.

[453] Fix | Delete

if (200 <= errcode < 300):

[454] Fix | Delete

return addinfourl(fp, headers, "https:" + url, errcode)

[455] Fix | Delete

else:

[456] Fix | Delete

if data is None:

[457] Fix | Delete

return self.http_error(url, fp, errcode, errmsg, headers)

[458] Fix | Delete

else:

[459] Fix | Delete

return self.http_error(url, fp, errcode, errmsg, headers,

[460] Fix | Delete

data)

[461] Fix | Delete

[462] Fix | Delete

def open_file(self, url):

[463] Fix | Delete

"""Use local file or FTP depending on form of URL."""

[464] Fix | Delete

if not isinstance(url, str):

[465] Fix | Delete

raise IOError, ('file error', 'proxy support for file protocol currently not implemented')

[466] Fix | Delete

if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':

[467] Fix | Delete

return self.open_ftp(url)

[468] Fix | Delete

else:

[469] Fix | Delete

return self.open_local_file(url)

[470] Fix | Delete

[471] Fix | Delete

def open_local_file(self, url):

[472] Fix | Delete

"""Use local file."""

[473] Fix | Delete

import mimetypes, mimetools, email.utils

[474] Fix | Delete

try:

[475] Fix | Delete

from cStringIO import StringIO

[476] Fix | Delete

except ImportError:

[477] Fix | Delete

from StringIO import StringIO

[478] Fix | Delete

host, file = splithost(url)

[479] Fix | Delete

localname = url2pathname(file)

[480] Fix | Delete

try:

[481] Fix | Delete

stats = os.stat(localname)

[482] Fix | Delete

except OSError, e:

[483] Fix | Delete

raise IOError(e.errno, e.strerror, e.filename)

[484] Fix | Delete

size = stats.st_size

[485] Fix | Delete

modified = email.utils.formatdate(stats.st_mtime, usegmt=True)

[486] Fix | Delete

mtype = mimetypes.guess_type(url)[0]

[487] Fix | Delete

headers = mimetools.Message(StringIO(

[488] Fix | Delete

'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %

[489] Fix | Delete

(mtype or 'text/plain', size, modified)))

[490] Fix | Delete

if not host:

[491] Fix | Delete

urlfile = file

[492] Fix | Delete

if file[:1] == '/':

[493] Fix | Delete

urlfile = 'file://' + file

[494] Fix | Delete

elif file[:2] == './':

[495] Fix | Delete

raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)

[496] Fix | Delete

return addinfourl(open(localname, 'rb'),

[497] Fix | Delete

headers, urlfile)

[498] Fix | Delete

host, port = splitport(host)

[499] Fix | Delete

12 3 4