sys/lib/python/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import time
  29 import sys
  30 from urlparse import urljoin as basejoin
  31
  32 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  33            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  34            "urlencode", "url2pathname", "pathname2url", "splittag",
  35            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  36            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  37            "splitnport", "splitquery", "splitattr", "splitvalue",
  38            "splitgophertype", "getproxies"]
  39
  40 __version__ = '1.17'    # XXX This version is not always updated :-(
  41
  42 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  43
  44 # Helper for non-unix systems
  45 if os.name == 'mac':
  46     from macurl2path import url2pathname, pathname2url
  47 elif os.name == 'nt':
  48     from nturl2path import url2pathname, pathname2url
  49 elif os.name == 'riscos':
  50     from rourl2path import url2pathname, pathname2url
  51 else:
  52     def url2pathname(pathname):
  53         """OS-specific conversion from a relative URL of the 'file' scheme
  54         to a file system path; not recommended for general use."""
  55         return unquote(pathname)
  56
  57     def pathname2url(pathname):
  58         """OS-specific conversion from a file system path to a relative URL
  59         of the 'file' scheme; not recommended for general use."""
  60         return quote(pathname)
  61
  62 # This really consists of two pieces:
  63 # (1) a class which handles opening of all sorts of URLs
  64 #     (plus assorted utilities etc.)
  65 # (2) a set of functions for parsing URLs
  66 # XXX Should these be separated out into different modules?
  67
  68
  69 # Shortcut for basic usage
  70 _urlopener = None
  71 def urlopen(url, data=None, proxies=None):
  72     """urlopen(url [, data]) -> open file-like object"""
  73     global _urlopener
  74     if proxies is not None:
  75         opener = FancyURLopener(proxies=proxies)
  76     elif not _urlopener:
  77         opener = FancyURLopener()
  78         _urlopener = opener
  79     else:
  80         opener = _urlopener
  81     if data is None:
  82         return opener.open(url)
  83     else:
  84         return opener.open(url, data)
  85 def urlretrieve(url, filename=None, reporthook=None, data=None):
  86     global _urlopener
  87     if not _urlopener:
  88         _urlopener = FancyURLopener()
  89     return _urlopener.retrieve(url, filename, reporthook, data)
  90 def urlcleanup():
  91     if _urlopener:
  92         _urlopener.cleanup()
  93
  94 # exception raised when downloaded size does not match content-length
  95 class ContentTooShortError(IOError):
  96     def __init__(self, message, content):
  97         IOError.__init__(self, message)
  98         self.content = content
  99
 100 ftpcache = {}
 101 class URLopener:
 102     """Class to open URLs.
 103     This is a class rather than just a subroutine because we may need
 104     more than one set of global protocol-specific options.
 105     Note -- this is a base class for those who don't want the
 106     automatic handling of errors type 302 (relocated) and 401
 107     (authorization needed)."""
 108
 109     __tempfiles = None
 110
 111     version = "Python-urllib/%s" % __version__
 112
 113     # Constructor
 114     def __init__(self, proxies=None, **x509):
 115         if proxies is None:
 116             proxies = getproxies()
 117         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 118         self.proxies = proxies
 119         self.key_file = x509.get('key_file')
 120         self.cert_file = x509.get('cert_file')
 121         self.addheaders = [('User-Agent', self.version)]
 122         self.__tempfiles = []
 123         self.__unlink = os.unlink # See cleanup()
 124         self.tempcache = None
 125         # Undocumented feature: if you assign {} to tempcache,
 126         # it is used to cache files retrieved with
 127         # self.retrieve().  This is not enabled by default
 128         # since it does not work for changing documents (and I
 129         # haven't got the logic to check expiration headers
 130         # yet).
 131         self.ftpcache = ftpcache
 132         # Undocumented feature: you can use a different
 133         # ftp cache by assigning to the .ftpcache member;
 134         # in case you want logically independent URL openers
 135         # XXX This is not threadsafe.  Bah.
 136
 137     def __del__(self):
 138         self.close()
 139
 140     def close(self):
 141         self.cleanup()
 142
 143     def cleanup(self):
 144         # This code sometimes runs when the rest of this module
 145         # has already been deleted, so it can't use any globals
 146         # or import anything.
 147         if self.__tempfiles:
 148             for file in self.__tempfiles:
 149                 try:
 150                     self.__unlink(file)
 151                 except OSError:
 152                     pass
 153             del self.__tempfiles[:]
 154         if self.tempcache:
 155             self.tempcache.clear()
 156
 157     def addheader(self, *args):
 158         """Add a header to be used by the HTTP interface only
 159         e.g. u.addheader('Accept', 'sound/basic')"""
 160         self.addheaders.append(args)
 161
 162     # External interface
 163     def open(self, fullurl, data=None):
 164         """Use URLopener().open(file) instead of open(file, 'r')."""
 165         fullurl = unwrap(toBytes(fullurl))
 166         if self.tempcache and fullurl in self.tempcache:
 167             filename, headers = self.tempcache[fullurl]
 168             fp = open(filename, 'rb')
 169             return addinfourl(fp, headers, fullurl)
 170         urltype, url = splittype(fullurl)
 171         if not urltype:
 172             urltype = 'file'
 173         if urltype in self.proxies:
 174             proxy = self.proxies[urltype]
 175             urltype, proxyhost = splittype(proxy)
 176             host, selector = splithost(proxyhost)
 177             url = (host, fullurl) # Signal special case to open_*()
 178         else:
 179             proxy = None
 180         name = 'open_' + urltype
 181         self.type = urltype
 182         name = name.replace('-', '_')
 183         if not hasattr(self, name):
 184             if proxy:
 185                 return self.open_unknown_proxy(proxy, fullurl, data)
 186             else:
 187                 return self.open_unknown(fullurl, data)
 188         try:
 189             if data is None:
 190                 return getattr(self, name)(url)
 191             else:
 192                 return getattr(self, name)(url, data)
 193         except socket.error, msg:
 194             raise IOError, ('socket error', msg), sys.exc_info()[2]
 195
 196     def open_unknown(self, fullurl, data=None):
 197         """Overridable interface to open unknown URL type."""
 198         type, url = splittype(fullurl)
 199         raise IOError, ('url error', 'unknown url type', type)
 200
 201     def open_unknown_proxy(self, proxy, fullurl, data=None):
 202         """Overridable interface to open unknown URL type."""
 203         type, url = splittype(fullurl)
 204         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 205
 206     # External interface
 207     def retrieve(self, url, filename=None, reporthook=None, data=None):
 208         """retrieve(url) returns (filename, headers) for a local object
 209         or (tempfilename, headers) for a remote object."""
 210         url = unwrap(toBytes(url))
 211         if self.tempcache and url in self.tempcache:
 212             return self.tempcache[url]
 213         type, url1 = splittype(url)
 214         if filename is None and (not type or type == 'file'):
 215             try:
 216                 fp = self.open_local_file(url1)
 217                 hdrs = fp.info()
 218                 del fp
 219                 return url2pathname(splithost(url1)[1]), hdrs
 220             except IOError, msg:
 221                 pass
 222         fp = self.open(url, data)
 223         headers = fp.info()
 224         if filename:
 225             tfp = open(filename, 'wb')
 226         else:
 227             import tempfile
 228             garbage, path = splittype(url)
 229             garbage, path = splithost(path or "")
 230             path, garbage = splitquery(path or "")
 231             path, garbage = splitattr(path or "")
 232             suffix = os.path.splitext(path)[1]
 233             (fd, filename) = tempfile.mkstemp(suffix)
 234             self.__tempfiles.append(filename)
 235             tfp = os.fdopen(fd, 'wb')
 236         result = filename, headers
 237         if self.tempcache is not None:
 238             self.tempcache[url] = result
 239         bs = 1024*8
 240         size = -1
 241         read = 0
 242         blocknum = 0
 243         if reporthook:
 244             if "content-length" in headers:
 245                 size = int(headers["Content-Length"])
 246             reporthook(blocknum, bs, size)
 247         while 1:
 248             block = fp.read(bs)
 249             if block == "":
 250                 break
 251             read += len(block)
 252             tfp.write(block)
 253             blocknum += 1
 254             if reporthook:
 255                 reporthook(blocknum, bs, size)
 256         fp.close()
 257         tfp.close()
 258         del fp
 259         del tfp
 260
 261         # raise exception if actual size does not match content-length header
 262         if size >= 0 and read < size:
 263             raise ContentTooShortError("retrieval incomplete: got only %i out "
 264                                        "of %i bytes" % (read, size), result)
 265
 266         return result
 267
 268     # Each method named open_<type> knows how to open that type of URL
 269
 270     def open_http(self, url, data=None):
 271         """Use HTTP protocol."""
 272         import httplib
 273         user_passwd = None
 274         proxy_passwd= None
 275         if isinstance(url, str):
 276             host, selector = splithost(url)
 277             if host:
 278                 user_passwd, host = splituser(host)
 279                 host = unquote(host)
 280             realhost = host
 281         else:
 282             host, selector = url
 283             # check whether the proxy contains authorization information
 284             proxy_passwd, host = splituser(host)
 285             # now we proceed with the url we want to obtain
 286             urltype, rest = splittype(selector)
 287             url = rest
 288             user_passwd = None
 289             if urltype.lower() != 'http':
 290                 realhost = None
 291             else:
 292                 realhost, rest = splithost(rest)
 293                 if realhost:
 294                     user_passwd, realhost = splituser(realhost)
 295                 if user_passwd:
 296                     selector = "%s://%s%s" % (urltype, realhost, rest)
 297                 if proxy_bypass(realhost):
 298                     host = realhost
 299
 300             #print "proxy via http:", host, selector
 301         if not host: raise IOError, ('http error', 'no host given')
 302
 303         if proxy_passwd:
 304             import base64
 305             proxy_auth = base64.b64encode(proxy_passwd).strip()
 306         else:
 307             proxy_auth = None
 308
 309         if user_passwd:
 310             import base64
 311             auth = base64.b64encode(user_passwd).strip()
 312         else:
 313             auth = None
 314         h = httplib.HTTP(host)
 315         if data is not None:
 316             h.putrequest('POST', selector)
 317             h.putheader('Content-Type', 'application/x-www-form-urlencoded')
 318             h.putheader('Content-Length', '%d' % len(data))
 319         else:
 320             h.putrequest('GET', selector)
 321         if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 322         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 323         if realhost: h.putheader('Host', realhost)
 324         for args in self.addheaders: h.putheader(*args)
 325         h.endheaders()
 326         if data is not None:
 327             h.send(data)
 328         errcode, errmsg, headers = h.getreply()
 329         if errcode == -1:
 330             # something went wrong with the HTTP status line
 331             raise IOError, ('http protocol error', 0,
 332                             'got a bad status line', None)
 333         fp = h.getfile()
 334         if errcode == 200:
 335             return addinfourl(fp, headers, "http:" + url)
 336         else:
 337             if data is None:
 338                 return self.http_error(url, fp, errcode, errmsg, headers)
 339             else:
 340                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 341
 342     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 343         """Handle http errors.
 344         Derived class can override this, or provide specific handlers
 345         named http_error_DDD where DDD is the 3-digit error code."""
 346         # First check if there's a specific handler for this error
 347         name = 'http_error_%d' % errcode
 348         if hasattr(self, name):
 349             method = getattr(self, name)
 350             if data is None:
 351                 result = method(url, fp, errcode, errmsg, headers)
 352             else:
 353                 result = method(url, fp, errcode, errmsg, headers, data)
 354             if result: return result
 355         return self.http_error_default(url, fp, errcode, errmsg, headers)
 356
 357     def http_error_default(self, url, fp, errcode, errmsg, headers):
 358         """Default error handler: close the connection and raise IOError."""
 359         void = fp.read()
 360         fp.close()
 361         raise IOError, ('http error', errcode, errmsg, headers)
 362
 363     if hasattr(socket, "ssl"):
 364         def open_https(self, url, data=None):
 365             """Use HTTPS protocol."""
 366             import httplib
 367             user_passwd = None
 368             proxy_passwd = None
 369             if isinstance(url, str):
 370                 host, selector = splithost(url)
 371                 if host:
 372                     user_passwd, host = splituser(host)
 373                     host = unquote(host)
 374                 realhost = host
 375             else:
 376                 host, selector = url
 377                 # here, we determine, whether the proxy contains authorization information
 378                 proxy_passwd, host = splituser(host)
 379                 urltype, rest = splittype(selector)
 380                 url = rest
 381                 user_passwd = None
 382                 if urltype.lower() != 'https':
 383                     realhost = None
 384                 else:
 385                     realhost, rest = splithost(rest)
 386                     if realhost:
 387                         user_passwd, realhost = splituser(realhost)
 388                     if user_passwd:
 389                         selector = "%s://%s%s" % (urltype, realhost, rest)
 390                 #print "proxy via https:", host, selector
 391             if not host: raise IOError, ('https error', 'no host given')
 392             if proxy_passwd:
 393                 import base64
 394                 proxy_auth = base64.b64encode(proxy_passwd).strip()
 395             else:
 396                 proxy_auth = None
 397             if user_passwd:
 398                 import base64
 399                 auth = base64.b64encode(user_passwd).strip()
 400             else:
 401                 auth = None
 402             h = httplib.HTTPS(host, 0,
 403                               key_file=self.key_file,
 404                               cert_file=self.cert_file)
 405             if data is not None:
 406                 h.putrequest('POST', selector)
 407                 h.putheader('Content-Type',
 408                             'application/x-www-form-urlencoded')
 409                 h.putheader('Content-Length', '%d' % len(data))
 410             else:
 411                 h.putrequest('GET', selector)
 412             if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 413             if auth: h.putheader('Authorization', 'Basic %s' % auth)
 414             if realhost: h.putheader('Host', realhost)
 415             for args in self.addheaders: h.putheader(*args)
 416             h.endheaders()
 417             if data is not None:
 418                 h.send(data)
 419             errcode, errmsg, headers = h.getreply()
 420             if errcode == -1:
 421                 # something went wrong with the HTTP status line
 422                 raise IOError, ('http protocol error', 0,
 423                                 'got a bad status line', None)
 424             fp = h.getfile()
 425             if errcode == 200:
 426                 return addinfourl(fp, headers, "https:" + url)
 427             else:
 428                 if data is None:
 429                     return self.http_error(url, fp, errcode, errmsg, headers)
 430                 else:
 431                     return self.http_error(url, fp, errcode, errmsg, headers,
 432                                            data)
 433
 434     def open_gopher(self, url):
 435         """Use Gopher protocol."""
 436         if not isinstance(url, str):
 437             raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
 438         import gopherlib
 439         host, selector = splithost(url)
 440         if not host: raise IOError, ('gopher error', 'no host given')
 441         host = unquote(host)
 442         type, selector = splitgophertype(selector)
 443         selector, query = splitquery(selector)
 444         selector = unquote(selector)
 445         if query:
 446             query = unquote(query)
 447             fp = gopherlib.send_query(selector, query, host)
 448         else:
 449             fp = gopherlib.send_selector(selector, host)
 450         return addinfourl(fp, noheaders(), "gopher:" + url)
 451
 452     def open_file(self, url):
 453         """Use local file or FTP depending on form of URL."""
 454         if not isinstance(url, str):
 455             raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
 456         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
 457             return self.open_ftp(url)
 458         else:
 459             return self.open_local_file(url)
 460
 461     def open_local_file(self, url):
 462         """Use local file."""
 463         import mimetypes, mimetools, email.Utils
 464         try:
 465             from cStringIO import StringIO
 466         except ImportError:
 467             from StringIO import StringIO
 468         host, file = splithost(url)
 469         localname = url2pathname(file)
 470         try:
 471             stats = os.stat(localname)
 472         except OSError, e:
 473             raise IOError(e.errno, e.strerror, e.filename)
 474         size = stats.st_size
 475         modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
 476         mtype = mimetypes.guess_type(url)[0]
 477         headers = mimetools.Message(StringIO(
 478             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 479             (mtype or 'text/plain', size, modified)))
 480         if not host:
 481             urlfile = file
 482             if file[:1] == '/':
 483                 urlfile = 'file://' + file
 484             return addinfourl(open(localname, 'rb'),
 485                               headers, urlfile)
 486         host, port = splitport(host)
 487         if not port \
 488            and socket.gethostbyname(host) in (localhost(), thishost()):
 489             urlfile = file
 490             if file[:1] == '/':
 491                 urlfile = 'file://' + file
 492             return addinfourl(open(localname, 'rb'),
 493                               headers, urlfile)
 494         raise IOError, ('local file error', 'not on local host')
 495
 496     def open_ftp(self, url):
 497         """Use FTP protocol."""
 498         if not isinstance(url, str):
 499             raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
 500         import mimetypes, mimetools
 501         try:
 502             from cStringIO import StringIO
 503         except ImportError:
 504             from StringIO import StringIO
 505         host, path = splithost(url)
 506         if not host: raise IOError, ('ftp error', 'no host given')
 507         host, port = splitport(host)
 508         user, host = splituser(host)
 509         if user: user, passwd = splitpasswd(user)
 510         else: passwd = None
 511         host = unquote(host)
 512         user = unquote(user or '')
 513         passwd = unquote(passwd or '')
 514         host = socket.gethostbyname(host)
 515         if not port:
 516             import ftplib
 517             port = ftplib.FTP_PORT
 518         else:
 519             port = int(port)
 520         path, attrs = splitattr(path)
 521         path = unquote(path)
 522         dirs = path.split('/')
 523         dirs, file = dirs[:-1], dirs[-1]
 524         if dirs and not dirs[0]: dirs = dirs[1:]
 525         if dirs and not dirs[0]: dirs[0] = '/'
 526         key = user, host, port, '/'.join(dirs)
 527         # XXX thread unsafe!
 528         if len(self.ftpcache) > MAXFTPCACHE:
 529             # Prune the cache, rather arbitrarily
 530             for k in self.ftpcache.keys():
 531                 if k != key:
 532                     v = self.ftpcache[k]
 533                     del self.ftpcache[k]
 534                     v.close()
 535         try:
 536             if not key in self.ftpcache:
 537                 self.ftpcache[key] = \
 538                     ftpwrapper(user, passwd, host, port, dirs)
 539             if not file: type = 'D'
 540             else: type = 'I'
 541             for attr in attrs:
 542                 attr, value = splitvalue(attr)
 543                 if attr.lower() == 'type' and \
 544                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 545                     type = value.upper()
 546             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 547             mtype = mimetypes.guess_type("ftp:" + url)[0]
 548             headers = ""
 549             if mtype:
 550                 headers += "Content-Type: %s\n" % mtype
 551             if retrlen is not None and retrlen >= 0:
 552                 headers += "Content-Length: %d\n" % retrlen
 553             headers = mimetools.Message(StringIO(headers))
 554             return addinfourl(fp, headers, "ftp:" + url)
 555         except ftperrors(), msg:
 556             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 557
 558     def open_data(self, url, data=None):
 559         """Use "data" URL."""
 560         if not isinstance(url, str):
 561             raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
 562         # ignore POSTed data
 563         #
 564         # syntax of data URLs:
 565         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 566         # mediatype := [ type "/" subtype ] *( ";" parameter )
 567         # data      := *urlchar
 568         # parameter := attribute "=" value
 569         import mimetools
 570         try:
 571             from cStringIO import StringIO
 572         except ImportError:
 573             from StringIO import StringIO
 574         try:
 575             [type, data] = url.split(',', 1)
 576         except ValueError:
 577             raise IOError, ('data error', 'bad data URL')
 578         if not type:
 579             type = 'text/plain;charset=US-ASCII'
 580         semi = type.rfind(';')
 581         if semi >= 0 and '=' not in type[semi:]:
 582             encoding = type[semi+1:]
 583             type = type[:semi]
 584         else:
 585             encoding = ''
 586         msg = []
 587         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 588                                             time.gmtime(time.time())))
 589         msg.append('Content-type: %s' % type)
 590         if encoding == 'base64':
 591             import base64
 592             data = base64.decodestring(data)
 593         else:
 594             data = unquote(data)
 595         msg.append('Content-Length: %d' % len(data))
 596         msg.append('')
 597         msg.append(data)
 598         msg = '\n'.join(msg)
 599         f = StringIO(msg)
 600         headers = mimetools.Message(f, 0)
 601         #f.fileno = None     # needed for addinfourl
 602         return addinfourl(f, headers, url)
 603
 604
 605 class FancyURLopener(URLopener):
 606     """Derived class with handlers for errors we can handle (perhaps)."""
 607
 608     def __init__(self, *args, **kwargs):
 609         URLopener.__init__(self, *args, **kwargs)
 610         self.auth_cache = {}
 611         self.tries = 0
 612         self.maxtries = 10
 613
 614     def http_error_default(self, url, fp, errcode, errmsg, headers):
 615         """Default error handling -- don't raise an exception."""
 616         return addinfourl(fp, headers, "http:" + url)
 617
 618     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 619         """Error 302 -- relocated (temporarily)."""
 620         self.tries += 1
 621         if self.maxtries and self.tries >= self.maxtries:
 622             if hasattr(self, "http_error_500"):
 623                 meth = self.http_error_500
 624             else:
 625                 meth = self.http_error_default
 626             self.tries = 0
 627             return meth(url, fp, 500,
 628                         "Internal Server Error: Redirect Recursion", headers)
 629         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 630                                         data)
 631         self.tries = 0
 632         return result
 633
 634     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 635         if 'location' in headers:
 636             newurl = headers['location']
 637         elif 'uri' in headers:
 638             newurl = headers['uri']
 639         else:
 640             return
 641         void = fp.read()
 642         fp.close()
 643         # In case the server sent a relative URL, join with original:
 644         newurl = basejoin(self.type + ":" + url, newurl)
 645         return self.open(newurl)
 646
 647     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 648         """Error 301 -- also relocated (permanently)."""
 649         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 650
 651     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
 652         """Error 303 -- also relocated (essentially identical to 302)."""
 653         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 654
 655     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
 656         """Error 307 -- relocated, but turn POST into error."""
 657         if data is None:
 658             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 659         else:
 660             return self.http_error_default(url, fp, errcode, errmsg, headers)
 661
 662     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 663         """Error 401 -- authentication required.
 664         This function supports Basic authentication only."""
 665         if not 'www-authenticate' in headers:
 666             URLopener.http_error_default(self, url, fp,
 667                                          errcode, errmsg, headers)
 668         stuff = headers['www-authenticate']
 669         import re
 670         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 671         if not match:
 672             URLopener.http_error_default(self, url, fp,
 673                                          errcode, errmsg, headers)
 674         scheme, realm = match.groups()
 675         if scheme.lower() != 'basic':
 676             URLopener.http_error_default(self, url, fp,
 677                                          errcode, errmsg, headers)
 678         name = 'retry_' + self.type + '_basic_auth'
 679         if data is None:
 680             return getattr(self,name)(url, realm)
 681         else:
 682             return getattr(self,name)(url, realm, data)
 683
 684     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
 685         """Error 407 -- proxy authentication required.
 686         This function supports Basic authentication only."""
 687         if not 'proxy-authenticate' in headers:
 688             URLopener.http_error_default(self, url, fp,
 689                                          errcode, errmsg, headers)
 690         stuff = headers['proxy-authenticate']
 691         import re
 692         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 693         if not match:
 694             URLopener.http_error_default(self, url, fp,
 695                                          errcode, errmsg, headers)
 696         scheme, realm = match.groups()
 697         if scheme.lower() != 'basic':
 698             URLopener.http_error_default(self, url, fp,
 699                                          errcode, errmsg, headers)
 700         name = 'retry_proxy_' + self.type + '_basic_auth'
 701         if data is None:
 702             return getattr(self,name)(url, realm)
 703         else:
 704             return getattr(self,name)(url, realm, data)
 705
 706     def retry_proxy_http_basic_auth(self, url, realm, data=None):
 707         host, selector = splithost(url)
 708         newurl = 'http://' + host + selector
 709         proxy = self.proxies['http']
 710         urltype, proxyhost = splittype(proxy)
 711         proxyhost, proxyselector = splithost(proxyhost)
 712         i = proxyhost.find('@') + 1
 713         proxyhost = proxyhost[i:]
 714         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 715         if not (user or passwd): return None
 716         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 717         self.proxies['http'] = 'http://' + proxyhost + proxyselector
 718         if data is None:
 719             return self.open(newurl)
 720         else:
 721             return self.open(newurl, data)
 722
 723     def retry_proxy_https_basic_auth(self, url, realm, data=None):
 724         host, selector = splithost(url)
 725         newurl = 'https://' + host + selector
 726         proxy = self.proxies['https']
 727         urltype, proxyhost = splittype(proxy)
 728         proxyhost, proxyselector = splithost(proxyhost)
 729         i = proxyhost.find('@') + 1
 730         proxyhost = proxyhost[i:]
 731         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 732         if not (user or passwd): return None
 733         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 734         self.proxies['https'] = 'https://' + proxyhost + proxyselector
 735         if data is None:
 736             return self.open(newurl)
 737         else:
 738             return self.open(newurl, data)
 739
 740     def retry_http_basic_auth(self, url, realm, data=None):
 741         host, selector = splithost(url)
 742         i = host.find('@') + 1
 743         host = host[i:]
 744         user, passwd = self.get_user_passwd(host, realm, i)
 745         if not (user or passwd): return None
 746         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 747         newurl = 'http://' + host + selector
 748         if data is None:
 749             return self.open(newurl)
 750         else:
 751             return self.open(newurl, data)
 752
 753     def retry_https_basic_auth(self, url, realm, data=None):
 754         host, selector = splithost(url)
 755         i = host.find('@') + 1
 756         host = host[i:]
 757         user, passwd = self.get_user_passwd(host, realm, i)
 758         if not (user or passwd): return None
 759         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 760         newurl = 'https://' + host + selector
 761         if data is None:
 762             return self.open(newurl)
 763         else:
 764             return self.open(newurl, data)
 765
 766     def get_user_passwd(self, host, realm, clear_cache = 0):
 767         key = realm + '@' + host.lower()
 768         if key in self.auth_cache:
 769             if clear_cache:
 770                 del self.auth_cache[key]
 771             else:
 772                 return self.auth_cache[key]
 773         user, passwd = self.prompt_user_passwd(host, realm)
 774         if user or passwd: self.auth_cache[key] = (user, passwd)
 775         return user, passwd
 776
 777     def prompt_user_passwd(self, host, realm):
 778         """Override this in a GUI environment!"""
 779         import getpass
 780         try:
 781             user = raw_input("Enter username for %s at %s: " % (realm,
 782                                                                 host))
 783             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 784                 (user, realm, host))
 785             return user, passwd
 786         except KeyboardInterrupt:
 787             print
 788             return None, None
 789
 790
 791 # Utility functions
 792
 793 _localhost = None
 794 def localhost():
 795     """Return the IP address of the magic hostname 'localhost'."""
 796     global _localhost
 797     if _localhost is None:
 798         _localhost = socket.gethostbyname('localhost')
 799     return _localhost
 800
 801 _thishost = None
 802 def thishost():
 803     """Return the IP address of the current host."""
 804     global _thishost
 805     if _thishost is None:
 806         _thishost = socket.gethostbyname(socket.gethostname())
 807     return _thishost
 808
 809 _ftperrors = None
 810 def ftperrors():
 811     """Return the set of errors raised by the FTP class."""
 812     global _ftperrors
 813     if _ftperrors is None:
 814         import ftplib
 815         _ftperrors = ftplib.all_errors
 816     return _ftperrors
 817
 818 _noheaders = None
 819 def noheaders():
 820     """Return an empty mimetools.Message object."""
 821     global _noheaders
 822     if _noheaders is None:
 823         import mimetools
 824         try:
 825             from cStringIO import StringIO
 826         except ImportError:
 827             from StringIO import StringIO
 828         _noheaders = mimetools.Message(StringIO(), 0)
 829         _noheaders.fp.close()   # Recycle file descriptor
 830     return _noheaders
 831
 832
 833 # Utility classes
 834
 835 class ftpwrapper:
 836     """Class used by open_ftp() for cache of open FTP connections."""
 837
 838     def __init__(self, user, passwd, host, port, dirs):
 839         self.user = user
 840         self.passwd = passwd
 841         self.host = host
 842         self.port = port
 843         self.dirs = dirs
 844         self.init()
 845
 846     def init(self):
 847         import ftplib
 848         self.busy = 0
 849         self.ftp = ftplib.FTP()
 850         self.ftp.connect(self.host, self.port)
 851         self.ftp.login(self.user, self.passwd)
 852         for dir in self.dirs:
 853             self.ftp.cwd(dir)
 854
 855     def retrfile(self, file, type):
 856         import ftplib
 857         self.endtransfer()
 858         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 859         else: cmd = 'TYPE ' + type; isdir = 0
 860         try:
 861             self.ftp.voidcmd(cmd)
 862         except ftplib.all_errors:
 863             self.init()
 864             self.ftp.voidcmd(cmd)
 865         conn = None
 866         if file and not isdir:
 867             # Try to retrieve as a file
 868             try:
 869                 cmd = 'RETR ' + file
 870                 conn = self.ftp.ntransfercmd(cmd)
 871             except ftplib.error_perm, reason:
 872                 if str(reason)[:3] != '550':
 873                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 874         if not conn:
 875             # Set transfer mode to ASCII!
 876             self.ftp.voidcmd('TYPE A')
 877             # Try a directory listing
 878             if file: cmd = 'LIST ' + file
 879             else: cmd = 'LIST'
 880             conn = self.ftp.ntransfercmd(cmd)
 881         self.busy = 1
 882         # Pass back both a suitably decorated object and a retrieval length
 883         return (addclosehook(conn[0].makefile('rb'),
 884                              self.endtransfer), conn[1])
 885     def endtransfer(self):
 886         if not self.busy:
 887             return
 888         self.busy = 0
 889         try:
 890             self.ftp.voidresp()
 891         except ftperrors():
 892             pass
 893
 894     def close(self):
 895         self.endtransfer()
 896         try:
 897             self.ftp.close()
 898         except ftperrors():
 899             pass
 900
 901 class addbase:
 902     """Base class for addinfo and addclosehook."""
 903
 904     def __init__(self, fp):
 905         self.fp = fp
 906         self.read = self.fp.read
 907         self.readline = self.fp.readline
 908         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 909         if hasattr(self.fp, "fileno"):
 910             self.fileno = self.fp.fileno
 911         else:
 912             self.fileno = lambda: None
 913         if hasattr(self.fp, "__iter__"):
 914             self.__iter__ = self.fp.__iter__
 915             if hasattr(self.fp, "next"):
 916                 self.next = self.fp.next
 917
 918     def __repr__(self):
 919         return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
 920                                              id(self), self.fp)
 921
 922     def close(self):
 923         self.read = None
 924         self.readline = None
 925         self.readlines = None
 926         self.fileno = None
 927         if self.fp: self.fp.close()
 928         self.fp = None
 929
 930 class addclosehook(addbase):
 931     """Class to add a close hook to an open file."""
 932
 933     def __init__(self, fp, closehook, *hookargs):
 934         addbase.__init__(self, fp)
 935         self.closehook = closehook
 936         self.hookargs = hookargs
 937
 938     def close(self):
 939         addbase.close(self)
 940         if self.closehook:
 941             self.closehook(*self.hookargs)
 942             self.closehook = None
 943             self.hookargs = None
 944
 945 class addinfo(addbase):
 946     """class to add an info() method to an open file."""
 947
 948     def __init__(self, fp, headers):
 949         addbase.__init__(self, fp)
 950         self.headers = headers
 951
 952     def info(self):
 953         return self.headers
 954
 955 class addinfourl(addbase):
 956     """class to add info() and geturl() methods to an open file."""
 957
 958     def __init__(self, fp, headers, url):
 959         addbase.__init__(self, fp)
 960         self.headers = headers
 961         self.url = url
 962
 963     def info(self):
 964         return self.headers
 965
 966     def geturl(self):
 967         return self.url
 968
 969
 970 # Utilities to parse URLs (most of these return None for missing parts):
 971 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 972 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 973 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 974 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 975 # splitpasswd('user:passwd') -> 'user', 'passwd'
 976 # splitport('host:port') --> 'host', 'port'
 977 # splitquery('/path?query') --> '/path', 'query'
 978 # splittag('/path#tag') --> '/path', 'tag'
 979 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 980 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 981 # splitvalue('attr=value') --> 'attr', 'value'
 982 # splitgophertype('/Xselector') --> 'X', 'selector'
 983 # unquote('abc%20def') -> 'abc def'
 984 # quote('abc def') -> 'abc%20def')
 985
 986 try:
 987     unicode
 988 except NameError:
 989     def _is_unicode(x):
 990         return 0
 991 else:
 992     def _is_unicode(x):
 993         return isinstance(x, unicode)
 994
 995 def toBytes(url):
 996     """toBytes(u"URL") --> 'URL'."""
 997     # Most URL schemes require ASCII. If that changes, the conversion
 998     # can be relaxed
 999     if _is_unicode(url):
1000         try:
1001             url = url.encode("ASCII")
1002         except UnicodeError:
1003             raise UnicodeError("URL " + repr(url) +
1004                                " contains non-ASCII characters")
1005     return url
1006
1007 def unwrap(url):
1008     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1009     url = url.strip()
1010     if url[:1] == '<' and url[-1:] == '>':
1011         url = url[1:-1].strip()
1012     if url[:4] == 'URL:': url = url[4:].strip()
1013     return url
1014
1015 _typeprog = None
1016 def splittype(url):
1017     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1018     global _typeprog
1019     if _typeprog is None:
1020         import re
1021         _typeprog = re.compile('^([^/:]+):')
1022
1023     match = _typeprog.match(url)
1024     if match:
1025         scheme = match.group(1)
1026         return scheme.lower(), url[len(scheme) + 1:]
1027     return None, url
1028
1029 _hostprog = None
1030 def splithost(url):
1031     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1032     global _hostprog
1033     if _hostprog is None:
1034         import re
1035         _hostprog = re.compile('^//([^/?]*)(.*)$')
1036
1037     match = _hostprog.match(url)
1038     if match: return match.group(1, 2)
1039     return None, url
1040
1041 _userprog = None
1042 def splituser(host):
1043     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1044     global _userprog
1045     if _userprog is None:
1046         import re
1047         _userprog = re.compile('^(.*)@(.*)$')
1048
1049     match = _userprog.match(host)
1050     if match: return map(unquote, match.group(1, 2))
1051     return None, host
1052
1053 _passwdprog = None
1054 def splitpasswd(user):
1055     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1056     global _passwdprog
1057     if _passwdprog is None:
1058         import re
1059         _passwdprog = re.compile('^([^:]*):(.*)$')
1060
1061     match = _passwdprog.match(user)
1062     if match: return match.group(1, 2)
1063     return user, None
1064
1065 # splittag('/path#tag') --> '/path', 'tag'
1066 _portprog = None
1067 def splitport(host):
1068     """splitport('host:port') --> 'host', 'port'."""
1069     global _portprog
1070     if _portprog is None:
1071         import re
1072         _portprog = re.compile('^(.*):([0-9]+)$')
1073
1074     match = _portprog.match(host)
1075     if match: return match.group(1, 2)
1076     return host, None
1077
1078 _nportprog = None
1079 def splitnport(host, defport=-1):
1080     """Split host and port, returning numeric port.
1081     Return given default port if no ':' found; defaults to -1.
1082     Return numerical port if a valid number are found after ':'.
1083     Return None if ':' but not a valid number."""
1084     global _nportprog
1085     if _nportprog is None:
1086         import re
1087         _nportprog = re.compile('^(.*):(.*)$')
1088
1089     match = _nportprog.match(host)
1090     if match:
1091         host, port = match.group(1, 2)
1092         try:
1093             if not port: raise ValueError, "no digits"
1094             nport = int(port)
1095         except ValueError:
1096             nport = None
1097         return host, nport
1098     return host, defport
1099
1100 _queryprog = None
1101 def splitquery(url):
1102     """splitquery('/path?query') --> '/path', 'query'."""
1103     global _queryprog
1104     if _queryprog is None:
1105         import re
1106         _queryprog = re.compile('^(.*)\?([^?]*)$')
1107
1108     match = _queryprog.match(url)
1109     if match: return match.group(1, 2)
1110     return url, None
1111
1112 _tagprog = None
1113 def splittag(url):
1114     """splittag('/path#tag') --> '/path', 'tag'."""
1115     global _tagprog
1116     if _tagprog is None:
1117         import re
1118         _tagprog = re.compile('^(.*)#([^#]*)$')
1119
1120     match = _tagprog.match(url)
1121     if match: return match.group(1, 2)
1122     return url, None
1123
1124 def splitattr(url):
1125     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1126         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1127     words = url.split(';')
1128     return words[0], words[1:]
1129
1130 _valueprog = None
1131 def splitvalue(attr):
1132     """splitvalue('attr=value') --> 'attr', 'value'."""
1133     global _valueprog
1134     if _valueprog is None:
1135         import re
1136         _valueprog = re.compile('^([^=]*)=(.*)$')
1137
1138     match = _valueprog.match(attr)
1139     if match: return match.group(1, 2)
1140     return attr, None
1141
1142 def splitgophertype(selector):
1143     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1144     if selector[:1] == '/' and selector[1:2]:
1145         return selector[1], selector[2:]
1146     return None, selector
1147
1148 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1149 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1150
1151 def unquote(s):
1152     """unquote('abc%20def') -> 'abc def'."""
1153     res = s.split('%')
1154     for i in xrange(1, len(res)):
1155         item = res[i]
1156         try:
1157             res[i] = _hextochr[item[:2]] + item[2:]
1158         except KeyError:
1159             res[i] = '%' + item
1160         except UnicodeDecodeError:
1161             res[i] = unichr(int(item[:2], 16)) + item[2:]
1162     return "".join(res)
1163
1164 def unquote_plus(s):
1165     """unquote('%7e/abc+def') -> '~/abc def'"""
1166     s = s.replace('+', ' ')
1167     return unquote(s)
1168
1169 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1170                'abcdefghijklmnopqrstuvwxyz'
1171                '0123456789' '_.-')
1172 _safemaps = {}
1173
1174 def quote(s, safe = '/'):
1175     """quote('abc def') -> 'abc%20def'
1176
1177     Each part of a URL, e.g. the path info, the query, etc., has a
1178     different set of reserved characters that must be quoted.
1179
1180     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1181     the following reserved characters.
1182
1183     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1184                   "$" | ","
1185
1186     Each of these characters is reserved in some component of a URL,
1187     but not necessarily in all of them.
1188
1189     By default, the quote function is intended for quoting the path
1190     section of a URL.  Thus, it will not encode '/'.  This character
1191     is reserved, but in typical usage the quote function is being
1192     called on a path where the existing slash characters are used as
1193     reserved characters.
1194     """
1195     cachekey = (safe, always_safe)
1196     try:
1197         safe_map = _safemaps[cachekey]
1198     except KeyError:
1199         safe += always_safe
1200         safe_map = {}
1201         for i in range(256):
1202             c = chr(i)
1203             safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1204         _safemaps[cachekey] = safe_map
1205     res = map(safe_map.__getitem__, s)
1206     return ''.join(res)
1207
1208 def quote_plus(s, safe = ''):
1209     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1210     if ' ' in s:
1211         s = quote(s, safe + ' ')
1212         return s.replace(' ', '+')
1213     return quote(s, safe)
1214
1215 def urlencode(query,doseq=0):
1216     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1217
1218     If any values in the query arg are sequences and doseq is true, each
1219     sequence element is converted to a separate parameter.
1220
1221     If the query arg is a sequence of two-element tuples, the order of the
1222     parameters in the output will match the order of parameters in the
1223     input.
1224     """
1225
1226     if hasattr(query,"items"):
1227         # mapping objects
1228         query = query.items()
1229     else:
1230         # it's a bother at times that strings and string-like objects are
1231         # sequences...
1232         try:
1233             # non-sequence items should not work with len()
1234             # non-empty strings will fail this
1235             if len(query) and not isinstance(query[0], tuple):
1236                 raise TypeError
1237             # zero-length sequences of all types will get here and succeed,
1238             # but that's a minor nit - since the original implementation
1239             # allowed empty dicts that type of behavior probably should be
1240             # preserved for consistency
1241         except TypeError:
1242             ty,va,tb = sys.exc_info()
1243             raise TypeError, "not a valid non-string sequence or mapping object", tb
1244
1245     l = []
1246     if not doseq:
1247         # preserve old behavior
1248         for k, v in query:
1249             k = quote_plus(str(k))
1250             v = quote_plus(str(v))
1251             l.append(k + '=' + v)
1252     else:
1253         for k, v in query:
1254             k = quote_plus(str(k))
1255             if isinstance(v, str):
1256                 v = quote_plus(v)
1257                 l.append(k + '=' + v)
1258             elif _is_unicode(v):
1259                 # is there a reasonable way to convert to ASCII?
1260                 # encode generates a string, but "replace" or "ignore"
1261                 # lose information and "strict" can raise UnicodeError
1262                 v = quote_plus(v.encode("ASCII","replace"))
1263                 l.append(k + '=' + v)
1264             else:
1265                 try:
1266                     # is this a sufficient test for sequence-ness?
1267                     x = len(v)
1268                 except TypeError:
1269                     # not a sequence
1270                     v = quote_plus(str(v))
1271                     l.append(k + '=' + v)
1272                 else:
1273                     # loop over the sequence
1274                     for elt in v:
1275                         l.append(k + '=' + quote_plus(str(elt)))
1276     return '&'.join(l)
1277
1278 # Proxy handling
1279 def getproxies_environment():
1280     """Return a dictionary of scheme -> proxy server URL mappings.
1281
1282     Scan the environment for variables named <scheme>_proxy;
1283     this seems to be the standard convention.  If you need a
1284     different way, you can pass a proxies dictionary to the
1285     [Fancy]URLopener constructor.
1286
1287     """
1288     proxies = {}
1289     for name, value in os.environ.items():
1290         name = name.lower()
1291         if value and name[-6:] == '_proxy':
1292             proxies[name[:-6]] = value
1293     return proxies
1294
1295 if sys.platform == 'darwin':
1296     def getproxies_internetconfig():
1297         """Return a dictionary of scheme -> proxy server URL mappings.
1298
1299         By convention the mac uses Internet Config to store
1300         proxies.  An HTTP proxy, for instance, is stored under
1301         the HttpProxy key.
1302
1303         """
1304         try:
1305             import ic
1306         except ImportError:
1307             return {}
1308
1309         try:
1310             config = ic.IC()
1311         except ic.error:
1312             return {}
1313         proxies = {}
1314         # HTTP:
1315         if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1316             try:
1317                 value = config['HTTPProxyHost']
1318             except ic.error:
1319                 pass
1320             else:
1321                 proxies['http'] = 'http://%s' % value
1322         # FTP: XXXX To be done.
1323         # Gopher: XXXX To be done.
1324         return proxies
1325
1326     def proxy_bypass(x):
1327         return 0
1328
1329     def getproxies():
1330         return getproxies_environment() or getproxies_internetconfig()
1331
1332 elif os.name == 'nt':
1333     def getproxies_registry():
1334         """Return a dictionary of scheme -> proxy server URL mappings.
1335
1336         Win32 uses the registry to store proxies.
1337
1338         """
1339         proxies = {}
1340         try:
1341             import _winreg
1342         except ImportError:
1343             # Std module, so should be around - but you never know!
1344             return proxies
1345         try:
1346             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1347                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1348             proxyEnable = _winreg.QueryValueEx(internetSettings,
1349                                                'ProxyEnable')[0]
1350             if proxyEnable:
1351                 # Returned as Unicode but problems if not converted to ASCII
1352                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1353                                                        'ProxyServer')[0])
1354                 if '=' in proxyServer:
1355                     # Per-protocol settings
1356                     for p in proxyServer.split(';'):
1357                         protocol, address = p.split('=', 1)
1358                         # See if address has a type:// prefix
1359                         import re
1360                         if not re.match('^([^/:]+)://', address):
1361                             address = '%s://%s' % (protocol, address)
1362                         proxies[protocol] = address
1363                 else:
1364                     # Use one setting for all protocols
1365                     if proxyServer[:5] == 'http:':
1366                         proxies['http'] = proxyServer
1367                     else:
1368                         proxies['http'] = 'http://%s' % proxyServer
1369                         proxies['ftp'] = 'ftp://%s' % proxyServer
1370             internetSettings.Close()
1371         except (WindowsError, ValueError, TypeError):
1372             # Either registry key not found etc, or the value in an
1373             # unexpected format.
1374             # proxies already set up to be empty so nothing to do
1375             pass
1376         return proxies
1377
1378     def getproxies():
1379         """Return a dictionary of scheme -> proxy server URL mappings.
1380
1381         Returns settings gathered from the environment, if specified,
1382         or the registry.
1383
1384         """
1385         return getproxies_environment() or getproxies_registry()
1386
1387     def proxy_bypass(host):
1388         try:
1389             import _winreg
1390             import re
1391         except ImportError:
1392             # Std modules, so should be around - but you never know!
1393             return 0
1394         try:
1395             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1396                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1397             proxyEnable = _winreg.QueryValueEx(internetSettings,
1398                                                'ProxyEnable')[0]
1399             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1400                                                      'ProxyOverride')[0])
1401             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1402         except WindowsError:
1403             return 0
1404         if not proxyEnable or not proxyOverride:
1405             return 0
1406         # try to make a host list from name and IP address.
1407         rawHost, port = splitport(host)
1408         host = [rawHost]
1409         try:
1410             addr = socket.gethostbyname(rawHost)
1411             if addr != rawHost:
1412                 host.append(addr)
1413         except socket.error:
1414             pass
1415         try:
1416             fqdn = socket.getfqdn(rawHost)
1417             if fqdn != rawHost:
1418                 host.append(fqdn)
1419         except socket.error:
1420             pass
1421         # make a check value list from the registry entry: replace the
1422         # '<local>' string by the localhost entry and the corresponding
1423         # canonical entry.
1424         proxyOverride = proxyOverride.split(';')
1425         i = 0
1426         while i < len(proxyOverride):
1427             if proxyOverride[i] == '<local>':
1428                 proxyOverride[i:i+1] = ['localhost',
1429                                         '127.0.0.1',
1430                                         socket.gethostname(),
1431                                         socket.gethostbyname(
1432                                             socket.gethostname())]
1433             i += 1
1434         # print proxyOverride
1435         # now check if we match one of the registry values.
1436         for test in proxyOverride:
1437             test = test.replace(".", r"\.")     # mask dots
1438             test = test.replace("*", r".*")     # change glob sequence
1439             test = test.replace("?", r".")      # change glob char
1440             for val in host:
1441                 # print "%s <--> %s" %( test, val )
1442                 if re.match(test, val, re.I):
1443                     return 1
1444         return 0
1445
1446 else:
1447     # By default use environment variables
1448     getproxies = getproxies_environment
1449
1450     def proxy_bypass(host):
1451         return 0
1452
1453 # Test and time quote() and unquote()
1454 def test1():
1455     s = ''
1456     for i in range(256): s = s + chr(i)
1457     s = s*4
1458     t0 = time.time()
1459     qs = quote(s)
1460     uqs = unquote(qs)
1461     t1 = time.time()
1462     if uqs != s:
1463         print 'Wrong!'
1464     print repr(s)
1465     print repr(qs)
1466     print repr(uqs)
1467     print round(t1 - t0, 3), 'sec'
1468
1469
1470 def reporthook(blocknum, blocksize, totalsize):
1471     # Report during remote transfers
1472     print "Block number: %d, Block size: %d, Total size: %d" % (
1473         blocknum, blocksize, totalsize)
1474
1475 # Test program
1476 def test(args=[]):
1477     if not args:
1478         args = [
1479             '/etc/passwd',
1480             'file:/etc/passwd',
1481             'file://localhost/etc/passwd',
1482             'ftp://ftp.gnu.org/pub/README',
1483 ##          'gopher://gopher.micro.umn.edu/1/',
1484             'http://www.python.org/index.html',
1485             ]
1486         if hasattr(URLopener, "open_https"):
1487             args.append('https://synergy.as.cmu.edu/~geek/')
1488     try:
1489         for url in args:
1490             print '-'*10, url, '-'*10
1491             fn, h = urlretrieve(url, None, reporthook)
1492             print fn
1493             if h:
1494                 print '======'
1495                 for k in h.keys(): print k + ':', h[k]
1496                 print '======'
1497             fp = open(fn, 'rb')
1498             data = fp.read()
1499             del fp
1500             if '\r' in data:
1501                 table = string.maketrans("", "")
1502                 data = data.translate(table, "\r")
1503             print data
1504             fn, h = None, None
1505         print '-'*40
1506     finally:
1507         urlcleanup()
1508
1509 def main():
1510     import getopt, sys
1511     try:
1512         opts, args = getopt.getopt(sys.argv[1:], "th")
1513     except getopt.error, msg:
1514         print msg
1515         print "Use -h for help"
1516         return
1517     t = 0
1518     for o, a in opts:
1519         if o == '-t':
1520             t = t + 1
1521         if o == '-h':
1522             print "Usage: python urllib.py [-t] [url ...]"
1523             print "-t runs self-test;",
1524             print "otherwise, contents of urls are printed"
1525             return
1526     if t:
1527         if t > 1:
1528             test1()
1529         test(args)
1530     else:
1531         if not args:
1532             print "Use -h for help"
1533         for url in args:
1534             print urlopen(url).read(),
1535
1536 # Run test program when run as a script
1537 if __name__ == '__main__':
1538     main()