# Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information.
# Copyright 2010 The Update Framework.  See LICENSE for licensing information.

import httplib
import sys
import urllib2

import tuf.hash
import tuf.log
import tuf.util

logger = tuf.log.get_logger()


class BaseDownloadJob(object):
    """Abstract base class.  Represents a thing to be downloaded, and the
       knowledge of how to download it."""

    url = None

    # Meta-information, such as headers for HTTP requests.
    info = None

    def download(self):
        raise NotImplementedError


class DownloadJob(BaseDownloadJob):
    """A download job that supports urls supported by urllib2."""

    def __init__(self, url, wantHashes=None, wantLength=None):
        """Create a new DownloadJob.  When it is finally downloaded,
           store it in targetPath.  Store partial results in tmpPath;
           if there is already a file in tmpPath, assume that it is an
           incomplete download. If wantHashes, reject the file unless
           the hash is as given.  If useTor, use a socks connection.
           If repoFile, use that RepositoryFile to validate the downloaded
           data.
           
           targetPath can be None if baseUrl is the full url of the file to be
           downloaded.
           """
        self.url = url
        self._wantHashes = wantHashes
        self._wantLength = wantLength
        self._temp_file_obj = None

    def download(self):
        """Downloads the file, blocking until download is complete.
        
        Returns:
            A tuf.util.TempFile instance where the downloaded data is stored.
           
        Raises:
            tuf.DownloadError: If any error occurred that prevented the
                download or storage of the file from being successful.
        """
        try:
            self._download()
            return self._temp_file_obj
        except (urllib2.HTTPError, tuf.DownloadError), err:
            # TODO: document which types of situations cause each error.
            # urllib2.HTTPError can be caused by 404.
            raise tuf.DownloadError(err)
        except (OSError, httplib.error, IOError, urllib2.URLError), err:
            # Could be the mirror; could be the network.  Hard to say.
            raise tuf.DownloadError(err)
        except:
            err = sys.exc_info()[1]
            logger.exception(err)
            raise tuf.DownloadError(err)

    def _check_hash(self):
        """Helper: check whether the downloaded temporary file matches
           the hash and/or format we need."""
        if self._wantHashes is None:
            return
        for algorithm, expected in self._wantHashes.items():
            digest_obj = tuf.hash.Digest(algorithm)
            digest_obj.update(self._temp_file_obj.read())
            actual = digest_obj.format()
            if actual != expected:
                raise tuf.BadHash("Expected %s, got %s." % (expected, actual))
            else:
                logger.info("Correct hash: %s" % expected)

    def _download(self):
        # Implementation function.  Unlike download(), can throw exceptions.
        self._temp_file_obj = tuf.util.TempFile()
        f_in = None

        try:
            url = self.url.replace("\\", "/")
            logger.info("Downloading %s", url)
            f_in = open_url(url)
            logger.debug("Connected to %s", url)

            self.info = f_in.info()

            expectLength = f_in.info().get("Content-Length", "???")

            # TODO: if expectLength is too large, we don't even need to
            # continue.

            total = 0
            while True:
                c = f_in.read(min(8192, int(expectLength) - total))
                if not c:
                    logger.debug("Got %s/%s bytes from %s", total,
                                 expectLength, url)
                    break
                self._temp_file_obj.write(c, auto_flush=False)
                total += len(c)
                if self._wantLength != None and total > self._wantLength:
                    msg = ("Read too many bytes from %s; got %s, but "
                           "wanted %s" % (url, total, self._wantLength))
                    logger.warn(msg)
                    raise tuf.DownloadError(msg)

            if self._wantLength != None and total != self._wantLength:
                logger.warn("Length wrong on file %s", url)
                raise tuf.DownloadError("Wrong length. Expected %s, got %s" %
                                        (self._wantLength, total))
        finally:
            if f_in is not None:
                f_in.close()
            self._temp_file_obj.flush()

        try:
            self._check_hash()
        except tuf.FormatException:
            self._temp_file_obj.delete()
            raise


def open_url(url):
    """Open a connection to 'url'. This supports the schemes that urllib does,
       which are http:, ftp:, and file:. It will also support https: if python
       has the ssl module available (Python 2.6+)."""
    # TODO: Do proper ssl cert/name checking.
    # TODO: Disallow SSLv2.
    # TODO: Support ssl with MCrypto.
    # TODO: Determine whether this follows http redirects and decide if we like
    # that. For example, would we not want to allow redirection from ssl to
    # non-ssl urls?
    req = urllib2.Request(url)
    conn = urllib2.urlopen(req)
    return conn

if __name__ == '__main__':
    # Trivial CLI to test out downloading.

    import getopt
    options, args = getopt.getopt(sys.argv[1:], "")

    url = args[0]

    job = DownloadJob(url)
    temp_file_obj = job.download()
    print temp_file_obj.read()