"""
Requests a resource using proxies defined in a file.
A proxy is used only if the country the proxy is located in has not been the
origin of a request before.
The proxy file is expected to contain one proxy IP address per line in the
format <proxy_ip>:<proxy_port>

Caveats:
Currently, this script will work correctly only if the remote resource is
available and accessible. This is because we use wget to fetch the resource
and we only check the return value and not the output. This may change in the
future. Or we might use something else than wget...
Moreover, we should provides some means to permanently store the countries we
already used so that future invocations of this script will only use countries
which have not been used in previous runs.

Copyright (C) 20013 University of Bonn
Code: Martin Lambertz <lambertz@cs.uni-bonn.de>
"""

import GeoIP
import subprocess
import sys

REMOTE_RESOURCE = "http://www.google.com"

USERAGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0"
TIMEOUT = 25


class CountryDict(dict):

    def __init__(self):
        for countrycode in GeoIP.country_codes:
            self[countrycode] = ""

    def is_still_required(self, countrycode):
        if self[countrycode] == "":
            return True
        else:
            return False


class Requester:

    def __init__(self):
        self.countries = CountryDict()
        self.geoip = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)

    def request_with_proxy_file(self, proxyfile):
        with open(proxyfile, "r") as fd:
            for line in fd:
                proxy = line.strip()
                (proxy_ip, proxy_port) = proxy.split(":")
                countrycode = self.get_country_code(proxy_ip)
                if countrycode is not None:
                    if self.countries.is_still_required(countrycode):
                        print "Trying %s with %s" % (countrycode, proxy)
                        if self.request_with_proxy(proxy):
                            self.countries[countrycode] = proxy

    def get_country_code(self, proxy_ip):
        return self.geoip.country_code_by_addr(proxy_ip)

    def request_with_proxy(self, proxy):
        try:
            self.call_downloader(proxy)
        except subprocess.CalledProcessError:
            return False
        else:
            return True

    def call_downloader(self, proxy):
        self.call_wget(proxy)

    def call_wget(self, proxy):
        commandline = list()
        commandline.append("wget")
        commandline.append("--tries=1")
        commandline.append("=".join(("--timeout", str(TIMEOUT))))
        commandline.append("--no-check-certificate")
        commandline.append("".join(("--user-agent='", USERAGENT, "'")))
        commandline.append(REMOTE_RESOURCE)
        env = {"http_proxy": proxy}
        subprocess.check_call(commandline, env=env)


def process_proxy_file(proxyfile):
    requester = Requester()
    requester.request_with_proxy_file(proxyfile)
    count = 0
    for countrycode, proxy in requester.countries.items():
        if proxy != "":
            count += 1
            print countrycode, ": ", proxy
    print "Overall: ", str(count), "/", len(requester.countries)


def main(argv=None):
    if argv is None:
        argv = sys.argv
    if len(sys.argv) != 2:
        print "Usage: request_from_countries.py <proxylist>"
        rcode = 2
    else:
        rcode = process_proxy_file(sys.argv[1])
    return rcode


if __name__ == "__main__":
    sys.exit(main())
