#! /usr/bin/python3

from __future__ import print_function
import networkx as nx
import sys
import gzip
from collections import defaultdict


def calcportsmetric(g, verbose=False):
    if verbose:
        print("calculating sccs", file=sys.stderr)
    scc = list(nx.strongly_connected_components(g))
    if verbose:
        print("condensing", file=sys.stderr)
    h = nx.condensation(g, scc)
    if verbose:
        print("get roots", file=sys.stderr)
    i = h.reverse()
    for node in nx.dfs_postorder_nodes(i):
        pred = h.predecessors(node)
        # do not include the version so that source packages which exist in
        # multiple versions are only counted once
        mynames = set([g.nodes[n]["name"] for n in h.nodes[node]["members"]])
        if not pred:
            h.nodes[node]["k"] = mynames
        else:
            h.nodes[node]["k"] = set.union(mynames, *[h.nodes[n]["k"] for n in pred])
    # # slow and stupid version of the above (to check the fast version for
    # # correctness)
    # for node in h.nodes():
    # print scc[node], [scc[n] for n in nx.dfs_tree(i,node).nodes()]
    #    descendents = nx.dfs_tree(i,node).nodes()
    #    if descendents:
    #        h.nodes[node]['k'] = set.union(*[set([g.nodes[v]['name']
    #                                             for v in scc[n]])
    #                                        for n in descendents])
    #    else:
    #        h.nodes[node]['k'] = set([g.nodes[n]['name'] for n in scc[node]])
    if verbose:
        print("getting order", file=sys.stderr)
    # we want the results per source package, so we need to merge the values
    # for those source packages which exist in different versions
    srcmetrics = defaultdict(set)
    for n in h.nodes():
        a = h.nodes[n]["k"]
        for node in h.nodes[n]["members"]:
            srcmetrics[g.nodes[node]["name"]].update(a)
    return srcmetrics


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=(
            "Calculate a metric for source package importance "
            + "based on how many source packages (minimum and "
            + "maximum) become compilable in a bootstrapping "
            + "scenario through the compilability of a given source "
            + "package"
        )
    )
    parser.add_argument(
        "strongsrcgraph",
        metavar="strongsrcgraph.xml",
        type=nx.read_graphml,
        help="A dependency graph in GraphML format connecting source "
        + "package vertices with the source packages that build their "
        + "strong dependencies.",
    )
    parser.add_argument(
        "closuresrcgraph",
        metavar="closuresrcgraph.xml",
        type=nx.read_graphml,
        help="A dependency graph in GraphML format connecting source "
        + "package vertices with the source packages that build their "
        + "dependency closure.",
    )
    parser.add_argument(
        "--online",
        action="store_true",
        help="Retrieve popcon results for source packages",
    )
    parser.add_argument("-v", "--verbose", action="store_true", help="be verbose")
    args = parser.parse_args()
    strongres = calcportsmetric(args.strongsrcgraph, args.verbose)
    closureres = calcportsmetric(args.closuresrcgraph, args.verbose)

    if args.online:
        if args.verbose:
            print("getting popcon results for source packages", file=sys.stderr)
        if args.verbose:
            print("downloading...", file=sys.stderr)
        try:
            import urllib.request as urllib
        except ImportError:
            import urllib
        from io import BytesIO

        r = BytesIO(urllib.urlopen("http://popcon.debian.org/source/by_inst.gz").read())
        if args.verbose:
            print("done downloading, opening...", file=sys.stderr)
        popconvalues = dict()
        with gzip.GzipFile(fileobj=r) as popconf:
            # parse popcon file
            for line in popconf:
                if line.startswith(b"#"):
                    continue
                tokens = line.split()
                if len(tokens) != 7:
                    continue
                popconvalues[tokens[1].decode()] = int(tokens[2])

        # now replace every entry by a tuple of its length and the sum of their
        # popcon values
        result = dict()
        for k, v in list(strongres.items()):
            psum = sum([popconvalues.get(n, 0) for n in v])
            strongres[k] = (len(v), psum)
        for k, v in list(closureres.items()):
            psum = sum([popconvalues.get(n, 0) for n in v])
            closureres[k] = (len(v), psum)

        if args.verbose:
            print("printing results", file=sys.stderr)
        # we take the set intersection because in case a package is not
        # installable, then there exist no strong dependencies for it but its
        # dependency closure will still be calculated. So in that case there
        # may be packages which are in the closure graph but not in the source
        # graph. We only want those packages that are in both graphs because
        # we don't care about the subgraphs of uninstallable packages.
        for srcname in sorted(set(strongres.keys()) & set(closureres.keys())):
            print(
                "src:%s\t%d\t%d\t%d\t%d"
                % (
                    srcname,
                    strongres[srcname][0],
                    closureres[srcname][0],
                    strongres[srcname][1],
                    closureres[srcname][1],
                )
            )
    else:
        if args.verbose:
            print("printing results", file=sys.stderr)
        for srcname in sorted(set(strongres.keys()) & set(closureres.keys())):
            print(
                "src:%s\t%d\t%d"
                % (srcname, len(strongres[srcname]), len(closureres[srcname]))
            )
