#! /usr/bin/env python3

import argparse
import gzip
import json
import time
import os.path
from perceval.backends.core.git import Git

if os.path.isfile('already.json'):
    with open('already.json') as already_file:
      already = json.load(already_file)
else:
    already = []
    
for filename in os.listdir("."):
    if not filename.endswith(".json.gz"):
        continue
    if not filename.startswith("github-repos-"):
        continue

    with gzip.open(filename, 'rb') as fb:
        jsonfile = fb.read().decode("utf-8")
        parsed_json = json.loads(jsonfile)
        for repository in parsed_json["items"]:
            repo = repository["full_name"]

            if ".errors.txt" in repo:
                continue
    #        if repo.count('_') != 1:
    #            print("Error with following repo:", repo)
    #            continue
            owner, reponame = repo.split('/')
            repo_url = 'https://github.com/{}/{}.git'.format(owner, reponame)
            # directory for letting Perceval clone the git repo
            repo_dir = 'repos/{}__{}.git'.format(owner, reponame)

            # create a Git object, pointing to repo_url, using repo_dir for cloning
            repo = Git(uri=repo_url, gitpath=repo_dir)
            # fetch all commits as an iterator, and iterate it printing each hash
            timeinfo = time.gmtime()
            timevalue = "{:04d}{:02d}{:02d}".format(timeinfo.tm_year, timeinfo.tm_mon, timeinfo.tm_mday)
            if owner + "_" + reponame in already:
                print("Already analyzed", repo_url)
                continue
            already.append(owner + "_" + reponame)
            with open('already.json', 'w') as f:
                json.dump(already, f)
            with open(owner + "_" + reponame + "-" + timevalue + "-commits.json", 'a') as output_file:
                for commit in repo.fetch():
                    json.dump(commit, output_file, indent = 4)
