#! /usr/bin/env python3

import argparse
import json
import time
import os.path
from perceval.backends.core.git import Git

# Parse command line arguments
parser = argparse.ArgumentParser(
    description = "Simple parser for Git logs"
    )
parser.add_argument("-i", "--input",
                    help = "Text file with repos")
args = parser.parse_args()

if os.path.isfile('already.json'):
    with open('already.json') as already_file:
      already = json.load(already_file)
else:
    already = []

with open(args.input) as input_file:
    for line in input_file:
        repo = line[:-1].split()[-1]
        if ".errors.txt" in repo:
            continue
#        if repo.count('_') != 1:
#            print("Error with following repo:", repo)
#            continue
        owner, reponame = repo.split('2023')[0][:-1].split('_', 1)
        size = line.split()[4]
        if size != "-1":
            # url for the git repo to analyze
            repo_url = 'https://github.com/{}/{}.git'.format(owner, reponame)
            # directory for letting Perceval clone the git repo
            repo_dir = '/tmp/{}.git'.format(reponame)

            # create a Git object, pointing to repo_url, using repo_dir for cloning
            repo = Git(uri=repo_url, gitpath=repo_dir)
            # fetch all commits as an iterator, and iterate it printing each hash
            timeinfo = time.gmtime()
            timevalue = "{:04d}{:02d}{:02d}".format(timeinfo.tm_year, timeinfo.tm_mon, timeinfo.tm_mday)
            if owner + "_" + reponame in already:
                print("Already analyzed", repo_url)
                continue
            already.append(owner + "_" + reponame)
            with open('already.json', 'w') as f:
                json.dump(already, f)
            with open(owner + "_" + reponame + "-" + timevalue + "-commits.json", 'a') as output_file:
                for commit in repo.fetch():
                    json.dump(commit, output_file, indent = 4)
