#! /usr/bin/env python3

import json
import os
import re

resultsDict = {}

infoDict = { 
    "commits": ("data", "Author"),   # Commits: item["data"]["Author"]
    "issues": ("user", "login"),     # Issues: item["user"]["login"]
    "pulls": ("user", "login")       # Pulls: item["user"]["login"]
}

prog = re.compile(r"^(.*)-\d{8}-(.*).json$")

def to_csv(repo, commits, issues, pulls):
    print("{}, {}, {}, {}, {}, {}, {}".format(repo, commits[0], commits[1], issues[0], issues[1], pulls[0], pulls[1]))

for filename in os.listdir():
    if not filename.endswith(".json"):
        continue
    if filename == "already.json":
        continue
        
    repo, source = prog.match(filename).groups()

    number_of_items = 0
    authorList = []
    with open(filename) as f:
        jsonStr = ''
        for line in f:
            if line == "}{\n" or line == "}":
                jsonStr += "}"
                item = json.loads(jsonStr)
                number_of_items += 1
                author = item[infoDict[source][0]][infoDict[source][1]]
                authorList.append(author)
                if source in ("issues", "pulls"):
                    for comment in item["comments_data"]:
                        authorList.append(comment["user"]["login"])
                jsonStr = "{"
            else:
                jsonStr += line
    if repo not in resultsDict:
        resultsDict[repo] = {}            
    resultsDict[repo][source] = (number_of_items, len(set(authorList)))
#    print(repo, source, number_of_items, len(set(authorList)))
    
for repo in resultsDict:
    to_csv(repo, resultsDict[repo]["commits"], resultsDict[repo]["issues"], resultsDict[repo]["pulls"])
