#! /usr/bin/env python3

import csv
import sys
from collections import namedtuple, defaultdict

# CVS Columns: Number,Domain,Company,Country,Public,Subsidiary,Employees

# Company sizes:
company_sizes = [100, 250, 500, 5000, 500000]

company_sizes_dict = {
    100: "Small",
    250: "Medium",
    500: "Medium big",
    5000: "Big",
    500000: "Very big",
    "N/A": "N/A"
}


def company_employees2size(employees):
    if employees == "N/A":
        return "N/A"
    for size in company_sizes_dict:
        if int(employees) <= size:
            return company_sizes_dict[size]
            
def big_companies_sharing_project(company_list):
    num_big_companies = 0
    for company in company_list:
        if company[-1] in ['Big', 'Very big']:
            num_big_companies += 1
    if num_big_companies > 1: return True
    else: return False
    
def small_companies_in_project(company_list):
    num_small_companies = 0
    for company in company_list:
        if company[-1] in ['Small', 'Medium', 'Medium big']:
            num_small_companies += 1
    if num_small_companies > 0: return True
    else: return False


if len(sys.argv) < 2:
    sys.exit("Usage: python3 analyze-companies.py project")

inputfile = "companies-" + sys.argv[1] + ".csv"
with open(inputfile) as csvfile:
    csvreader = csv.reader(csvfile)
    Data = namedtuple("Data", next(csvreader))  # get names from column headers
    companies = [Data(*line) for line in csvreader]


###
###
# Analysis

## How many Companies vs. Univ vs. Other 
types = [company.Public for company in companies]
for ctype in set(types):
    print(ctype, types.count(ctype))
print()

## By Country
countries_dict = {}
countries = [company.Country for company in companies]
for country in set(countries):
    countries_dict[country] = countries.count(country)
print(dict(sorted(countries_dict.items(), key=lambda item: item[1])))
print()

## How many Companies vs. Univ vs. Other 
ctype = defaultdict(int)
committers = defaultdict(int)

ctype_size = {
    "Yes": defaultdict(int), 
    "No": defaultdict(int)
}

committers_size = {
    "Yes": defaultdict(int), 
    "No": defaultdict(int)
}

company_dict = {}
for company in companies:
    company_dict[company.Domain] = [company.Number, company.Company, company.Country, company.Public, company.Subsidiary, company_employees2size(company.Employees)]
    ctype[company.Public] += 1
    if company.Public in ["Yes", "No"]:
        for value in company_sizes:
#            print(company.Company, company.Employees)
            try:
                if int(company.Employees) < value:
                    ctype_size[company.Public][company_sizes_dict[value]] += 1
                    committers_size[company.Public][company_sizes_dict[value]] += int(company.Number)
                    break
            except ValueError:
                ctype_size[company.Public]["N/A"] += 1
                committers_size[company.Public][company_sizes_dict[value]] += int(company.Number)                
    committers[company.Public] += int(company.Number)

print(ctype)
print(ctype_size)
print(committers)
print(committers_size)

#############################
#############################
## Look for projects with developers from different companies collaborating together
## 

inputfile = "../2023-skovde-" + sys.argv[1] + "/ecosystem/developers.csv"
license_dict = defaultdict(int)
license_dict100 = defaultdict(int)
license_dict1000 = defaultdict(int)
with open(inputfile) as csvfile:
    csvreader = csv.reader(csvfile)
    for row in csvreader:
#        print(row)
        project_dict = defaultdict(int)
        project = row.pop(0)
        if project == "Repository":
            continue
        license = row.pop(0)
        num_commits = row.pop(0)
        num_developers = row.pop(0)
        
        for index in range(int(num_developers)):
            developer = row.pop(0)
            affiliation = row.pop(0)
            commits = row.pop(0)
#            print("    ", developer, affiliation, commits)
            project_dict[affiliation] += int(commits)
        if '0' in project_dict:
            del project_dict['0']
        if len(project_dict) > 1:
#            print("    ", project_dict)
            project_list = []
            for domain in project_dict:
                try:      
                    project_list.append(company_dict[domain])
                except KeyError:
                    pass
#                    print("        ", domain)
            if len(project_list) > 1:
                if big_companies_sharing_project(project_list):
                    print(project, license, num_commits, project_list)
                    license_dict[license] += 1
                    if int(num_commits) > 100:
                        license_dict100[license] += 1
                    if int(num_commits) > 1000:
                        license_dict1000[license] += 1

#        else:
#            print("Noooo", project, license, num_commits, project_list)
print("All:", sum(license_dict.values()), license_dict)
print("100:", sum(license_dict100.values()), license_dict100)
print("1000:", sum(license_dict1000.values()), license_dict1000)
