#!/usr/bin/python3

import json
import requests
import time
import os
from decimal import Decimal
from shutil import copyfile





start_time = time.time()

os.chdir("/var/www/html/covid-portal/scripts/publications")

dois={}
new_dois={}


os.remove("greek-dois-old.json")
os.rename("greek-dois-all.json","greek-dois-old.json")
os.remove("articles_by_popularity_previous.csv")
os.rename("articles_by_popularity.csv", "articles_by_popularity_previous.csv")
  
#Find all dois of previous update
with open("articles_by_popularity_previous.csv","r") as f:
    for line in f:
          sp=line.split()
          if not sp[2]=='N/A':
                dois.setdefault("dois",[]).append(sp[2])

#Download updated file with dois
r = requests.get('https://doi.org/10.5281/zenodo.3723281')
CSV_URL=r.url+'/files/articles_by_popularity.csv?download=1'
with requests.Session() as s:
    download = s.get(CSV_URL)
    decoded_content = download.content.decode('utf-8')
    f = open("articles_by_popularity.csv", "w")
    f.write(decoded_content)
    f.close()
 
#Find all dois of current update
with open("articles_by_popularity.csv","r") as f:
    for line in f:
            sp=line.split()
            # if sp[2] in data:
            #     final.setdefault("data",[]).append({"doi": sp[2],"influence": sp[3], 
            #                            "popularity_alt": sp[4], "popularity": sp[5], "influence_alt": sp[6], "social": sp[7] })
            if not sp[2]=='N/A':
                new_dois.setdefault("dois",[]).append(sp[2])
        
print(len(dois["dois"]))
print(len(new_dois["dois"]))

# #Find different dois between previous update and current update
difference_1 = set(dois["dois"]).difference(set(new_dois["dois"]))
difference_2 = set(new_dois["dois"]).difference(set(dois["dois"]))
different_dois = list(difference_1.union(difference_2))


print(len(different_dois))
#Find greek dois among all new dois using keywords
greek_dois=[]
errors=[]
for doi in different_dois:
        try:
            r=requests.get('https://api.crossref.org/works/{}'.format(doi))
            time.sleep(0.01)
        except: 
            errors.append(doi)
            with open('errors.json', 'w') as outfile:    
                json.dump(errors, outfile)
        if r.status_code==200:
            if r.text=='Resource not found.':
                continue;
            else:
                resp=r.json()
                if resp['status']=='ok':
                    message=resp['message']
                    if 'author' in message:
                        authors=message['author']
                        for author in authors:
                            if author['affiliation']:
                                for affil in author['affiliation']:
                                    keywords=['Greece', ' GR ', ' Gr ', ' gr ', 'greece']
                                    if any(x in affil['name'] for x in keywords):
                                        country=affil['name']
                                        if doi not in greek_dois:
                                            greek_dois.append(doi)
                                            with open('greek-dois-new.json', 'w') as outfile:    
                                                json.dump(greek_dois, outfile)
                                        else:
                                            break
print(len(greek_dois))                                        
all_data=[]

# file with previous dois
with open('greek-dois-old.json', 'r') as infile:    
      all_data.extend(json.load(infile))

#file with new dois
if len(greek_dois)>0:   
    with open('greek-dois-new.json', 'r') as infile:    
      all_data.extend(json.load(infile))

#file with all dois     
with open('greek-dois-all.json', 'w') as outfile:    
      json.dump(all_data, outfile)

# print(len(all_data))   
#Find metadata of greek dois
f = open('greek-dois-all.json',)
data = json.load(f)
f.close()

errors=[]
metadata={}       
with open("articles_by_popularity.csv","r") as f:
    for line in f:
            sp=line.split()
            if sp[2] in data:
                try:
                    r=requests.get('https://api.crossref.org/works/{}'.format(sp[2]))
                    time.sleep(0.01)
                except: 
                    errors.append(sp[2])
                    with open('errors.json', 'w') as outfile:    
                        json.dump(errors, outfile)
                if r.status_code==200:
                    if r.text=='Resource not found.':
                        continue;
                    else:
                        resp=r.json()
                        if resp['status']=='ok':
                            message=resp['message']
                            metadata.setdefault("data",[]).append({"doi": sp[2],"title":message['title'], "venue":message['container-title'], "date":message['issued'],  "influence": sp[3], 
                                        "popularity_alt": sp[4], "popularity": sp[5], "influence_alt": sp[6], "social": sp[7] })
                            
                    
        
with open('metadata.json', 'w') as outfile:    
    json.dump(metadata, outfile)

#Edit the metadata of the publications 
with open ('metadata.json','r', encoding='utf-8') as f:
    data=json.load(f)
    data=data['data']

def format_e(n):
    a = '%E' % n
    return a.split('E')[0].rstrip('0').rstrip('.') + 'E' + a.split('E')[1]



new_data={}
for dat in data:
    title=dat['title'][0]
    title.strip()
    title.replace("\n", "")
    venue=dat['venue'][0]
    year=dat['date']['date-parts'][0][0]
    new_data.setdefault("data",[]).append({"doi": dat['doi'],"title":title, "venue":venue, "date":year, 
                                        "popularity": format_e(Decimal(dat['popularity'])), 
                                        "popularity_alt": format_e(Decimal(dat['popularity_alt'])), 
                                        "influence": format_e(Decimal(dat['influence'])), "influence_alt": format_e(Decimal(dat['influence_alt'])), "social": dat['social'] })
    
    
with open ('publications.json','w') as f:
        json.dump(new_data,f)

copyfile('publications.json','/var/www/html/covid-portal/backup_data/publications.json')

print("--- %s minutes ---" % ((time.time() - start_time)/60))          
    

