#!/usr/bin/python3

import json
import collections
import os
import time
from urllib.request import urlopen
OrderedDict = collections.OrderedDict
from shutil import copyfile




os.chdir("/var/www/html/covid-portal/scripts/assembled")



start_time = time.time()

# os.remove("assembled_old.json")
# os.rename("assembled.json", "assembled_old.json")


dst= 'assembled.json'
dst_backup='/var/www/html/covid-portal/backup_data/assembled.json'

greek_data=[]
url='https://www.covid19dataportal.org/api/backend/viral-sequences/sequences?db=embl-covid19&size=1000&facets=country:Greece&fields=collection_date,country,host,strain,isolate,lat_lon,molecule_type,TAXON,creation_date,last_modification_date,center_name,coverage,lineage,phylogeny,who&crossReferencesOption=all#search-content'
data = urlopen(url)
wjson = data.read()
wjdata = json.loads(wjson)
entries=wjdata['entries']
greek_data.extend(entries)
print(len(greek_data))

# header = ["title","entity","published","country","link","host","labels","coverage", "taxonomy", "xrefs"]
data=[]
for row in greek_data:
        row['title']=row.pop('id')
        row['entity']='publications'
        collection_date=row['fields']['collection_date'][0]
        row['published']=collection_date[:4]+ '-' + collection_date[4:6] + '-' + collection_date[6:]
        row['country']="Greece"
        row['link']="https://www.ebi.ac.uk/ena/browser/view/"
        row['host']=row['fields']['host'][0]
        row['labels']={"Category: Genomics & transcriptomics":None}
        row['coverage']=row['fields']['coverage'][0]
        row['taxonomy']="Severe acute respiratory syndrome coronavirus 2"
        row['xrefs']=[{"db":"URL" }]
        


greek_data={"publications":greek_data}
with open(dst, 'w') as jsonfile:
    json.dump(greek_data, jsonfile, indent=2)

copyfile(dst, dst_backup)

print("--- %s seconds ---" % (time.time() - start_time)) 
