#!/usr/bin/python3

import json
import csv
import collections
import os
import math
import time
import requests
from urllib.request import urlopen
OrderedDict = collections.OrderedDict
from shutil import copyfile


# aliases
OrderedDict = collections.OrderedDict

os.chdir("/var/www/html/covid-portal/scripts/raw")

start_time = time.time()

os.remove("raw_viral_sequences_old.json")
os.rename("raw_viral_sequences.json", "raw_viral_sequences_old.json")
dst= 'raw_viral_sequences.json'

url='https://www.covid19dataportal.org/api/backend/viral-sequences/raw-reads?&facets=country:Greece&crossReferencesOption=all#search-content'
data = urlopen(url)
wjson = data.read()
wjdata = json.loads(wjson)
number_of_entries=wjdata['hitCount']
print(number_of_entries)


greek_data=[]
entry_ids=[]

stop=math.ceil(number_of_entries/800)

for i in range(0,stop):
    page=i+1
    url='https://www.covid19dataportal.org/api/backend/viral-sequences/raw-reads?&format=json&size=800&page='+str(page)+'&facets=country:Greece&fields=collection_date,acc,description,TAXON,creation_date,center_name,instrument_platform,instrument_model,library_strategy,library_selection&crossReferencesOption=all#search-content'
    data = urlopen(url)
    wjson = data.read()
    wjdata = json.loads(wjson)
    entries=wjdata['entries']
    for entry in entries:
        entry_id=entry['id']
        entry_ids.append(entry_id)
    
    greek_data.extend(entries)
        

# header = ["accesion","cross_references","descriptions","country","center_name","region","instrument_platform","instrument_model","library_strategy", "library_selection" ]

for row in greek_data:
        row['accesion']=row.pop('id')
        row['descriptions']=row['fields']['description'][0]
        # row[1]="https://www.ebi.ac.uk/ena/browser/view/PRJEB44141"
        # row['published']=collection_date[:4]+ '-' + collection_date[4:6] + '-' + collection_date[6:]
        row['country']="Greece"
        row['center_name']=row['fields']['center_name'][0]
        row['instrument_platform']=row['fields']['instrument_platform'][0]
        row['instrument_model']=row['fields']['instrument_model'][0]
        row['library_strategy']=row['fields']['library_strategy'][0]
        row['library_selection']=row['fields']['library_selection'][0]
        
        


data={"raw_viral_sequences":greek_data}
with open(dst, 'w') as jsonfile:
    json.dump(data, jsonfile, indent=2)

copyfile(dst,'/var/www/html/covid-portal/backup_data/raw_viral_sequences.json')

print("--- %s seconds ---" % (time.time() - start_time)) 

