-
Notifications
You must be signed in to change notification settings - Fork 0
/
CSV_NEX-GDDP-CMIP6_one_lat_lon.py
586 lines (436 loc) · 28 KB
/
CSV_NEX-GDDP-CMIP6_one_lat_lon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
#!/usr/bin/env python
# coding: utf-8
# # User input
# In[1]:
variable_of_interest = 'rsds'#'hurs' # add list of available name variable, tasmax, tasmin, tas, sfcWind,rsds,pr
unit = 'MJ.m-2.day-1'#'m_s-1'#'K'#'Celsius'#'%' #'MJ.m-2.day-1'#sfsWind : 'm_s-1'# tas, tasmin and tasmax 'Celsius'
title_column = 'Surface Downwelling Shortwave Radiation '+unit#'Daily Minimum Near-Surface Air Temperature '+unit#'Daily Maximum Near-Surface Air Temperature '+unit#u'Daily Minimum Near-Surface Air Temperature \N{DEGREE SIGN}C'#'Mean of the daily precipitation rate '+unit#'Daily-Mean Near-Surface Wind Speed '+unit#'Surface Downwelling Shortwave Radiation '+unit #'Daily Near-Surface Air Temperature '+unit#u'Daily Near-Surface Air Temperature \N{DEGREE SIGN}C' #'Near-Surface Relative Humidity '+unit#'Surface Downwelling Shortwave Radiation '+unit# 'Daily-Mean Near-Surface Wind Speed '+unit#u'Daily Maximum Near-Surface Air Temperature \N{DEGREE SIGN}C'#u'Daily Near-Surface Air Temperature \N{DEGREE SIGN}C'
conversion_factor = 8.64 * 10**(-2)#86400 #8.64 * 10**(-2) # default value is 1
conversion_addition = 0#-273.15 # default value is zero
# convert precipitation data from kg.m^(-2).s^(-1) to mm/day : 1 kg/m2/s = 86400 mm/day
# original units of tas and tasmax K: conversion from K to degrees C: Celsius = Kelvin - 273.15 Celsius
# original units of rsds is W m-2, but need them in MJ.m-2.day-1. The conversion factor is 8.64 * 10**(-2)
# the user should indicate the years of the period of interest
start_year = 1950
stop_year = 2100 # if the user only wants one year, the same year as the start_year should be indicated
# # Import Packages and functions
# In[2]:
import requests
import pandas as pd
import os
import os.path
os.environ['HDF5_USE_FILE_LOCKING'] = 'False'
from netCDF4 import Dataset
import xarray as xr
import numpy as np
import numpy.ma as ma
# to measure elapsed time
import time
from timeit import default_timer as timer
import multiprocessing as mp# to download several file in parrallel
from multiprocessing.pool import ThreadPool
# # Functions
# In[3]:
# function to extract the name of the file from its url
# the input is an url
def extract_name_file(url):
index_before_name=url.rfind('/') # returns the highest index where the last character '/' was found, which is just before the name of the file
name = url[index_before_name+1:len(url)] # return the name of the file as a string, with the suffix '.nc'
return name
# function 'produce_name_list' produce a list of files' name, with the suffix '.nc'
# 'produce_name_list' use the function 'extract_name_file' to have the name of a file from its url
# the input is a list of url, from which we want to extract the corresponding names of files
def produce_name_list(url_list):
name_list=[] # create empty list
for file in url_list:
f_name = extract_name_file(file) # return the name of the file as a string, with the suffix '.nc'
name_list.append(f_name) # add extracted name in the list
return name_list # return the list of names in the url_list
# In[4]:
# function produce_year produce:
# year: a vector containing all the year in the period of interest
# year_str: a array containing all the year in the period of interest in the string format
# index: a array containing the index of the year and year_str
#### Parameters of the function
# first_year: number in int format, of the first year of the period of interest
# last_year: number in int format, of the last year of the period of interest
def produce_year(first_year,last_year):
year = np.arange(first_year,(last_year+1),1) # create vector of years
year_str = [0]*len(year) # create initiale empty vector to convert years in int
index = np.arange(0,len(year)) # create vector of index for year
i = 0 # initialize index
for i in index: # convert all the date in string format
year_str[i]=str(year[i])
return (year, year_str, index)
# In[5]:
# this functions aims to regroup all the scenarios, models, time_aggregation and variables in vectors
# the function use the function 'data_information'
def information_files_in_vectors(name_list):
variables= []
time_aggregations= []
models= []
scenarios= []
for file_name in name_list:
(variable, time_aggregation, model, scenario, year) = data_information(file_name)
# use function data_information to find information concerning the file_name
if variable not in variables:
variables.append(variable)
if time_aggregation not in time_aggregations:
time_aggregations.append(time_aggregation)
if model not in models:
models.append(model)
if scenario not in scenarios:
scenarios.append(scenario)
return variables, time_aggregations,models,scenarios
# In[6]:
# this functions aims to return the closest latitudes and longitudes to the projects, and the respectives index
# in the lat and lon vectors of the file
def _lat_lon(path,lat_projects,lon_projects):
ds = xr.open_dataset(path)
# ds.indexes['time'] gives back CFTimeIndex format, with hours. The strftime('%d-%m-%Y') permits to have time
# as an index, with format '%d-%m-%Y'. The .values permits to have an array
lat = ds.lat.values
lon = ds.lon.values
ds.close() # to spare memory
# preallocate space for the future vectors
index_closest_lat = []
index_closest_lon = []
closest_value_lat = []
closest_value_lon = []
for j in np.arange(0,len(lat_projects)):
(A,B)=closest_lat_lon_to_proj(lat_projects[j],lat)
index_closest_lat.append(A[0])
closest_value_lat.append(B[0])
(C,D)=closest_lat_lon_to_proj(lon_projects[j],lon)
index_closest_lon.append(C[0])
closest_value_lon.append(D[0])
return index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon
# In[7]:
# this function aims to select the closest point to the geographical point of the project
# the function takes as input
# location_project, which is a numpy.float64
# vector, which is a numpy.ndarray
# the function returns
# closest_value[0], a numpy.float64
def closest_lat_lon_to_proj(location_project,vector):
# the function any() returns a boolean value. Here, the function test if there are elements in the array
# containing the difference between the vector and the location_project, equal to the minimum of the absolute
# value of the difference between the vector and the location_project
if any(np.where((vector - location_project) == min(abs(vector - location_project)))[0]):
# the function any() returned True
# there is an element in the vector that is equal to the minimum of the absolute value of the difference
# between the vector and the location_project
# the function np.where() returns the index for which (vector - location_project) == min(abs(vector - location_project))
index_closest = np.where((vector - location_project) == min(abs(vector - location_project)))[0]
closest_value = vector[index_closest]
else:
# the function any() returned False
# there is NO element in the vector that is equal to the minimum of the absolute value of the difference
# between the vector and the location_project
# the function np.where() returns the index for which (vector - location_project) == -min(abs(vector - location_project))
index_closest = np.where((vector - location_project) == -min(abs(vector - location_project)))[0]
closest_value = vector[index_closest]
return index_closest, closest_value
# the function returns
# first, the value of the index of the element of vector, that is the closest to location_project
# second, the array containing the element of vector, that is the closest to location_project
# In[8]:
## those three function are used to have the information concerning a file
## information are in the name of the file, so the name of the file is used to find its related information
## information mean variable, time_aggregation, model, scenario, year of the file
### this function permit to extract the word before the first character '_' in the input 'name'
### the input name is in format str
### returning the new_name, without the word found, will permit to re-use the function to find all
# the information concerning the studied file
def name_next_boundary(name):
index_before_name=name.find('_') # returns the lowest index where the character '_' was found
word = name[0:index_before_name] # first word in the string 'name', before the first character '_'
new_name = name.replace(word+'_','') # delete the word found from the string 'name'
return word, new_name # return, in string format, the word found (which is an information of the studied file),
# and the string 'new_name', which is 'name' without the word found
# this function permit to extract the year of the studied file
# the year is always writen at the end of the name's file
# the input name is in format str
def find_year(name):
index_before_name=name.rfind('_') # returns the highest index where the character '_' was found
# the last character '_' is just before the year in the string 'name'
# determine if the string 'name' ends with '.nc'
if name.endswith('.nc'):
# 'name' ends with '.nc'
name_end = 3 # the three last character of the string name will be removed to find the year of the studied file
else:
# 'name' does not end with '.nc'
name_end = 0 # no character will be removed at the end of 'name' to find the year of the studied file
year = name[index_before_name+1:len(name)-name_end] # the year is extracted from the name of the file studied
# based on the index_before_name (highest index where the character '_' was found) and the suffix of 'name'
return year # the year in string format is returned
# This function use the functions 'name_next_boundary' and 'find_year' to extract the information of the file studied
# the input name is in format str, the name of the file from which we want information
def data_information(name):
#### use of the function 'name_next_boundary': each time it is used,
# returns an information, and the name of the studied file without this information
(variable, shorten_name) = name_next_boundary(name)
(time_aggregation, shorten_name) = name_next_boundary(shorten_name)
(model, shorten_name) = name_next_boundary(shorten_name)
(scenario, shorten_name) = name_next_boundary(shorten_name)
#### use the function 'find_year' to extract the information 'year' from the string 'shorten_name'
year = find_year(shorten_name)
# the function returns all the information of the studied file
return variable, time_aggregation, model, scenario, year
# In[9]:
# this function aims to create the empty dataframe that will be filled
def create_empty_dataframe(name_project,scenarios,models,closest_value_lat,closest_value_lon,name_climate_var,start_year,stop_year):
df = pd.DataFrame()
for i in np.arange(0,len(name_project)):
for scenario in scenarios:
if scenario == 'historical':
if (stop_year<2015) and (start_year<2015):
time = pd.date_range('01-01-'+str(start_year),'31-12-'+str(stop_year), freq='D').strftime('%d-%m-%Y').values
midx = pd.MultiIndex.from_product([(name_project[i],),(scenario,), models, (closest_value_lat[i],),(closest_value_lon[i],),time],names=['Name project','Experiment', 'Model', 'Latitude','Longitude','Date'])
cols = [name_climate_var]
Variable_dataframe = pd.DataFrame(data = [],
index = midx,
columns = cols)
df = pd.concat([df,Variable_dataframe])
if (stop_year>2015):
time = pd.date_range('01-01-'+str(start_year),'31-12-2014', freq='D').strftime('%d-%m-%Y').values
midx = pd.MultiIndex.from_product([(name_project[i],),(scenario,), models, (closest_value_lat[i],),(closest_value_lon[i],),time],names=['Name project','Experiment', 'Model', 'Latitude','Longitude','Date'])
cols = [name_climate_var]
Variable_dataframe = pd.DataFrame(data = [],
index = midx,
columns = cols)
df = pd.concat([df,Variable_dataframe])
else:
if (stop_year>2014) and (start_year>2014):
time = pd.date_range('01-01-'+str(start_year),'31-12-'+str(stop_year), freq='D').strftime('%d-%m-%Y').values
midx = pd.MultiIndex.from_product([(name_project[i],),(scenario,), models, (closest_value_lat[i],),(closest_value_lon[i],),time],names=['Name project','Experiment', 'Model', 'Latitude','Longitude','Date'])
cols = [name_climate_var]
Variable_dataframe = pd.DataFrame(data = [],
index = midx,
columns = cols)
df = pd.concat([df,Variable_dataframe])
if (start_year<2014):
time = pd.date_range('01-01-2015','31-12-'+str(stop_year), freq='D').strftime('%d-%m-%Y').values
midx = pd.MultiIndex.from_product([(name_project[i],),(scenario,), models, (closest_value_lat[i],),(closest_value_lon[i],),time],names=['Name project','Experiment', 'Model', 'Latitude','Longitude','Date'])
cols = [name_climate_var]
Variable_dataframe = pd.DataFrame(data = [],
index = midx,
columns = cols)
df = pd.concat([df,Variable_dataframe])
return df
# In[10]:
# this function is used in 'create_dataframe'. The function aims to return the path of the file of interest
# The function looks into a list of name which name in the list has every input
# The inputs are:
# out_path: a general file path where the files are registered,
# name_file_list: a list of files' names
# variable: the name of the variable of interest
# model: the model of interest (example: ACCESS-CM2)
# scenario: the scenario of interest (example:ssp245)
# year: the year of interest
# ensemble: the ensemble of interest (example: r1i1p1f1_gn)
# the output is:
# the path of the file corresponding to all the parameters indicated in input
def find_path_file(out_path,name_file_list,variable,temporal_resolution,model,scenario,year,ensemble):
# look into the list of names if find a name with every parameter indicated in inputs
name_found = [name for name in name_file_list if scenario in name and model in name and year in name and ensemble in name and temporal_resolution in name]
if name_found == []:
# no name with all the parameters indicated as inputs was found
return name_found # return an empty element instead of a path, the function does not run the following lines
# the name was found, so prepare the path of the file of interest
print('The name of the file is ' + name_found[0])
path = os.path.join(out_path,name_found[0])
return path # return the path of the file of interest
# In[11]:
# the register_data_in_dataframe function aims to test if data with the specific parameters exist in the folder of concern
# As inputs :
# the list of urls of the files of interest. The name of the file will be extracted from them
# temporal_resolution: the temporal resolution of the climate variable in question in string format
# year_str: a vector containing the year of the period of interest in a string format
# scenarios: a list of the scenorios of interest in string format
# models: a list of the models of interest in string format
# out_path: the out_path in a string format
# name_variable: the name of the variable of interest (example: 'pr' for precipitation)
# name_project: the list of names of the project of interest
# index_closest_lat: array containing an index for each project,
# corresponding to the index of the value in latitude vector which is the closest to
# the project latitude
# index_closest_lat: array containing an index for each project,
# corresponding to the index of the value in longitude vector which is the closest to
# the project longitude
# closest_value_lat: array containing a value for each project, corresponding to the value in the
# latitude vector which is the closest to the project's latitude
# closest_value_lon: array containing a value for each project, corresponding to the value in the
# longitude vector which is the closest to the project's longitude
# df : empty dataframe to fill
# Outputs are:
# df: the filled dataframe with the values of interest
# path_file_not_found: the list of files that were not found with the parameters asked
# ds_did_not_open: the list of files that could not be read
def register_data_in_dataframe(name_list,temporal_resolution,year_str,scenarios,models,out_path, name_variable, name_project,index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon,conversion_factor,conversion_addition,df):
path_file_not_found = [] # create empty list to register names of files that were not found with the corresponding parameters
ds_did_not_open = [] # create empty list to register names of files that couldn't be opened
for year in year_str:
if int(year) <= 2014: # historical case
scenario = [scenarios[0]]
else: # non historical case
scenario = [scenarios[1],scenarios[2],scenarios[3],scenarios[4]]
for SSP in scenario:
for model_simulation in models:
# for each year, each scenarios and each models, test if there is a corresponding file existing
# with function 'find_path_file'
climate_variable_path = find_path_file(out_path,name_list,name_variable,temporal_resolution,model_simulation,SSP,year,'r1i1p1f1_gn')
if climate_variable_path!= []:
# a name of file with the corresponding parameters were found
try: # to register information from the dataset ds in the dataframe df
ds = xr.open_dataset(climate_variable_path) # open the file corresponding to the parameters
print('The file did open')
time = ds.indexes['time'].strftime('%d-%m-%Y').values # register the time in the file
for i in np.arange(0,len(name_project)):
print('For the year '+year+' and project '+name_project[i]+', test with scenario '+SSP+', with model '+model_simulation)
# for each year, scenarios, models and each project, the values of the opened dataset ds
# are registered in the empty dataframe df, to a specific place corresponding to the parameters of the loop
df.loc[(name_project[i],SSP,model_simulation,closest_value_lat[i],closest_value_lon[i],time)] = ds.variables[variable_of_interest].isel(lat=index_closest_lat[i],lon=index_closest_lon[i]).values.reshape(len(time),1)*conversion_factor + conversion_addition
ds.close() # the opened dataset is closed to spare memory
except: # the dataset ds can not be read
# add information of the dataset that can't be read in the empty list ds_did_not_open
#ds_did_not_open.append(climate_variable_path)
#print(climate_variable_path + ' did not open with ds')
#ds.close() # the opened dataset is closed to spare memory
continue # try with next model
else:
# NO file with the corresponding parameters were found
# add information of the missing file in the empty list path_file_not_found
path_file_not_found.append(name_variable+'_'+temporal_resolution+'_'+model_simulation+'_'+SSP+'_'+year+'_'+'r1i1p1f1_gn')
return df,path_file_not_found,ds_did_not_open
# the function df_to_csv aims to return the filled dataframe in a csv format
# Inputs are:
# df: the dataframe that should be register in a csv file
# path_for_csv: this is the path where the csv file should be registered, in a string format
# title_file: this is the name of the csv file to be created in a string format
# CAREFUL --> title_file MUST have the extension of the file in the string (.csv for example)
# Output is:
# in the case where the dataframe is not empty, the ouput is the full path to the created csv file
# in the case where the dataframe is empty, the output is an empty list
def df_to_csv(df,path_for_csv,title_file):
# test if dataframe is empty, if values exist for this period
if not df.empty:
# if dataframe is not empty, value were registered, the first part is run :
# a path to register the csv file is created, .....
if not os.path.isdir(path_for_csv):
# the path to the file does not exist
os.makedirs(path_for_csv) # to ensure creation of the folder
# creation of the path for the csv file, in a string format
full_name = os.path.join(path_for_csv,title_file)
# ..... and the dataframe is registered in a csv file
df.to_csv(full_name) # register dataframe in csv file
print('Path for csv file is: ' + full_name)
return full_name # return the full path that leads to the created csv file
else: # if the dataframe is empty, no value were found, there is no value to register or to return
print('The dataframe is empty')
return []
# # Projects information
#
# In[12]:
# Project information
name_project_data = np.array(['WTP_Mutua_EIB', 'Gorongosa_EIB', 'Chimoio_WTP_EIB', 'Pemba_EIB'])
name_project = pd.Series(name_project_data)
lon_projects_data = np.array([34.5927839939706, 34.07824286310398 , 33.47333313659342, 40.52545156033736])
lon_projects = pd.Series(lon_projects_data)
lat_projects_data = np.array([-19.495079648575242, -18.68063728746643, -19.125095255188334,-12.973942656747809])
lat_projects = pd.Series(lat_projects_data)
# # Define Paths
# In[13]:
out_path=r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\NEX-GDDP-CMIP6-AllMoz'
# # Complete list of url with files to download
# In[14]:
# register information from csv file
#all_urls = pd.read_csv(r'C:\Users\CLMRX\OneDrive - COWI\Documents\GitHub\CRVA_tool\outputs\NEX-GDDP-CMIP6\gddp-cmip6-thredds-fileserver.csv')
csv_path = os.path.join(out_path,'gddp-cmip6-thredds-fileserver.csv')
all_urls = pd.read_csv(csv_path)
# In[15]:
### make all elements of the csv into a readable list
temp_list = all_urls[[' fileUrl']].T# transpose csv
temp_list=temp_list.values.tolist()
temp_list=temp_list[0]
url_list=[s.replace(' ', '') for s in temp_list]
# In[16]:
## download only data of the variable of interest, between start_year and stop_year
url_list_climate_var = [url for url in url_list if variable_of_interest+'_' in url and int(url[len(url)-7:len(url)-3])>=start_year and int(url[len(url)-7:len(url)-3])<=stop_year and 'r1i1p1f1_gn' in url]
# In[17]:
name_list_climate_var = produce_name_list(url_list_climate_var)
# # Produce csv files with data to use
# In[18]:
(year, year_str, index_year) = produce_year(start_year,stop_year)
# In[19]:
# variables, time_aggregations,models,scenarios in the name_list_climate_var
(variables, time_aggregations,models,scenarios)=information_files_in_vectors(name_list_climate_var)
# In[20]:
# remove model NESM3 because there is no data associate to this model
#models.remove('NESM3')
# In[22]:
# this cell aims to extract the index in the lat_projects and lon_projects vectors, corresponding to the values of the
# closest latitude and longitude to the projects
index_closest_lat = []
while index_closest_lat == []: # once the information where extracted, no need to continue looking
for name in name_list_climate_var: # for loop to test the following names if the precedent one did not work
try: # test to use function '_lat_lon' with this path
path = os.path.join(out_path,name)
print(path)
(index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon)=_lat_lon(path,lat_projects,lon_projects)
print(index_closest_lat)
# this function '_lat_lon' use xr.open_dataset(path). If there is a problem with this opening process
# (because the path given has a problem for example), the function can continue to test with the next
# file to extract the information of interest (the index and value of the closest latitude and longitude)
break
except:
continue # the informations where not extracted. Continue the for loop
# In[23]:
# create the empty dataframe, based on the information from the names' files
df_climate_var=create_empty_dataframe(name_project,scenarios,models,closest_value_lat,closest_value_lon,title_column,start_year,stop_year)
# test with one project, one scenario, one model with the following line
#df_climate_var=create_empty_dataframe([name_project[0]],[scenarios[0]],[models[0]],[closest_value_lat[0]],[closest_value_lon[0]],title_column,start_year,stop_year)
# In[24]:
df_climate_var
# In[ ]:
start_t = timer()
# 16h for historic period
(df_climate_var,path_file_not_found,ds_did_not_open)=register_data_in_dataframe(name_list_climate_var,time_aggregations[0],year_str,scenarios,models,out_path, variable_of_interest, name_project,index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon,conversion_factor,conversion_addition,df_climate_var)
# test with one project, one scenario, one model with the following line, takes 1h20
#(df_climate_var,path_file_not_found,ds_did_not_open)=register_data_in_dataframe(name_list_climate_var,time_aggregations[0],year_str,[scenarios[0]],[models[0]],out_path, variable_of_interest, [name_project[0]],[index_closest_lat[0]],[index_closest_lon[0]],[closest_value_lat[0]],[closest_value_lon[0]],df_climate_var)
end_t = timer()
print('It took '+str(round(end_t - start_t,2))+' seconds to register the data of interest in dataframe')
print('It took '+str(round((end_t - start_t)/3600,2))+' hours to register the data of interest in dataframe')
# In[ ]:
df_climate_var
# In[ ]:
# Register information
# register dataframe in a csv format
title_file = variable_of_interest+'_'+str(start_year)+'-'+str(stop_year)+'_projectsMoz.csv'
path_for_csv = os.path.join(out_path,'csv_file',variable_of_interest, variable_of_interest+'_'+unit+'_'+time_aggregations[0]+'_'+str(start_year)+'-'+str(stop_year))
path_csv = df_to_csv(df_climate_var,path_for_csv,title_file)
# ensure creation of the path
if not os.path.isdir(path_for_csv):
os.makedirs(path_for_csv)
# register path_file_not_found in a file format
if path_file_not_found != []:
txt_file_path = os.path.join(path_for_csv,'Path_file_not_found_'+variable_of_interest+'.csv')
with open(txt_file_path, 'w') as text_file:
for item in path_file_not_found:
text_file.write(item)
text_file.write('\n')
print('Not every files were found')
else:
print('Every files were found')
# register ds_did_not_open in a file format
if ds_did_not_open != []:
txt_file_path = os.path.join(path_for_csv,'ds_did_not_open_'+variable_of_interest+'.csv')
with open(txt_file_path, 'w') as text_file:
for element in ds_did_not_open:
text_file.write(element)
text_file.write('\n')
print('Not every files were opened successfully')
else:
print('Every files were opened')
# In[ ]:
# In[ ]: