# -*- coding: utf-8 -*-
"""
Created on Thu 16 Dec 2021
@author: Alexandre Kenshilik Coche
@contact: alexandre.co@hotmail.fr
This module is a collection of tools for manipulating hydrological space-time
data, especially netcdf data. It has been originally developped to provide
preprocessing tools for CWatM (https://cwatm.iiasa.ac.at/) and HydroModPy
(https://gitlab.com/Alex-Gauvain/HydroModPy), but most functions have been
designed to be of general use.
"""
#%% Imports:
import xarray as xr
xr.set_options(keep_attrs = True)
import rioxarray as rio #Not necessary, the rio module from xarray is enough
import pandas as pd
import geopandas as gpd
import numpy as np
import rasterio
import rasterio.features
from affine import Affine
from shapely.geometry import Point
# from shapely.geometry import Polygon
# from shapely.geometry import mapping
import os
import re
import sys
import gc # garbage collector
from pathlib import Path
import fiona
import datetime
from geop4th import (
geobricks as geo,
utils,
)
from geop4th.workflows.standardize.standardize_sim2 import standardize_sim2 # redirected to new module
# ========== see reproject() §Rasterize ======================================
# import geocube
# from geocube.api.core import make_geocube
# from geocube.rasterize import rasterize_points_griddata, rasterize_points_radial
# import functools
# =============================================================================
# import whitebox
# wbt = whitebox.WhiteboxTools()
# wbt.verbose = False
#%% LEGENDE:
# ---- ° = à garder mais mettre à jour
# ---- * = à inclure dans une autre fonction ou supprimer
#%% LIST ALL FUNCTIONALITIES
def standardize_available():
excluded_funcs = {
'georef',
'convert_to_cwatm',
'shortname',
'remove_double',
'name_xml_attributes',
'standardize_data',
'by_name',
}
temp_unfinished_funcs = {
'standardize_era5',
'standardize_drias',
'standardize_corine',
'standardize_obb',
'standardize_cesoso',
'standardize_ucsbretagne',
'standardize_cropcoeff',
'process_rht',
'convert_from_h5_newsurfex',
'convert_from_h5_oldsurfex',
'correct_bias',
'secondary_era5_climvar',
}
utils.available(__name__,
ignore = excluded_funcs.union(temp_unfinished_funcs),
)
#%% FILE MANAGEMENT
###############################################################################
# Shorten names
def standardize_shortname(data, data_type, ext = 'nc'):
"""
ext : str
"""
if ext is not None:
ext = ext.replace('.', '')
if os.path.isfile(data):
root_folder = os.path.split(data)[0]
else:
root_folder = data
if os.path.isdir(data):
content_list = [os.path.join(data, f)
for f in os.listdir(data)]
else:
content_list = [data]
# ---- DRIAS-Eau 2024 Indicateurs (netcdf)
# Indicateurs SAFRAN (SWIAV, SSWI-3...) "EXPLORE2-SIM2 2024"
if data_type.casefold().replace(' ', '').replace('-', '') in [
'indicateursim2024', 'indicateurssim2024','indicateurdriaseau2024',
'indicateursdriaseau2024']:
folder_pattern = re.compile('Indicateurs_(.*Annuel|.*Saisonnier)_(.*)_MF_ADAMONT_(.*)_SIM2')
for elem in content_list:
# Raccourcir le nom
if os.path.isdir(elem):
res = folder_pattern.findall(elem)
if len(res) > 0:
outpath = '_'.join(res[0])
os.rename(elem, os.path.join(root_folder, outpath))
# ---- Remove custom added date-time
elif data_type.casefold().replace(' ', '').replace('-', '') in [
'removedate']:
# This option enables to remove the '2024-11-06_12h53'-like suffixes
# that I have added to file names (in order to trace the date of creation,
# but it is unnecessary and causes file names to be too long for Windows).
datetime_pattern = re.compile('(.*)_\d{4,4}-\d{2,2}-\d{2,2}_\d{2,2}h\d{2,2}.'+f'{ext}')
for elem in content_list:
if len(datetime_pattern.findall(elem)) > 0:
os.rename(os.path.join(root_folder, elem),
os.path.join(root_folder, datetime_pattern.findall(elem)[0] + f'.{ext}'))
elif os.path.isdir(os.path.join(root_folder, elem)):
shortname(os.path.join(root_folder, elem), data_type = data_type, ext = ext)
###############################################################################
def standardize_remove_double(data_folder, data_type):
"""
The data downloaded from DRIAS website contains *some* data sets
available for different periods (it is the same data, but one version
goes from 1950 to 2005, and another from 1970 to 2005). For some models,
there are no duplicate of that sort.
These duplicates are unnecessary. This script moves these duplicates to a
subfolder named "doublons".
Example
-------
folder_list = [r"Eau-SWIAV_Saisonnier_EXPLORE2-2024_historical",
r"Eau-SWIAV_Saisonnier_EXPLORE2-2024_rcp45",
r"Eau-SWIAV_Saisonnier_EXPLORE2-2024_rcp85"]
root_folder = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\DRIAS\DRIAS-Eau\EXPLORE2-SIM2 2024\Pays Basque\Indicateurs"
for folder in folder_list:
base_folder = os.path.join(root_folder, folder)
subfolder_list = [os.path.join(base_folder, f)
for f in os.listdir(base_folder)
if os.path.isdir(os.path.join(base_folder, f))]
for subfolder in subfolder_list:
gc.remove_double(subfolder, data_type = "indicateurs drias eau 2024")
"""
# ---- DRIAS-Eau 2024 Indicateurs (netcdf)
# Indicateurs SAFRAN (SWIAV, SSWI-3...) "EXPLORE2-SIM2 2024"
if data_type.casefold().replace(' ', '').replace('-', '') in [
'indicateursim2024', 'indicateurssim2024','indicateurdriaseau2024',
'indicateursdriaseau2024']:
content_list = [os.path.join(data_folder, f)
for f in os.listdir(data_folder)
if (os.path.isfile(os.path.join(data_folder, f)) \
& (os.path.splitext(os.path.join(data_folder, f))[-1] == '.nc'))]
model_pattern = re.compile("(.*)_(\d{4,4})-(\d{4,4})_(historical|rcp45|rcp85)_(.*)_(SIM2.*)")
model_dict = {}
# List all models
for elem in content_list:
filename = os.path.split(elem)[-1]
var, date, datef, scenario, model, creation_date = model_pattern.findall(filename)[0]
date = int(date)
if model in model_dict:
model_dict[model].append(date)
else:
model_dict[model] = [date]
# Move duplicates
if not os.path.exists(os.path.join(data_folder, "doublons")):
os.makedirs(os.path.join(data_folder, "doublons"))
for model in model_dict:
if len(model_dict[model]) > 1:
filename = '_'.join([var, f"{max(model_dict[model])}-{datef}", scenario, model, creation_date])
os.rename(
os.path.join(data_folder, filename),
os.path.join(data_folder, 'doublons', filename)
)
###############################################################################
def name_xml_attributes(*, output_file, fields):
band_str_open = '\t<PAMRasterBand band="{}">\t\t<Description>{}'
band_str_close = '</Description>\t</PAMRasterBand>\n</PAMDataset>'
base_str = ''.join(['<PAMDataset>\n',
'\t<Metadata>\n',
'\t\t<MDI key="CRS">epsg:27572</MDI>\n',
'\t</Metadata>\n',
len(fields) * (band_str_open + band_str_close)])
band_ids = [str(item) for item in list(range(1, len(fields)+1))]
# Create a string list of indices: '1', '2', '3' ...
addstr = [item for pair in zip(band_ids, fields) for item in pair]
# Create a list alternating indices and names: '1', 'CLDC', '2', 'T2M' ...
file_object = open(output_file + '.aux.xml', 'w')
file_object.write(base_str.format(*addstr))
file_object.close()
#%% PORTAL STANDARDIZATION
def standardize_by_name(data, data_name, **kwargs):
#%% Données topographiques
#%%% IGN BD ALTI 25m (DEM)
if data_name.casefold().replace(' ', '') in ["alti", "bdalti", "ignalti"]:
return standardize_bdalti(data)
#%% Données climatiques
#%%% SIM2 (csv to netcdf)
# Données SAFRAN-ISBA (T, PRELIQ, ETP, EVAPC, SWI, RUNC... ) de réanalyse historique "SIM2"
elif data_name.replace(" ", "").casefold() in ['sim2']:
return standardize_sim2(data)
#%%% DRIAS-2020 and DRIAS-2020-SIM2 (previously called EXPLORE2-2021) (netcdf)
elif data_name.replace(" ", "").replace("-", "").casefold() in ["drias2020", "explore22021"]:
return standardize_drias2020(data)
#%%% DRIAS-Climat 2022 (netcdf)
# Données SAFRAN (tas, prtot, rayonnement, vent... ) "EXPLORE2-Climat 2022"
elif data_name.replace(" ", "").replace("-", "").casefold() in ['drias2022', "explore2climat", "explore22022"]:
return standardize_explore2climat(data)
#%%% DRIAS-Eau 2024 (netcdf)
# Données SURFEX (evapc, drainc, runoffc, swe, swi...) "EXPLORE2-SIM2 2024"
elif data_name.replace(" ", "").replace("-", "").casefold() in ['drias2024', 'driassim22024',
'driaseau2024',
"explore2eau", "explore22024",
"explore2sim2", "explore2sim22024"]:
return standardize_explore2eau(data)
#%%% C3S seasonal forecast
# https://cds.climate.copernicus.eu/datasets/seasonal-original-single-levels?tab=overview
elif data_name.casefold() in ["c3s"]:
return standardize_c3s(data)
#%% Données d'usage de l'eau
#%%% Water withdrawals BNPE
elif data_name.casefold() in ['bnpe']:
return standardize_bnpe(data, **kwargs)
#%% Données de débit
#%%% Hydrométrie eaufrance
elif data_name.casefold() in ['eaufrance',
'hydrometrie', 'hydrométrie', # 'hydrometry',
'débit', 'debit', # 'discharge'
]:
return standardize_eaufrance(data, **kwargs)
#%% Données d'occupation des sols
#%%% ° Crop coefficients, version "better"
elif data_name.casefold() in ['crop', 'crop coeff', 'crop coefficient']:
return standardize_cropcoeff(data)
#%%% ° CES OSO
elif data_name.casefold() in ['ces oso', 'oso', 'ces']:
return standardize_cesoso(data)
#%%% ° CORINE Land Cover
elif data_name.casefold() in ["corine", "cld", "corine land cover"]:
return standardize_corine(data)
#%%% ° Observatoire botanique de Brest (2018-2019)
elif data_name.casefold() in ["observatoire", "obb"]:
return standardize_obb(data)
#%% Données de sol
#%%% ° Soil depth UCS Bretagne
elif data_name.casefold().replace(' ', '').replace('-', '').replace('_', '') in ["ucsbretagne", "ucsbzh", "ucsppbzh"]:
return standardize_ucsbretagne(data)
#%% STANDARDIZE FUNCTIONS
# =============================================================================
# def convert_to_cwatm(data, data_type, reso_m = None, EPSG_out=None,
# EPSG_in = None, coords_extent = 'Bretagne'):
# # =============================================================================
# # previously prepare_CWatM_input(*, data, data_type, reso_m = None,
# # EPSG_out, EPSG_in = None,
# # coords_extent = 'Bretagne'):
# # =============================================================================
#
#
# # ====== Ancienne version du script ===========================================
# # (input_folder, _file_name) = os.path.split(data)
# # (file_name, file_extension) = os.path.splitext(_file_name)
# # =============================================================================
# =============================================================================
[docs]
def standardize_bdalti(data,
*, to_file = False):
"""
Standardize the DEM data from IGN's ALTI Database (https://geoservices.ign.fr/bdalti):
- merge tiles
- georeference (embed CRS = 2154, standardize `_FillValue`, `grid_mapping` and `time`)
- replace nodata values (-99999.0) with np.nan
Parameters
----------
data : (list of) str or pathlib.Path, or variable (xarray.Dataset or xarray.DataArray)
``data`` is usually the folder containing all the raw BDALTI tiles downloaded from
IGN website. It can also be a list of filepaths or xarray variables.
It can also be a single filepath or xarray variable.
to_file : bool or path (str or pathlib.Path), default False
If True and if ``data`` is a path (str or pathlib.Path), the resulting
dataset will be exported to the same location as ``data``, with the name 'BDALTIV2_stz.tif'.
If ``to_file`` is a path, the resulting dataset will be exported to this specified filepath.
Returns
-------
data_ds : xarray.Dataset
Standardized DEM.
If ``to_file`` argument is used, the standardized DEM is exported to a file.
If ``to_file = True``, the standardized DEM is exported to the default GeoTIFF file
'BDALTIV2_stz.tif'.
Examples
---------
>>> stzfr.bdalti(
r"E:\Inputs\DEM\IGN\BDALTIV2_2-0_25M_ASC_LAMB93-IGN69",
to_file = True
)
Exporting...
_ Success: The data has been exported to the file 'E:\Inputs\DEM\IGN\BDALTIV2_2-0_25M_ASC_LAMB93-IGN69_PAYS_BASQUE\BDALTIV2_stz.tif'
<xarray.Dataset> Size: 252MB
Dimensions: (x: 7000, y: 9000)
Coordinates:
* x (x) float64 56kB 3e+05 3e+05 3e+05 ... 4.75e+05 4.75e+05
* y (y) float64 72kB 6.4e+06 6.4e+06 ... 6.175e+06 6.175e+06
spatial_ref int64 8B 0
Data variables:
elev (y, x) float32 252MB nan nan nan nan nan ... nan nan nan nan
"""
# ---- Merge tiles (if relevant)
data_ds = geo.merge(data, extension = '.asc')
# ---- Rename the main variable (ASCII files do not have a variable name)
# (in case file is exported into a netCDF)
if isinstance(data_ds, xr.Dataset):
main_var = geo.main_vars(data_ds)[0]
data_ds = data_ds.rename({main_var: 'elev'})
elif isinstance(data_ds, xr.DataArray):
data_ds = data_ds.rename('elev')
data_ds = data_ds.to_dataset() # convert into xarray.Dataset
else:
print(f"Error: BD ALTI data is not supposed to be a {type(data_ds)}")
return
# ---- Georeference
data_ds = geo.georef(data_ds, crs = 2154)
# ---- Replace nodata values (-99999.0) with NaN
# =============================================================================
# encod = data_ds['elev'].encoding # store encodings (_FillValue, compression...)
# data_ds['elev'] = data_ds['elev'].where(data_ds['elev'] != -99999) # replace nodata
# data_ds['elev'].encoding = encod # transfer encodings
# =============================================================================
data_ds['elev'].encoding['_FillValue'] = np.nan
# ---- Export
if to_file == True:
if isinstance(data, (str, Path)):
print('\nExporting...')
# Determine the filename
if os.path.isdir(data):
# =============================================================================
# data_folder, filelist = geo.get_filelist(data, extension = '.asc')
# stem = os.path.join(data_folder, filelist[0])
# =============================================================================
stem = os.path.join(data, 'BDALTIV2.tif')
elif os.path.isfile(data):
stem = data
# Export
export_filepath = os.path.splitext(stem)[0] + "_stz" + ".tif"
geo.export(data_ds, export_filepath)
else:
print("Warning; `data` should be a path (str or pathlib.Path) for using `to_file=True`.")
elif isinstance(to_file, (str, Path)):
print('\nExporting...')
geo.export(data_ds, to_file)
# ---- Returns results as a GEOP4TH variable (xr.Dataset)
return data_ds
# SIM2 standardization has been moved to geop4th.workflows.standardize.standardize_sim2
# The import at the top of this file ensures backward compatibility:
# from geop4th.workflows.standardize.standardize_sim2 import standardize_sim2
def standardize_drias2020(
data,
*,
to_file = False
):
# ---- Load
data_ds = geo.load(data, decode_times = True, decode_coords = 'all')
# ---- Coords
# For old versions of this datasets, 'x' and 'y' variables
# were absent and needed to be retrieved from 'i' and 'j':
# Create 'x' and 'y' coordinates if necessary, from 'i' and 'j'
space_dims_lowcase = [d.casefold() for xy in geo.main_space_dims(data_ds, include_coords = True, include_vars = True) for d in xy]
if ('x' not in space_dims_lowcase) | ('y' not in space_dims_lowcase):
print("\nFormating coords...")
if 'x' not in space_dims_lowcase:
data_ds = data_ds.assign_coords(
x = ('i', 52000 + data_ds.i.values*8000))
print(" _ 'x' values created from 'i'")
if 'y' not in space_dims_lowcase:
data_ds = data_ds.assign_coords(
y = ('j', 1609000 + data_ds.j.values*8000))
print(" _ 'y' values created from 'j'")
# Replace X and Y as coordinates
data_ds = data_ds.swap_dims(i = 'x', j = 'y')
print(" _ Coordinates 'i' and 'j' replaced with 'x' and 'y'")
# Ensure that lat, lon, i and j will be further loaded by xarray as coords
var = geo.main_vars(data_ds)[0]
data_ds[var].encoding['coordinates'] = 'x y i j lat lon'
if 'coordinates' in data_ds[var].attrs:
data_ds[var].attrs.pop('coordinates')
print(" _ x, y, i, j, lat, lon ensured to be read as coordinates")
# ---- Standardize variables
# TODO
# =============================================================================
# var_list = geo.main_vars(data_ds)
# for var in var_list:
# data_ds[var] = geo.convert_units(data_ds[var], ...coeff...)
# =============================================================================
# ---- CRS
data_ds = geo.georef(data_ds, crs = 27572, infer_xy_from = 'all')
# ---- Export
if to_file == True:
if isinstance(data, (str, Path)):
print('\nExporting...')
data = Path(data)
geo.export(data_ds, data.with_stem(data.stem + '_stz'))
else:
print("Warning; `data` should be a path (str or pathlib.Path) for using `to_file=True`. Otherwise use `to_file = <path>`")
elif isinstance(to_file, (str, Path)):
print('\nExporting...')
geo.export(data_ds, to_file)
return data_ds
def standardize_explore2climat(
data,
*,
to_file = False,
):
"""
Parameters
----------
data : TYPE
DESCRIPTION.
Returns
-------
None.
Notes
------
Indicators form explore2climat usually contains the coordinates 'time_bnds' and
the dimension without coordinates 'bnds'. These refer to the period
corresponding to the dimension 'time'. For instance, the date
``ds.time[0]`` (''1951-01-14') correspond to the 3-months centered period
from ``ds.time_bnds.loc[{'time': ds.time[0], 'bnds': 0}]`` ('1950-12-01')
to ``ds.time_bnds.loc[{'time': ds.time[0], 'bnds': 1}]`` ('1951-02-28')
"""
# (netCDF)
# Load
data_ds = geo.load(data, drop_variables = 'LambertParisII')
# some dtype errors can arise when xarray loads the LambertParisII variable
# (grid_mapping variable). These errors should be handled by geobricks.load(),
# but it is still better to add a safeguard here.
# Ensure that lat, and lon will be further loaded by xarray as coords
var = geo.main_vars(data_ds)[0]
data_ds[var].encoding['coordinates'] = 'x y lat lon'
if 'coordinates' in data_ds[var].attrs:
data_ds[var].attrs.pop('coordinates')
print(" _ x, y, lat, lon ensured to be read as coordinates [safety]")
# ---- Standardize units
# TODO
print("Warning: unit standardization is not implemented yet for `explore2climat()`")
# =============================================================================
# var_list = geo.main_vars(data_ds)
# for var in var_list:
# data_ds[var] = geo.convert_units(data_ds[var], ...coeff...)
# =============================================================================
# ---- CRS
data_ds = geo.georef(data_ds, crs = 27572)
# ---- Export
if to_file == True:
if isinstance(data, (str, Path)):
print('\nExporting...')
data = Path(data)
geo.export(data_ds, data.with_stem(data.stem + '_stz'))
else:
print("Warning; `data` should be a path (str or pathlib.Path) for using `to_file=True`. Otherwise use `to_file = <path>`")
elif isinstance(to_file, (str, Path)):
print('\nExporting...')
geo.export(data_ds, to_file)
return data_ds
def standardize_explore2eau(
data,
*,
to_file = False,
):
"""
Parameters
----------
data : TYPE
DESCRIPTION.
Returns
-------
None.
Notes
------
Indicators form explore2climat usually contains the coordinates 'time_bnds' and
the dimension without coordinates 'bnds'. These refer to the period
corresponding to the dimension 'time'. For instance, the date
``ds.time[0]`` (''1951-01-14') correspond to the 3-months centered period
from ``ds.time_bnds.loc[{'time': ds.time[0], 'bnds': 0}]`` ('1950-12-01')
to ``ds.time_bnds.loc[{'time': ds.time[0], 'bnds': 1}]`` ('1951-02-28')
"""
# Load
data_ds = geo.load(data, drop_variables = 'LambertParisII')
# some dtype errors can arise when xarray loads the LambertParisII variable
# (grid_mapping variable). These errors should be handled by geobricks.load(),
# but it is still better to add a safeguard here.
# Ensure that lat, and lon will be further loaded by xarray as coords
var = geo.main_vars(data_ds)[0]
data_ds[var].encoding['coordinates'] = 'x y lat lon'
if 'coordinates' in data_ds[var].attrs:
data_ds[var].attrs.pop('coordinates')
print(" _ x, y, lat, lon ensured to be read as coordinates [safety]")
# ---- Standardize units
# TODO
var_list = geo.main_vars(data_ds)
# =============================================================================
# for var in var_list:
# data_ds[var] = geo.convert_units(data_ds[var], ...coeff...)
# =============================================================================
for var in var_list:
if var in ['DRAINC', 'EVAPC', 'RUNOFFC']:
data_ds[var] = geo.convert_units(data_ds[var], '/1000') # conversion from mm.d-1 to m.d-1
print(' _ Units converted from mm/d to m/d')
# ---- CRS
data_ds = geo.georef(data_ds, crs = 27572)
# ---- Export
if to_file == True:
if isinstance(data, (str, Path)):
print('\nExporting...')
data = Path(data)
geo.export(data_ds, data.with_stem(data.stem + '_stz'))
else:
print("Warning; `data` should be a path (str or pathlib.Path) for using `to_file=True`. Otherwise use `to_file = <path>`")
elif isinstance(to_file, (str, Path)):
print('\nExporting...')
geo.export(data_ds, to_file)
return data_ds
[docs]
def standardize_c3s(data):
data_ds = geo.load(data,
# decode_times = True,
# decode_coords = 'all'
)
# Get main variable
var = geo.main_var(data_ds)
print(f" _ Main variables are: {', '.join(var)}")
# Backup of attributes and encodings
attrs = dict()
encod = dict()
for v in var:
attrs[v] = data_ds[v].attrs.copy()
encod[v] = data_ds[v].encoding.copy()
# Format time coordinate
if ('forecast_reference_time' in data_ds.coords) | ('forecast_reference_time' in data_ds.data_vars):
data_ds = data_ds.squeeze('forecast_reference_time').drop('forecast_reference_time')
if ('forecast_period' in data_ds.coords) | ('forecast_period' in data_ds.data_vars):
data_ds = data_ds.drop('forecast_period')
# =========== implemented in load_any =========================================
# data_ds = use_standard_time(data_ds)
# =============================================================================
return data_ds
[docs]
def standardize_bnpe(
data,
extension = '.json'):
data_folder, filelist = geo.get_filelist(data, extension = extension)
folder_root = os.path.split(data_folder)[0]
# Complete withdrawals timeseries by merging each file with water origin
# =========
# Retrieve infos ouvrages
folder_ouvrage, filelist_ouvrage = geo.get_filelist(os.path.join(data, "ouvrages"), extension = extension)
ouvrage_list = [geo.load(os.path.join(folder_ouvrage, f), sep = ';') for f in filelist_ouvrage]
ouvrages = pd.concat(ouvrage_list, axis = 0, ignore_index = True)
for col in ['codes_points_prelevements', 'uri_bdlisa']:
if col in ouvrages.columns:
ouvrages[col] = ouvrages[col].apply(
lambda v: tuple(v.tolist()) if isinstance(v, np.ndarray) else (tuple(v) if isinstance(v, list) else v)
)
ouvrages = ouvrages.drop_duplicates()
ouvrages.reset_index(drop = True, inplace = True)
# =============================================================================
# if not os.path.exists(os.path.join(folder_root, "allinfo")):
# os.makedirs(os.path.join(folder_root, "allinfo"))
# =============================================================================
allinfo_gdf_list = []
# Chroniques de prélèvement
for f in filelist:
temp_gdf = geo.load(os.path.join(data_folder, f), sep = ";")
# Case where the files are in JSON format instead of GeoJSON:
if isinstance(temp_gdf, dict):
temp_gdf = pd.DataFrame.from_dict(temp_gdf['data'])
geometry = [Point(xy) for xy in zip(temp_gdf.longitude,
temp_gdf.latitude)]
temp_gdf = gpd.GeoDataFrame(
temp_gdf,
crs = 4326,
geometry = geometry)
# Combine ouvrage info and withdrawal timeseries
temp_gdf = pd.merge(temp_gdf, ouvrages[['code_ouvrage',
'code_type_milieu',
'libelle_type_milieu']],
on = 'code_ouvrage', how = 'left')
# Complete allinfo_gdf_list
allinfo_gdf_list.append(temp_gdf)
# =============================================================================
# # For each year, export a json file including water origin information
# geo.export(temp_gdf,
# os.path.join(folder_root, "allinfo", f),
# encoding='utf-8')
# =============================================================================
# Merge all files into a single vector one
# =========
merged_gdf = geo.merge(data = allinfo_gdf_list)
# ========= flat gdf considered useless =======================================
# flat_merged_gdf = geo.merge(data = allinfo_gdf_list, flatten = True)
# =============================================================================
merged_gdf['time'] = merged_gdf.annee.astype(str) + '-12-31'
merged_gdf['time'] = pd.to_datetime(merged_gdf['time'], format = "%Y-%m-%d")
# Export
# =========
# =============================================================================
# if not os.path.exists(os.path.join(folder_root, "concat")):
# os.makedirs(os.path.join(folder_root, "concat"))
# =============================================================================
# ====== function returns a variable instead of exporting =====================
# year_pattern = re.compile("\d{4,4}")
# year_start = int(year_pattern.findall(os.path.splitext(filelist[0])[0])[0])
# year_end = int(year_pattern.findall(os.path.splitext(filelist[-1])[0])[0])
# geo.export(merged_gdf,
# # os.path.join(folder_root, "concat", f"{year_start}-{year_end}.shp"),
# os.path.join(folder_root, "concat", f"{year_start}-{year_end}.json"),
# encoding='utf-8')
# =============================================================================
# ========= flat gdf considered useless =======================================
# geo.export(flat_merged_gdf,
# # os.path.join(folder_root, "concat", f"{year_start}-{year_end}.shp"),
# os.path.join(folder_root, "concat", f"{year_start}-{year_end}_flat.json"),
# encoding='utf-8')
# =============================================================================
# ========= flat gdf considered useless =======================================
# return {f"{year_start}-{year_end}.json": merged_gdf,
# f"{year_start}-{year_end}_flat.json": flat_merged_gdf}
# =============================================================================
return merged_gdf
[docs]
def standardize_eaufrance(
data,
extension = '.json'):
data_folder, filelist = geo.get_filelist(data, extension = extension)
folder_root = os.path.split(data_folder)[0]
# Complete withdrawals timeseries by merging each file with water origin
# =========
# Retrieve infos on stations
folder_stations, filelist_stations = geo.get_filelist(os.path.join(data, "stations"), extension = extension)
stations_list = [gpd.read_file(os.path.join(folder_stations, f)) for f in filelist_stations]
stations = pd.concat(stations_list, axis = 0, ignore_index = True).drop_duplicates()
stations.reset_index(drop = True, inplace = True)
allinfo_gdf_list = []
# Chroniques de prélèvement
for f in filelist:
temp_gdf = geo.load(os.path.join(data_folder, f))
# Case where the files are in JSON format instead of GeoJSON:
if isinstance(temp_gdf, dict):
temp_gdf = pd.DataFrame.from_dict(temp_gdf['data'])
geometry = [Point(xy) for xy in zip(temp_gdf.longitude,
temp_gdf.latitude)]
temp_gdf = gpd.GeoDataFrame(
temp_gdf,
crs = 4326,
geometry = geometry)
# Combine station info and discharge timeseries
temp_gdf = pd.merge(temp_gdf, stations[['code_site',
'code_station',
'code_cours_eau',
'libelle_cours_eau',
'uri_cours_eau',
'commentaire_influence_locale_station',
'date_ouverture_station',
]],
on = 'code_station', how = 'left')
# Complete allinfo_gdf_list
allinfo_gdf_list.append(temp_gdf)
# Merge all files into a single vector one
# =========
merged_gdf = geo.merge(data = allinfo_gdf_list)
merged_gdf['time'] = pd.to_datetime(merged_gdf['date_obs_elab'], format = "%Y-%m-%d")
# Correct units
# ========
# convert l/s into m3/s, or mm into m
merged_gdf = geo.convert_units(merged_gdf, '/1000', var = 'resultat_obs_elab')
return merged_gdf
[docs]
def standardize_rrp(
*, shp_file,
xlsx_file,
low_space = True,
to_file = False):
# Combine the vector file (shapefile) with the database (excel file)
# for testing:
# stzfr.rrp(shp_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\1- Sol\RRP\dpt64\RRP_64\RRP64_32360_VF_pigma.shp", xlsx_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\1- Sol\RRP\dpt64\BDD_RRP_64\32360_base_semantique\32360_RRP64_Niv2.xlsx")
# ---- Load
ucs = geo.load(shp_file)
val_ucs = pd.read_excel(
xlsx_file,
sheet_name = "ucs",
# index_col = "no_ucs", # as in shp_file
# header = 3,
# index_col = 0,
# skiprows = 3,
)
val_uts = pd.read_excel(
xlsx_file,
sheet_name = "l_ucs_uts",
# index_col = ['id_ucs', 'id_uts'],
)
# val_uts = val_uts.set_index(['id_ucs', 'id_uts'])
val_strate = pd.read_excel(
xlsx_file,
sheet_name = "strate",
# index_col = 'id_strate',
)
val_strate_quant = pd.read_excel(
xlsx_file,
sheet_name = "strate_quant",
)
# ====== previous method memo =================================================
# val_strate_quant.set_index(['id_uts', 'no_strate'], inplace = True)
# val_strate_quant.set_index('nom_var', drop = False, append = True, inplace = True)
# =============================================================================
# ---- Handle duplicated analyses
dupli_row = val_strate_quant[val_strate_quant.duplicated(['id_uts', 'no_strate', 'nom_var'], keep = 'first')]
for _, r in dupli_row.iterrows():
idx_list = val_strate_quant[(val_strate_quant.id_uts == r.id_uts) \
& (val_strate_quant.no_strate == r.no_strate) \
& (val_strate_quant.nom_var == r.nom_var)].index
for i in range(1, len(idx_list)):
val_strate_quant.loc[idx_list[i], 'nom_var'] = val_strate_quant.loc[idx_list[i], 'nom_var'] + str(i+1)
# ====== previous method memo =================================================
# val_strate_quant = val_strate_quant.droplevel('nom_var', axis = 0)
# =============================================================================
val_strate_quant = val_strate_quant.pivot(
index = ['id_uts', 'no_strate'], # 'id_strate_quant'],
columns = 'nom_var',
# values = 'val_mod',
values = ['val_min', 'val_mod', 'val_max', 'unite',
'info_var', 'id_methode_physique', 'id_methode'],
)
val_strate_quant.reset_index(inplace = True, drop = False)
# flatten the column multiIndex
val_strate_quant.columns = ['_'.join(col[::-1]) if col[1] != '' else col[0] for col in val_strate_quant.columns.values]
# ---- Combine UTS, strate and quantification informations
# =============================================================================
# val = pd.merge(ucs, val_ucs, left_on = 'NO_UCS', right_on = 'no_ucs')
# val = pd.merge(val, val_uts, on = 'id_ucs')
# =============================================================================
val = pd.merge(val_uts, val_strate, on = 'id_uts')
val = pd.merge(val, val_strate_quant, on = ['id_uts', 'no_strate'])
val = pd.merge(val, val_ucs, on = 'id_ucs')
if not low_space:
val = pd.merge(ucs, val, left_on = 'NO_UCS', right_on = 'no_ucs')
# val.set_index(['id_uts'], inplace = True)
# Remove duplicated fields
val = val.drop(['NO_UCS', 'nom_UCS'], axis = 1)
# Reorder columns
col_list = ['id_strate', 'no_strate', 'id_uts', 'pourcent', 'id_ucs']
val = val[col_list + list(set(val.columns) - set(col_list))]
# ---- Standardize variable names and units
val[['prof_appar_min', 'prof_appar_moy', 'prof_appar_max',
'epais_min', 'epais_moy', 'epais_max', 'epaisseur', 'prof_inf_moy'
]] = val[['prof_appar_min', 'prof_appar_moy', 'prof_appar_max',
'epais_min', 'epais_moy', 'epais_max', 'epaisseur', 'prof_inf_moy'
]]/100 # convert cm to m
# ---- Export
if low_space:
file = xlsx_file
ext = '.csv'
kwargs = {'sep': ';', 'index': False}
else:
file = shp_file
ext = os.path.splitext(file)[-1]
kwargs = dict()
if to_file == True:
if isinstance(file, (str, Path)):
print('\nExporting...')
# Determine the filename
if os.path.isdir(file):
export_filepath = os.path.join(file, 'RRP_stz.gpkg')
elif os.path.isfile(file):
export_filepath = os.path.splitext(file)[0] + '_stz' + ext
# Export
geo.export(val, export_filepath, **kwargs)
else:
print("Warning: `data` should be a path (str or pathlib.Path) for using `to_file=True`.")
elif isinstance(to_file, (str, Path)):
print('\nExporting...')
geo.export(val, to_file, **kwargs)
return val
#%%% ° CORINE Land Cover
def standardize_corine(data):
(folder_name, file_name) = os.path.split(data)
(file_name, extension) = os.path.splitext(file_name)
data_years = [1990, 2000, 2006, 2012, 2018]
y = data_years[4]
land_convert = [
[23, 24, 25, 26, 27, 28, 29], # 1. forests
[12, 15, 16, 17, 18, 19, 20, 21, 22], # 2. grasslands and non-irrigated crops
[14], # 3. rice fields
[13, ], # 4. irrigated crops
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], # 5. sealed lands
[40, 41, 42, 43], # 6. water surfaces
# [7, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], # x. sol nu
]
# =============================================================================
# with xr.open_dataset(data) as data_ds:
# data_ds.load() # to unlock the resource
# =============================================================================
#%%%% Découpe du fichier (mondial) sur la Bretagne
# -----------------------------------------------
with rasterio.open(data, 'r') as data:
# data_profile = data.profile
# data_crs = data.crs # epsg:3035
# data_val = data.read()[0] # np.ndarray
# # 46000 lignes
# # 65000 colonnes
# data_dtype = data.dtypes[0] # int8
# Définition du périmètre de découpe
# =============================================================================
# shapes = Polygon([
# (3188000, 2960000),
# (3188000 + 364000, 2960000),
# (3188000 + 364000, 2960000 - 231000),
# (3188000, 2960000 - 231000),
# (3188000, 2960000)])
# =============================================================================
# =============================================================================
# shapes = rasterio.warp.transform_geom(
# {'init': 'epsg:3035'},
# data.crs,
# {'type': 'Polygon',
# 'coordinates': [[
# (3188000, 2960000),
# (3188000 + 364000, 2960000),
# (3188000 + 364000, 2960000 - 231000),
# (3188000, 2960000 - 231000),
# (3188000, 2960000)]]}
# )
# =============================================================================
with fiona.open(r"D:\2- Postdoc\2- Travaux\2- Suivi\2- CWatM\2022-01-12) Synthèse inputs\Mask_Bretagne_3035_1km.shp", "r") as shapefile:
shapes = [feature["geometry"] for feature in shapefile]
# Découpe
out_raster, out_transform = rasterio.mask.mask(data,
shapes,
crop = True)
crp_data = out_raster[0]
# Mise à jour des attributs
# data_profile = data.meta
data_profile = data.profile
data_profile.update({
# "driver": "Gtiff",
"height": crp_data.shape[0],
"width": crp_data.shape[1],
# "nodata": -128,
"transform": out_transform})
# Export (fichier facultatif)
output_file = os.path.join(folder_name, file_name + '_Brittany.tif')
with rasterio.open(output_file, 'w', **data_profile) as output_f:
output_f.write_band(1, crp_data)
print('\nCropped file has been successfully exported')
#%%%% Extraction des types d'occupation du sol
# -------------------------------------------
data_profile['dtype'] = 'float32' # pour limiter la taille sur disque
for i in range(0,6):
# Création d'une carte de mêmes dimensions, avec la valeur 1 sur
# les pixels correspondant au landcover voulu, et 0 sur les autres
print('\nClass ' + str(i+1) + ":")
count = np.zeros(crp_data.shape, dtype = data_profile['dtype'])
count = count.astype('float32') # pour limiter la taille sur disque
for code in land_convert[i]:
# count[crp_data == code] += 1 # Normalement ça ne dépasse jamais 1
count[crp_data == code] = 1
# Export en résolution initiale (fichiers facultatifs)
output_file = os.path.join(folder_name, file_name + '_class' + str(i+1) + '.tif')
with rasterio.open(output_file, 'w', **data_profile) as output_f:
output_f.write_band(1, count)
print(' - Successfully exported into initial-resolution *.tif')
# Reprojection and conversion of counts into percentages
# dst_trsfm = Affine(1000, 0, 900000,
# 0, -1000, 5500000)
# dst_trsfm = Affine(1000, 0, 3188000,
# 0, -1000, 2960000)
dst_trsfm = Affine(1000, 0, 3165000,
0, -1000, 2954000)
# data_reprj = np.zeros((4600, 6500), dtype = np.float32)
# data_reprj = np.zeros((231, 364), dtype = np.float32)
data_reprj = np.zeros((258, 407), dtype = np.float32)
dst_data_profile = data_profile.copy()
dst_data_profile.update({
"height": data_reprj.shape[0],
"width": data_reprj.shape[1],
"transform": dst_trsfm,
"nodata": np.nan})
rasterio.warp.reproject(count,
destination = data_reprj,
src_transform = data_profile['transform'],
src_crs = data_profile['crs'],
src_nodata = data_profile['nodata'],
dst_transform = dst_data_profile['transform'],
dst_crs = dst_data_profile['crs'],
dst_nodata = dst_data_profile['nodata'],
resampling = rasterio.enums.Resampling(5),
)
# Export en résolution finale (fichiers intermédiaires nécessaires)
output_file = os.path.join(folder_name,
'_'.join([file_name, '_class' + str(i+1), '1000m.tif'])
)
with rasterio.open(output_file, 'w', **dst_data_profile) as output_f:
output_f.write_band(1, data_reprj)
print(' - Successfully exported into coarse-resolution *.tif')
#%%%% Conversion des derniers fichiers créés (*.tif) en *.nc
# ---------------------------------------------------------
var_names = [
'fracforest',
'fracgrassland',
'fracirrNonPaddy',
'fracirrPaddy',
'fracsealed',
'fracwater',
]
# Initialization:
i = 0
with xr.open_dataset(os.path.join(folder_name, '_'.join([file_name, '_class' + str(i+1), '1000m.tif']))) as data_ds:
data_ds.load()
# data_ds = data_ds.squeeze('band').drop('band')
data_ds = data_ds.rename(band = 'time')
# data_ds = data_ds.reindex(time = [datetime.datetime(2018, 1, 31)])
data_ds = data_ds.assign_coords({'time': ('time', [datetime.datetime(y, 1, 31)])})
data_ds = data_ds.rename(band_data = var_names[i])
for i in range(1,6):
with xr.open_dataset(os.path.join(folder_name, '_'.join([file_name, '_class' + str(i+1), '1000m.tif']))) as data_tmp:
data_tmp.load()
# data_tmp = data_tmp.squeeze('band').drop('band')
data_tmp = data_tmp.rename(band = 'time')
# data_tmp = data_tmp.reindex(time = [datetime.datetime(2018, 1, 31)])
data_tmp = data_tmp.assign_coords({'time': ('time', [datetime.datetime(y, 1, 31)])})
data_tmp = data_tmp.rename(band_data = var_names[i])
data_ds[var_names[i]] = data_tmp[var_names[i]]
# Rectification de la normalisation des pourcentages
# =============================================================================
# # data_ds.fillna(0)
# sum_norm = data_ds[var_names[0]]+data_ds[var_names[1]]+data_ds[var_names[2]]+data_ds[var_names[3]]+data_ds[var_names[4]]+data_ds[var_names[5]]
# for i in range(0,6):
# data_ds[var_names[i]] = data_ds[var_names[i]]/sum_norm
# =============================================================================
data_ds.rio.write_crs('epsg:3035', inplace = True)
data_ds.x.attrs = {'standard_name': 'projection_x_coordinate',
'long_name': 'x coordinate of projection',
'units': 'Meter'}
data_ds.y.attrs = {'standard_name': 'projection_y_coordinate',
'long_name': 'y coordinate of projection',
'units': 'Meter'}
output_file = os.path.join(folder_name, 'fractionLandcover_Bretagne_CORINE_{}.nc'.format(str(y)))
# NB : Il est nécessaire de faire disparaître les valeurs nan
# =============================================================================
# data_ds['fracforest'].encoding['_FillValue'] = 0 # ou None ??
# data_ds['fracgrassland'].encoding['_FillValue'] = 0 # ou None ??
# data_ds['fracirrNonPaddy'].encoding['_FillValue'] = 0 # ou None ??
# data_ds['fracirrPaddy'].encoding['_FillValue'] = 0 # ou None ??
# data_ds['fracsealed'].encoding['_FillValue'] = 0 # ou None ??
# data_ds['fracwater'].encoding['_FillValue'] = 0 # ou None ??
# =============================================================================
data_ds.to_netcdf(output_file)
#%%% ° Observatoire botanique de Brest (2018-2019)
def standardize_obb(data):
land_convert = [
[0, 0, 0], # 1. forests
[0, 0, 0], # 2. grasslands and non-irrigated crops
[0, 0, 0], # 3. rice fields
[0, 0, 0], # 4. irrigated crops
[0, 0, 0], # 5. sealed lands
[0, 0, 0], # 6. water surfaces
# [7, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], # x. sol nu
]
return 'Pas encore implémenté'
#%%% ° CES OSO
def standardize_cesoso(data):
return "Pas encore implémenté"
#%%% ° Soil depth from UCS Bretagne
def standardize_ucsbretagne(data):
data_df = gpd.read_file(data) # data_df = soil depth
data_epsg = data_df.crs.to_epsg()
#% Calcul des profondeurs locales moyennes pondérées
print('\nComputing local weighted depth averages...')
soil_depth_limit = 140 # valeur semi-arbitraire
class_depth_convert = {'0':0,
'1':soil_depth_limit - (soil_depth_limit-100)/2,
'2':90,
'3':70,
'4':50,
'5':30,
'6':10,
}
data_df['EPAIS_1_NUM'] = [class_depth_convert[val] for val in data_df['EPAIS_DOM'].values]
data_df['EPAIS_2_NUM'] = [class_depth_convert[val] for val in data_df['EPAIS_2'].values]
data_df['EPAIS_1_2'] = (data_df['EPAIS_1_NUM'] * data_df['P_EPAISDOM'] + data_df['EPAIS_2_NUM'] * data_df['P_EPAIS2']) / (data_df['P_EPAISDOM'] + data_df['P_EPAIS2'])
data_df['EPAIS_1_2'] = data_df['EPAIS_1_2']/100 # Convertir cm vers m
#% Reprojection dans le CRS voulu
print('\nReprojecting...')
print(' from epsg:{} to epsg:{}'.format(str(data_epsg), str(EPSG_out)))
data_df.to_crs('epsg:'+str(EPSG_out), inplace = True)
#% Rasterisation
print('\nRasterizing...')
x_min = 74000
y_max = 6901000
bounds_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(2154),
rasterio.crs.CRS.from_epsg(EPSG_out),
[74000], [6901000]) # Limites (larges) de la Bretagne en L93
x_min = bounds_conv[0][0] // reso_m * reso_m
y_max = bounds_conv[1][0] // reso_m * reso_m
# Décalage d'un demi pixel
if reso_m == 75:
x_min = nearest(x = x_min) - 75/2
y_max = nearest(x = y_max) + 75/2
else:
x_min = x_min - reso_m/2
y_max = y_max + reso_m/2
transform_ = Affine(reso_m, 0.0, x_min,
0.0, -reso_m, y_max)
# =============================================================================
# transform_ = Affine(reso_m, 0.0, x_min,
# 0.0, reso_m, 670500)
# =============================================================================
width_ = 377000 // reso_m + 1
height_ = 227000 // reso_m + 1
raster_data = rasterio.features.rasterize(
[(val['geometry'], val['EPAIS_1_2']) for i, val in data_df.iterrows()],
out_shape = (height_, width_),
transform = transform_,
fill = np.NaN,
all_touched = True)
#% Convertir en DataSet
print('\nConverting into xarray.Dataset...')
# Converting to xarray dataarray:
dataarray_ = xr.DataArray(raster_data,
coords = [np.arange(y_max,
y_max - reso_m * height_,
-reso_m).astype(np.float32),
np.arange(x_min,
x_min + reso_m * width_,
reso_m).astype(np.float32)],
dims = ['y', 'x'])
# NB : La fonction 'range()' est évitée car elle renvoie des entiers
# QGIS n'arrive pas à détecter le CRS lorsque les axes sont des entiers
# =============================================================================
# dataarray_ = xr.DataArray(raster_data.transpose(),
# coords = [range(x_min,
# x_min + reso_m * width_,
# reso_m),
# range(y_max,
# y_max - reso_m * height_,
# -reso_m)],
# dims = ['x', 'y'])
# =============================================================================
# Converting to xarray dataset:
dataset_ = dataarray_.to_dataset(name = 'soildepth')
# Adding attributes:
dataset_.rio.write_crs("epsg:"+str(EPSG_out), inplace = True)
dataset_.x.attrs = {'standard_name': 'projection_x_coordinate',
'long_name': 'x coordinate of projection',
'units': 'Meter'}
dataset_.y.attrs = {'standard_name': 'projection_y_coordinate',
'long_name': 'y coordinate of projection',
'units': 'Meter'}
dataset_.soildepth.attrs = {'grid_mapping': "spatial_ref"} # A supprimer ???
# dataset_.assign_coords(spatial_ref = dataset_.spatial_ref)
# dataset_.rio.write_crs("epsg:"+str(EPSG_out), inplace = True)
#% Export :
print('\nCompressing and saving...')
full_file_name = ' '.join([file_name,
'res='+str(reso_m)+'m',
'CRS='+str(EPSG_out),
'lim='+str(soil_depth_limit),
])
output_file = os.path.join(input_folder,
full_file_name + '.nc')
print(' --> result exported to:\n --> ' + output_file)
var = main_var(dataset_)
# dataset_[var].encoding['_FillValue'] = None # ou np.nan ?
# ERREUR : dataset_.to_netcdf(output_file, encoding = {var: {'zlib': True, 'dtype': 'float32', 'scale_factor': 0.00000001, '_FillValue': np.nan},})
dataset_.to_netcdf(output_file)
#% Fichiers pour CWatM
topsoil_ds = dataset_.where(dataset_['soildepth']<0.30, 0.30)
topsoil_ds = topsoil_ds.where(~np.isnan(dataset_), np.nan)
# topsoil_ds = topsoil_ds.where(~np.isnan(dataset_), 0)
subsoil_ds = dataset_ - topsoil_ds
output_top = os.path.join(input_folder,
'soildepth1_' + full_file_name + '.nc')
output_sub = os.path.join(input_folder,
'soildepth2_' + full_file_name + '.nc')
# topsoil_ds.to_netcdf(output_top, encoding = {var: {'zlib': True, 'dtype': 'float32', 'scale_factor': 0.00000001, '_FillValue': np.nan},})
topsoil_ds.to_netcdf(output_top)
# subsoil_ds.to_netcdf(output_sub, encoding = {var: {'zlib': True, 'dtype': 'float32', 'scale_factor': 0.00000001, '_FillValue': np.nan},})
subsoil_ds.to_netcdf(output_sub)
"""
La compression (27x plus petit) fait planter CWatM lorsqu'il utilise
l'option use_soildepth_as_GWtop
"""
return dataset_
#%%% ° Crop coefficients, version "better"
def standardize_cropcoeff(data):
#%%%% Initialization:
landcover_motif = re.compile('cropCoefficient[a-zA-Z]*')
pos = landcover_motif.search(file_name).span()
landcover = file_name[15:pos[1]]
name = os.path.splitext(file_name)[0][0:pos[1]]
print(f'\nProcessed landcover = {landcover}')
print("___________________")
with xr.open_dataset(
data,
decode_coords = 'all',
) as data:
data.load()
#%%%% Reprojection:
print('\nReprojecting...')
if not EPSG_in: # EPSG = None, by default
if 'spatial_ref' not in list(data.coords) + list(data.data_vars):
print(" Le SCR n'est pas renseigné dans les données initiales. Par défaut, le WGS84 est utilisé")
EPSG_in = 4326
else:
EPSG_in = data.rio.crs.to_epsg()
print(" From epsg:{} to espg:{}".format(EPSG_in, EPSG_out))
data.rio.write_crs("epsg:{}".format(EPSG_in), inplace = True)
# Emprise sur la Bretagne
if EPSG_out == 3035: #LAEA
x_min = 3164000
x_max = 3572000
y_max = 2980000
y_min = 2720000
elif EPSG_out == 2154: #Lambert-93
x_min = 70000
x_max = 460000
y_max = 6909000
y_min = 6651000
# =============================================================================
# # To properly implement management of lat/lon, reso_m should be upgraded
# # with a system including reso_deg (future developments)
# elif EPSG_out == 4326: #WGS84 (in this case, reso_m should be in ° instead of m)
# x_min = -5.625
# x_max = -0.150
# y_max = 49.240
# y_min = 46.660
# =============================================================================
# Alignement des coordonnées selon la résolution (valeurs rondes)
x_min = nearest(x = x_min, res = reso_m, x0 = 0)
x_max = nearest(x = x_max, res = reso_m, x0 = 0)
y_max = nearest(y = y_max, res = reso_m, y0 = 0)
y_min = nearest(y = y_min, res = reso_m, y0 = 0)
print(" Final bounds:")
print(" {}".format(y_max))
print(" {} {}".format(x_min, x_max))
print(" {}".format(y_min))
# Reproject
if reso_m is None:
coord1 = list(data.coords)[1]
print('NB : La resolution est déterminée à partir des {}\n'.format(coord1))
reso = float(data[coord1][1] - data[coord1][0])
if data[coord1].units[0:6].casefold() == 'degree':
reso_m = round(reso*111200)
elif data[coord1].units[0:1].casefold() == 'm':
reso_m = reso
print(' Resolution: {} km'.format(str(reso_m/1000)))
dst_height = (y_max - y_min)//reso_m
dst_width = (x_max - x_min)//reso_m
data_reprj = data.rio.reproject(
dst_crs = 'epsg:'+str(EPSG_out),
# resolution = (1000, 1000),
shape = (dst_height, dst_width),
transform = Affine(reso_m, 0.0, x_min,
0.0, -reso_m, y_max),
resampling = rasterio.enums.Resampling(5),
nodata = np.nan)
#%%%% Preparing export:
print("\nPreparing export...")
# =============================================================================
# # Change the order of coordinates to match QGIS standards:
# # Normally, not necessary
# data_reprj = data_reprj.transpose('time', 'y', 'x')
# print(" order of coordinates = 'time', 'y', 'x'")
# =============================================================================
# Formatting to match standards (encodings and attributes)
print(" Formating encodings and attributes")
# =============================================================================
# for c in ['time', 'latitude', 'longitude']:
# data_reprj[c].attrs = data[c].attrs
# data_reprj[c].encoding = data[c].encoding
# for f in _fields_intersect:
# data_reprj[f].attrs = data[f].attrs
# data_reprj[f].encoding = data[f].encoding
# =============================================================================
# Insert georeferencing metadata to match QGIS standards:
# data_reprj.rio.write_crs("epsg:4326", inplace = True)
# print(" CRS set to epsg:4326")
data_reprj.x.encoding = data.lon.encoding
data_reprj.y.encoding = data.lat.encoding
data_reprj.x.encoding.pop('original_shape')
data_reprj.y.encoding.pop('original_shape')
var = geo.main_var(data)
# If it is the PrecipitationMaps, it is necessary to erase the x and y
# '_FillValue' encoding, because precipitation inputs are taken as
# format model by CWatM
if var == 'tp':
# if name == 'Total precipitation daily_mean':
data_reprj.x.encoding['_FillValue'] = None
data_reprj.y.encoding['_FillValue'] = None
# Compression (not lossy)
data_reprj[var].encoding = data[var].encoding
data_reprj[var].encoding.pop('chunksizes')
data_reprj[var].encoding.pop('original_shape')
# =============================================================================
# data_reprj[var].encoding['zlib'] = True
# data_reprj[var].encoding['complevel'] = 4
# data_reprj[var].encoding['contiguous'] = False
# data_reprj[var].encoding['shuffle'] = True
# =============================================================================
# NB: The packing induces a loss of precision of apprx. 1/1000 of unit,
# for a quantity with an interval of 150 units. The packing is
# initially used in the original ERA5-Land data.
print(" All attributes and encodings transfered")
#%%%% Export :
print('\nSaving...')
output_file = os.path.join(input_folder,
'_'.join([file_name,
'res='+str(reso_m)+'m',
'epsg'+str(EPSG_out),
]) + '.nc')
data_reprj.to_netcdf(output_file)
print("\nLe resultat a bien été exporté dans le fichier : {}".format(output_file))
return data_reprj
###############################################################################
#%%% * Formate les données RHT
def process_rht(shp_file, attrs_file, fields = 'all'):
"""
Pour rajouter les attributs dans le shapefile, de anière à pouvoir
l'afficher facilement dans QGIS.
Parameters
----------
shp_file : str
Chemin vers le fichier shapefile
attrs_file : str
Chemin vers la table externe d'attributs
fields : list
Liste des noms de colonnes que l'on veut insérer dans le shapefile
Returns
-------
Create a new file.
"""
# Inputs
# ------
gdf_in = gpd.read_file(shp_file)
df_attrs = pd.read_csv(attrs_file, sep = r";", header = 0, skiprows = 0)
if fields == 'all': fields = list(df_attrs.columns)
if not isinstance(fields, list): fields = [fields]
if 'id_drain' not in fields: fields.append('id_drain')
print('Fields :\n{}'.format(fields))
# Fusion
# ------
gdf_out = gdf_in.merge(df_attrs[fields], left_on = "ID_DRAIN", right_on = "id_drain")
# Export
# ------
out_name = os.path.splitext(shp_file)[0] + '_merged.shp'
gdf_out.to_file(out_name)
return gdf_out
###############################################################################
#%%% * convert_from_h5_newsurfex (with recessions)
def convert_from_h5_newsurfex(*, input_file, mesh_file, scenario = 'historic',
output_format = 'NetCDF', **kwargs):
r"""
% DESCRIPTION:
This function converts Ronan *.h5 files from SURFEX into NetCDF files, or
GeoTIFF images, in order to make output files readable by QGIS.
% EXAMPLE:
>> import geoconvert as gc
>> gc.convert_from_h5_newsurfex(input_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\Surfex\BZH\REA.h5",
mesh_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\Surfex\BZH\shapefile\maille_meteo_fr_pr93.shp",
output_format = "NetCDF",
fields = ["REC", "TAS"])
% ARGUMENTS:
>
% OPTIONAL ARGUMENTS:
> output_format = 'NetCDF' (défault) | 'GeoTIFF'
> scenario = 'historic' | 'RCP2.6' | 'RCP4.5' | 'RCP8.5'
> kwargs:
> fields = variable(s) to conserve (among all attributes in input file)
= ['ETP', 'PPT', 'REC', 'RUN', 'TAS']
(One file per variable will be created.)
> dates = for instance : ['2019-01-01', '2019-01-02', ...]
(If no date is specified, all dates from input file are considered)
"""
#% Get fields argument:
# ---------------------
if 'fields' in kwargs:
fields = kwargs['fields']
if isinstance(fields, str): fields = [fields]
else: fields = list(fields) # in case fields are string or tuple
else:
print('\n/!\ Fields need to be specified\n')
for _field in fields:
print('Processing ' + _field + '...')
print(' Loading data...')
#% Load data sets:
# ----------------
# _data contains the values, loaded as a pandas dataframe
_data = pd.read_hdf(input_file, _field + '/' + scenario)
_data.index.freq = 'D' # Correct the frequency of timestamp, because it
# is altered during importation
_data = _data.iloc[:, 0:-1] # Get rid of the last 'MEAN' column
_data = _data.transpose()
# _dataset contains the spatial metadata, and will be used as the
# final output structure
_dataset = gpd.read_file(mesh_file)
_dataset.num_id = _dataset.num_id.astype(int) # Convert 'num_id' (str)
# into integer
# Only cells present in _data are considered:
_datasubset = _dataset[_dataset.num_id.isin(_data.index)]
_datasubset = _datasubset.merge(_data, left_on = "num_id",
right_index = True)
_datasubset.index = _datasubset.num_id # replace indexes by id_values
#% Get dates argument: (and build output_name)
# --------------------
_basename = os.path.splitext(input_file)[0]
if 'dates' in kwargs:
dates = kwargs['dates']
if isinstance(dates, str):
output_file = '_'.join([_basename, dates, _field])
dates = [dates]
else:
dates = list(dates) # in case dates are tuple
output_file = '_'.join([_basename, dates[0], 'to', dates[-1], _field])
else:
dates = _data.columns # if not input_arg, dates = all
output_file = '_'.join([_basename, scenario, _field])
#% Exports:
# ---------
profile_base = {'driver': 'NetCDF',
'dtype': 'float32',
'nodata': None,
'width': 37,
'height': 23,
'count': 1,
# 'count': len(dates),
'crs': rio.crs.CRS.from_epsg(27572),
'transform': Affine(8000, 0.0, 56000, 0.0, 8000, 2261000),
# values deducted from Xlamb and Ylamb
# can also be found from QGIS: "X (ou Y) du sommet le plus proche"
'tiled': False,
'interleave': 'band'}
# =============================================================================
# # Getting bounds from polygons is less precise:
# _datasubset.geometry.total_bounds[0:1]
# # >> [ 107438.43514434074, 6697835.528522245 ] in epsg:2154
# Affine(8000, 0.0, 56092.06862427108, 0.0, 8000, 2260917.5598924947)
# # more precise values: 57092.06862427143, 2259917.559892499
# # --> does not improve much...
# =============================================================================
# =============================================================================
# # Using polygon limits instead of polygon centers is less precise:
# rasterized_data = rasterio.features.rasterize(
# [(_dataval.loc['geometry'], _dataval.loc[pd.to_datetime(_date)]) for i, _dataval in _datasubset.to_crs(27572).iterrows()],
# out_shape = (profile_base['height'], profile_base['width']),
# transform = profile_base['transform'],
# fill = 0,
# all_touched = True)
# # dtype = rasterio.uint8
# =============================================================================
#% Add coordinates of the center of each polygon, as a POINT object:
# ------------------------------------------------------------------
_datasubset['centers'] = [Point(x_y)
for x_y in zip(_datasubset.loc[:, 'Xlamb'],
_datasubset.loc[:, 'Ylamb'])]
# NB: Coordinates are here in Lambert Zone II (epsg:27572)
# =============================================================================
# #{a} # Previous version, creating a raster per date (very slow):
# rasterized_data = rasterio.features.rasterize(
# [(_dataval.loc['centers'], _dataval.loc[pd.to_datetime(_date)])
# for i, _dataval in _datasubset.iterrows()],
# out_shape = (profile_base['height'], profile_base['width']),
# transform = profile_base['transform'],
# fill = np.NaN,
# all_touched = True)
# # dtype = rasterio.uint8
# =============================================================================
#% Create a raster mask:
# ---------------------
rasterized_mask = rasterio.features.rasterize(
[(_dataval.loc['centers'], _dataval.loc['num_id'])
for i, _dataval in _datasubset.iterrows()],
out_shape = (profile_base['height'], profile_base['width']),
transform = profile_base['transform'],
fill = np.NaN,
all_touched = True)
# dtype = rasterio.uint16
# =============================================================================
# #% Apercu visuel rapide :
# plt.imshow(rasterized_mask, origin = 'lower')
# =============================================================================
#% Build a xarray based on the previous mask:
# -------------------------------------------
print(' Building an xarray...')
# Initialization:
array3D = np.full((len(dates),
profile_base['height'],
profile_base['width']), np.nan)
# Filling:
for (_y, _x), _id_val in np.ndenumerate(rasterized_mask):
if not np.isnan(_id_val):
array3D[:, _y, _x] = _datasubset.loc[_id_val].iloc[10:-1]
# Converting to xarray dataarray:
dataarray = xr.DataArray(array3D,
coords = [dates,
np.arange(2265000,
2265000 + 8000*profile_base['height'],
8000).astype(np.float32),
np.arange(60000,
60000 + 8000*profile_base['width'],
8000).astype(np.float32)],
# NB : La fonction 'range()' est évitée car elle renvoie des entiers
# QGIS n'arrive pas à détecter le CRS lorsque les axes sont des entiers
dims = ['time', 'y', 'x'])
# Converting to xarray dataset:
dataset = dataarray.to_dataset(name = _field)
# Adding attributes:
dataset.rio.write_crs("epsg:27572", inplace = True)
dataset.x.attrs = {'standard_name': 'projection_x_coordinate',
'long_name': 'x coordinate of projection',
'units': 'Meter'}
dataset.y.attrs = {'standard_name': 'projection_y_coordinate',
'long_name': 'y coordinate of projection',
'units': 'Meter'}
# Export:
# -------
if output_format.casefold() in ['nc', 'netcdf']:
dataset.to_netcdf(output_file + '.nc')
(folder_name, file_name) = os.path.split(output_file + '.nc')
print(" The dataset has been successfully exported to the file '{}' in the folder '{}'\n".format(file_name, folder_name))
elif output_format.casefold() in ['tif', 'tiff', 'geotiff']:
with rasterio.open(output_file + '.tiff', 'w', **profile_base) as out_f:
for i, d in enumerate(dates):
out_f.write_band(i, dataset.sel(time = d))
(folder_name, file_name) = os.path.split(output_file + '.nc')
print(" The dataset has been successfully exported to the file '{}' in the folder '{}'\n".format(file_name, folder_name))
return fields
# =============================================================================
# #- - - - - - SANDBOX - - - - -
# #% Single day:
# # date = _data.loc[:,_data.columns == '2019-10-15'].columns.values
# date = _data.loc[:,_data.columns == _date].columns[0]
# _datasingleday = _datasubset.loc[:, date]
#
# #% Matplot:
# _datasubset.plot(column = date, cmap = 'RdYlBu_r', vmin = 10.5, vmax = 15)
# =============================================================================
#%%% * convert_from_h5_oldsurfex (data from Quentin)
###############################################################################
def convert_from_h5_oldsurfex(*, output_format = 'csv', **kwargs):
r"""
% DESCRIPTION :
Cette fonction formate les fichiers Surfex de Quentin en fichiers *.csv
organisés par dates (lignes) et identifiants de mailles (colonnes).
% EXEMPLES :
>> import surfexconvert as sc #(NB : il faut au préalable que le dossier soit ajouté dans le PYTHONPATH))
>> sc.convert_from_h5_oldsurfex(output_format = "csv",
start_years = list(range(2005, 2011, 1)),
variables = ['DRAIN', 'ETR'])
>> sc.convert_from_oldsurfex(output_format = "nc",
mesh_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\Surfex\BZH\shapefile\maille_meteo_fr_pr93.shp")
% ARGUMENTS (OPTIONNELS) :
> output_format = 'csv' (défault) | 'NetCDF' | 'GeoTIFF'
> kwargs:
> input_folder = dossier contenant les fichiers à traiter.
Si rien n'est spécifié, le dossier du script est pris en compte
> variables = variable(s) à traiter (parmi DRAIN, ETR, PRCP et RUNOFF)
Si ce n'est pas spécifié, toutes les variables sont considérées
> start_years = années à traiter
(par ex : 2012 correspond au fichier blabla_2012_2013)
Si rien n'est spécifié, toutes les années sont considérées
> mesh_file = chemin d'accès au fichier de correspondances entre les
id des tuiles et leurs coordonnées
(nécessaire uniquement pour NetCDF et GeoTIFF)
"""
# ---- RECUPERATION DES INPUTS
#--------------------------
if 'input_folder' in kwargs:
input_folder = kwargs['input_folder']
else:
input_folder = os.path.dirname(os.path.realpath(__file__))
# Si 'variables' est renseigné, il est converti en liste de strings.
# Sinon, toutes les variables sont prises en compte (DRAIN, ETR, PRCP...).
if 'variables' in kwargs:
variables = kwargs['variables']
if isinstance(variables, str): variables = [variables]
else: variables = list(variables)
else:
variables = ['DRAIN', 'ETR', 'PRCP', 'RUNOFF']
print('> variables = ' + ', '.join(variables))
# Si 'start_years' est renseigné, il est converti en liste de strings.
# Sinon, toutes les années sont considérées (1970-1971 -> 2011-2012).
if 'start_years' in kwargs:
start_years = kwargs['start_years']
if isinstance(start_years, str): start_years = [start_years]
else:
start_years = list(start_years)
start_years = [str(y) for y in start_years]
else:
start_years = [str(y) for y in range(1970, 2012, 1)]
print("> années de départ = " + ', '.join(start_years))
if 'mesh_file' in kwargs:
mesh_file = kwargs['mesh_file']
#% Localisation des dossiers et fichiers de base :
#-------------------------------------------------
idlist_file = os.path.join(input_folder,
r"numero_mailles_bretagne_etendue.txt")
# (= fichier des identifiants des mailles)
mesh_indexes = np.loadtxt(idlist_file, dtype = int)
# ---- TRAITEMENT DES DONNEES
for _variable in variables:
print("Traitement de " + _variable + "...")
dataset_allyears = pd.DataFrame(columns = mesh_indexes) #(initialisation)
for _year in start_years:
print(_year)
input_file = os.path.join(input_folder,
"_".join([_variable, _year,
str(int(_year)+1)]))
# (= fichier de données)
#% Importation :
#---------------
raw_data = pd.read_table(input_file, delimiter = " ",
engine = 'python', header = None)
# Redimensionnement :
#--------------------
if pd.to_datetime(str(int(_year) + 1)).is_leap_year: n_days = 366
else: n_days = 365
reshp_array = np.array(raw_data).reshape((n_days, 610),
order = 'C')
reshp_array = reshp_array[:, 0:-6] # Array de 365 lignes et 604 col
# Ajout des étiquettes :
#-----------------------
_start_date = _year + '-07-31'
date_indexes = pd.date_range(start = _start_date,
periods = n_days, freq = '1D')
dataframe_1y = pd.DataFrame(data = reshp_array,
index = date_indexes,
columns = mesh_indexes, copy = True)
dataset_allyears = dataset_allyears.append(dataframe_1y)
# =================================================================
# #% Rajouter des moyennes ou sommes annuelles comme sur les
# précédentes données Surfex :
# T['mean'] = T.mean(axis = 1)
# T['sum'] = T.sum(axis = 1)
# =================================================================
# ---- EXPORTATION
output_file = os.path.join(input_folder,
"_".join([_variable, start_years[0],
str(int(start_years[-1])+1)]
)
)
#% Export en CSV :
#-----------------
if output_format in ['csv', 'CSV']:
dataset_allyears.to_csv(output_file + ".csv", sep = '\t',
header = True)
(folder_name, file_name) = os.path.split(output_file + ".csv")
print("> Le fichier \'{}\' a été créé dans le dossier \'{}\'.".format(
file_name, folder_name))
#% Export en NetCDF ou TIFF :
#----------------------------
elif output_format in ['NetCDF', 'nc', 'TIFF', 'tiff', 'tif', 'GeoTIFF']:
#% Géoréférencement des données / formatage
print("Formatage...")
# (NB : dataset_allyears sera écrasé plus tard)
all_date_indexes = dataset_allyears.index
n_dates = len(all_date_indexes)
_data = dataset_allyears.transpose(copy = True)
_geodataset = gpd.read_file(mesh_file)
_geodataset.num_id = _geodataset.num_id.astype(int)
_geodataset = _geodataset[_geodataset.num_id.isin(_data.index)]
_geodataset = _geodataset.merge(_data, left_on = "num_id",
right_index = True)
res_x = 8000 # résolution Surfex [m]
res_y = 8000
n_x = len(pd.unique(_geodataset.loc[:, 'Xlamb']))
n_y = len(pd.unique(_geodataset.loc[:, 'Ylamb']))
x_min = min(_geodataset.loc[:, 'Xlamb']) - res_x/2 #NB : not 56000
y_min = min(_geodataset.loc[:, 'Ylamb']) - res_y/2
profile_base = {'driver': 'NetCDF',
'dtype': 'float64',
'nodata': None,
# 'time': n_dates,
'height': n_y, #23
'width': n_x, #37
'count': n_dates,
'crs': rio.crs.CRS.from_epsg(27572),
'transform': Affine(res_x, 0.0, x_min,
0.0, res_y, y_min),
# values deducted from Xlamb and Ylamb and QGIS
'tiled': False,
'interleave': 'band'}
_geodataset['centers'] = [Point(x_y) for x_y in
zip(_geodataset.loc[:, 'Xlamb'],
_geodataset.loc[:, 'Ylamb'])]
# NB: Coordinates are here in Lambert Zone II (epsg:27572)
raster_data_allyears = np.empty(shape = (n_dates,
profile_base['height'],
profile_base['width']))
print(" Rasterisation... ({} éléments. Peut prendre plusieurs minutes)".format(len(all_date_indexes)))
for d, _date in enumerate(all_date_indexes):
_raster_data = rasterio.features.rasterize(
[(_dataval.loc['centers'], _dataval.loc[pd.to_datetime(_date)])
for i, _dataval in _geodataset.iterrows()],
out_shape = (profile_base['height'], profile_base['width']),
transform = profile_base['transform'],
fill = np.NaN,
all_touched = True)
raster_data_allyears[d, :, :] = _raster_data
#*** Est-ce que cette étape peut être faite telle quelle avec
# des données incluant tous les temps ?***
print(" xarray...")
dataarray_allyears = xr.DataArray(
raster_data_allyears,
#raster_data_allyears.astype('float32'),
coords = [all_date_indexes,
np.sort(pd.unique(_geodataset.loc[:, 'Ylamb'])),
np.sort(pd.unique(_geodataset.loc[:, 'Xlamb']))],
# <!> NB <!> Il est extrêmement important de trier les
# indices dans l'ordre croissant !!! (ils sont désordonnés dans
# _geodataset). Sinon QGIS n'arrive pas à lire les fichiers !
dims = ['time', 'y', 'x'])
# =============================================================================
# # [AVANT : dims = ['time', 'latitude', 'longitude'])]
# =============================================================================
# L'ordre conseillé des dimensions est t, lat, lon, mais ça n'a
# pas d'incidence.
# Conversion en Dataset :
dataset_allyears = dataarray_allyears.to_dataset(name = _variable)
print("Exportation...")
if output_format in ['NetCDF', 'nc']:
#% Pour NetCDF :
# - - - - - - - -
# Formatage des attributs selon les conventions QGIS :
# Insert georeferencing metadata :
dataset_allyears.rio.write_crs("epsg:27572", inplace = True)
dataset_allyears.x.attrs = {'standard_name': 'projection_x_coordinate',
'long_name': 'x coordinate of projection',
'units': 'Meter'}
dataset_allyears.y.attrs = {'standard_name': 'projection_y_coordinate',
'long_name': 'y coordinate of projection',
'units': 'Meter'}
# =============================================================================
# # AVANT :
# dataset_allyears.longitude.attrs = {'units': 'degrees_east',
# 'long_name': 'longitude'}
# dataset_allyears.latitude.attrs = {'units': 'degrees_north',
# 'long_name': 'latitude'}
# =============================================================================
# dataset_allyears.time.attrs = {'units': 'days since 1970-01-01',
# 'calendar': 'gregorian',
# 'long_name': 'time'}
dataset_allyears.attrs = {'Conventions': 'CF-1.6'}
dataset_allyears.to_netcdf(output_file + ".nc")
(folder_name, file_name) = os.path.split(output_file + ".nc")
print("> Le fichier \'{}\' a été créé dans le dossier \'{}\'.".format(
file_name, folder_name))
else:
#% Pour TIFF :
# - - - - - - -
dataset_allyears[_variable].rio.to_raster(output_file + '.tiff')
# =============================================================================
# # Méthode sans rioxarray :
# # (Problème : les valeurs des coordonnées semblent désordonnées)
# with rasterio.open(output_file + ".tiff", 'w',
# **profile_base) as out_f:
# for d, _date in enumerate(all_date_indexes):
# # out_f.write_band(d + 1, raster_data_allyears[d, :, :])
# out_f.write_band(d + 1,
# dataset_allyears[_variable].sel(time = _date))
# =============================================================================
(folder_name, file_name) = os.path.split(output_file + ".tiff")
print("> Le fichier \'{}\' a été créé dans le dossier \'{}\'.".format(
file_name, folder_name))
return dataset_allyears
#%% main
if __name__ == "__main__":
# Format the inputs (assumed to be strings) into floats
sys.argv[1] = float(sys.argv[1])
sys.argv[2] = float(sys.argv[2])