Source code for o3api.tco3_zm

# -*- coding: utf-8 -*-
#
# Copyright (c) 2017 - 2020 Karlsruhe Institute of Technology - Steinbuch Centre for Computing
# This code is distributed under the MIT License
# Please, see the LICENSE file
#
# @author: vykozlov

import logging
import numpy as np
import o3api.config as cfg
#import o3api.debug as dbg
from o3api.prepare import PrepareData
import pandas as pd
from scipy import signal


logger = logging.getLogger('__name__')
logger.setLevel(cfg.log_level)

# configuration for netCDF
TIME = cfg.netCDF_conf['t_c']
LAT = cfg.netCDF_conf['lat_c']
TCO3 = cfg.netCDF_conf['tco3']
VMRO3 = cfg.netCDF_conf['vmro3']
TCO3Return = cfg.netCDF_conf['tco3_r']

# configuration for API
api_c = cfg.api_conf


[docs]class ProcessForTCO3Zm(PrepareData): """Subclass of :class:`PrepareData` to calculate tco3_zm """ def __init__(self, data, **kwargs): super().__init__(data, **kwargs) self.ref_meas = kwargs[api_c['ref_meas']] self.ref_year = kwargs[api_c['ref_year']] self.ref_fillna = kwargs[api_c['ref_fillna']] self.ref_value, self.ref_data = self.get_ref_value() def __smooth_boxcar(self, data, bwindow): """Function to apply boxcar, following https://scipy-cookbook.readthedocs.io/items/SignalSmooth.html N.B. 'valid' replaced with 'same' ! """ boxcar = np.ones(bwindow) logger.debug("signal(raw) (len={}): {}".format(len(data),data)) # 1. select data slice with valid data, i.e. avoid NaNs at start & end # 2. if there are NaN _within_ the data range, interpolate # 3. apply boxcar smoothing on this data slice # 4. update original data with smoothed values first_idx = data.first_valid_index() last_idx = data.last_valid_index() data_smooth = data.loc[first_idx:last_idx] data_smooth = data_smooth.interpolate() logger.debug("signal(for smoothing) (len={}): {}".format(len(data_smooth),data_smooth)) # mirror start and end of the original signal: sgnl = np.r_[data_smooth[bwindow-1:0:-1],data_smooth,data_smooth[-2:-bwindow-1:-1]] logger.debug("Signal (len={}): {}".format(len(sgnl),sgnl)) boxcar_values = signal.convolve(sgnl, boxcar, mode='same')/bwindow logger.debug("Signal+boxcar (len={}): {}".format(len(boxcar_values), boxcar_values)) data_out = data data_out.loc[first_idx:last_idx] = boxcar_values[bwindow-1:-(bwindow-1)] return data_out
[docs] def get_ensemble_yearly(self, models) -> pd.DataFrame: """Rebin tco3_zm data for yearly entries: (0). optionally interpolate missing values in the reference measurement 1. group by year 2. average by applying mean() :param models: Models to process for tco3 plots :return: yearly averaged data points :rtype: pd.DataFrame """ data = super().get_raw_ensemble_pd(models) # try to interpolate values in the ref_meas if self.ref_meas in models and self.ref_fillna: data[self.ref_meas] = data[self.ref_meas].interpolate(method='linear', limit_direction='forward', axis=0) data = data.groupby([data.index.year], dropna=True).mean() # select data according to the years requested #return data[(data.index>=self.begin) & (data.index<=self.end)] return data
[docs] def get_ref_value(self): """Get reference value for the reference year :return: reference value (tco3_zm at reference year) """ # get ref_meas averaged over a year ref_data = self.get_ensemble_yearly([self.ref_meas]) # select ref_year and find ref_value ref_value = ref_data[ref_data.index == self.ref_year].iloc[0][self.ref_meas] logger.debug("ref_value: {} => {}".format(ref_data, ref_value)) return ref_value, ref_data
[docs] def get_ensemble_smoothed(self, models, smooth_win) -> pd.DataFrame: """Smooth tco3_zm data using boxcar :param models: Models to process for tco3_return :return: smoothed data points :rtype: pd.DataFrame """ data = self.get_ensemble_yearly(models) # NB: __smooth_boxcar smooths data only with valid values, # i.e. we avoid NaNs at beginning and end of dataframe # see __smooth_boxcar() for details data = data.apply(self.__smooth_boxcar, args = [smooth_win], axis = 0, result_type = 'broadcast' ) return data
[docs] def get_ensemble_shifted(self, data) -> pd.DataFrame: """Shift tco3_zm data to reference year :param data: data to process as pd.DataFrame :return: shifted data points for plotting :rtype: pd.DataFrame """ data_ref_year = data[data.index == self.ref_year].mean() shift = self.ref_value - data_ref_year # in the case of no data at ref_year, "shift" contains NaNs # Option 1: # => replace NaNs with "0.", i.e. do NOT apply shift! #shift = shift.fillna(0.) # Option 2: -> taken # do nothing, then data_shifted is filled with NaNs as shift is NaN data_shifted = data + shift.values logger.debug("data_shifted: {} ".format(data_shifted)) return data_shifted
[docs] def get_ensemble_stats(self, data) -> pd.DataFrame: """Calculate Mean, Std, Median for tco3_zm data :param data: data to process as pd.DataFrame :return: updated pd.DateFrame with stats columns :rtype: pd.DataFrame """ # make data copy, as adding a new "Mean" column # affects "Median" calculation, issue#52 data_in = data.copy(deep=True) # exclude also ref_measurement from the calculation of statistics models = data_in.columns.to_list() if self.ref_meas in models: data_in.drop(self.ref_meas, axis=1, inplace=True) # calculate stats data['MMMean'] = data_in.mean(axis=1, skipna=True) data_std = data_in.std(axis=1, skipna=True) data['MMMean-Std'] = data['MMMean'] - data_std data['MMMean+Std'] = data['MMMean'] + data_std data['MMMedian'] = data_in.median(axis=1, skipna=True) return data
#@dbg._profile
[docs] def get_ensemble_for_plot(self, models) -> pd.DataFrame: """Build the ensemble of tco3_zm models for plotting, include reference :param models: Models to process for tco3_zm :return: ensemble of models, including the reference, as pd.DataFrame :rtype: pd.DataFrame """ boxcar_win = cfg.O3AS_TCO3Return_BOXCAR_WINDOW data = self.get_ensemble_smoothed(models, boxcar_win) data_shift = self.get_ensemble_shifted(data) data_plot = self.get_ensemble_stats(data_shift) if self.ref_meas in models: data_plot[self.ref_meas] = self.ref_data # inject 'reference_value' data_plot['reference_value'] = self.ref_value return data_plot
[docs]class ProcessForTCO3ZmReturn(ProcessForTCO3Zm): """Subclass of :class:`ProcessForTCO3Zm` to calculate tco3_return """ def __init__(self, data, **kwargs): # to properly retrieve skimmed data, set to TCO3 data kwargs[api_c['plot_t']] = TCO3 super().__init__(data, **kwargs) self.region = kwargs['region']
[docs] def get_return_years(self, data): """Calculate return year for every model :param data: data to process :return: return years for models :rtype: pd.DataFrame """ refMargin = cfg.O3AS_TCO3Return_REF_YEAR_MARGIN logger.debug(F"{self.ref_year}: {self.ref_value} (ref_value)") def __get_return_year(data): # 1. search for years with tco3_zm > ref_value # 2. remove duplicates # 3. only two rows will be left (False, True) data_return = (data[data.index>(self.ref_year+refMargin)]>self.ref_value).drop_duplicates() try: idx = data_return.values.tolist().index(True) return_year = data_return.index[idx] except: return_year = np.nan return return_year return_years = {} # model: year models = data.columns [ return_years.update({m: __get_return_year(data[m])}) for m in models ] logger.debug(F"return_years: {return_years}") data_return_years = pd.DataFrame(return_years, index=[self.region]) #data_return_years['mean_year'] = data_return_years.mean(axis=1, skipna=True) return data_return_years
[docs] def get_ensemble_for_plot(self, models): """Build the ensemble of tco3_return points for plotting :param models: Models to process for tco3_zm :return: ensemble of models, including the mean, as pd.DataFrame :rtype: pd.DataFrame """ boxcar_win = cfg.O3AS_TCO3Return_BOXCAR_WINDOW data = super().get_ensemble_smoothed(models, boxcar_win) data_shift = super().get_ensemble_shifted(data) data_tco3 = super().get_ensemble_stats(data_shift) data_return_years = self.get_return_years(data_tco3) return data_return_years