Source code for o3api.load

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 13 16:29:07 2022

@author: valentin
"""

import glob
import o3api.config as cfg
import os
import logging
import xarray as xr

import cProfile
import io
import pstats
from functools import wraps

# to check size of data in the memory
# https://github.com/pympler/pympler
#from pympler import asizeof

logger = logging.getLogger('__name__') #o3api
logger.setLevel(cfg.log_level)

# configuration for netCDF
TIME = cfg.netCDF_conf['t_c']
LAT = cfg.netCDF_conf['lat_c']
TCO3 = cfg.netCDF_conf['tco3']
VMRO3 = cfg.netCDF_conf['vmro3']
TCO3Return = cfg.netCDF_conf['tco3_r']

# configuration for API
api_c = cfg.api_conf

logger = logging.getLogger('__name__') #__name__ #o3api
logging.basicConfig(format='%(asctime)s [%(levelname)s]: %(message)s')
logger.setLevel(logging.INFO)


def _profile(func):
    """Decorate function for profiling
    """
    @wraps(func)
    def wrapper(*args, **kwargs):
        pr = cProfile.Profile()
        pr.enable()
        retval = func(*args, **kwargs)
        pr.disable()
        s = io.StringIO()
        sortby = 'cumulative' #SortKey.CUMULATIVE  # 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())
        return retval

    return wrapper

[docs]class LoadData: """Base Class to initialize the dataset :param plot_type: The plot type (e.g. tco3_zm, tco3_return, vmro3_zm, ...) """ def __init__ (self, data_basepath, plot_type): """Constructor method """ self.data_basepath = data_basepath self.plot_type = plot_type self._data_pattern = self.plot_type + "*.nc" # tco3_return uses the same data as tco3_zm : if plot_type == "tco3_return": self._data_pattern = "tco3*.nc" self._datafile_paths = [] #None def __set_datafile_paths(self): """Set the list of datafile paths corresponding to the O3 plot type (self._datafile_paths). Scans all directories in the O3AS_DATA_BASEPATH. """ self._datafile_paths = glob.glob(os.path.join(self.data_basepath, '**', self._data_pattern)) self._datafile_paths.sort()
[docs] def load_dataset(self, model_path): """Load dataset from the datafile path (one model) :param model_path: Full path to the model data :return: xarray Dataset with the model data :rtype: xarray.Dataset """ ds = xr.open_dataset(model_path, cache=True, # True decode_cf=False) # decode_cf=False #faster? ds = xr.decode_cf(ds) # we are using monthly data, with the 'middle' date, # in this case converting with align_on='date' will not miss dates # see https://xarray.pydata.org/en/stable/generated/xarray.Dataset.convert_calendar.html ds = ds.convert_calendar('standard', TIME, align_on='date', use_cftime=False) return ds
[docs] def load_dataset_ensemble(self): """Load data from the list of datafiles (self._datafile_paths) in memory. :return: dictionary of datasets as {'model': xarray dataset } """ self.__set_datafile_paths() logger.debug(F"get_dataset_ensemble (total: {len(self._datafile_paths)}): \ {self._datafile_paths}") # dictionary to hold all data as {'model': dataset} ds_ensemble = {} for mp in self._datafile_paths: # find the name of dataset (directory name) model = os.path.dirname(mp).split("/")[-1] # build up the dictionary of corresponding datasets ds_ensemble[model] = self.load_dataset(mp) # immediately load it in memory ds_ensemble[model].load() print(F"Loaded {len(ds_ensemble)} {self.plot_type} (zonal mean) models") return ds_ensemble
# initialize an empty dictionary #data = {} #tco3_zm = LoadDataset("tco3_zm") #data["tco3_zm"] = tco3_zm.load_dataset_ensemble() #vmro3_zm = LoadDataset("vmro3_zm") #data["vmro3_zm"] = vmro3_zm.load_dataset_ensemble() #print("Memory, tco3_zm:", asizeof.asizeof(data["tco3_zm"])) #print(F"Loaded {len(data['tco3_zm'])} TCO3 (zonal mean) models") #print("Memory, vmro3_zm:", asizeof.asizeof(data["vmro3_zm"])) #print(F"Loaded {len(data['vmro3_zm'])} VMRO3 (zonal mean) models") #print(data["tco3_zm"]["CCMI-1_ACCESS_ACCESS-CCM-refC2"]) #print(data["tco3_zm"]["SBUV_GSFC_merged-SAT-ozone"]) #print(data["vmro3_zm"])