Source code for o3skim.loads

"""Module in charge of model data loading."""
import pandas as pd
import numpy as np
import logging
import xarray as xr
import o3skim.utils as utils


logger = logging.getLogger('o3skim.loads')


[docs]def ccmi(variable, paths):
    """Loads and returns a CCMI-1 DataArray model and the dataset
    attributes.

    :param variable: Variable to load from the dataset.
    :type variable: str

    :param paths: Paths expression to the dataset netCDF files.
    :type paths: str or [str]

    :return: Standardized DataArray.
    :rtype: (:class:`xarray.DataArray`, dict)
    """
    logger.debug("Loading CCMI-1 data from: %s", paths)
    if len(paths) == 1:
        paths = paths[0]
    with xr.open_mfdataset(paths) as dataset:
        datarray = dataset[variable]
        ds_attrs = dataset.attrs
    return datarray, ds_attrs


[docs]def ecmwf(variable, paths):
    """Loads and returns a ECMWF DataArray model and the dataset
    attributes.

    :param variable: Variable to load from the dataset.
    :type variable: str

    :param paths: Paths expression to the dataset netCDF files.
    :type paths: str or [str]

    :return: Standardized DataArray.
    :rtype: (:class:`xarray.DataArray`, dict)
    """
    logger.debug("Loading ECMWF data from: %s", paths)
    if len(paths) == 1:
        paths = paths[0]
    with xr.open_mfdataset(paths) as dataset:
        datarray = dataset[variable]
        ds_attrs = dataset.attrs
    return datarray, ds_attrs


[docs]def esacci(variable, time_position, paths):
    """Loads and returns a ESACCI DataArray model and the dataset
    attributes. Note the name structure is composed by sections:
    For example: ESACCI-OZONE-L3S-TC-MERGED-DLR_1M-20010302-fv0100.
    Therefore is needed to indicate the position in the string
    for the dataset time (7 or -2 for the case above).

    :param variable: Variable to load from the dataset.
    :type variable: str

    :param time_position: Name position for the dataset time.
    :type time_position: int

    :param paths: Paths expression to the dataset netCDF files.
    :type paths: str or [str]

    :return: Standardized DataArray.
    :rtype: (:class:`xarray.DataArray`, dict)
    """
    if len(paths) == 1:
        paths = paths[0]

    def pf(ds):
        fpath = ds.encoding["source"]
        fname = fpath.split('/')[-1]
        fdate = fname.split('-')[time_position]
        time = pd.to_datetime(fdate)
        return ds.expand_dims(time=[time])

    with xr.open_mfdataset(paths, preprocess=pf) as dataset:
        datarray = dataset[variable]
        ds_attrs = dataset.attrs
    return datarray, ds_attrs


[docs]def sbuv(textfile, delimiter):
    """Loads and returns a SBUV DataArray model and the dataset
    attributes. Note SBUV models do not have longitude coordinate.

    :param textfile: Location to the textfile with model information.
    :type textfile: str

    :param delimiter: Delimiter character for row values on the table.
    :type delimiter: str or [str]

    :return: Standardized DataArray.
    :rtype: (:class:`xarray.DataArray`, {})
    """
    with open(textfile, 'r') as strio:
        tables = utils.chunkio('SBUV', strio)
    arrays = []
    for head, chunk in tables:
        header = head[0:-2].split(' ')
        year = int(header[0])
        table = pd.read_table(chunk, sep=delimiter, index_col=[0, 1])
        table.index = [(int(l1)+int(l2))/2 for l1, l2 in table.index]
        array = xr.DataArray(
            data=table,
            attrs=dict(
                version=header[3],
                name=header[1],
                description=' '.join(header[-3:])),
            dims=['lat', 'time'],
            coords=dict(
                lat=table.index,
                time=[pd.datetime(year, m, 1) for m in range(1, 13)]))
        array.values.flat[array.values.flat == 0.0] = np.nan
        arrays.append(array)
    return xr.concat(arrays, 'time'), {}