Source code for time_funcs

#!/usr/bin/env python
# coding: utf-8

import pandas as pd
import click

[docs]def resample_to_month(df: pd.DataFrame, stat_func='mean', suffix=None) -> pd.DataFrame: """Resamples a timeseries at sub-monthly time step to monthly values. A range of monthly statistics can be chosen. If desired, the column name of the returned dataframe can contain a suffix for better distinguishment. By default, column names are unaltered. Args: df (pd.DataFrame): dataframe containing timeseries. Note, only tested with dataframes containing one column. stat_func (str, optional): Statistical method to be used. Either 'mean', 'max', 'min' or 'sum'. Defaults to 'mean'. suffix (str, optional): Suffix to be added to column of returned dataframe. Defaults to None. Returns: pd.DataFrame: dataframe containing resampled timeseries. """ df = resample_time(df, 'M') click.echo('INFO -- resampling data to monthly time scale.') if stat_func == 'mean': df = df.mean() elif stat_func == 'max': df = df.max() elif stat_func == 'min': df = df.min() elif stat_func == 'sum': df = df.sum() else: raise ValueError('no supported statistical function provided - choose between mean, max, min or sum') if suffix != None: df = df.add_suffix(suffix) return df
[docs]def resample_to_annual(df: pd.DataFrame, stat_func='mean', suffix=None) -> pd.DataFrame: """Resamples a timeseries at sub-annual time step to annual values. A range of annual statistics can be chosen. If desired, the column name of the returned dataframe can contain a suffix for better distinguishment. By default, column names are unaltered. Args: df (pd.DataFrame): dataframe containing timeseries. Note, only tested with dataframes containing one column. stat_func (str, optional): Statistical method to be used. Either 'mean', 'max', 'min' or 'sum'. Defaults to 'mean'. suffix (str, optional): Suffix to be added to column of returned dataframe. Defaults to None. Returns: pd.DataFrame: dataframe containing resampled timeseries. """ df = resample_time(df, 'Y') click.echo('INFO -- resampling data to yearly time scale.') if stat_func == 'mean': df = df.mean() elif stat_func == 'max': df = df.max() elif stat_func == 'min': df = df.min() elif stat_func == 'sum': df = df.sum() else: raise ValueError('no supported statistical function provided - choose between mean, max, min or sum') if suffix != None: df = df.add_suffix(suffix) return df
[docs]def resample_time(df: pd.DataFrame, resampling_period: str) -> pd.DataFrame: """Resamples a dataframe in time. The resampling duration is set with 'time' and needs to follow pandas conventions. Output needs to be combined with a statistic, such as ".mean()". Args: df (pd.DataFrame): dataframe to be resampled. resampling_period (str): resampling duration. Returns: pd.DataFrame: actually returns a pd.core.resample.DatetimeIndexResampler """ df = df.resample(resampling_period, convention='start') return df
[docs]def calc_monthly_climatology(df_in: pd.DataFrame, col_name=None) -> pd.DataFrame: """Calculates the climatological mean of each month across a timeseries at sub-monthly timestep. Args: df_in (pd.DataFrame): dataframe containing timeseries at sub-monthly timestep. col_name (str, optional): name of column to be considered only. Defaults to None. Returns: pd.DataFrame: dataframe containing mean of each month. """ # if variable name is not None, then pick values from specified column if col_name != None: df = df_in[col_name] # else, just use the dataframe as is else: df = df_in # group values by month and then calculate mean df_out = df.groupby(df.index.month).mean() return df_out