"""
The following functions are used on Python 3.6 with packages
Numpy version: 1.14.2
Pandas version: 0.23.0
"""

import pandas as pd
import numpy as np

def basicStats(df, thres, quantiles = [90, 95, 99]):
    '''
    Use the function to calculate basic statisticsl parameters for a time series
    Input:
        df: data frame with single column
        thres: threshold for wet/rainy day
        quantiles: quantiles (in percentage)
    Returns:
        Mean
        Standard deviation
        fraction of wet days
        transition probabilities (p00, p01, p10, p11)
        quantileVal: value of corresponding quantiles
    Usage:
        basicStats(df.obs, 0.1)
        
    Note, this function ignores nan in computation of basic statistics.
    The nan values should be treated before using this function. 
    '''
    avg = np.nanmean(df)
    sd = np.nanstd(df)
    p_rain = sum(df>thres)/len(df)
    
    np_dat = df.values
    
    day0 = np_dat[:-1]
    day1 = np_dat[1:]
    n00 = np.sum((day0 <= thres) & (day1 <= thres))
    n01 = np.sum((day0 <= thres) & (day1 > thres))
    n10 = np.sum((day0 > thres) & (day1 <= thres))
    n11 = np.sum((day0 > thres) & (day1 > thres))
    
    p00 = n00/np.sum(n00+n01)
    p01 = n01/np.sum(n00+n01)
    p10 = n10/np.sum(n10+n11)
    p11 = n11/np.sum(n10+n11)
    
    quantileVal = np.nanpercentile(np_dat, quantiles) 
    
    return avg, sd, p_rain, [p00, p01, p10, p11], quantileVal
    


def modifiedKGE(file, index_true, index_pred):
    '''
    The function computes the modified Kling-Gupta efficiency and its components
    Input:
        file: a data frame with different data sources in each column. 
        index_true: the index of the dataframe corresponding to observed (or true) value
        index_pred: the index of the dataframe corresponding to model/predicted value
    Returns:
        Correlation 
        \beta : Bias ratio
        \gamma: Variability ratio
        KGE': modified Kling-Gupta Efficiency
    Usage:
        modifiedKGE(df, 'obs','sim')
    
    '''
    
    df = file.copy()
    df = df.dropna()

    y_true = df[index_true].values
    y_pred = df[index_pred].values
    
    r = np.corrcoef(y_true,y_pred)[0,1]
    
    beta = np.nanmean(y_pred)/np.nanmean(y_true)
    
    gamma = (np.nanmean(y_true)*np.nanstd(y_pred))/(np.nanmean(y_pred)*np.nanstd(y_true))
    #alpha = np.nanstd(y_pred)/np.nanstd(y_true) # alpha is the variability defined in Gupta et al 2009.
    
    mKGE = 1- np.sqrt((1-r)**2 + (1-beta)**2 + (1-gamma)**2)
    
    return(r, beta, gamma, mKGE)
	