Source code for Foresight.eval_inspect

"""
This module include a set of functions that are used to evaluate and
inspect the time series in the dataset.

Author: Oliver Boom
Github Alias: OliverJBoom
"""

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error


[docs]def check_length(universe_dict):
    """Checks the name of all the DataFrames in the dictionary of time series.

    :param universe_dict:       The dictionary of time series
    :type  universe_dict:       dict
    """
    for df_name in universe_dict:
        print(len(universe_dict[df_name]))


[docs]def visualise_df(df):
    """Visualises each time series in a DataFrame.

    :param df:                  The DataFrame of time series to visualise
    :type  df:                  pd.DataFrame
    """
    _, ax_arr = plt.subplots(int(len(df.columns) / 2), 2,
                             figsize=(4 * 10, 4 * len(df.columns)))

    for axes, df_name in zip(ax_arr.flatten(), df.columns):
        axes.set_title(df_name)
        axes.plot(df.index, df[df_name])
        axes.grid()
        axes.legend()

    plt.show()


[docs]def check_day_frequency(df, col_name='ds'):
    """Creates a bar chart showing the frequency of the days of the week.

    Used to check that only business days are included in the dataset, and
    that there is a roughly equal distribution of entries across the week.

    :param df:               A DataFrame containing the time series to check
    :type  df:               pd.DataFrame

    :param col_name:     The name of the column of interest
    :type  col_name:     string
    """
    df["day"] = df[col_name].apply(lambda x: x.weekday_name)
    print(df['day'].value_counts())
    df['day'].value_counts().plot(kind='bar')


[docs]def df_std(df, col_name):
    """Calculates standard deviation of a DataFrames column.

    :param df:                    A DataFrame of time series
    :type  df:                    pd.DataFrame

    :param col_name:              The column of interest
    :type  col_name:              string

    :return:                      The standard deviation of the series
    :rtype:                       float
    """
    return df[[col_name]].stack().std()


[docs]def inverse_log_returns(original_prices, log_returns, lag=5, offset=0):
    """Takes a DataFrame of predicted log returns and original
    prices and returns an array of predicted absolute prices

    The offset parameter moves the series forwards or backwards to
    align the series with the DataFrame it might be appended to.

    :param original_prices:  A DataFrame of absolute prices
    :type  original_prices:  pd.DataFrame

    :param log_returns:      A DataFrame of log returns
    :type  log_returns:      pd.DataFrame

    :param lag:              The lag in days between series
    :type  lag:              int

    :param offset:          Amount to offset the series forwards of backwards
    :type  offset:          int

    :return:                The raw prices given by the log returns
    :rtype:                 pd.Series
    """
    assert isinstance(log_returns, pd.DataFrame)
    assert isinstance(original_prices, pd.DataFrame)
    if offset == 0:
        return (original_prices.shift(offset).values[:-lag] *
                np.exp(log_returns[:-lag])).values.ravel()

    return (original_prices.shift(offset).values *
            np.exp(log_returns)).values.ravel()


[docs]def mean_absolute_percentage_error(y_true, y_pred):
    """Calculates the mean absolute percentage error between two arrays.

    :param y_true:            The observed values
    :type  y_true:            np.array

    :param y_pred:            The predicted values
    :type  y_pred:            np.array

    :return:                  The mean absolute percentage error of the series
    :rtype:                   float
    """
    return 100 * np.mean(np.abs(y_true - y_pred) / y_true)


[docs]def evaluate(y_true, y_pred, log_ret=False):
    """Calculates the error metrics for between two arrays.

    The error metrics calculated are:
        Means Squared Error
        Mean Absolute Error
        Mean Directional Accuracy

    For a log returns series the definition of mean directional accuracy
    changes. This is as for a log return series it is the signum values of the
    series that details which direction the series has moved. This is as a log
    return series is the first difference of the original series. For raw
    price. The signal needs to be differenced before the signum function
    is applied.


    :param y_true:            The observed values
    :type  y_true:            np.array

    :param y_pred:            The predicted values
    :type  y_pred:            np.array

    :param log_ret:           Whether the series compared are log returns
    :type  log_ret:           bool

    :return error_metrics:    The error metrics of the series
    :rtype:                   List
    """
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)

    if log_ret:
        mda = mean_directional_accuracy_log_ret(y_true, y_pred)
    else:
        mda = mean_directional_accuracy(y_true, y_pred)

    error_metrics = [mse, mae, mda]
    return error_metrics


[docs]def mean_directional_accuracy_log_ret(y_true, y_pred):
    """Calculates the mean directional accuracy error metric between
    two series of log returns.

    :param y_true:           The observed values
    :type  y_true:           np.array

    :param y_pred:           The predicted values
    :type  y_pred:           np.array

    :return:                 The mean directional accuracy of the series
    :rtype:                  float
    """
    return np.mean(np.sign(y_true) == np.sign(y_pred))


[docs]def mean_directional_accuracy(y_true, y_pred):
    """Calculated the mean directional accuracy error metric
    between two series.

    :param y_true:           The observed values
    :type  y_true:           np.array

    :param y_pred:           The predicted values
    :type  y_pred:           np.array

    :return:                 The mean directional accuracy of the series
    :rtype:                  float
    """
    return np.mean(np.sign(y_pred[1:, :] - y_pred[:-1, :])
                   == np.sign(y_true[1:, :] - y_true[:-1, :]))