Source code for diamond.integration_tests.test_clogistic_simulated

import unittest
import numpy as np
import pandas as pd
from diamond.glms.cumulative_logistic import CumulativeLogisticRegression
import os
import logging

LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.INFO)


[docs]class TestCumulativeLogistic(unittest.TestCase):
[docs] def test_setUp(self, tol=0.02): # assumes working directory is diamond/ folder = "diamond/integration_tests/clogistic" simulated_data_loc = "%s/simulated_clogistic_df.csv" % folder estimated_covariance_loc = "%s/simulated_clogistic_covariance.csv" % folder resources_exist = os.path.exists(simulated_data_loc) and os.path.exists(estimated_covariance_loc) if not resources_exist: logging.info("Simulating data and estimating covariances in R") os.system("Rscript %s/clogistic_generate_and_fit.R" % folder) logging.info("Reading in training data and R::ordinal-estimated covariance matrix") df_train = pd.read_csv(simulated_data_loc) df_estimated_covariance = pd.read_csv(estimated_covariance_loc) self.formula = "y ~ x + (1 + x | level)" self.model = CumulativeLogisticRegression(train_df=df_train, priors_df=df_estimated_covariance, copy=True, test_df=None) logging.info("Fitting model in diamond") results = self.model.fit(self.formula, tol=1e-3, max_its=5, verbose=True) # the format of the coefficient vector is: # fixed effects, then [random intercept, random slope] for each level beta_hat = np.append(results["main"]["main_value"].values, pd.melt(results["level"], "level").sort_values(["level", "variable"]).value.values) # drop the 0 value at the head of beta_true # this is a placeholder, which reflects that there is no fixed intercept in this model beta_true = pd.read_csv("%s/simulated_clogistic_true_parameters.csv" % folder)["x"].values[1:] rel_error = np.mean((beta_hat - beta_true) ** 2) / np.mean(abs(beta_true)) if rel_error > tol: logging.warn("relative error of coefs = %f > tolerance = %f" % (rel_error, tol)) else: logging.info("relative error of coefs = %f < tolerance = %f" % (rel_error, tol)) # make sure the coefficients are very close self.assertTrue(rel_error < tol) # check intercepts, too alpha_true = pd.read_csv("%s/simulated_clogistic_true_intercepts.csv" % folder).ix[1:3, "x"].values alpha_hat = results["intercepts"] rel_error_alpha = np.mean((alpha_hat - alpha_true) ** 2) / np.mean(abs(alpha_true)) if rel_error_alpha > tol: logging.warn("relative error of intercepts = %f > tolerance = %f" % (rel_error_alpha, tol)) else: logging.info("relative error of intercepts = %f < tolerance = %f" % (rel_error_alpha, tol)) self.assertTrue(rel_error_alpha < tol)
if __name__ == '__main__': unittest.main()