Source code for pharmpy.tools.bootstrap.results

import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import scipy.stats

import pharmpy.visualization
from pharmpy.model_factory import Model
from pharmpy.results import ModelfitResults, Results
from pharmpy.tools.psn_helpers import cmd_line_model_path, model_paths


[docs]class BootstrapResults(Results): rst_path = Path(__file__).parent / 'report.rst' # FIXME: Should inherit from results that take multiple runs like bootstrap, cdd etc. def __init__( self, parameter_statistics=None, parameter_distribution=None, covariance_matrix=None, ofv_distribution=None, ofv_statistics=None, included_individuals=None, ofvs=None, parameter_estimates=None, ofv_plot=None, parameter_estimates_correlation_plot=None, dofv_quantiles_plot=None, parameter_estimates_histogram=None, ): self.parameter_statistics = parameter_statistics self.parameter_distribution = parameter_distribution self.covariance_matrix = covariance_matrix self.ofv_distribution = ofv_distribution self.ofv_statistics = ofv_statistics self.included_individuals = included_individuals self.ofvs = ofvs self.parameter_estimates = parameter_estimates self.ofv_plot = ofv_plot self.parameter_estimates_correlation_plot = parameter_estimates_correlation_plot self.dofv_quantiles_plot = dofv_quantiles_plot self.parameter_estimates_histogram = parameter_estimates_histogram
[docs] def add_plots(self): self.ofv_plot = self.plot_ofv() self.parameter_estimates_correlation_plot = self.plot_parameter_estimates_correlation() self.dofv_quantiles_plot = self.plot_dofv_quantiles() self.parameter_estimates_histogram = self.plot_parameter_estimates_histogram()
[docs] def plot_ofv(self): plot = pharmpy.visualization.histogram( self.ofvs['bootstrap_bootdata_ofv'], title='Bootstrap OFV' ) return plot
[docs] def plot_dofv_quantiles(self): ofvs = self.ofvs dofvs = ofvs['delta_bootdata'].sort_values().reset_index(drop=True) dofvs_boot_base = ofvs['delta_origdata'].sort_values().reset_index(drop=True) quantiles = np.linspace(0.0, 1.0, num=len(dofvs)) degrees = len(self.parameter_distribution) chi2_dist = scipy.stats.chi2(df=degrees) chi2 = chi2_dist.ppf(quantiles) degrees_dofvs = self.ofv_statistics['mean']['delta_bootdata'] degrees_boot_base = self.ofv_statistics['mean']['delta_origdata'] df_dict = {'quantiles': quantiles, f'Reference χ²({degrees})': chi2} if not np.isnan(degrees_dofvs): df_dict[ ( 'Original model OFV - Bootstrap model OFV (both using bootstrap datasets)', f'Estimated df = {degrees_dofvs:.2f}', ) ] = dofvs if not np.isnan(degrees_boot_base): df_dict[ ( 'Bootstrap model OFV - Original model OFV (both using original dataset)', f'Estimated df = {degrees_boot_base:.2f}', ) ] = dofvs_boot_base df = pd.DataFrame(df_dict) plot = pharmpy.visualization.line_plot( df, 'quantiles', xlabel='Distribution quantiles', ylabel='dOFV', legend_title='Distribution', title='dOFV quantiles', ) return plot
[docs] def plot_parameter_estimates_correlation(self): pe = self.parameter_estimates plot = pharmpy.visualization.scatter_matrix(pe) return plot
[docs] def plot_parameter_estimates_histogram(self): pe = self.parameter_estimates plot = pharmpy.visualization.facetted_histogram(pe) return plot
[docs]def calculate_results( bootstrap_models, original_model=None, included_individuals=None, dofv_results=None ): results = [m.modelfit_results for m in bootstrap_models if m.modelfit_results is not None] if original_model: original_results = original_model.modelfit_results else: original_results = None if original_results is None: warnings.warn( 'No results for the base model could be read. Cannot calculate bias and ' 'original_bootdata_ofv' ) df = pd.DataFrame() for res in results: df = df.append(res.parameter_estimates, ignore_index=True, sort=False) df = df.reindex(results[0].parameter_estimates.index, axis=1) parameter_estimates = df.reset_index(drop=True) covariance_matrix = df.cov() df = parameter_estimates.copy() mean = df.mean() if original_results is not None: orig = original_results.parameter_estimates bias = mean - orig else: bias = np.nan statistics = pd.DataFrame( {'mean': mean, 'median': df.median(), 'bias': bias, 'stderr': df.std()} ) statistics['RSE'] = statistics['stderr'] / statistics['mean'] df = parameter_estimates distribution = create_distribution(df) boot_ofvs = [x.ofv for x in results] ofvs = pd.Series(boot_ofvs, name='bootstrap_bootdata_ofv') ofvs = pd.DataFrame(ofvs) ofvs['original_bootdata_ofv'] = np.nan ofvs['bootstrap_origdata_ofv'] = np.nan if original_results is not None: base_iofv = original_results.individual_ofv if included_individuals and base_iofv is not None: for i, included in enumerate(included_individuals): base_ofv = base_iofv[included].sum() ofvs.at[i, 'original_bootdata_ofv'] = base_ofv if dofv_results is not None: for i, res in enumerate(dofv_results): if res is not None: ofvs.at[i, 'bootstrap_origdata_ofv'] = res.ofv if original_results is not None: base_ofv = original_results.ofv ofvs['original_origdata_ofv'] = base_ofv ofvs['delta_bootdata'] = ofvs['original_bootdata_ofv'] - ofvs['bootstrap_bootdata_ofv'] if original_results is not None: ofvs['delta_origdata'] = ofvs['bootstrap_origdata_ofv'] - base_ofv else: ofvs['delta_origdata'] = np.nan with warnings.catch_warnings(): # Catch numpy warnings beause of NaN in ofvs warnings.filterwarnings('ignore', r'All-NaN slice encountered') warnings.filterwarnings('ignore', 'Mean of empty slice') ofv_dist = create_distribution(ofvs) ofv_stats = pd.DataFrame( {'mean': ofvs.mean(), 'median': ofvs.median(), 'stderr': ofvs.std()} ) res = BootstrapResults( covariance_matrix=covariance_matrix, parameter_statistics=statistics, parameter_distribution=distribution, ofv_distribution=ofv_dist, ofv_statistics=ofv_stats, included_individuals=included_individuals, ofvs=ofvs, parameter_estimates=parameter_estimates, ) return res
[docs]def create_distribution(df): dist = pd.DataFrame( { 'min': df.min(), '0.05%': df.quantile(0.0005), '0.5%': df.quantile(0.005), '2.5%': df.quantile(0.025), '5%': df.quantile(0.05), 'median': df.median(), '95%': df.quantile(0.95), '97.5%': df.quantile(0.975), '99.5%': df.quantile(0.995), '99.95%': df.quantile(0.9995), 'max': df.max(), } ) return dist
[docs]def psn_bootstrap_results(path): """Create bootstrapresults from a PsN bootstrap run :param path: Path to PsN boostrap run directory :return: A :class:`BootstrapResults` object """ path = Path(path) models = [Model(p) for p in model_paths(path, 'bs_pr1_*.mod')] # Read the results already now to give an appropriate error if no results exists results = [m.modelfit_results for m in models if m.modelfit_results is not None] if not results: raise FileNotFoundError("No model results available in m1") try: base_model = Model(cmd_line_model_path(path)) except FileNotFoundError: base_model = None # Read dOFV results in NONMEM specific way. Models have multiple $PROBLEM # Create proper result objects to pass to calculate_results dofv_results = None if (path / 'm1' / 'dofv_1.mod').is_file(): from pharmpy.plugins.nonmem.table import NONMEMTableFile dofv_results = [] for table_path in (path / 'm1').glob('dofv_*.ext'): table_file = NONMEMTableFile(table_path) next_table = 1 for table in table_file: while next_table != table.number: dofv_results.append(None) next_table += 1 res = ModelfitResults(ofv=table.final_ofv) dofv_results.append(res) next_table += 1 incinds = pd.read_csv(path / 'included_individuals1.csv', header=None).values.tolist() res = calculate_results( models, original_model=base_model, included_individuals=incinds, dofv_results=dofv_results ) return res