Source code for pharmpy.tools.bootstrap.results

import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import scipy.stats

import pharmpy.visualization
from pharmpy.model_factory import Model
from pharmpy.results import ModelfitResults, Results
from pharmpy.tools.psn_helpers import cmd_line_model_path, model_paths


[docs]class BootstrapResults(Results):
    rst_path = Path(__file__).parent / 'report.rst'

    # FIXME: Should inherit from results that take multiple runs like bootstrap, cdd etc.
    def __init__(
        self,
        parameter_statistics=None,
        parameter_distribution=None,
        covariance_matrix=None,
        ofv_distribution=None,
        ofv_statistics=None,
        included_individuals=None,
        ofvs=None,
        parameter_estimates=None,
        ofv_plot=None,
        parameter_estimates_correlation_plot=None,
        dofv_quantiles_plot=None,
        parameter_estimates_histogram=None,
    ):
        self.parameter_statistics = parameter_statistics
        self.parameter_distribution = parameter_distribution
        self.covariance_matrix = covariance_matrix
        self.ofv_distribution = ofv_distribution
        self.ofv_statistics = ofv_statistics
        self.included_individuals = included_individuals
        self.ofvs = ofvs
        self.parameter_estimates = parameter_estimates
        self.ofv_plot = ofv_plot
        self.parameter_estimates_correlation_plot = parameter_estimates_correlation_plot
        self.dofv_quantiles_plot = dofv_quantiles_plot
        self.parameter_estimates_histogram = parameter_estimates_histogram

[docs]    def add_plots(self):
        self.ofv_plot = self.plot_ofv()
        self.parameter_estimates_correlation_plot = self.plot_parameter_estimates_correlation()
        self.dofv_quantiles_plot = self.plot_dofv_quantiles()
        self.parameter_estimates_histogram = self.plot_parameter_estimates_histogram()

[docs]    def plot_ofv(self):
        plot = pharmpy.visualization.histogram(
            self.ofvs['bootstrap_bootdata_ofv'], title='Bootstrap OFV'
        )
        return plot

[docs]    def plot_dofv_quantiles(self):
        ofvs = self.ofvs
        dofvs = ofvs['delta_bootdata'].sort_values().reset_index(drop=True)
        dofvs_boot_base = ofvs['delta_origdata'].sort_values().reset_index(drop=True)
        quantiles = np.linspace(0.0, 1.0, num=len(dofvs))
        degrees = len(self.parameter_distribution)
        chi2_dist = scipy.stats.chi2(df=degrees)
        chi2 = chi2_dist.ppf(quantiles)
        degrees_dofvs = self.ofv_statistics['mean']['delta_bootdata']
        degrees_boot_base = self.ofv_statistics['mean']['delta_origdata']
        df_dict = {'quantiles': quantiles, f'Reference χ²({degrees})': chi2}
        if not np.isnan(degrees_dofvs):
            df_dict[
                (
                    'Original model OFV - Bootstrap model OFV (both using bootstrap datasets)',
                    f'Estimated df = {degrees_dofvs:.2f}',
                )
            ] = dofvs
        if not np.isnan(degrees_boot_base):
            df_dict[
                (
                    'Bootstrap model OFV - Original model OFV (both using original dataset)',
                    f'Estimated df = {degrees_boot_base:.2f}',
                )
            ] = dofvs_boot_base
        df = pd.DataFrame(df_dict)
        plot = pharmpy.visualization.line_plot(
            df,
            'quantiles',
            xlabel='Distribution quantiles',
            ylabel='dOFV',
            legend_title='Distribution',
            title='dOFV quantiles',
        )
        return plot

[docs]    def plot_parameter_estimates_correlation(self):
        pe = self.parameter_estimates
        plot = pharmpy.visualization.scatter_matrix(pe)
        return plot

[docs]    def plot_parameter_estimates_histogram(self):
        pe = self.parameter_estimates
        plot = pharmpy.visualization.facetted_histogram(pe)
        return plot


[docs]def calculate_results(
    bootstrap_models, original_model=None, included_individuals=None, dofv_results=None
):
    results = [m.modelfit_results for m in bootstrap_models if m.modelfit_results is not None]
    if original_model:
        original_results = original_model.modelfit_results
    else:
        original_results = None

    if original_results is None:
        warnings.warn(
            'No results for the base model could be read. Cannot calculate bias and '
            'original_bootdata_ofv'
        )

    df = pd.DataFrame()
    for res in results:
        df = df.append(res.parameter_estimates, ignore_index=True, sort=False)
    df = df.reindex(results[0].parameter_estimates.index, axis=1)
    parameter_estimates = df.reset_index(drop=True)

    covariance_matrix = df.cov()

    df = parameter_estimates.copy()
    mean = df.mean()
    if original_results is not None:
        orig = original_results.parameter_estimates
        bias = mean - orig
    else:
        bias = np.nan
    statistics = pd.DataFrame(
        {'mean': mean, 'median': df.median(), 'bias': bias, 'stderr': df.std()}
    )
    statistics['RSE'] = statistics['stderr'] / statistics['mean']

    df = parameter_estimates
    distribution = create_distribution(df)

    boot_ofvs = [x.ofv for x in results]
    ofvs = pd.Series(boot_ofvs, name='bootstrap_bootdata_ofv')
    ofvs = pd.DataFrame(ofvs)
    ofvs['original_bootdata_ofv'] = np.nan
    ofvs['bootstrap_origdata_ofv'] = np.nan
    if original_results is not None:
        base_iofv = original_results.individual_ofv
        if included_individuals and base_iofv is not None:
            for i, included in enumerate(included_individuals):
                base_ofv = base_iofv[included].sum()
                ofvs.at[i, 'original_bootdata_ofv'] = base_ofv

    if dofv_results is not None:
        for i, res in enumerate(dofv_results):
            if res is not None:
                ofvs.at[i, 'bootstrap_origdata_ofv'] = res.ofv

    if original_results is not None:
        base_ofv = original_results.ofv
        ofvs['original_origdata_ofv'] = base_ofv

    ofvs['delta_bootdata'] = ofvs['original_bootdata_ofv'] - ofvs['bootstrap_bootdata_ofv']

    if original_results is not None:
        ofvs['delta_origdata'] = ofvs['bootstrap_origdata_ofv'] - base_ofv
    else:
        ofvs['delta_origdata'] = np.nan

    with warnings.catch_warnings():
        # Catch numpy warnings beause of NaN in ofvs
        warnings.filterwarnings('ignore', r'All-NaN slice encountered')
        warnings.filterwarnings('ignore', 'Mean of empty slice')
        ofv_dist = create_distribution(ofvs)
        ofv_stats = pd.DataFrame(
            {'mean': ofvs.mean(), 'median': ofvs.median(), 'stderr': ofvs.std()}
        )

    res = BootstrapResults(
        covariance_matrix=covariance_matrix,
        parameter_statistics=statistics,
        parameter_distribution=distribution,
        ofv_distribution=ofv_dist,
        ofv_statistics=ofv_stats,
        included_individuals=included_individuals,
        ofvs=ofvs,
        parameter_estimates=parameter_estimates,
    )

    return res


[docs]def create_distribution(df):
    dist = pd.DataFrame(
        {
            'min': df.min(),
            '0.05%': df.quantile(0.0005),
            '0.5%': df.quantile(0.005),
            '2.5%': df.quantile(0.025),
            '5%': df.quantile(0.05),
            'median': df.median(),
            '95%': df.quantile(0.95),
            '97.5%': df.quantile(0.975),
            '99.5%': df.quantile(0.995),
            '99.95%': df.quantile(0.9995),
            'max': df.max(),
        }
    )
    return dist


[docs]def psn_bootstrap_results(path):
    """Create bootstrapresults from a PsN bootstrap run

    :param path: Path to PsN boostrap run directory
    :return: A :class:`BootstrapResults` object
    """
    path = Path(path)

    models = [Model(p) for p in model_paths(path, 'bs_pr1_*.mod')]
    # Read the results already now to give an appropriate error if no results exists
    results = [m.modelfit_results for m in models if m.modelfit_results is not None]
    if not results:
        raise FileNotFoundError("No model results available in m1")
    try:
        base_model = Model(cmd_line_model_path(path))
    except FileNotFoundError:
        base_model = None

    # Read dOFV results in NONMEM specific way. Models have multiple $PROBLEM
    # Create proper result objects to pass to calculate_results
    dofv_results = None
    if (path / 'm1' / 'dofv_1.mod').is_file():
        from pharmpy.plugins.nonmem.table import NONMEMTableFile

        dofv_results = []
        for table_path in (path / 'm1').glob('dofv_*.ext'):
            table_file = NONMEMTableFile(table_path)
            next_table = 1
            for table in table_file:
                while next_table != table.number:
                    dofv_results.append(None)
                    next_table += 1
                res = ModelfitResults(ofv=table.final_ofv)
                dofv_results.append(res)
                next_table += 1

    incinds = pd.read_csv(path / 'included_individuals1.csv', header=None).values.tolist()
    res = calculate_results(
        models, original_model=base_model, included_individuals=incinds, dofv_results=dofv_results
    )
    return res
Quick search

Table of Contents

Source code for pharmpy.tools.bootstrap.results