Source code for pharmpy.modeling.summarize_individuals

import importlib.util
import warnings
from typing import Any, Callable, Dict, List, Union

from pharmpy.deps import numpy as np
from pharmpy.deps import pandas as pd
from pharmpy.model import Model, ModelfitResultsError
from pharmpy.modeling.ml import predict_influential_individuals, predict_outliers

DataFrame = Any  # NOTE should be pd.DataFrame but we want lazy loading
Series = Any  # NOTE same with pd.Series


[docs]def summarize_individuals(models: List[Model]) -> Union[DataFrame, None]: """Creates a summary dataframe keyed by model-individual pairs for an input list of models. Content of the various columns: +-------------------------+----------------------------------------------------------------------+ | Column | Description | +=========================+======================================================================+ | ``outlier_count`` | Number of observations with CWRES > 5 | +-------------------------+----------------------------------------------------------------------+ | ``ofv`` | Individual OFV | +-------------------------+----------------------------------------------------------------------+ | ``dofv_vs_parent`` | Difference in individual OFV between this model and its parent model | +-------------------------+----------------------------------------------------------------------+ | ``predicted_dofv`` | Predicted dOFV if this individual was excluded | +-------------------------+----------------------------------------------------------------------+ | ``predicted_residual`` | Predicted residual | +-------------------------+----------------------------------------------------------------------+ Parameters ---------- models : List[Model] Input models Return ------ pd.DataFrame | None The summary as a dataframe Examples -------- >>> from pharmpy.modeling import * >>> model = load_example_model("pheno") >>> from pharmpy.tools import fit >>> fit(model) <Pharmpy model object pheno> >>> from pharmpy.tools import run_tool # doctest: +SKIP >>> results = run_tool( ... 'modelsearch', ... model=model, ... mfl='ABSORPTION(ZO);PERIPHERALS([1, 2])', ... algorithm='reduced_stepwise' ... ) # doctest: +SKIP >>> summarize_individuals([results.start_model, *results.models]) # doctest: +SKIP """ # noqa: E501 modelsDict = {model.name: model for model in models} spec = importlib.util.find_spec('tflite_runtime') if spec is None: warnings.warn("tflite is not installed, using NaN for predictions") df = pd.concat( map( lambda model: groupedByIDAddColumnsOneModel(modelsDict, model), models, ), keys=[model.name for model in models], names=['model'], axis=0, verify_integrity=True, ) return df
def parent_model_name(model: Model) -> str: return model.parent_model def model_name(model: Model) -> str: return model.name def outlier_count_func(df: DataFrame) -> float: # NOTE this returns a float because we will potentially concat this # with NaNs return float((abs(df) > 5).sum()) def outlier_count(model: Model) -> Union[Series, float]: res = model.modelfit_results if res is None: return np.nan residuals = res.residuals if residuals is None: return np.nan else: groupedByID = residuals.groupby('ID') return groupedByID['CWRES'].agg(outlier_count_func) def _predicted( predict: Callable[[Model], DataFrame], model: Model, column: str ) -> Union[Series, float]: try: predicted = predict(model) except ModelfitResultsError: return np.nan except ImportError: return np.nan if predicted is None: return np.nan return predicted[column] def predicted_residual(model: Model) -> Union[Series, float]: return _predicted(predict_outliers, model, 'residual') def predicted_dofv(model: Model) -> Union[Series, float]: return _predicted(predict_influential_individuals, model, 'dofv') def ofv(model: Model) -> Union[Series, float]: res = model.modelfit_results return np.nan if res is None else res.individual_ofv def dofv(parent_model: Union[Model, None], candidate_model: Model) -> Union[Series, float]: return np.nan if parent_model is None else ofv(parent_model) - ofv(candidate_model) def groupedByIDAddColumnsOneModel(modelsDict: Dict[str, Model], model: Model) -> DataFrame: id_column_name = model.datainfo.id_column.name index = pd.Index(data=model.dataset[id_column_name].unique(), name=id_column_name) df = pd.DataFrame( { 'parent_model': parent_model_name(model), 'outlier_count': outlier_count(model), 'ofv': ofv(model), 'dofv_vs_parent': dofv(modelsDict.get(model.parent_model), model), 'predicted_dofv': predicted_dofv(model), 'predicted_residual': predicted_residual(model), }, index=index, ) return df
[docs]def summarize_individuals_count_table(models=None, df=None): r"""Create a count table for individual data Content of the various columns: +-------------------------+------------------------------------------------------------------------------------------------+ | Column | Description | +=========================+================================================================================================+ | ``inf_selection`` | Number of subjects influential on model selection. | | | :math:`\mathrm{OFV}_{parent} - \mathrm{OFV} > 3.84 \veebar` | | | :math:`\mathrm{OFV}_{parent} - \mathrm{iOFV}_{parent} - (\mathrm{OFV} - \mathrm{iOFV}) > 3.84` | +-------------------------+------------------------------------------------------------------------------------------------+ | ``inf_params`` | Number of subjects influential on parameters. predicted_dofv > 3.84 | +-------------------------+------------------------------------------------------------------------------------------------+ | ``out_obs`` | Number of subjects having at least one outlying observation (CWRES > 5) | +-------------------------+------------------------------------------------------------------------------------------------+ | ``out_ind`` | Number of outlying subjects. predicted_residual > 3.0 | +-------------------------+------------------------------------------------------------------------------------------------+ | ``inf_outlier`` | Number of subjects both influential by any criteria and outlier by any criteria | +-------------------------+------------------------------------------------------------------------------------------------+ Parameters ---------- models : list of models List of models to summarize. df : pd.DataFrame Output from a previous call to summarize_individuals. Returns ------- pd.DataFrame Table with one row per model. See also -------- summarize_individuals : Get raw individual data """ # noqa: E501 if models: df = summarize_individuals(models) if df is None: return None is_out_obs = df['outlier_count'] > 0.0 is_out_ind = df['predicted_residual'] > 3.0 is_inf_params = df['predicted_dofv'] > 3.84 out_obs = is_out_obs.groupby(level='model', sort=False).sum().astype('int32') out_ind = is_out_ind.groupby(level='model', sort=False).sum().astype('int32') inf_params = is_inf_params.groupby(level='model', sort=False).sum().astype('int32') ninds = len(df.index.unique(level='ID')) parents = df['parent_model'].iloc[::ninds] parent_ofvs = df.loc[parents]['ofv'].reset_index(drop=True) parent_ofvs.index = df.index for name in df.index.unique(level='model'): if name == df.loc[name]['parent_model'].iloc[0]: start_name = name break # FIXME: Doesn't have to have a start model ofv_sums = df['ofv'].groupby('model').sum() parent_sums = parent_ofvs.groupby('model').sum() full_ofv_diff = parent_sums - ofv_sums # / len(df.index.unique(level='ID')) full_ofv_diff.loc[start_name] = 0 removed_diff = (parent_sums - parent_ofvs) - (ofv_sums - df['ofv']) is_inf_selection = (full_ofv_diff > 3.84) ^ (removed_diff > 3.84) inf_selection = is_inf_selection.groupby(level='model', sort=False).sum().astype('int32') is_inf_outlier = (is_out_obs | is_out_ind) & (is_inf_params | is_inf_selection) inf_outlier = is_inf_outlier.groupby(level='model', sort=False).sum().astype('int32') parents.index = inf_selection.index res = pd.DataFrame( { 'parent_model': parents, 'inf_selection': inf_selection, 'inf_params': inf_params, 'out_obs': out_obs, 'out_ind': out_ind, 'inf_outlier': inf_outlier, } ) return res