Source code for pharmpy.modeling.ml

from pathlib import Path

from pharmpy.deps import numpy as np
from pharmpy.deps import pandas as pd

from ..model import ModelfitResultsError
from .common import get_model_covariates
from .data import (
    get_ids,
    get_number_of_individuals,
    get_number_of_observations,
    get_number_of_observations_per_individual,
)


def _all_parameters(model):
    # All parameter estimates including fixed
    # This might warrant a function in modeling
    d = {p.name: p.init for p in model.parameters}
    d.update(dict(model.modelfit_results.parameter_estimates))
    return pd.Series(d)


def _create_dataset(model):
    res = model.modelfit_results
    if res is None:
        raise ModelfitResultsError("Need ModelfitResults for model")
    if res.residuals is None:
        raise ModelfitResultsError("No residuals available in ModelfitResults")
    if 'CWRES' not in res.residuals.columns:
        raise ModelfitResultsError("CWRES not an available residual in ModelfitResults")
    idcol = model.datainfo.id_column.name
    nids = get_number_of_individuals(model)
    nobs = get_number_of_observations(model)
    nobsi = get_number_of_observations_per_individual(model)
    cwres = res.residuals['CWRES']
    iofv = res.individual_estimates
    npar = len(model.parameters)
    ofv = res.ofv

    # Max ratio of abs(ETAi) and omegai
    variance_omegas = [
        model.random_variables.get_variance(rv.name).name for rv in model.random_variables.etas
    ]
    all_paramvals = _all_parameters(model)
    omega_estimates = np.sqrt(all_paramvals[variance_omegas])
    abs_ebes = res.individual_estimates.abs()
    ebe_ratio = abs_ebes / list(omega_estimates)
    max_ebe_ratio = ebe_ratio.max(axis=1).fillna(1.0)
    # Set to 1 if division by zero, e.g. from omega fix to 0

    # exp(OFVi / nobsi) / exp(OFV / nobs)
    iofv = res.individual_ofv
    ofv_ratio = np.exp(iofv / nobsi) / np.exp(model.modelfit_results.ofv / nobs)

    # mean(ETA / OMEGA)
    cov = res.individual_estimates_covariance
    etc_diag = np.sqrt(pd.DataFrame([np.diag(y) for y in cov], columns=cov.iloc[0].columns))
    etc_ratio = etc_diag / list(omega_estimates)
    mean_etc_ratio = etc_ratio.mean(axis=1).fillna(1.0)
    mean_etc_ratio.index = ofv_ratio.index

    # max((abs(indcov - mean(cov))) / sd(cov))
    cov_names = get_model_covariates(model, strings=True)
    if len(cov_names) > 0:
        covariates = model.dataset[cov_names + [idcol]].set_index(idcol)
        mean_covs = covariates.groupby(idcol).mean()
        maxcov = (abs(mean_covs - mean_covs.mean()) / mean_covs.std()).max(axis=1)
    else:
        maxcov = 0.0

    df = pd.DataFrame(
        {
            'nids': nids,
            'nobs': nobs,
            'nobs_subj': nobsi / (nobs / nids),
            'nobsi': nobsi,
            'ncovs': len(cov_names),
            'maxcov': maxcov,
            'max_cwres': abs(cwres).groupby(idcol).max(),
            'median_cwres': abs(cwres).groupby(idcol).median(),
            'max_ebe_ratio': max_ebe_ratio,
            'ofv_ratio': ofv_ratio,
            'etc_ratio': mean_etc_ratio,
            'iofv': iofv,
            'npar': npar,
            'ofv': ofv,
        }
    )
    return df


[docs]def predict_outliers(model, cutoff=3.0): """Predict outliers for a model using a machine learning model. See the :ref:`simeval <Individual OFV summary>` documentation for a definition of the `residual` Parameters ---------- model : Model Pharmpy model cutoff : float Cutoff threshold for a residual singalling an outlier Returns ------- pd.Dataframe Dataframe over the individuals with a `residual` column containing the raw predicted residuals and a `outlier` column with a boolean to tell whether the individual is an outlier or not. Examples -------- >>> from pharmpy.modeling import * >>> model = load_example_model("pheno") >>> predict_outliers(model) # doctest: +SKIP residual outlier ID 1 -0.281443 False 2 1.057392 False 3 -0.119105 False 4 -0.846849 False 5 0.600540 False 6 1.014008 False 7 -0.750734 False 8 0.247175 False 9 0.117002 False 10 -0.835389 False 11 3.529582 True 12 -0.035670 False 13 0.292333 False 14 0.303278 False 15 -0.565949 False 16 -0.078192 False 17 -0.291295 False 18 2.466421 False 19 -0.402343 False 20 -0.699996 False 21 -0.567987 False 22 -0.526776 False 23 0.303918 False 24 -0.177588 False 25 1.272142 False 26 -0.390000 False 27 0.775876 False 28 -0.501528 False 29 -0.700951 False 30 -0.352599 False 31 0.294196 False 32 0.744014 False 33 -0.215364 False 34 0.208691 False 35 1.713130 False 36 0.300293 False 37 -0.810736 False 38 0.459877 False 39 -0.675125 False 40 -0.563835 False 41 -0.526945 False 42 4.449199 True 43 0.720714 False 44 -0.792248 False 45 -0.860923 False 46 -0.731858 False 47 -0.247131 False 48 1.894190 False 49 -0.282737 False 50 -0.153398 False 51 1.200546 False 52 0.902774 False 53 0.586427 False 54 0.183329 False 55 1.036930 False 56 -0.639868 False 57 -0.765279 False 58 -0.209665 False 59 -0.225693 False See also -------- predict_influential_individuals predict_influential_outliers """ model_path = Path(__file__).parent.resolve() / 'ml_models' / 'outliers.tflite' data = _create_dataset(model) output = _predict_with_tflite(model_path, data) df = pd.DataFrame({'residual': output, 'outlier': output > cutoff}, index=get_ids(model)) df.index.name = model.datainfo.id_column.name return df
[docs]def predict_influential_individuals(model, cutoff=3.84): """Predict influential individuals for a model using a machine learning model. Parameters ---------- model : Model Pharmpy model cutoff : float Cutoff threshold for a dofv signalling an influential individual Returns ------- pd.Dataframe Dataframe over the individuals with a `dofv` column containing the raw predicted delta-OFV and an `influential` column with a boolean to tell whether the individual is influential or not. See also -------- predict_influential_outliers predict_outliers """ model_path = Path(__file__).parent.resolve() / 'ml_models' / 'infinds.tflite' data = _create_dataset(model) output = _predict_with_tflite(model_path, data) df = pd.DataFrame({'dofv': output, 'influential': output > cutoff}, index=get_ids(model)) df.index.name = model.datainfo.id_column.name return df
[docs]def predict_influential_outliers(model, outlier_cutoff=3, influential_cutoff=3.84): """Predict influential outliers for a model using a machine learning model. Parameters ---------- model : Model Pharmpy model outlier_cutoff : float Cutoff threshold for a residual singalling an outlier influential_cutoff : float Cutoff threshold for a dofv signalling an influential individual Returns ------- pd.Dataframe Dataframe over the individuals with a `outliers` and `dofv` columns containing the raw predictions and `influential`, `outlier` and `influential_outlier` boolean columns. See also -------- predict_influential_individuals predict_outliers """ outliers = predict_outliers(model, cutoff=outlier_cutoff) infinds = predict_influential_individuals(model, cutoff=influential_cutoff) df = pd.concat([outliers, infinds], axis=1) df['influential_outlier'] = df['outlier'] & df['influential'] return df
def _predict_with_tflite(model_path, data): import tflite_runtime.interpreter as tflite interpreter = tflite.Interpreter(str(model_path)) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() npdata = data.astype('float32').values nrows = len(data) output = np.empty(nrows) for i in range(0, nrows): interpreter.set_tensor(input_details[0]['index'], npdata[i : (i + 1), :]) interpreter.invoke() output_data = interpreter.get_tensor(output_details[0]['index']) output[i] = output_data return output