from __future__ import annotations
import warnings
from functools import partial
from typing import Literal, Optional, Union
from pharmpy.deps import numpy as np
from pharmpy.deps import pandas as pd
from pharmpy.internals.math import is_posdef, nearest_positive_semidefinite
from pharmpy.model import Model
[docs]
def create_rng(seed: Optional[Union[np.random.Generator, int]] = None):
"""Create a new random number generator
Pharmpy functions that use random sampling take a random number generator or seed as input.
This function can be used to create a default new random number generator.
Parameters
----------
seed : int or rng
Seed for the random number generator or None (default) for a randomized seed. If seed
is generator it will be passed through.
Returns
-------
Generator
Initialized numpy random number generator object
Examples
--------
>>> from pharmpy.modeling import create_rng
>>> rng = create_rng(23)
>>> rng.standard_normal()
0.5532605888887387
"""
if isinstance(seed, np.random.Generator):
rng = seed
elif isinstance(seed, float) and int(seed) == seed:
# Case to support int-like floats in pharmr
rng = np.random.default_rng(int(seed))
else:
rng = np.random.default_rng(seed)
return rng
def _sample_truncated_joint_normal(sigma, mu, a, b, n, rng):
"""Give an array of samples from the truncated joint normal distribution using sample rejection
- mu, sigma - parameters for the normal distribution
- a, b - vectors of lower and upper limits for each random variable
- n - number of samples
"""
if not is_posdef(sigma):
raise ValueError("Covariance matrix not positive definite")
kept_samples = np.empty((0, len(mu)))
remaining = n
while remaining > 0:
samples = rng.multivariate_normal(
mu, sigma, size=remaining, method="cholesky", check_valid='ignore'
)
in_range = np.logical_and(samples > a, samples < b).all(axis=1)
kept_samples = np.concatenate((kept_samples, samples[in_range]))
remaining = n - len(kept_samples)
return kept_samples
def _sample_from_function(
model,
parameter_estimates,
samplingfn,
force_posdef_samples=None,
n=1,
seed=None,
):
"""Sample parameter vectors using a general function
The sampling function will be given three arguments:
- estimated parameter values
- lower - lower bounds of parameters
- upper - upper bounds of parameters
- n - number of samples
"""
rng = create_rng(seed)
parameter_estimates = parameter_estimates[
parameter_estimates.index.isin(model.parameters.nonfixed.names)
]
pe = parameter_estimates.to_numpy()
parameter_summary = model.parameters.to_dataframe().loc[parameter_estimates.keys()]
parameter_summary = parameter_summary[~parameter_summary['fix']]
lower = parameter_summary.lower.astype('float64').to_numpy()
upper = parameter_summary.upper.astype('float64').to_numpy()
# reject non-posdef
kept_samples = pd.DataFrame()
remaining = n
if force_posdef_samples == 0:
force_posdef = True
else:
force_posdef = False
i = 0
while remaining > 0:
samples = samplingfn(pe, lower, upper, n=remaining, rng=rng)
df = pd.DataFrame(samples, columns=parameter_estimates.keys())
if not force_posdef:
selected = df[df.apply(model.random_variables.validate_parameters, axis=1)]
else:
rvs = model.random_variables
selected = df.transform(
lambda row: pd.Series(rvs.nearest_valid_parameters(row)), axis=1
)
kept_samples = pd.concat((kept_samples, selected))
remaining = n - len(kept_samples)
i += 1
if not force_posdef and force_posdef_samples is not None and i >= force_posdef_samples:
force_posdef = True
return kept_samples.reset_index(drop=True)
[docs]
def sample_parameters_from_covariance_matrix(
model: Model,
parameter_estimates: pd.Series,
covariance_matrix: pd.DataFrame,
force_posdef_samples: Optional[int] = None,
force_posdef_covmatrix: bool = False,
n: int = 1,
seed: Optional[Union[np.random.Generator, int]] = None,
):
"""Sample parameter vectors using the covariance matrix
If parameters is not provided all estimated parameters will be used
Parameters
----------
model : Model
Input model
parameter_estimates : pd.Series
Parameter estimates to use as means in sampling
covariance_matrix : pd.DataFrame
Parameter uncertainty covariance matrix
force_posdef_samples : int
Set to how many iterations to do before forcing all samples to be positive definite. None is
default and means never and 0 means always
force_posdef_covmatrix : bool
Set to True to force the input covariance matrix to be positive definite
n : int
Number of samples
seed : Generator
Random number generator
Returns
-------
pd.DataFrame
A dataframe with one sample per row
Example
-------
>>> from pharmpy.modeling import *
>>> from pharmpy.tools import load_example_modelfit_results
>>> model = load_example_model("pheno")
>>> results = load_example_modelfit_results("pheno")
>>> rng = create_rng(23)
>>> cov = results.covariance_matrix
>>> pe = results.parameter_estimates
>>> sample_parameters_from_covariance_matrix(model, pe, cov, n=3, seed=rng)
POP_CL POP_VC COVAPGR IIV_CL IIV_VC SIGMA
0 0.004887 1.000761 0.198184 0.034860 0.031391 0.013750
1 0.004631 1.024746 0.071056 0.031726 0.026824 0.012597
2 0.004631 0.991088 0.130841 0.027464 0.024589 0.013215
See also
--------
sample_parameters_uniformly : Sample parameter vectors using uniform distribution
sample_individual_estimates : Sample individual estiates given their covariance
"""
sigma = covariance_matrix.loc[parameter_estimates.keys(), parameter_estimates.keys()].to_numpy()
if not is_posdef(sigma):
if force_posdef_covmatrix:
old_sigma = sigma
sigma = nearest_positive_semidefinite(sigma)
delta_frobenius = np.linalg.norm(old_sigma) - np.linalg.norm(sigma)
delta_max = np.abs(old_sigma).max() - np.abs(sigma).max()
warnings.warn(
f'Covariance matrix was forced to become positive definite.\n'
f' Difference in the frobenius norm: {delta_frobenius:.3e}\n'
f' Difference in the max norm: {delta_max:.3e}\n'
)
else:
raise ValueError("Uncertainty covariance matrix not positive-definite")
fn = partial(_sample_truncated_joint_normal, sigma)
samples = _sample_from_function(
model, parameter_estimates, fn, force_posdef_samples=force_posdef_samples, n=n, seed=seed
)
return samples
[docs]
def sample_individual_estimates(
model: Model,
individual_estimates: pd.DataFrame,
individual_estimates_covariance: pd.DataFrame,
parameters: Optional[list[str]] = None,
samples_per_id: int = 100,
seed: Optional[Union[np.random.Generator, int]] = None,
):
"""Sample individual estimates given their covariance.
Parameters
----------
model : Model
Pharmpy model
individual_estimates : pd.DataFrame
Individual estimates to use
individual_estimates_covariance : pd.DataFrame
Uncertainty covariance of the individual estimates
parameters : list
A list of a subset of individual parameters to sample. Default is None, which means all.
samples_per_id : int
Number of samples per individual
seed : rng or int
Random number generator or seed
Returns
-------
pd.DataFrame
Pool of samples in a DataFrame
Example
-------
>>> from pharmpy.modeling import create_rng, load_example_model, sample_individual_estimates
>>> from pharmpy.tools import load_example_modelfit_results
>>> model = load_example_model("pheno")
>>> results = load_example_modelfit_results("pheno")
>>> rng = create_rng(23)
>>> ie = results.individual_estimates
>>> iec = results.individual_estimates_covariance
>>> sample_individual_estimates(model, ie, iec, samples_per_id=2, seed=rng)
ETA_CL ETA_VC
ID sample
1 0 -0.127941 0.037273
1 -0.065492 -0.182851
2 0 -0.263323 -0.265849
1 -0.295883 -0.060346
3 0 -0.012108 0.219967
... ... ...
57 1 -0.034279 -0.040988
58 0 -0.187879 -0.143184
1 -0.088845 -0.034655
59 0 -0.187779 -0.014214
1 -0.019953 -0.151151
<BLANKLINE>
[118 rows x 2 columns]
See also
--------
sample_parameters_from_covariance_matrix : Sample parameter vectors using the
uncertainty covariance matrix
sample_parameters_uniformly : Sample parameter vectors using uniform distribution
"""
rng = create_rng(seed)
assert rng is not None
ests = individual_estimates
covs = individual_estimates_covariance
if parameters is None:
parameters = ests.columns
ests = ests[parameters]
samples = pd.DataFrame()
for (idx, mu), sigma in zip(ests.iterrows(), covs):
sigma = sigma.loc[parameters, parameters]
sigma = nearest_positive_semidefinite(sigma)
id_samples = rng.multivariate_normal(mu.values, sigma.values, size=samples_per_id)
id_df = pd.DataFrame(id_samples, columns=ests.columns)
id_df['ID'] = idx
id_df['sample'] = list(range(0, samples_per_id))
id_df.set_index(['ID', 'sample'], drop=True, inplace=True)
samples = pd.concat((samples, id_df))
return samples