import math
import numpy as np
import sympy
# This module could probably be made private.
[docs]def triangular_root(x):
"""Calculate the triangular root of x. I.e. if x is a triangular number T_n what is n?"""
return math.floor(math.sqrt(2 * x))
[docs]def flattened_to_symmetric(x):
"""Convert a vector containing the elements of a lower triangular matrix into a full symmetric
matrix
"""
n = triangular_root(len(x))
new = np.zeros((n, n))
inds = np.tril_indices_from(new)
new[inds] = x
new[(inds[1], inds[0])] = x
return new
[docs]def cov2corr(cov):
"""Convert covariance matrix to correlation matrix"""
v = np.sqrt(np.diag(cov))
outer_v = np.outer(v, v)
corr = cov / outer_v
corr[cov == 0] = 0
return corr
[docs]def corr2cov(corr, sd):
"""Convert correlation matrix to covariance matrix
Parameters
----------
corr
Correlation matrix (ones on diagonal)
sd
One dimensional array of standard deviations
"""
sd_matrix = np.diag(sd)
cov = sd_matrix @ corr @ sd_matrix
return cov
[docs]def round_and_keep_sum(x, s):
"""Round values in Series x and their sum must be s
Algorithm: Floor all elements in series. If sum not correct add one to element with
highest fractional part until sum is reached.
"""
sorted_fractions = x.apply(lambda x: math.modf(x)[0]).sort_values(ascending=False)
rounded_sample_sizes = x.apply(lambda x: math.modf(x)[1])
for (group_index, _) in sorted_fractions.iteritems():
num_samples = rounded_sample_sizes.sum()
diff = s - num_samples
if diff == 0:
break
step = math.copysign(1, diff)
rounded_sample_sizes[group_index] += step
return rounded_sample_sizes.astype('int64')
[docs]def se_delta_method(expr, values, cov):
"""Use the delta method to estimate the standard error of a function of parameters with covariance
matrix available.
Parameters
----------
expr
A sympy expression for the function of parameters
cov
Dataframe with symbol names as indices must include at least all parameters needed for expr
values
dict/series parameter estimates. Must include at least all parameters needed for expr
Returns
-------
Approximation of the standard error
"""
symbs = expr.free_symbols
names_unsorted = [s.name for s in symbs]
# Sort names according to order in cov
names = [y for x in cov.columns for y in names_unsorted if y == x]
cov = cov[names].loc[names]
symb_gradient = [sympy.diff(expr, sympy.Symbol(name)) for name in names]
num_gradient = np.array([float(x.subs(values)) for x in symb_gradient])
se = np.sqrt(num_gradient @ cov.values @ num_gradient.T)
return se
[docs]def is_posdef(A):
"""Checks whether a matrix is positive definite"""
try:
_ = np.linalg.cholesky(A)
return True
except np.linalg.LinAlgError:
return False
[docs]def nearest_posdef(A):
"""Return the nearest positive definite matrix in the Frobenius norm to a matrix
A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which
credits [2].
[1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd
[2] N.J. Higham, "Computing a nearest symmetric positive semidefinite
matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6
[3] https://gist.github.com/fasiha/fdb5cec2054e6f1c6ae35476045a0bbd
"""
if is_posdef(A):
return A
B = (A + A.T) / 2
_, s, V = np.linalg.svd(B)
H = np.dot(V.T, np.dot(np.diag(s), V))
A2 = (B + H) / 2
A3 = (A2 + A2.T) / 2
if is_posdef(A3):
return A3
spacing = np.spacing(np.linalg.norm(A))
# The above is different from [1]. It appears that MATLAB's `chol` Cholesky
# decomposition will accept matrixes with exactly 0-eigenvalue, whereas
# Numpy's will not. So where [1] uses `eps(mineig)` (where `eps` is Matlab
# for `np.spacing`), we use the above definition. CAVEAT: our `spacing`
# will be much larger than [1]'s `eps(mineig)`, since `mineig` is usually on
# the order of 1e-16, and `eps(1e-16)` is on the order of 1e-34, whereas
# `spacing` will, for Gaussian random matrixes of small dimension, be on
# othe order of 1e-16. In practice, both ways converge, as the unit test
# below suggests.
Id = np.eye(A.shape[0])
k = 1
while not is_posdef(A3):
mineig = np.min(np.real(np.linalg.eigvals(A3)))
A3 += Id * (-mineig * k ** 2 + spacing)
k += 1
return A3
[docs]def sample_truncated_joint_normal(mu, sigma, a, b, n, seed=None):
"""Give an array of samples from the truncated joint normal distributon using sample rejection
- mu, sigma - parameters for the normal distribution
- a, b - vectors of lower and upper limits for each random variable
- n - number of samples
"""
if not is_posdef(sigma):
raise ValueError("Covariance matrix not positive definite")
if seed is None or isinstance(seed, int):
seed = np.random.default_rng(seed)
kept_samples = np.empty((0, len(mu)))
remaining = n
while remaining > 0:
samples = seed.multivariate_normal(mu, sigma, size=remaining, check_valid='ignore')
in_range = np.logical_and(samples > a, samples < b).all(axis=1)
kept_samples = np.concatenate((kept_samples, samples[in_range]))
remaining = n - len(kept_samples)
return kept_samples
[docs]def conditional_joint_normal(mu, sigma, a):
"""Give parameters of the conditional joint normal distribution
The condition is the last len(a) values
See https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Conditional_distributions
"""
# partition mu and sigma
nfirst = len(mu) - len(a)
S11 = sigma[0:nfirst, 0:nfirst]
S12 = sigma[0:nfirst, nfirst:]
S21 = sigma[nfirst:, 0:nfirst]
S22 = sigma[nfirst:, nfirst:]
M1 = mu[0:nfirst]
M2 = mu[nfirst:]
S22_inv = np.linalg.inv(S22)
mu_bar = M1 + S12 @ S22_inv @ (a - M2)
sigma_bar = S11 - S12 @ S22_inv @ S21
return mu_bar, sigma_bar
[docs]def round_to_n_sigdig(x, n):
if x == 0:
return x
else:
return round(x, -int(math.floor(math.log10(abs(x)))) + (n - 1))
[docs]def is_near_target(x, target, zero_limit, significant_digits):
if target == 0:
return abs(x) < abs(zero_limit)
else:
return round_to_n_sigdig(x, n=significant_digits) == round_to_n_sigdig(
target, n=significant_digits
)