from copy import copy
from numpy import asarray, diag, dot, exp
from numpy.linalg import pinv, solve
from glimix_core._ep import EPLinearKernel
from ._glmm import GLMM
[docs]class GLMMExpFam(GLMM):
r"""Generalised Linear Gaussian Processes implementation.
It implements inference over GLMMs via the Expectation Propagation [Min01]_
algorithm.
It currently supports the ``"Bernoulli"``, ``"Probit"``, ``"Binomial"``, and
``"Poisson"`` likelihoods. (For heterogeneous Normal likelihood, please refer to
:class:`glimix_core.glmm.GLMMNormal` for a closed-form inference.)
Parameters
----------
y : array_like
Outcome variable.
lik : tuple
Likelihood definition. The first item is one of the following likelihood names:
``"Bernoulli"``, ``"Binomial"``, ``"Normal"``, and ``"Poisson"``. For
`Binomial`, the second item is an array of outcomes.
X : array_like
Covariates.
QS : tuple
Economic eigendecomposition.
Example
-------
.. doctest::
>>> from numpy import dot, sqrt, zeros
>>> from numpy.random import RandomState
>>>
>>> from numpy_sugar.linalg import economic_qs
>>>
>>> from glimix_core.glmm import GLMMExpFam
>>>
>>> random = RandomState(0)
>>> nsamples = 10
>>>
>>> X = random.randn(nsamples, 2)
>>> G = random.randn(nsamples, 100)
>>> K = dot(G, G.T)
>>> ntrials = random.randint(1, 100, nsamples)
>>> z = dot(G, random.randn(100)) / sqrt(100)
>>>
>>> successes = zeros(len(ntrials), int)
>>> for i in range(len(ntrials)):
... successes[i] = sum(z[i] + 0.2 * random.randn(ntrials[i]) > 0)
>>>
>>> QS = economic_qs(K)
>>>
>>> glmm = GLMMExpFam(successes, ('binomial', ntrials), X, QS)
>>> print('Before: %.2f' % glmm.lml())
Before: -16.40
>>> glmm.fit(verbose=False)
>>> print('After: %.2f' % glmm.lml())
After: -13.43
"""
def __init__(self, y, lik, X, QS=None, n_int=1000, rtol=1.49e-05, atol=1.49e-08):
from liknorm import LikNormMachine
GLMM.__init__(self, y, lik, X, QS)
self._ep = EPLinearKernel(self._X.shape[0], rtol=rtol, atol=atol)
self._ep.set_compute_moments(self.compute_moments)
self._machine = LikNormMachine(self._lik[0], n_int)
self.update_approx = True
self._variables.get("beta").listen(self.set_update_approx)
self._variables.get("logscale").listen(self.set_update_approx)
self._variables.get("logitdelta").listen(self.set_update_approx)
def __copy__(self):
gef = GLMMExpFam(self._y, self._lik, self._X, self._QS)
gef.__dict__["_ep"] = copy(self._ep)
gef.__dict__["_ep"].set_compute_moments(gef.compute_moments)
gef.update_approx = self.update_approx
GLMM._copy_to(self, gef)
return gef
def __del__(self):
if hasattr(self, "_machine"):
self._machine.finish()
def _update_approx(self):
if not self.update_approx:
return
self._ep.set_prior(self.mean(), self.covariance())
self.update_approx = False
@property
def beta(self):
return GLMM.beta.fget(self)
@beta.setter
def beta(self, v):
GLMM.beta.fset(self, v)
self.set_update_approx()
def compute_moments(self, eta, tau, moments):
y = (self._y,) + self._lik[1:]
self._machine.moments(y, eta, tau, moments)
[docs] def covariance(self):
return dict(QS=self._QS, scale=self.scale, delta=self.delta)
[docs] def fit(self, verbose=True, factr=1e5, pgtol=1e-7):
self._ep.verbose = verbose
super(GLMMExpFam, self).fit(verbose=verbose, factr=factr, pgtol=pgtol)
self._ep.verbose = False
[docs] def fix(self, var_name):
GLMM.fix(self, var_name)
self.set_update_approx()
[docs] def posteriori_mean(self):
r"""Mean of the estimated posteriori.
This is also the maximum a posteriori estimation of the latent variable.
"""
from numpy_sugar.linalg import rsolve
Sigma = self.posteriori_covariance()
eta = self._ep._posterior.eta
return dot(Sigma, eta + rsolve(GLMM.covariance(self), self.mean()))
[docs] def posteriori_covariance(self):
r"""Covariance of the estimated posteriori."""
K = GLMM.covariance(self)
tau = self._ep._posterior.tau
return pinv(pinv(K) + diag(1 / tau))
[docs] def gradient(self):
r"""Gradient of the log of the marginal likelihood.
Returns
-------
dict
Map between variables to their gradient values.
"""
self._update_approx()
g = self._ep.lml_derivatives(self._X)
ed = exp(-self.logitdelta)
es = exp(self.logscale)
grad = dict()
grad["logitdelta"] = g["delta"] * (ed / (1 + ed)) / (1 + ed)
grad["logscale"] = g["scale"] * es
grad["beta"] = g["mean"]
return grad
@property
def logitdelta(_):
return super().logitdelta
@logitdelta.setter
def logitdelta(self, v):
GLMM.logitdelta.fset(self, v)
self.set_update_approx()
@property
def logscale(_):
return super().logscale
@logscale.setter
def logscale(self, v):
GLMM.logscale.fset(self, v)
self.set_update_approx()
def set_update_approx(self, _=None):
self.update_approx = True
def set_variable_bounds(self, var_name, bounds):
GLMM.set_variable_bounds(self, var_name, bounds)
self.set_update_approx()
@property
def site(self):
return self._ep.site
[docs] def unfix(self, var_name):
GLMM.unfix(self, var_name)
self.set_update_approx()
def value(self):
self._update_approx()
return self._ep.lml()
def predictive_mean(self, Xstar, ks, kss):
mstar = self.mean_star(Xstar)
ks = self.covariance_star(ks)
m = self.mean()
eta = self._ep.posterior.eta
tau = self._ep.posterior.tau
mu = eta / tau
K = GLMM.covariance(self)
return mstar + dot(ks, solve(K, mu - m))
def predictive_covariance(self, Xstar, ks, kss):
from numpy_sugar.linalg import sum2diag
kss = self.variance_star(kss)
ks = self.covariance_star(ks)
tau = self._ep.posterior.tau
K = GLMM.covariance(self)
KT = sum2diag(K, 1 / tau)
ktk = solve(KT, ks.T)
b = []
for i in range(len(kss)):
b += [dot(ks[i, :], ktk[:, i])]
b = asarray(b)
return kss - b