# Source code for GPy.inference.latent_function_inference.posterior

# Copyright (c) 2012, GPy authors (see AUTHORS.txt).

import numpy as np
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol, dtrtrs, tdot
from GPy.core.parameterization.variational import VariationalPosterior

[docs]class Posterior(object):
"""
An object to represent a Gaussian posterior over latent function values, p(f|D).
This may be computed exactly for Gaussian likelihoods, or approximated for
non-Gaussian likelihoods.

The purpose of this class is to serve as an interface between the inference
schemes and the model classes.  the model class can make predictions for
the function at any new point x_* by integrating over this posterior.

"""

def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None,
woodbury_inv=None, prior_mean=0):
"""
woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
K : the proir covariance (required for lazy computation of various quantities)
mean : the posterior mean
cov : the posterior covariance

Not all of the above need to be supplied! You *must* supply:

K (for lazy computation)
or
K_chol (for lazy computation)

You may supply either:

woodbury_chol
woodbury_vector

Or:

mean
cov

Of course, you can supply more than that, but this class will lazily
compute all other quantites on demand.

"""
# obligatory
self._K = K

if ((woodbury_chol is not None) and (woodbury_vector is not None)) \
or ((woodbury_inv is not None) and (woodbury_vector is not None)) \
or ((woodbury_inv is not None) and (mean is not None)) \
or ((mean is not None) and (cov is not None)):
pass  # we have sufficient to compute the posterior
else:
raise ValueError("insufficient information to compute the posterior")

self._K_chol = K_chol
self._K = K
# option 1:
self._woodbury_chol = woodbury_chol
self._woodbury_vector = woodbury_vector

# option 2.
self._woodbury_inv = woodbury_inv
# and woodbury vector

# option 2:
self._mean = mean
self._covariance = cov
self._prior_mean = prior_mean

# compute this lazily
self._precision = None

@property
def mean(self):
"""
Posterior mean
$$K_{xx}v v := \texttt{Woodbury vector}$$
"""
if self._mean is None:
self._mean = np.dot(self._K, self.woodbury_vector)
return self._mean

@property
def covariance(self):
"""
Posterior covariance
$$K_{xx} - K_{xx}W_{xx}^{-1}K_{xx} W_{xx} := \texttt{Woodbury inv}$$
"""
if self._covariance is None:
# LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
self._covariance = (
np.atleast_3d(self._K) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K,
[1, 0]).T).squeeze()
# self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
return self._covariance

[docs]    def covariance_between_points(self, kern, X, X1, X2):
"""
Computes the posterior covariance between points.

:param kern: GP kernel
:param X: current input observations
:param X1: some input observations
:param X2: other input observations
"""
# ndim == 3 is a model for missing data
if self.woodbury_chol.ndim != 2:
raise RuntimeError("This method does not support posterior for missing data models")

Kx1 = kern.K(X, X1)
Kx2 = kern.K(X, X2)
K12 = kern.K(X1, X2)

tmp1 = dtrtrs(self.woodbury_chol, Kx1)[0]
tmp2 = dtrtrs(self.woodbury_chol, Kx2)[0]
var = K12 - tmp1.T.dot(tmp2)

return var

@property
def precision(self):
"""
Inverse of posterior covariance
"""
if self._precision is None:
cov = np.atleast_3d(self.covariance)
self._precision = np.zeros(cov.shape)  # if one covariance per dimension
for p in range(cov.shape[-1]):
self._precision[:, :, p] = pdinv(cov[:, :, p])[0]
return self._precision

@property
def woodbury_chol(self):
"""
return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
$$L_{W}L_{W}^{\top} = W^{-1} W^{-1} := \texttt{Woodbury inv}$$
"""
if self._woodbury_chol is None:
# compute woodbury chol from
if self._woodbury_inv is not None:
winv = np.atleast_3d(self._woodbury_inv)
self._woodbury_chol = np.zeros(winv.shape)
for p in range(winv.shape[-1]):
self._woodbury_chol[:, :, p] = pdinv(winv[:, :, p])[2]
# Li = jitchol(self._woodbury_inv)
# self._woodbury_chol, _ = dtrtri(Li)
# W, _, _, _, = pdinv(self._woodbury_inv)
# symmetrify(W)
# self._woodbury_chol = jitchol(W)
# try computing woodbury chol from cov
elif self._covariance is not None:
raise NotImplementedError("TODO: check code here")
B = self._K - self._covariance
tmp, _ = dpotrs(self.K_chol, B)
self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
else:
raise ValueError("insufficient information to compute posterior")
return self._woodbury_chol

@property
def woodbury_inv(self):
"""
The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
$$(K_{xx} + \Sigma_{xx})^{-1} \Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}$$
"""
if self._woodbury_inv is None:
if self._woodbury_chol is not None:
self._woodbury_inv, _ = dpotri(self._woodbury_chol, lower=1)
# self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
symmetrify(self._woodbury_inv)
elif self._covariance is not None:
B = np.atleast_3d(self._K) - np.atleast_3d(self._covariance)
self._woodbury_inv = np.empty_like(B)
for i in range(B.shape[-1]):
tmp, _ = dpotrs(self.K_chol, B[:, :, i])
self._woodbury_inv[:, :, i], _ = dpotrs(self.K_chol, tmp.T)
return self._woodbury_inv

@property
def woodbury_vector(self):
"""
Woodbury vector in the gaussian likelihood case only is defined as
$$(K_{xx} + \Sigma)^{-1}Y \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}$$
"""
if self._woodbury_vector is None:
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean - self._prior_mean)
return self._woodbury_vector

@property
def K_chol(self):
"""
Cholesky of the prior covariance K
"""
if self._K_chol is None:
self._K_chol = jitchol(self._K)
return self._K_chol

def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
woodbury_vector = self.woodbury_vector
woodbury_inv = self.woodbury_inv

if not isinstance(Xnew, VariationalPosterior):
Kx = kern.K(pred_var, Xnew)
mu = np.dot(Kx.T, woodbury_vector)
if len(mu.shape) == 1:
mu = mu.reshape(-1, 1)
if full_cov:
Kxx = kern.K(Xnew)
if woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(woodbury_inv, Kx))
elif woodbury_inv.ndim == 3:  # Missing data
var = np.empty((Kxx.shape[0], Kxx.shape[1], woodbury_inv.shape[2]))
from ...util.linalg import mdot
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, woodbury_inv[:, :, i], Kx))
var = var
else:
Kxx = kern.Kdiag(Xnew)
if woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(woodbury_inv.T, Kx) * Kx, 0))[:, None]
elif woodbury_inv.ndim == 3:  # Missing data
var = np.empty((Kxx.shape[0], woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
var = np.clip(var, 1e-15, np.inf)
else:
psi0_star = kern.psi0(pred_var, Xnew)
psi1_star = kern.psi1(pred_var, Xnew)
psi2_star = kern.psi2n(pred_var, Xnew)
la = woodbury_vector
mu = np.dot(psi1_star, la)  # TODO: dimensions?
N, M, D = psi0_star.shape[0], psi1_star.shape[1], la.shape[1]

if full_cov:
raise NotImplementedError(
"Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1]))
di = np.diag_indices(la.shape[1])
else:
tmp = psi2_star - psi1_star[:, :, None] * psi1_star[:, None, :]
var = (tmp.reshape(-1, M).dot(la).reshape(N, M, D) * la[None, :, :]).sum(1) + psi0_star[:, None]
if woodbury_inv.ndim == 2:
var += -psi2_star.reshape(N, -1).dot(woodbury_inv.flat)[:, None]
else:
var += -psi2_star.reshape(N, -1).dot(woodbury_inv.reshape(-1, D))
var = np.clip(var, 1e-15, np.inf)
return mu, var

[docs]class PosteriorExact(Posterior):
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):

Kx = kern.K(pred_var, Xnew)
mu = np.dot(Kx.T, self.woodbury_vector)
if len(mu.shape) == 1:
mu = mu.reshape(-1, 1)
if full_cov:
Kxx = kern.K(Xnew)
if self._woodbury_chol.ndim == 2:
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
var = Kxx - tdot(tmp.T)
elif self._woodbury_chol.ndim == 3:  # Missing data
var = np.empty((Kxx.shape[0], Kxx.shape[1], self._woodbury_chol.shape[2]))
for i in range(var.shape[2]):
tmp = dtrtrs(self._woodbury_chol[:, :, i], Kx)[0]
var[:, :, i] = (Kxx - tdot(tmp.T))
var = var
else:
Kxx = kern.Kdiag(Xnew)
if self._woodbury_chol.ndim == 2:
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
var = (Kxx - np.square(tmp).sum(0))[:, None]
elif self._woodbury_chol.ndim == 3:  # Missing data
var = np.empty((Kxx.shape[0], self._woodbury_chol.shape[2]))
for i in range(var.shape[1]):
tmp = dtrtrs(self._woodbury_chol[:, :, i], Kx)[0]
var[:, i] = (Kxx - np.square(tmp).sum(0))
var = var
return mu, var

[docs]class PosteriorEP(Posterior):
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):

Kx = kern.K(pred_var, Xnew)
mu = np.dot(Kx.T, self.woodbury_vector)
if len(mu.shape) == 1:
mu = mu.reshape(-1, 1)

if full_cov:
Kxx = kern.K(Xnew)
if self._woodbury_inv.ndim == 2:
tmp = np.dot(Kx.T, np.dot(self._woodbury_inv, Kx))
var = Kxx - tmp
elif self._woodbury_inv.ndim == 3:  # Missing data
var = np.empty((Kxx.shape[0], Kxx.shape[1], self._woodbury_inv.shape[2]))
for i in range(var.shape[2]):
tmp = np.dot(Kx.T, np.dot(self._woodbury_inv[:, :, i], Kx))
var[:, :, i] = (Kxx - tmp)
var = var
else:
Kxx = kern.Kdiag(Xnew)
if self._woodbury_inv.ndim == 2:
tmp = (np.dot(self._woodbury_inv, Kx) * Kx).sum(0)
var = (Kxx - tmp)[:, None]
elif self._woodbury_inv.ndim == 3:  # Missing data
var = np.empty((Kxx.shape[0], self._woodbury_inv.shape[2]))
for i in range(var.shape[1]):
tmp = (Kx * np.dot(self._woodbury_inv[:, :, i], Kx)).sum(0)
var[:, i] = (Kxx - tmp)
var = var

return mu, var

[docs]class StudentTPosterior(PosteriorExact):
def __init__(self, deg_free, **kwargs):
super(StudentTPosterior, self).__init__(**kwargs)
self.nu = deg_free

def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
mu, var = super(StudentTPosterior, self)._raw_predict(kern, Xnew, pred_var, full_cov)
beta = np.sum(self.woodbury_vector * self.mean)
N = self.woodbury_vector.shape[0]
tp_var_scale = (self.nu + beta - 2) / (self.nu + N - 2)
return mu, tp_var_scale * var