Source code for GPy.likelihoods.student_t

# Copyright (c) 2012-2014 Ricardo Andrade, Alan Saul
# Licensed under the BSD 3-clause license (see LICENSE.txt)

import numpy as np
from scipy import stats, special
import scipy as sp
from . import link_functions
from scipy import stats, integrate
from scipy.special import gammaln, gamma
from .likelihood import Likelihood
from ..core.parameterization import Param
from paramz.transformations import Logexp
from scipy.special import psi as digamma

[docs]class StudentT(Likelihood): """ Student T likelihood For nomanclature see Bayesian Data Analysis 2003 p576 .. math:: p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} """ def __init__(self,gp_link=None, deg_free=5, sigma2=2): if gp_link is None: gp_link = link_functions.Identity() super(StudentT, self).__init__(gp_link, name='Student_T') # sigma2 is not a noise parameter, it is a squared scale. self.sigma2 = Param('t_scale2', float(sigma2), Logexp()) self.v = Param('deg_free', float(deg_free), Logexp()) self.link_parameter(self.sigma2) self.link_parameter(self.v) #self.v.constrain_fixed() self.log_concave = False
[docs] def update_gradients(self, grads): """ Pull out the gradients, be careful as the order must match the order in which the parameters are added """ self.sigma2.gradient = grads[0] self.v.gradient = grads[1]
[docs] def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): """ Hessian at y, given link(f), w.r.t link(f) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) The hessian will be 0 unless i == j .. math:: \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} :param inv_link_f: latent variables inv_link(f) :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :rtype: Nx1 array .. Note:: Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ e = y - inv_link_f hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) return hess
[docs] def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): """ Third order derivative log-likelihood function at y given link(f) w.r.t link(f) .. math:: \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} :param inv_link_f: latent variables link(f) :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ e = y - inv_link_f d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / ((e**2 + self.sigma2*self.v)**3) ) return d3lik_dlink3
[docs] def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None): """ Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) .. math:: \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} :param inv_link_f: latent variables link(f) :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array """ e = y - inv_link_f d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) / ((self.sigma2*self.v + (e**2))**3) ) return d2logpdf_dlink2_dvar
[docs] def d2logpdf_dlink2_dv(self, inv_link_f, y, Y_metadata=None): e = y - inv_link_f e2 = np.square(e) df = float(self.v[:]) s2 = float(self.sigma2[:]) e2_s2v = e**2 + s2*df d2logpdf_df2_dv = (-s2*(df+1) + e2 - s2*df)/e2_s2v**2 - 2*s2*(df+1)*(e2 - s2*df)/e2_s2v**3 return d2logpdf_df2_dv
[docs] def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None): d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dv = self.d2logpdf_dlink2_dv(f, y, Y_metadata=Y_metadata) return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
[docs] def predictive_mean(self, mu, sigma, Y_metadata=None): # The comment here confuses mean and median. return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
[docs] def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): if self.deg_free<=2.: return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2. else: return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)
[docs] def conditional_mean(self, gp): return self.gp_link.transf(gp)
[docs] def conditional_variance(self, gp): return self.deg_free/(self.deg_free - 2.)
[docs] def samples(self, gp, Y_metadata=None): """ Returns a set of samples of observations based on a given value of the latent variable. :param gp: latent variable """ orig_shape = gp.shape gp = gp.flatten() #FIXME: Very slow as we are computing a new random variable per input! #Can't get it to sample all at the same time #student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp]) dfs = np.ones_like(gp)*self.v scales = np.ones_like(gp)*np.sqrt(self.sigma2) student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp), scale=scales) return student_t_samples.reshape(orig_shape)