# Source code for GPy.likelihoods.bernoulli

# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)

import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf, derivLogCdfNormal, logCdfNormal
from .likelihood import Likelihood

[docs]class Bernoulli(Likelihood):
"""
Bernoulli likelihood

.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}

.. Note::
Y takes values in either {-1, 1} or {0, 1}.
link function should have the domain [0, 1], e.g. probit (default) or Heaviside

likelihood.py, for the parent class
"""

self.log_concave = True

[docs]    def to_dict(self):
"""
Convert the object into a json serializable dictionary.

Note: It uses the private method _save_to_input_dict of the parent.

:return dict: json serializable dictionary containing the needed information to instantiate the object
"""

input_dict = super(Bernoulli, self)._save_to_input_dict()
input_dict["class"] = "GPy.likelihoods.Bernoulli"
return input_dict

def _preprocess_values(self, Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.

..Note:: Binary classification algorithm works better with classes {-1, 1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0, 1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep

[docs]    def moments_match_ep(self, Y_i, tau_i, v_i, Y_metadata_i=None):
"""
Moments match of the marginal approximation in EP algorithm

:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if Y_i == 1:
sign = 1.
elif Y_i == 0 or Y_i == -1:
sign = -1
else:
raise ValueError("bad value for Bernoulli observation (0, 1)")
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
phi_div_Phi = derivLogCdfNormal(z)
log_Z_hat = logCdfNormal(z)

mu_hat = v_i/tau_i + sign*phi_div_Phi/np.sqrt(tau_i**2 + tau_i)
sigma2_hat = 1./tau_i - (phi_div_Phi/(tau_i**2+tau_i))*(z+phi_div_Phi)

z = sign*v_i/np.sqrt(tau_i)
phi_div_Phi = derivLogCdfNormal(z)
log_Z_hat = logCdfNormal(z)
mu_hat = v_i/tau_i + sign*phi_div_Phi/np.sqrt(tau_i)
sigma2_hat = (1. - a*phi_div_Phi - np.square(phi_div_Phi))/tau_i
else:
#TODO: do we want to revert to numerical quadrature here?

# TODO: Output log_Z_hat instead of Z_hat (needs to be change in all others likelihoods)
return np.exp(log_Z_hat), mu_hat, sigma2_hat

[docs]    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):

if gh_points is None:
gh_x, gh_w = self._gh_points()
else:
gh_x, gh_w = gh_points

gh_w = gh_w / np.sqrt(np.pi)
shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
Ysign = np.where(Y==1,1,-1)
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + (m*Ysign)[:,None]
p = std_norm_cdf(X)
p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
N = std_norm_pdf(X)
F = np.log(p).dot(gh_w)
NoverP = N/p
dF_dm = (NoverP*Ysign[:,None]).dot(gh_w)
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(gh_w)
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), None
else:
raise NotImplementedError

[docs]    def predictive_mean(self, mu, variance, Y_metadata=None):

return std_norm_cdf(mu/np.sqrt(1+variance))

return std_norm_cdf(mu/np.sqrt(variance))

else:
raise NotImplementedError

[docs]    def predictive_variance(self, mu, variance, pred_mean, Y_metadata=None):

return 0.
else:
return np.nan

"""
Likelihood function given inverse link of f.

.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}

:param y: data
:type y: Nx1 array
:returns: likelihood evaluated for this point
:rtype: float

.. Note:
Each y_i must be in {0, 1}
"""

"""
Log Likelihood function given inverse link of f.

.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})

:param y: data
:type y: Nx1 array
:returns: log likelihood evaluated at points inverse link of f.
:rtype: float
"""
return np.log(np.clip(p, 1e-9 ,np.inf))

"""

.. math::
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}

:param y: data
:type y: Nx1 array
:returns: gradient of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array
"""
denom = np.where(y==1, ff, -(1-ff))
return 1./denom

"""
Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j  w.r.t inverse link of f_i and inverse link of f_j.

.. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}

:param y: data
:type y: Nx1 array
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array

.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
"""
ret =  -1./np.square(np.clip(arg, 1e-9, 1e9))
if np.any(np.isinf(ret)):
stop
return ret

"""
Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f

.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}

:param y: data
:type y: Nx1 array
:returns: third derivative of log likelihood evaluated at points inverse_link(f)
:rtype: Nx1 array
"""
state = np.seterr(divide='ignore')
# TODO check y \in {0, 1} or {-1, 1}
np.seterr(**state)

[docs]    def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
"""
Get the "quantiles" of the binary labels (Bernoulli draws). all the
quantiles must be either 0 or 1, since those are the only values the
draw can take!
"""
p = self.predictive_mean(mu, var)
return [np.asarray(p>(q/100.), dtype=np.int32) for q in quantiles]

"""
Returns a set of samples of observations based on a given value of the latent variable.

:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
ns = np.ones_like(gp, dtype=int)