Source code for GPy.kern.src.eq_ode1

# Copyright (c) 2014, Cristian Guarnizo.
# Licensed under the BSD 3-clause license (see LICENSE.txt)

import numpy as np
from scipy.special import erf, erfcx
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
from paramz.caching import Cache_this

[docs]class EQ_ODE1(Kern): """ Covariance function for first order differential equation driven by an exponentiated quadratic covariance. This outputs of this kernel have the form .. math:: \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t) where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance. :param output_dim: number of outputs driven by latent function. :type output_dim: int :param W: sensitivities of each output to the latent driving function. :type W: ndarray (output_dim x rank). :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance. :type rank: int :param decay: decay rates for the first order system. :type decay: array of length output_dim. :param delay: delay between latent force and output response. :type delay: array of length output_dim. :param kappa: diagonal term that allows each latent output to have an independent component to the response. :type kappa: array of length output_dim. .. Note: see first order differential equation examples in GPy.examples.regression for some usage. """ def __init__(self, input_dim=2, output_dim=1, rank=1, W = None, lengthscale=None, decay=None, active_dims=None, name='eq_ode1'): assert input_dim == 2, "only defined for 1 input dims" super(EQ_ODE1, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name) self.rank = rank self.output_dim = output_dim if lengthscale is None: lengthscale = .5 + np.random.rand(self.rank) else: lengthscale = np.asarray(lengthscale) assert lengthscale.size in [1, self.rank], "Bad number of lengthscales" if lengthscale.size != self.rank: lengthscale = np.ones(self.rank)*lengthscale if W is None: W = .5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) else: assert W.shape == (self.output_dim, self.rank) if decay is None: decay = np.ones(self.output_dim) else: decay = np.asarray(decay) assert decay.size in [1, self.output_dim], "Bad number of decay" if decay.size != self.output_dim: decay = np.ones(self.output_dim)*decay # if kappa is None: # self.kappa = np.ones(self.output_dim) # else: # kappa = np.asarray(kappa) # assert kappa.size in [1, self.output_dim], "Bad number of kappa" # if decay.size != self.output_dim: # decay = np.ones(self.output_dim)*kappa #self.kappa = Param('kappa', kappa, Logexp()) #self.delay = Param('delay', delay, Logexp()) #self.is_normalized = True #self.is_stationary = False #self.gaussian_initial = False self.lengthscale = Param('lengthscale', lengthscale, Logexp()) self.decay = Param('decay', decay, Logexp()) self.W = Param('W', W) self.link_parameters(self.lengthscale, self.decay, self.W)
[docs] @Cache_this(limit=3) def K(self, X, X2=None): #This way is not working, indexes are lost after using k._slice_X #index = np.asarray(X, dtype=np.int) #index = index.reshape(index.size,) if hasattr(X, 'values'): X = X.values index = np.int_(np.round(X[:, 1])) index = index.reshape(index.size,) X_flag = index[0] >= self.output_dim if X2 is None: if X_flag: #Calculate covariance function for the latent functions index -= self.output_dim return self._Kuu(X, index) else: raise NotImplementedError else: #This way is not working, indexes are lost after using k._slice_X #index2 = np.asarray(X2, dtype=np.int) #index2 = index2.reshape(index2.size,) if hasattr(X2, 'values'): X2 = X2.values index2 = np.int_(np.round(X2[:, 1])) index2 = index2.reshape(index2.size,) X2_flag = index2[0] >= self.output_dim #Calculate cross-covariance function if not X_flag and X2_flag: index2 -= self.output_dim return self._Kfu(X, index, X2, index2) #Kfu elif X_flag and not X2_flag: index -= self.output_dim return self._Kfu(X2, index2, X, index).T #Kuf elif X_flag and X2_flag: index -= self.output_dim index2 -= self.output_dim return self._Kusu(X, index, X2, index2) #Ku_s u else: raise NotImplementedError #Kf_s f
#Calculate the covariance function for diag(Kff(X,X))
[docs] def Kdiag(self, X): if hasattr(X, 'values'): index = np.int_(np.round(X[:, 1].values)) else: index = np.int_(np.round(X[:, 1])) index = index.reshape(index.size,) X_flag = index[0] >= self.output_dim if X_flag: #Kuudiag return np.ones(X[:,0].shape) else: #Kffdiag kdiag = self._Kdiag(X) return np.sum(kdiag, axis=1)
def _Kdiag(self, X): #This way is not working, indexes are lost after using k._slice_X #index = np.asarray(X, dtype=np.int) #index = index.reshape(index.size,) if hasattr(X, 'values'): X = X.values index = np.int_(X[:, 1]) index = index.reshape(index.size,) #terms that move along t t = X[:, 0].reshape(X.shape[0], 1) d = np.unique(index) #Output Indexes B = self.decay.values[d] S = self.W.values[d, :] #Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] B = B.reshape(B.size, 1) #Terms that move along q lq = self.lengthscale.values.reshape(1, self.rank) S2 = S*S kdiag = np.empty((t.size, )) #Dx1 terms c0 = (S2/B)*((.5*np.sqrt(np.pi))*lq) #DxQ terms nu = lq*(B*.5) nu2 = nu*nu #Nx1 terms gamt = -2.*B gamt = gamt[index]*t #NxQ terms t_lq = t/lq # Upsilon Calculations # Using wofz #erfnu = erf(nu) upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :] ,t_lq+nu[index,:] )) upm[t[:, 0] == 0, :] = 0. upv = np.exp(nu2[index, :] + gamt + lnDifErf( -t_lq+nu[index,:], nu[index, :] ) ) upv[t[:, 0] == 0, :] = 0. #Covariance calculation #kdiag = np.sum(c0[index, :]*(upm-upv), axis=1) kdiag = c0[index, :]*(upm-upv) return kdiag
[docs] def update_gradients_full(self, dL_dK, X, X2 = None): #index = np.asarray(X, dtype=np.int) #index = index.reshape(index.size,) if hasattr(X, 'values'): X = X.values self.decay.gradient = np.zeros(self.decay.shape) self.W.gradient = np.zeros(self.W.shape) self.lengthscale.gradient = np.zeros(self.lengthscale.shape) index = np.int_(np.round(X[:, 1])) index = index.reshape(index.size,) X_flag = index[0] >= self.output_dim if X2 is None: if X_flag: #Kuu or Kmm index -= self.output_dim tmp = dL_dK*self._gkuu_lq(X, index) for q in np.unique(index): ind = np.where(index == q) self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum() else: raise NotImplementedError else: #Kfu or Knm #index2 = np.asarray(X2, dtype=np.int) #index2 = index2.reshape(index2.size,) if hasattr(X2, 'values'): X2 = X2.values index2 = np.int_(np.round(X2[:, 1])) index2 = index2.reshape(index2.size,) X2_flag = index2[0] >= self.output_dim if not X_flag and X2_flag: #Kfu index2 -= self.output_dim else: #Kuf dL_dK = dL_dK.T #so we obtaing dL_Kfu indtemp = index - self.output_dim Xtemp = X X = X2 X2 = Xtemp index = index2 index2 = indtemp glq, gSdq, gB = self._gkfu(X, index, X2, index2) tmp = dL_dK*glq for q in np.unique(index2): ind = np.where(index2 == q) self.lengthscale.gradient[q] = tmp[:, ind].sum() tmpB = dL_dK*gB tmp = dL_dK*gSdq for d in np.unique(index): ind = np.where(index == d) self.decay.gradient[d] = tmpB[ind, :].sum() for q in np.unique(index2): ind2 = np.where(index2 == q) self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum()
[docs] def update_gradients_diag(self, dL_dKdiag, X): #index = np.asarray(X, dtype=np.int) #index = index.reshape(index.size,) if hasattr(X, 'values'): X = X.values self.decay.gradient = np.zeros(self.decay.shape) self.W.gradient = np.zeros(self.W.shape) self.lengthscale.gradient = np.zeros(self.lengthscale.shape) index = np.int_(X[:, 1]) index = index.reshape(index.size,) glq, gS, gB = self._gkdiag(X, index) if dL_dKdiag.size == X.shape[0]: dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1)) tmp = dL_dKdiag*glq self.lengthscale.gradient = tmp.sum(0) tmpB = dL_dKdiag*gB tmp = dL_dKdiag*gS for d in np.unique(index): ind = np.where(index == d) self.decay.gradient[d] = tmpB[ind, :].sum() self.W.gradient[d, :] = tmp[ind].sum(0)
[docs] def gradients_X(self, dL_dK, X, X2=None): #index = np.asarray(X, dtype=np.int) #index = index.reshape(index.size,) if hasattr(X, 'values'): X = X.values index = np.int_(np.round(X[:, 1])) index = index.reshape(index.size,) X_flag = index[0] >= self.output_dim #If input_dim == 1, use this #gX = np.zeros((X.shape[0], 1)) #Cheat to allow gradient for input_dim==2 gX = np.zeros(X.shape) if X2 is None: #Kuu or Kmm if X_flag: index -= self.output_dim gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0) return gX else: raise NotImplementedError else: #Kuf or Kmn #index2 = np.asarray(X2, dtype=np.int) #index2 = index2.reshape(index2.size,) if hasattr(X2, 'values'): X2 = X2.values index2 = np.int_(np.round(X2[:, 1])) index2 = index2.reshape(index2.size,) X2_flag = index2[0] >= self.output_dim if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z index -= self.output_dim gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1) return gX else: raise NotImplementedError
#---------------------------------------# # Helper functions # #---------------------------------------# #Evaluation of squared exponential for LFM def _Kuu(self, X, index): index = index.reshape(index.size,) t = X[:, 0].reshape(X.shape[0],) lq = self.lengthscale.values.reshape(self.rank,) lq2 = lq*lq #Covariance matrix initialization kuu = np.zeros((t.size, t.size)) #Assign 1. to diagonal terms kuu[np.diag_indices(t.size)] = 1. #Upper triangular indices indtri1, indtri2 = np.triu_indices(t.size, 1) #Block Diagonal indices among Upper Triangular indices ind = np.where(index[indtri1] == index[indtri2]) indr = indtri1[ind] indc = indtri2[ind] r = t[indr] - t[indc] r2 = r*r #Calculation of covariance function kuu[indr, indc] = np.exp(-r2/lq2[index[indr]]) #Completion of lower triangular part kuu[indc, indr] = kuu[indr, indc] return kuu def _Kusu(self, X, index, X2, index2): index = index.reshape(index.size,) index2 = index2.reshape(index2.size,) t = X[:, 0].reshape(X.shape[0],1) t2 = X2[:, 0].reshape(1,X2.shape[0]) lq = self.lengthscale.values.reshape(self.rank,) #Covariance matrix initialization kuu = np.zeros((t.size, t2.size)) for q in range(self.rank): ind1 = index == q ind2 = index2 == q r = t[ind1]/lq[q] - t2[0,ind2]/lq[q] r2 = r*r #Calculation of covariance function kuu[np.ix_(ind1, ind2)] = np.exp(-r2) return kuu #Evaluation of cross-covariance function def _Kfu(self, X, index, X2, index2): #terms that move along t t = X[:, 0].reshape(X.shape[0], 1) d = np.unique(index) #Output Indexes B = self.decay.values[d] S = self.W.values[d, :] #Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] #Output related variables must be column-wise B = B.reshape(B.size, 1) #Input related variables must be row-wise z = X2[:, 0].reshape(1, X2.shape[0]) lq = self.lengthscale.values.reshape((1, self.rank)) kfu = np.empty((t.size, z.size)) #DxQ terms c0 = S*((.5*np.sqrt(np.pi))*lq) nu = B*(.5*lq) nu2 = nu**2 #1xM terms z_lq = z/lq[0, index2] #NxM terms tz = t-z tz_lq = tz/lq[0, index2] # Upsilon Calculations fullind = np.ix_(index, index2) upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind])) upsi[t[:, 0] == 0, :] = 0. #Covariance calculation kfu = c0[fullind]*upsi return kfu #Gradient of Kuu wrt lengthscale def _gkuu_lq(self, X, index): t = X[:, 0].reshape(X.shape[0],) index = index.reshape(X.shape[0],) lq = self.lengthscale.values.reshape(self.rank,) lq2 = lq*lq #Covariance matrix initialization glq = np.zeros((t.size, t.size)) #Upper triangular indices indtri1, indtri2 = np.triu_indices(t.size, 1) #Block Diagonal indices among Upper Triangular indices ind = np.where(index[indtri1] == index[indtri2]) indr = indtri1[ind] indc = indtri2[ind] r = t[indr] - t[indc] r2 = r*r r2_lq2 = r2/lq2[index[indr]] #Calculation of covariance function er2_lq2 = np.exp(-r2_lq2) #Gradient wrt lq c = 2.*r2_lq2/lq[index[indr]] glq[indr, indc] = er2_lq2*c #Complete the lower triangular glq[indc, indr] = glq[indr, indc] return glq #Be careful this derivative should be transpose it def _gkuu_X(self, X, index): #Diagonal terms are always zero t = X[:, 0].reshape(X.shape[0],) index = index.reshape(index.size,) lq = self.lengthscale.values.reshape(self.rank,) lq2 = lq*lq #Covariance matrix initialization gt = np.zeros((t.size, t.size)) #Upper triangular indices indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal #Block Diagonal indices among Upper Triangular indices ind = np.where(index[indtri1] == index[indtri2]) indr = indtri1[ind] indc = indtri2[ind] r = t[indr] - t[indc] r2 = r*r r2_lq2 = r2/(-lq2[index[indr]]) #Calculation of covariance function er2_lq2 = np.exp(r2_lq2) #Gradient wrt t c = 2.*r/lq2[index[indr]] gt[indr, indc] = er2_lq2*c #Complete the lower triangular gt[indc, indr] = -gt[indr, indc] return gt #Gradients for Diagonal Kff def _gkdiag(self, X, index): index = index.reshape(index.size,) #terms that move along t d = np.unique(index) B = self.decay[d].values S = self.W[d, :].values #Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] #Output related variables must be column-wise t = X[:, 0].reshape(X.shape[0], 1) B = B.reshape(B.size, 1) S2 = S*S #Input related variables must be row-wise lq = self.lengthscale.values.reshape(1, self.rank) gB = np.empty((t.size,)) glq = np.empty((t.size, lq.size)) gS = np.empty((t.size, lq.size)) #Dx1 terms c0 = S2*lq*np.sqrt(np.pi) #DxQ terms nu = (.5*lq)*B nu2 = nu*nu #Nx1 terms gamt = -B[index]*t egamt = np.exp(gamt) e2gamt = egamt*egamt #NxQ terms t_lq = t/lq t2_lq2 = -t_lq*t_lq etlq2gamt = np.exp(t2_lq2 + gamt) #NXQ ##Upsilon calculations #erfnu = erf(nu) #TODO: This can be improved upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :], t_lq + nu[index, :]) ) upm[t[:, 0] == 0, :] = 0. upv = np.exp(nu2[index, :] + 2.*gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) ) #egamt*upv upv[t[:, 0] == 0, :] = 0. #Gradient wrt S c0_S = (S/B)*(lq*np.sqrt(np.pi)) gS = c0_S[index]*(upm - upv) #For B CB1 = (.5*lq)**2 - .5/B**2 #DXQ lq2_2B = (.5*lq**2)*(S2/B) #DXQ CB2 = 2.*etlq2gamt - e2gamt - 1. #NxQ # gradient wrt B NxZ gB = c0[index, :]*(CB1[index, :]*upm - (CB1[index, :] - t/B[index])*upv) + \ lq2_2B[index, :]*CB2 #Gradient wrt lengthscale #DxQ terms c0 = (.5*np.sqrt(np.pi))*(S2/B)*(1.+.5*(lq*B)**2) Clq1 = S2*(lq*.5) glq = c0[index]*(upm - upv) + Clq1[index]*CB2 return glq, gS, gB def _gkfu(self, X, index, Z, index2): index = index.reshape(index.size,) #TODO: reduce memory usage #terms that move along t d = np.unique(index) B = self.decay[d].values S = self.W[d, :].values #Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] #t column t = X[:, 0].reshape(X.shape[0], 1) B = B.reshape(B.size, 1) #z row z = Z[:, 0].reshape(1, Z.shape[0]) index2 = index2.reshape(index2.size,) lq = self.lengthscale.values.reshape((1, self.rank)) #kfu = np.empty((t.size, z.size)) glq = np.empty((t.size, z.size)) gSdq = np.empty((t.size, z.size)) gB = np.empty((t.size, z.size)) #Dx1 terms B_2 = B*.5 S_pi = S*(.5*np.sqrt(np.pi)) #DxQ terms c0 = S_pi*lq #lq*Sdq*sqrt(pi) nu = B*lq*.5 nu2 = nu*nu #1xM terms z_lq = z/lq[0, index2] #NxM terms tz = t-z tz_lq = tz/lq[0, index2] etz_lq2 = -np.exp(-tz_lq*tz_lq) ez_lq_Bt = np.exp(-z_lq*z_lq -B[index]*t) # Upsilon calculations fullind = np.ix_(index, index2) upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind] ) ) upsi[t[:, 0] == 0., :] = 0. #Gradient wrt S #DxQ term Sa1 = lq*(.5*np.sqrt(np.pi)) gSdq = Sa1[0,index2]*upsi #Gradient wrt lq la1 = S_pi*(1. + 2.*nu2) Slq = S*lq uplq = etz_lq2*(tz_lq/lq[0, index2] + B_2[index]) uplq += ez_lq_Bt*(-z_lq/lq[0, index2] + B_2[index]) glq = la1[fullind]*upsi glq += Slq[fullind]*uplq #Gradient wrt B Slq = Slq*lq nulq = nu*lq upBd = etz_lq2 + ez_lq_Bt gB = c0[fullind]*(nulq[fullind] - tz)*upsi + .5*Slq[fullind]*upBd return glq, gSdq, gB #TODO: reduce memory usage def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z) index = index.reshape(index.size,) #terms that move along t d = np.unique(index) B = self.decay[d].values S = self.W[d, :].values #Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] #t column t = X[:, 0].reshape(X.shape[0], 1) B = B.reshape(B.size, 1) #z row z = Z[:, 0].reshape(1, Z.shape[0]) index2 = index2.reshape(index2.size,) lq = self.lengthscale.values.reshape((1, self.rank)) #kfu = np.empty((t.size, z.size)) gz = np.empty((t.size, z.size)) #Dx1 terms S_pi =S*(.5*np.sqrt(np.pi)) #DxQ terms #Slq = S*lq c0 = S_pi*lq #lq*Sdq*sqrt(pi) nu = (.5*lq)*B nu2 = nu*nu #1xM terms z_lq = z/lq[0, index2] z_lq2 = -z_lq*z_lq #NxQ terms t_lq = t/lq #NxM terms zt_lq = z_lq - t_lq[:, index2] zt_lq2 = -zt_lq*zt_lq # Upsilon calculations fullind = np.ix_(index, index2) z2 = z_lq + nu[fullind] z1 = z2 - t_lq[:, index2] upsi = np.exp(nu2[fullind] - B[index]*(t-z) + lnDifErf(z1,z2) ) upsi[t[:, 0] == 0., :] = 0. #Gradient wrt z za1 = c0*B #za2 = S_w gz = za1[fullind]*upsi + S[fullind]*( np.exp(z_lq2 - B[index]*t) -np.exp(zt_lq2) ) return gz
[docs]def lnDifErf(z1,z2): #Z2 is always positive logdiferf = np.zeros(z1.shape) ind = np.where(z1>0.) ind2 = np.where(z1<=0.) if ind[0].shape > 0: z1i = z1[ind] z12 = z1i*z1i z2i = z2[ind] logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i)*np.exp(z12-z2i**2)) if ind2[0].shape > 0: z1i = z1[ind2] z2i = z2[ind2] logdiferf[ind2] = np.log(erf(z2i) - erf(z1i)) return logdiferf