#===============================================================================
# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt).
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of GPy nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#===============================================================================
import numpy as np
from scipy import sparse
import itertools
from ...models import WarpedGP
[docs]def in_ipynb():
try:
cfg = get_ipython().config
return 'IPKernelApp' in cfg
except NameError:
return False
[docs]def find_best_layout_for_subplots(num_subplots):
r, c = 1, 1
while (r*c) < num_subplots:
if (c==(r+1)) or (r==c):
c += 1
elif c==(r+2):
r += 1
c -= 1
return r, c
[docs]def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, percentiles, which_data_ycols, predict_kw, samples=0):
"""
Make the right decisions for prediction with a model
based on the standard arguments of plotting.
This is quite complex and will take a while to understand,
so do not change anything in here lightly!!!
"""
# Put some standards into the predict_kw so that prediction is done automatically:
if predict_kw is None:
predict_kw = {}
if 'likelihood' not in predict_kw:
if plot_raw:
from ...likelihoods import Gaussian
from ...likelihoods.link_functions import Identity
lik = Gaussian(Identity(), 1e-9) # Make the likelihood not add any noise
else:
lik = None
predict_kw['likelihood'] = lik
if 'Y_metadata' not in predict_kw:
predict_kw['Y_metadata'] = {}
if 'output_index' not in predict_kw['Y_metadata']:
predict_kw['Y_metadata']['output_index'] = Xgrid[:,-1:].astype(np.int)
mu, _ = self.predict(Xgrid, **predict_kw)
if percentiles is not None:
percentiles = self.predict_quantiles(Xgrid, quantiles=percentiles, **predict_kw)
else: percentiles = []
if samples > 0:
fsamples = self.posterior_samples(Xgrid, size=samples, **predict_kw)
fsamples = fsamples[:, which_data_ycols, :]
else:
fsamples = None
# Filter out the ycolums which we want to plot:
retmu = mu[:, which_data_ycols]
percs = [p[:, which_data_ycols] for p in percentiles]
if plot_raw and apply_link:
for i in range(len(which_data_ycols)):
retmu[:, [i]] = self.likelihood.gp_link.transf(mu[:, [i]])
for perc in percs:
perc[:, [i]] = self.likelihood.gp_link.transf(perc[:, [i]])
if fsamples is not None:
for s in range(fsamples.shape[-1]):
fsamples[:, i, s] = self.likelihood.gp_link.transf(fsamples[:, i, s])
return retmu, percs, fsamples
[docs]def helper_for_plot_data(self, X, plot_limits, visible_dims, fixed_inputs, resolution):
"""
Figure out the data, free_dims and create an Xgrid for
the prediction.
This is only implemented for two dimensions for now!
"""
#work out what the inputs are for plotting (1D or 2D)
if fixed_inputs is None:
fixed_inputs = []
fixed_dims = get_fixed_dims(fixed_inputs)
free_dims = get_free_dims(self, visible_dims, fixed_dims)
if len(free_dims) == 1:
#define the frame on which to plot
resolution = resolution or 200
Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits, resolution=resolution)
Xgrid = np.zeros((Xnew.shape[0],self.input_dim))
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
x = Xgrid
y = None
elif len(free_dims) == 2:
#define the frame for plotting on
resolution = resolution or 35
Xnew, x, y, xmin, xmax = x_frame2D(X[:,free_dims], plot_limits, resolution)
Xgrid = np.zeros((Xnew.shape[0], self.input_dim))
Xgrid[:,free_dims] = Xnew
#xmin = Xgrid.min(0)[free_dims]
#xmax = Xgrid.max(0)[free_dims]
for i,v in fixed_inputs:
Xgrid[:,i] = v
else:
raise TypeError("calculated free_dims {} from visible_dims {} and fixed_dims {} is neither 1D nor 2D".format(free_dims, visible_dims, fixed_dims))
return fixed_dims, free_dims, Xgrid, x, y, xmin, xmax, resolution
[docs]def scatter_label_generator(labels, X, visible_dims, marker=None):
ulabels = []
for lab in labels:
if not lab in ulabels:
ulabels.append(lab)
if marker is not None:
marker = itertools.cycle(list(marker))
else:
m = None
try:
input_1, input_2, input_3 = visible_dims
except:
try:
# tuple or int?
input_1, input_2 = visible_dims
input_3 = None
except:
input_1 = visible_dims
input_2 = input_3 = None
for ul in ulabels:
from numbers import Number
if isinstance(ul, str):
try:
this_label = unicode(ul)
except NameError:
#python3
this_label = ul
elif isinstance(ul, Number):
this_label = 'class {!s}'.format(ul)
else:
this_label = ul
if marker is not None:
m = next(marker)
index = np.nonzero(labels == ul)[0]
if input_2 is None:
x = X[index, input_1]
y = np.zeros(index.size)
z = None
elif input_3 is None:
x = X[index, input_1]
y = X[index, input_2]
z = None
else:
x = X[index, input_1]
y = X[index, input_2]
z = X[index, input_3]
yield x, y, z, this_label, index, m
[docs]def subsample_X(X, labels, num_samples=1000):
"""
Stratified subsampling if labels are given.
This means due to rounding errors you might get a little differences between the
num_samples and the returned subsampled X.
"""
if X.shape[0] > num_samples:
print("Warning: subsampling X, as it has more samples then {}. X.shape={!s}".format(int(num_samples), X.shape))
if labels is not None:
subsample = []
for _, _, _, _, index, _ in scatter_label_generator(labels, X, (0, None, None)):
subsample.append(np.random.choice(index, size=max(2, int(index.size*(float(num_samples)/X.shape[0]))), replace=False))
subsample = np.hstack(subsample)
else:
subsample = np.random.choice(X.shape[0], size=1000, replace=False)
X = X[subsample]
labels = labels[subsample]
#=======================================================================
# <<<WORK IN PROGRESS>>>
# <<<DO NOT DELETE>>>
# plt.close('all')
# fig, ax = plt.subplots(1,1)
# from GPy.plotting.matplot_dep.dim_reduction_plots import most_significant_input_dimensions
# import matplotlib.patches as mpatches
# i1, i2 = most_significant_input_dimensions(m, None)
# xmin, xmax = 100, -100
# ymin, ymax = 100, -100
# legend_handles = []
#
# X = m.X.mean[:, [i1, i2]]
# X = m.X.variance[:, [i1, i2]]
#
# xmin = X[:,0].min(); xmax = X[:,0].max()
# ymin = X[:,1].min(); ymax = X[:,1].max()
# range_ = [[xmin, xmax], [ymin, ymax]]
# ul = np.unique(labels)
#
# for i, l in enumerate(ul):
# #cdict = dict(red =[(0., colors[i][0], colors[i][0]), (1., colors[i][0], colors[i][0])],
# # green=[(0., colors[i][0], colors[i][1]), (1., colors[i][1], colors[i][1])],
# # blue =[(0., colors[i][0], colors[i][2]), (1., colors[i][2], colors[i][2])],
# # alpha=[(0., 0., .0), (.5, .5, .5), (1., .5, .5)])
# #cmap = LinearSegmentedColormap('{}'.format(l), cdict)
# cmap = LinearSegmentedColormap.from_list('cmap_{}'.format(str(l)), [colors[i], colors[i]], 255)
# cmap._init()
# #alphas = .5*(1+scipy.special.erf(np.linspace(-2,2, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
# alphas = (scipy.special.erf(np.linspace(0,2.4, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
# cmap._lut[:, -1] = alphas
# print l
# x, y = X[labels==l].T
#
# heatmap, xedges, yedges = np.histogram2d(x, y, bins=300, range=range_)
# #heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
#
# im = ax.imshow(heatmap, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], cmap=cmap, aspect='auto', interpolation='nearest', label=str(l))
# legend_handles.append(mpatches.Patch(color=colors[i], label=l))
# ax.set_xlim(xmin, xmax)
# ax.set_ylim(ymin, ymax)
# plt.legend(legend_handles, [l.get_label() for l in legend_handles])
# plt.draw()
# plt.show()
#=======================================================================
return X, labels
[docs]def update_not_existing_kwargs(to_update, update_from):
"""
This function updates the keyword aguments from update_from in
to_update, only if the keys are not set in to_update.
This is used for updated kwargs from the default dicts.
"""
if to_update is None:
to_update = {}
to_update.update({k:v for k,v in update_from.items() if k not in to_update})
return to_update
[docs]def get_x_y_var(model):
"""
Either the the data from a model as
X the inputs,
X_variance the variance of the inputs ([default: None])
and Y the outputs
If (X, X_variance, Y) is given, this just returns.
:returns: (X, X_variance, Y)
"""
# model given
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean.values
X_variance = model.X.variance.values
else:
try:
X = model.X.values
except AttributeError:
X = model.X
X_variance = None
try:
Y = model.Y.values
except AttributeError:
Y = model.Y
if isinstance(model, WarpedGP) and not model.predict_in_warped_space:
Y = model.Y_normalized
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
return X, X_variance, Y
[docs]def get_free_dims(model, visible_dims, fixed_dims):
"""
work out what the inputs are for plotting (1D or 2D)
The visible dimensions are the dimensions, which are visible.
the fixed_dims are the fixed dimensions for this.
The free_dims are then the visible dims without the fixed dims.
"""
if visible_dims is None:
visible_dims = np.arange(model.input_dim)
dims = np.asanyarray(visible_dims)
if fixed_dims is not None:
dims = [dim for dim in dims if dim not in fixed_dims]
return np.asanyarray([dim for dim in dims if dim is not None])
[docs]def get_fixed_dims(fixed_inputs):
"""
Work out the fixed dimensions from the fixed_inputs list of tuples.
"""
return np.array([i for i,_ in fixed_inputs])
[docs]def get_which_data_ycols(model, which_data_ycols):
"""
Helper to get the data columns to plot.
"""
if which_data_ycols == 'all' or which_data_ycols is None:
return np.arange(model.output_dim)
return which_data_ycols
[docs]def get_which_data_rows(model, which_data_rows):
"""
Helper to get the data rows to plot.
"""
if which_data_rows == 'all' or which_data_rows is None:
return slice(None)
return which_data_rows
[docs]def x_frame1D(X,plot_limits=None,resolution=None):
"""
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
"""
assert X.shape[1] ==1, "x_frame1D is defined for one-dimensional inputs"
if plot_limits is None:
from GPy.core.parameterization.variational import VariationalPosterior
if isinstance(X, VariationalPosterior):
xmin,xmax = X.mean.min(0),X.mean.max(0)
else:
xmin,xmax = X.min(0),X.max(0)
xmin, xmax = xmin-0.25*(xmax-xmin), xmax+0.25*(xmax-xmin)
elif len(plot_limits) == 2:
xmin, xmax = map(np.atleast_1d, plot_limits)
else:
raise ValueError("Bad limits for plotting")
Xnew = np.linspace(float(xmin),float(xmax),int(resolution) or 200)[:,None]
return Xnew, xmin, xmax
[docs]def x_frame2D(X,plot_limits=None,resolution=None):
"""
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
"""
assert X.shape[1]==2, "x_frame2D is defined for two-dimensional inputs"
if plot_limits is None:
xmin, xmax = X.min(0), X.max(0)
xmin, xmax = xmin-0.075*(xmax-xmin), xmax+0.075*(xmax-xmin)
elif len(plot_limits) == 2:
xmin, xmax = plot_limits
try:
xmin = xmin[0], xmin[1]
except:
# only one limit given, copy over to other lim
xmin = [plot_limits[0], plot_limits[0]]
xmax = [plot_limits[1], plot_limits[1]]
elif len(plot_limits) == 4:
xmin, xmax = (plot_limits[0], plot_limits[2]), (plot_limits[1], plot_limits[3])
else:
raise ValueError("Bad limits for plotting")
resolution = resolution or 50
xx, yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xnew = np.c_[xx.flat, yy.flat]
return Xnew, xx, yy, xmin, xmax