Source code for GPy.util.subarray_and_sorting

'''
.. module:: GPy.util.subarray_and_sorting

.. moduleauthor:: Max Zwiessele <ibinbei@gmail.com>

'''
__updated__ = '2014-05-21'

import numpy as np, logging

[docs]def common_subarrays(X, axis=0): """ Find common subarrays of 2 dimensional X, where axis is the axis to apply the search over. Common subarrays are returned as a dictionary of <subarray, [index]> pairs, where the subarray is a tuple representing the subarray and the index is the index for the subarray in X, where index is the index to the remaining axis. :param :class:`np.ndarray` X: 2d array to check for common subarrays in :param int axis: axis to apply subarray detection over. When the index is 0, compare rows -- columns, otherwise. Examples: ========= In a 2d array: >>> import numpy as np >>> X = np.zeros((3,6), dtype=bool) >>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1 >>> X array([[False, False, False, False, False, False], [ True, False, True, True, True, True], [False, False, True, True, False, False]], dtype=bool) >>> d = common_subarrays(X,axis=1) >>> len(d) 3 >>> X[:, d[tuple(X[:,0])]] array([[False, False, False], [ True, True, True], [False, False, False]], dtype=bool) >>> d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5] True >>> d[tuple(X[:,1])] [1] """ from collections import defaultdict from itertools import count from operator import iadd assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays" subarrays = defaultdict(list) cnt = count() def accumulate(x, s, c): t = tuple(x) col = next(c) iadd(s[t], [col]) return None if axis == 0: [accumulate(x, subarrays, cnt) for x in X] else: [accumulate(x, subarrays, cnt) for x in X.T] return subarrays
if __name__ == '__main__': import doctest doctest.testmod()