-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdissimilarity.py
55 lines (44 loc) · 1.73 KB
/
dissimilarity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""Computation of the dissimilarity representation of a dataset of
objects from a set of k prototypes (landmarks), given a distance
function. The result is a Euclidean embedding of the dataset with k
dimensions.
See Olivetti E., Nguyen T.B., Garyfallidis, E., The Approximation of
the Dissimilarity Projection, http://dx.doi.org/10.1109/PRNI.2012.13
"""
import numpy as np
from subsampling import compute_subset
def compute_dissimilarity(dataset, distance, k,
prototype_policy='sff', verbose=False):
"""Compute the dissimilarity (distance) matrix between a dataset of N
objects and prototypes, where prototypes are selected among the
objects with a given policy.
Parameters
----------
dataset : list or array of objects
an iterable of objects.
distance : function
Distance function between groups of objects or sets of objects.
k : int
The number of prototypes/landmarks.
prototype_policy : string
The prototype selection policy. The default value is 'sff',
which is highly scalable.
verbose : bool
If true prints some messages. Deafault is True.
Return
------
dissimilarity_matrix : array (N, k)
See Also
--------
subsampling.furthest_first_traversal,
subsampling.subset_furthest_first
Notes
-----
"""
if verbose:
print("Generating %s prototypes with policy %s." % (k, prototype_policy))
prototype_idx = compute_subset(dataset, distance, k,
landmark_policy=prototype_policy)
prototypes = [dataset[i] for i in prototype_idx]
dissimilarity_matrix = distance(dataset, prototypes)
return dissimilarity_matrix, prototype_idx