Skip to content

Commit

Permalink
Fix issues re. clustering with a custom distance(nodeP, nodeQ) func…
Browse files Browse the repository at this point in the history
…tion

- Only AP, k-medoids, and HCA should allow this two-argument distance function.
- Revise HCA and k-medoids to allow for use of `nodeP` and nodeQ`.
- Add tests for HCA and k-medoids with `distance(nodeP, nodeQ)`.  AP already uses this format in its main test.  They verify the arguments and the end result.
- Remove the erroneous `distance()` signatures from the docs for k-means and fuzzy c-means.

Ref : Clustering without attributes #2368
  • Loading branch information
maxkfranz committed Apr 24, 2019
1 parent 8ec83f6 commit 98b6149
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 14 deletions.
14 changes: 4 additions & 10 deletions documentation/docmaker.json
Original file line number Diff line number Diff line change
Expand Up @@ -3278,11 +3278,8 @@
{ "name": "function( length, getPAt, getQAt[, nodeP, nodeQ] )", "descr": "A custom function that returns the distance between attribute vectors `p` and `q`.", "fields": [
{ "name": "length", "descr": "The length of the vectors." },
{ "name": "getPAt(i)", "descr": "A function that returns the ith value of the `p` vector." },
{ "name": "getQAt(i)", "descr": "A function that returns the ith value of the `q` vector." },
{ "name": "nodeP", "optional": true, "descr": "An optionally-used reference to the node associated with the `p` attribute vector. It is useful for affecting the weights with information outside of the attributes, such as connectivity." },
{ "name": "nodeQ", "optional": true, "descr": "An optionally-used reference to the node associated with the `q` attribute vector. It is useful for affecting the weights with information outside of the attributes, such as connectivity." }
] },
{ "name": "function( nodeP, nodeQ )", "descr": "A custom function that returns the distance between `nodeP` and `nodeQ`. This allows for specifying the distance matrix directly, forgoing attributes." }
{ "name": "getQAt(i)", "descr": "A function that returns the ith value of the `q` vector." }
] }
] },
{ "name": "maxIterations", "descr": "The maximum number of iterations of the algorithm to run (default `10`).", "optional": true },
{ "name": "sensitivityThreshold", "descr": "The coefficients difference threshold used to determine whether the algorithm has converged (default `0.001`).", "optional": true }
Expand Down Expand Up @@ -3342,11 +3339,8 @@
{ "name": "function( length, getPAt, getQAt[, nodeP, nodeQ] )", "descr": "A custom function that returns the distance between attribute vectors `p` and `q`.", "fields": [
{ "name": "length", "descr": "The length of the vectors." },
{ "name": "getPAt(i)", "descr": "A function that returns the ith value of the `p` vector." },
{ "name": "getQAt(i)", "descr": "A function that returns the ith value of the `q` vector." },
{ "name": "nodeP", "optional": true, "descr": "An optionally-used reference to the node associated with the `p` attribute vector. It is useful for affecting the weights with information outside of the attributes, such as connectivity." },
{ "name": "nodeQ", "optional": true, "descr": "An optionally-used reference to the node associated with the `q` attribute vector. It is useful for affecting the weights with information outside of the attributes, such as connectivity." }
] },
{ "name": "function( nodeP, nodeQ )", "descr": "A custom function that returns the distance between `nodeP` and `nodeQ`. This allows for specifying the distance matrix directly, forgoing attributes." }
{ "name": "getQAt(i)", "descr": "A function that returns the ith value of the `q` vector." }
] }
] },
{ "name": "maxIterations", "descr": "The maximum number of iterations of the algorithm to run (default `10`).", "optional": true },
{ "name": "sensitivityThreshold", "descr": "The coefficient difference threshold used to determine whether the algorithm has converged (default `0.001`).", "optional": true }
Expand Down
4 changes: 2 additions & 2 deletions src/collection/algorithms/hierarchical-clustering.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ let mergeClosest = function( clusters, index, dists, mins, opts ) {
let dist;
let attrs = opts.attributes;

let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2) );
let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2), n1, n2 );

for ( let i = 0; i < clusters.length; i++ ) {
let key = clusters[i].key;
Expand Down Expand Up @@ -241,7 +241,7 @@ let hierarchicalClustering = function( options ){
let opts = setOptions( options );

let attrs = opts.attributes;
let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2) );
let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2), n1, n2 );

// Begin hierarchical algorithm
let clusters = [];
Expand Down
7 changes: 5 additions & 2 deletions src/collection/algorithms/k-clustering.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ if( process.env.NODE_ENV !== 'production' ){ /* eslint-disable no-console, no-un
} /* eslint-enable */

let getDist = function(type, node, centroid, attributes, mode){
let getP = mode === 'kMedoids' ? ( i => attributes[i](centroid) ) : ( i => centroid[i] );
let noNodeP = mode !== 'kMedoids';
let getP = noNodeP ? ( i => centroid[i] ) : ( i => attributes[i](centroid) );
let getQ = i => attributes[i](node);
let nodeP = centroid;
let nodeQ = node;

return clusteringDistance( type, attributes.length, getP, getQ );
return clusteringDistance( type, attributes.length, getP, getQ, nodeP, nodeQ );
};

let randomCentroids = function( nodes, k, attributes ) {
Expand Down
41 changes: 41 additions & 0 deletions test/collection-hierarchical.js
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,47 @@ describe('Algorithms', function(){
expect(clustersAtLevel10[4][0].id()).to.equal('F');
expect(clustersAtLevel10[5][0].id()).to.equal('D');
});

it('allows a custom 2-arg distance function', function(){
var clustersAtLevel2 = cy.elements().hierarchicalClustering({
linkage: 'min',
distance: function(nodeP, nodeQ){
expect(nodeP).to.exist;
expect(nodeQ).to.exist;

// are the args collection-like?
expect(nodeP.id()).to.exist;
expect(nodeQ.id()).to.exist;

var x1p = nodeP.data('X1');
var x1q = nodeQ.data('X1');
var x2p = nodeP.data('X2');
var x2q = nodeQ.data('X2');

var dx1 = (x1p - x1q);
var dx2 = (x2p - x2q);

return Math.sqrt( (dx1 * dx1) + (dx2 * dx2) );
},
mode: 'dendrogram',
dendrogramDepth: 2,
addDendrogram: false
});

expect(clustersAtLevel2).to.exist;

// expect same result as 'Check level 2 of dendrogram'

// At level 2, we expect the algorithm (for this example) to return 4 clusters
expect(clustersAtLevel2.length).to.equal(4);

expect(clustersAtLevel2[0][0].id()).to.equal('B');
expect(clustersAtLevel2[1][0].id()).to.equal('A');
expect(clustersAtLevel2[2][0].id()).to.equal('C');
expect(clustersAtLevel2[3][0].id()).to.equal('E');
expect(clustersAtLevel2[3][1].id()).to.equal('F');
expect(clustersAtLevel2[3][2].id()).to.equal('D');
});
}

});
Expand Down
37 changes: 37 additions & 0 deletions test/collection-k-medoids.js
Original file line number Diff line number Diff line change
Expand Up @@ -233,5 +233,42 @@ describe('Algorithms', function(){

});

it('allows a custom 2-arg distance function', function(){
var cltrs = cy.elements().kMedoids({
k: 2,
maxIterations: 10,
testMode: true,
testCentroids: [n2, n8],
distance: function(nodeP, nodeQ){
expect(nodeP).to.exist;
expect(nodeQ).to.exist;

// are the args collection-like?
expect(nodeP.id()).to.exist;
expect(nodeQ.id()).to.exist;

var da = Math.abs(nodeP.data('attrA') - nodeQ.data('attrA'));
var db = Math.abs(nodeP.data('attrB') - nodeQ.data('attrB'));

return da + db;
}
});

var cltrIExpected = function(i){
var expected = expectedClusters[i].elements.reduce(function(eles, ele){
return eles.merge(ele);
}, cy.collection());
var actual = cltrs[i];

return expected.same(actual);
};

expect(cltrs).to.exist;
expect(cltrs.length).to.equal(2);

expect(cltrIExpected(0), '0th cluster expected').to.be.true;
expect(cltrIExpected(1), '1st cluster expected').to.be.true;
});

});
});

0 comments on commit 98b6149

Please sign in to comment.