-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
542 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,52 @@ | ||
# dbscan | ||
Implements the DBSCAN Clustering algorithm | ||
# The DBSCAN Clustering Algorithm | ||
|
||
In this project, we implement the DBSCAN clustering algorithm. For | ||
further details, please visit my | ||
[homepage](http://yaikhom.com/2015/09/04/implementing-the-dbscan-clustering-algorithm.html), | ||
or view the NOWEB generated documentation `dbscan.pdf`. | ||
|
||
##Source code | ||
|
||
This repository contains the following source code and data files: | ||
|
||
* `dbscan.c` - A C programming language implementation (uses 3D data points). | ||
* `dbscan.js`- A JavaScript implementation (uses 2D data points). | ||
* `dbscan.min.js`- A minified JavaScript implementation. | ||
* `example.dat` - Example data file. | ||
|
||
##Usage | ||
|
||
To run the algorithm on the supplied example data, first compile | ||
|
||
$ clang -O2 -Wall -g -o dbscan dbscan.c -lm | ||
|
||
and then run the program: | ||
|
||
$ cat example.dat | ./dbscan | ||
|
||
This will produce output as follows: | ||
|
||
Epsilon: 1.000000 | ||
Minimum points: 2 | ||
Number of points: 53 | ||
x y z cluster_id | ||
---------------------------------------------- | ||
1.00 3.00 1.00: 0 | ||
1.00 4.00 1.00: 0 | ||
1.00 5.00 1.00: 0 | ||
1.00 6.00 1.00: 0 | ||
2.00 2.00 1.00: 2 | ||
2.00 3.00 0.00: 1 | ||
2.00 4.00 0.00: 1 | ||
2.00 5.00 0.00: 1 | ||
2.00 6.00 0.00: 1 | ||
2.00 7.00 1.00: 3 | ||
3.00 1.00 1.00: 2 | ||
3.00 2.00 1.00: 2 | ||
... | ||
|
||
If you wish to try the algorithm interactively, a JavaScript | ||
implementation is available | ||
[here](http://yaikhom.com/2015/09/04/implementing-the-dbscan-clustering-algorithm.html). This | ||
example uses HTML5 canvas and was implemented using | ||
[d3js](http://d3js.org) for DOM manipulation and user interaction. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,335 @@ | ||
/* Copyright 2015 Gagarine Yaikhom (MIT License) */ | ||
#include <limits.h> | ||
#include <math.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
#define UNCLASSIFIED -1 | ||
#define NOISE -2 | ||
|
||
#define CORE_POINT 1 | ||
#define NOT_CORE_POINT 0 | ||
|
||
#define SUCCESS 0 | ||
#define FAILURE -3 | ||
|
||
typedef struct point_s point_t; | ||
struct point_s { | ||
double x, y, z; | ||
int cluster_id; | ||
}; | ||
|
||
typedef struct node_s node_t; | ||
struct node_s { | ||
unsigned int index; | ||
node_t *next; | ||
}; | ||
|
||
typedef struct epsilon_neighbours_s epsilon_neighbours_t; | ||
struct epsilon_neighbours_s { | ||
unsigned int num_members; | ||
node_t *head, *tail; | ||
}; | ||
|
||
node_t *create_node(unsigned int index); | ||
int append_at_end( | ||
unsigned int index, | ||
epsilon_neighbours_t *en); | ||
epsilon_neighbours_t *get_epsilon_neighbours( | ||
unsigned int index, | ||
point_t *points, | ||
unsigned int num_points, | ||
double epsilon, | ||
double (*dist)(point_t *a, point_t *b)); | ||
void print_epsilon_neighbours( | ||
point_t *points, | ||
epsilon_neighbours_t *en); | ||
void destroy_epsilon_neighbours(epsilon_neighbours_t *en); | ||
void dbscan( | ||
point_t *points, | ||
unsigned int num_points, | ||
double epsilon, | ||
unsigned int minpts, | ||
double (*dist)(point_t *a, point_t *b)); | ||
int expand( | ||
unsigned int index, | ||
unsigned int cluster_id, | ||
point_t *points, | ||
unsigned int num_points, | ||
double epsilon, | ||
unsigned int minpts, | ||
double (*dist)(point_t *a, point_t *b)); | ||
int spread( | ||
unsigned int index, | ||
epsilon_neighbours_t *seeds, | ||
unsigned int cluster_id, | ||
point_t *points, | ||
unsigned int num_points, | ||
double epsilon, | ||
unsigned int minpts, | ||
double (*dist)(point_t *a, point_t *b)); | ||
double euclidean_dist(point_t *a, point_t *b); | ||
double adjacent_intensity_dist(point_t *a, point_t *b); | ||
unsigned int parse_input( | ||
FILE *file, | ||
point_t **points, | ||
double *epsilon, | ||
unsigned int *minpts); | ||
void print_points( | ||
point_t *points, | ||
unsigned int num_points); | ||
|
||
node_t *create_node(unsigned int index) | ||
{ | ||
node_t *n = (node_t *) calloc(1, sizeof(node_t)); | ||
if (n == NULL) | ||
perror("Failed to allocate node."); | ||
else { | ||
n->index = index; | ||
n->next = NULL; | ||
} | ||
return n; | ||
} | ||
|
||
int append_at_end( | ||
unsigned int index, | ||
epsilon_neighbours_t *en) | ||
{ | ||
node_t *n = create_node(index); | ||
if (n == NULL) { | ||
free(en); | ||
return FAILURE; | ||
} | ||
if (en->head == NULL) { | ||
en->head = n; | ||
en->tail = n; | ||
} else { | ||
en->tail->next = n; | ||
en->tail = n; | ||
} | ||
++(en->num_members); | ||
return SUCCESS; | ||
} | ||
|
||
epsilon_neighbours_t *get_epsilon_neighbours( | ||
unsigned int index, | ||
point_t *points, | ||
unsigned int num_points, | ||
double epsilon, | ||
double (*dist)(point_t *a, point_t *b)) | ||
{ | ||
epsilon_neighbours_t *en = (epsilon_neighbours_t *) | ||
calloc(1, sizeof(epsilon_neighbours_t)); | ||
if (en == NULL) { | ||
perror("Failed to allocate epsilon neighbours."); | ||
return en; | ||
} | ||
for (int i = 0; i < num_points; ++i) { | ||
if (i == index) | ||
continue; | ||
if (dist(&points[index], &points[i]) > epsilon) | ||
continue; | ||
else { | ||
if (append_at_end(i, en) == FAILURE) { | ||
destroy_epsilon_neighbours(en); | ||
en = NULL; | ||
break; | ||
} | ||
} | ||
} | ||
return en; | ||
} | ||
|
||
void print_epsilon_neighbours( | ||
point_t *points, | ||
epsilon_neighbours_t *en) | ||
{ | ||
if (en) { | ||
node_t *h = en->head; | ||
while (h) { | ||
printf("(%lfm, %lf, %lf)\n", | ||
points[h->index].x, | ||
points[h->index].y, | ||
points[h->index].z); | ||
h = h->next; | ||
} | ||
} | ||
} | ||
|
||
void destroy_epsilon_neighbours(epsilon_neighbours_t *en) | ||
{ | ||
if (en) { | ||
node_t *t, *h = en->head; | ||
while (h) { | ||
t = h->next; | ||
free(h); | ||
h = t; | ||
} | ||
free(en); | ||
} | ||
} | ||
|
||
void dbscan( | ||
point_t *points, | ||
unsigned int num_points, | ||
double epsilon, | ||
unsigned int minpts, | ||
double (*dist)(point_t *a, point_t *b)) | ||
{ | ||
unsigned int i, cluster_id = 0; | ||
for (i = 0; i < num_points; ++i) { | ||
if (points[i].cluster_id == UNCLASSIFIED) { | ||
if (expand(i, cluster_id, points, | ||
num_points, epsilon, minpts, | ||
dist) == CORE_POINT) | ||
++cluster_id; | ||
} | ||
} | ||
} | ||
|
||
int expand( | ||
unsigned int index, | ||
unsigned int cluster_id, | ||
point_t *points, | ||
unsigned int num_points, | ||
double epsilon, | ||
unsigned int minpts, | ||
double (*dist)(point_t *a, point_t *b)) | ||
{ | ||
int return_value = NOT_CORE_POINT; | ||
epsilon_neighbours_t *seeds = | ||
get_epsilon_neighbours(index, points, | ||
num_points, epsilon, | ||
dist); | ||
if (seeds == NULL) | ||
return FAILURE; | ||
|
||
if (seeds->num_members < minpts) | ||
points[index].cluster_id = NOISE; | ||
else { | ||
points[index].cluster_id = cluster_id; | ||
node_t *h = seeds->head; | ||
while (h) { | ||
points[h->index].cluster_id = cluster_id; | ||
h = h->next; | ||
} | ||
|
||
h = seeds->head; | ||
while (h) { | ||
spread(h->index, seeds, cluster_id, points, | ||
num_points, epsilon, minpts, dist); | ||
h = h->next; | ||
} | ||
|
||
return_value = CORE_POINT; | ||
} | ||
destroy_epsilon_neighbours(seeds); | ||
return return_value; | ||
} | ||
|
||
int spread( | ||
unsigned int index, | ||
epsilon_neighbours_t *seeds, | ||
unsigned int cluster_id, | ||
point_t *points, | ||
unsigned int num_points, | ||
double epsilon, | ||
unsigned int minpts, | ||
double (*dist)(point_t *a, point_t *b)) | ||
{ | ||
epsilon_neighbours_t *spread = | ||
get_epsilon_neighbours(index, points, | ||
num_points, epsilon, | ||
dist); | ||
if (spread == NULL) | ||
return FAILURE; | ||
if (spread->num_members >= minpts) { | ||
node_t *n = spread->head; | ||
point_t *d; | ||
while (n) { | ||
d = &points[n->index]; | ||
if (d->cluster_id == NOISE || | ||
d->cluster_id == UNCLASSIFIED) { | ||
if (d->cluster_id == UNCLASSIFIED) { | ||
if (append_at_end(n->index, seeds) | ||
== FAILURE) { | ||
destroy_epsilon_neighbours(spread); | ||
return FAILURE; | ||
} | ||
} | ||
d->cluster_id = cluster_id; | ||
} | ||
n = n->next; | ||
} | ||
} | ||
|
||
destroy_epsilon_neighbours(spread); | ||
return SUCCESS; | ||
} | ||
|
||
double euclidean_dist(point_t *a, point_t *b) | ||
{ | ||
return sqrt(pow(a->x - b->x, 2) + | ||
pow(a->y - b->y, 2) + | ||
pow(a->z - b->z, 2)); | ||
} | ||
|
||
unsigned int parse_input( | ||
FILE *file, | ||
point_t **points, | ||
double *epsilon, | ||
unsigned int *minpts) | ||
{ | ||
unsigned int num_points, i = 0; | ||
fscanf(file, "%lf %u %u\n", | ||
epsilon, minpts, &num_points); | ||
point_t *p = (point_t *) | ||
calloc(num_points, sizeof(point_t)); | ||
if (p == NULL) { | ||
perror("Failed to allocate points array"); | ||
return 0; | ||
} | ||
while (i < num_points) { | ||
fscanf(file, "%lf %lf %lf\n", | ||
&(p[i].x), &(p[i].y), &(p[i].z)); | ||
p[i].cluster_id = UNCLASSIFIED; | ||
++i; | ||
} | ||
*points = p; | ||
return num_points; | ||
} | ||
|
||
void print_points( | ||
point_t *points, | ||
unsigned int num_points) | ||
{ | ||
unsigned int i = 0; | ||
printf("Number of points: %u\n" | ||
" x y z cluster_id\n" | ||
"-----------------------------\n" | ||
, num_points); | ||
while (i < num_points) { | ||
printf("%5.2lf %5.2lf %5.2lf: %d\n", | ||
points[i].x, | ||
points[i].y, points[i].z, | ||
points[i].cluster_id); | ||
++i; | ||
} | ||
} | ||
|
||
int main(void) { | ||
point_t *points; | ||
double epsilon; | ||
unsigned int minpts; | ||
unsigned int num_points = | ||
parse_input(stdin, &points, &epsilon, &minpts); | ||
if (num_points) { | ||
dbscan(points, num_points, epsilon, | ||
minpts, euclidean_dist); | ||
printf("Epsilon: %lf\n", epsilon); | ||
printf("Minimum points: %u\n", minpts); | ||
print_points(points, num_points); | ||
} | ||
free(points); | ||
return 0; | ||
} |
Oops, something went wrong.