Skip to content

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
gyaikhom committed Oct 21, 2015
1 parent 5f73a3f commit 45b450a
Show file tree
Hide file tree
Showing 6 changed files with 542 additions and 2 deletions.
54 changes: 52 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,52 @@
# dbscan
Implements the DBSCAN Clustering algorithm
# The DBSCAN Clustering Algorithm

In this project, we implement the DBSCAN clustering algorithm. For
further details, please visit my
[homepage](http://yaikhom.com/2015/09/04/implementing-the-dbscan-clustering-algorithm.html),
or view the NOWEB generated documentation `dbscan.pdf`.

##Source code

This repository contains the following source code and data files:

* `dbscan.c` - A C programming language implementation (uses 3D data points).
* `dbscan.js`- A JavaScript implementation (uses 2D data points).
* `dbscan.min.js`- A minified JavaScript implementation.
* `example.dat` - Example data file.

##Usage

To run the algorithm on the supplied example data, first compile

$ clang -O2 -Wall -g -o dbscan dbscan.c -lm

and then run the program:

$ cat example.dat | ./dbscan

This will produce output as follows:

Epsilon: 1.000000
Minimum points: 2
Number of points: 53
x y z cluster_id
----------------------------------------------
1.00 3.00 1.00: 0
1.00 4.00 1.00: 0
1.00 5.00 1.00: 0
1.00 6.00 1.00: 0
2.00 2.00 1.00: 2
2.00 3.00 0.00: 1
2.00 4.00 0.00: 1
2.00 5.00 0.00: 1
2.00 6.00 0.00: 1
2.00 7.00 1.00: 3
3.00 1.00 1.00: 2
3.00 2.00 1.00: 2
...

If you wish to try the algorithm interactively, a JavaScript
implementation is available
[here](http://yaikhom.com/2015/09/04/implementing-the-dbscan-clustering-algorithm.html). This
example uses HTML5 canvas and was implemented using
[d3js](http://d3js.org) for DOM manipulation and user interaction.
335 changes: 335 additions & 0 deletions dbscan.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
/* Copyright 2015 Gagarine Yaikhom (MIT License) */
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#define UNCLASSIFIED -1
#define NOISE -2

#define CORE_POINT 1
#define NOT_CORE_POINT 0

#define SUCCESS 0
#define FAILURE -3

typedef struct point_s point_t;
struct point_s {
double x, y, z;
int cluster_id;
};

typedef struct node_s node_t;
struct node_s {
unsigned int index;
node_t *next;
};

typedef struct epsilon_neighbours_s epsilon_neighbours_t;
struct epsilon_neighbours_s {
unsigned int num_members;
node_t *head, *tail;
};

node_t *create_node(unsigned int index);
int append_at_end(
unsigned int index,
epsilon_neighbours_t *en);
epsilon_neighbours_t *get_epsilon_neighbours(
unsigned int index,
point_t *points,
unsigned int num_points,
double epsilon,
double (*dist)(point_t *a, point_t *b));
void print_epsilon_neighbours(
point_t *points,
epsilon_neighbours_t *en);
void destroy_epsilon_neighbours(epsilon_neighbours_t *en);
void dbscan(
point_t *points,
unsigned int num_points,
double epsilon,
unsigned int minpts,
double (*dist)(point_t *a, point_t *b));
int expand(
unsigned int index,
unsigned int cluster_id,
point_t *points,
unsigned int num_points,
double epsilon,
unsigned int minpts,
double (*dist)(point_t *a, point_t *b));
int spread(
unsigned int index,
epsilon_neighbours_t *seeds,
unsigned int cluster_id,
point_t *points,
unsigned int num_points,
double epsilon,
unsigned int minpts,
double (*dist)(point_t *a, point_t *b));
double euclidean_dist(point_t *a, point_t *b);
double adjacent_intensity_dist(point_t *a, point_t *b);
unsigned int parse_input(
FILE *file,
point_t **points,
double *epsilon,
unsigned int *minpts);
void print_points(
point_t *points,
unsigned int num_points);

node_t *create_node(unsigned int index)
{
node_t *n = (node_t *) calloc(1, sizeof(node_t));
if (n == NULL)
perror("Failed to allocate node.");
else {
n->index = index;
n->next = NULL;
}
return n;
}

int append_at_end(
unsigned int index,
epsilon_neighbours_t *en)
{
node_t *n = create_node(index);
if (n == NULL) {
free(en);
return FAILURE;
}
if (en->head == NULL) {
en->head = n;
en->tail = n;
} else {
en->tail->next = n;
en->tail = n;
}
++(en->num_members);
return SUCCESS;
}

epsilon_neighbours_t *get_epsilon_neighbours(
unsigned int index,
point_t *points,
unsigned int num_points,
double epsilon,
double (*dist)(point_t *a, point_t *b))
{
epsilon_neighbours_t *en = (epsilon_neighbours_t *)
calloc(1, sizeof(epsilon_neighbours_t));
if (en == NULL) {
perror("Failed to allocate epsilon neighbours.");
return en;
}
for (int i = 0; i < num_points; ++i) {
if (i == index)
continue;
if (dist(&points[index], &points[i]) > epsilon)
continue;
else {
if (append_at_end(i, en) == FAILURE) {
destroy_epsilon_neighbours(en);
en = NULL;
break;
}
}
}
return en;
}

void print_epsilon_neighbours(
point_t *points,
epsilon_neighbours_t *en)
{
if (en) {
node_t *h = en->head;
while (h) {
printf("(%lfm, %lf, %lf)\n",
points[h->index].x,
points[h->index].y,
points[h->index].z);
h = h->next;
}
}
}

void destroy_epsilon_neighbours(epsilon_neighbours_t *en)
{
if (en) {
node_t *t, *h = en->head;
while (h) {
t = h->next;
free(h);
h = t;
}
free(en);
}
}

void dbscan(
point_t *points,
unsigned int num_points,
double epsilon,
unsigned int minpts,
double (*dist)(point_t *a, point_t *b))
{
unsigned int i, cluster_id = 0;
for (i = 0; i < num_points; ++i) {
if (points[i].cluster_id == UNCLASSIFIED) {
if (expand(i, cluster_id, points,
num_points, epsilon, minpts,
dist) == CORE_POINT)
++cluster_id;
}
}
}

int expand(
unsigned int index,
unsigned int cluster_id,
point_t *points,
unsigned int num_points,
double epsilon,
unsigned int minpts,
double (*dist)(point_t *a, point_t *b))
{
int return_value = NOT_CORE_POINT;
epsilon_neighbours_t *seeds =
get_epsilon_neighbours(index, points,
num_points, epsilon,
dist);
if (seeds == NULL)
return FAILURE;

if (seeds->num_members < minpts)
points[index].cluster_id = NOISE;
else {
points[index].cluster_id = cluster_id;
node_t *h = seeds->head;
while (h) {
points[h->index].cluster_id = cluster_id;
h = h->next;
}

h = seeds->head;
while (h) {
spread(h->index, seeds, cluster_id, points,
num_points, epsilon, minpts, dist);
h = h->next;
}

return_value = CORE_POINT;
}
destroy_epsilon_neighbours(seeds);
return return_value;
}

int spread(
unsigned int index,
epsilon_neighbours_t *seeds,
unsigned int cluster_id,
point_t *points,
unsigned int num_points,
double epsilon,
unsigned int minpts,
double (*dist)(point_t *a, point_t *b))
{
epsilon_neighbours_t *spread =
get_epsilon_neighbours(index, points,
num_points, epsilon,
dist);
if (spread == NULL)
return FAILURE;
if (spread->num_members >= minpts) {
node_t *n = spread->head;
point_t *d;
while (n) {
d = &points[n->index];
if (d->cluster_id == NOISE ||
d->cluster_id == UNCLASSIFIED) {
if (d->cluster_id == UNCLASSIFIED) {
if (append_at_end(n->index, seeds)
== FAILURE) {
destroy_epsilon_neighbours(spread);
return FAILURE;
}
}
d->cluster_id = cluster_id;
}
n = n->next;
}
}

destroy_epsilon_neighbours(spread);
return SUCCESS;
}

double euclidean_dist(point_t *a, point_t *b)
{
return sqrt(pow(a->x - b->x, 2) +
pow(a->y - b->y, 2) +
pow(a->z - b->z, 2));
}

unsigned int parse_input(
FILE *file,
point_t **points,
double *epsilon,
unsigned int *minpts)
{
unsigned int num_points, i = 0;
fscanf(file, "%lf %u %u\n",
epsilon, minpts, &num_points);
point_t *p = (point_t *)
calloc(num_points, sizeof(point_t));
if (p == NULL) {
perror("Failed to allocate points array");
return 0;
}
while (i < num_points) {
fscanf(file, "%lf %lf %lf\n",
&(p[i].x), &(p[i].y), &(p[i].z));
p[i].cluster_id = UNCLASSIFIED;
++i;
}
*points = p;
return num_points;
}

void print_points(
point_t *points,
unsigned int num_points)
{
unsigned int i = 0;
printf("Number of points: %u\n"
" x y z cluster_id\n"
"-----------------------------\n"
, num_points);
while (i < num_points) {
printf("%5.2lf %5.2lf %5.2lf: %d\n",
points[i].x,
points[i].y, points[i].z,
points[i].cluster_id);
++i;
}
}

int main(void) {
point_t *points;
double epsilon;
unsigned int minpts;
unsigned int num_points =
parse_input(stdin, &points, &epsilon, &minpts);
if (num_points) {
dbscan(points, num_points, epsilon,
minpts, euclidean_dist);
printf("Epsilon: %lf\n", epsilon);
printf("Minimum points: %u\n", minpts);
print_points(points, num_points);
}
free(points);
return 0;
}
Loading

0 comments on commit 45b450a

Please sign in to comment.