-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathK-Means Sequentail.cpp
145 lines (119 loc) · 3.82 KB
/
K-Means Sequentail.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
#define DIMENSIONS 2
#define MAX_ITERATIONS 100
typedef struct {
double x;
double y;
} Point;
double euclidean_distance(Point a, Point b) {
return sqrt(pow((a.x - b.x), 2) + pow((a.y - b.y), 2));
}
void read_points_from_file(const char *filename, int num_points, Point *points) {
FILE *file = fopen(filename, "r");
if (!file) {
fprintf(stderr, "Unable to open file %s.\n", filename);
exit(1);
}
// Read points from file
for (int i = 0; i < num_points; i++) {
fscanf(file, "%lf %lf", &points[i].x, &points[i].y);
}
fclose(file);
}
void k_means_clustering(const char *filename, int num_points, Point *points, int num_clusters) {
Point centroids[num_clusters];
// Initialize centroids
for (int i = 0; i < num_clusters; i++) {
centroids[i].x = points[i].x;
centroids[i].y = points[i].y;
}
clock_t start_time = clock();
// Perform K-means clustering
for (int iter = 0; iter < MAX_ITERATIONS; iter++) {
int cluster_counts[num_clusters] = {0};
Point sum[num_clusters] = {{0.0, 0.0}};
int converged = 1;
for (int i = 0; i < num_points; i++) {
double min_dist = INFINITY;
int closest_centroid = -1;
for (int j = 0; j < num_clusters; j++) {
double dist = euclidean_distance(points[i], centroids[j]);
if (dist < min_dist) {
min_dist = dist;
closest_centroid = j;
}
}
// Update cluster assignment
if (closest_centroid != -1) {
cluster_counts[closest_centroid]++;
sum[closest_centroid].x += points[i].x;
sum[closest_centroid].y += points[i].y;
}
}
// Update centroids
for (int i = 0; i < num_clusters; i++) {
if (cluster_counts[i] > 0) {
Point new_centroid = {sum[i].x / cluster_counts[i], sum[i].y / cluster_counts[i]};
if (euclidean_distance(centroids[i], new_centroid) > 0.0001) {
centroids[i] = new_centroid;
converged = 0;
}
}
}
if (converged) {
break;
}
}
clock_t end_time = clock();
// Print final centroids
printf("Final Centroids for %s:\n", filename);
for (int i = 0; i < num_clusters; i++) {
printf("Centroid %d: %.4lf %.4lf\n", i + 1, centroids[i].x, centroids[i].y);
}
// Calculate and print execution time
double execution_time = (double)(end_time - start_time) / CLOCKS_PER_SEC;
printf("Execution Time for %s: %f seconds\n", filename, execution_time);
}
int main() {
// Define files
const char *file_names[] = {
"points_250_000.txt",
"points_50_000.txt",
"points_500.txt",
"points_100.txt",
"points_1_000_000.txt",
"points_100_000.txt",
"points_10_000.txt",
"points_1_000.txt"
};
const int num_points[] = {
250000,
50000,
500,
100,
1000000,
100000,
10000,
1000
};
// Iterate over files
for (int f = 0; f < sizeof(file_names) / sizeof(file_names[0]); f++) {
char filename[50];
strcpy(filename, "DataPoints/");
strcat(filename, file_names[f]);
int num = num_points[f];
Point *points = (Point *)malloc(num * sizeof(Point));
if (!points) {
fprintf(stderr, "Memory allocation failed.\n");
return 1;
}
read_points_from_file(filename, num, points);
k_means_clustering(filename, num, points, 10); // Assuming 10 clusters
free(points);
}
return 0;
}