-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cu
128 lines (97 loc) · 4.72 KB
/
main.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "utils.h"
#include "ga_kernels.cuh"
#include <stdio.h>
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;
const int NUM_CITIES = 100;
const int SIZE_INDIVIDUAL = NUM_CITIES + 1;
const int POPULATION_SIZE = 10000;
const int NUM_GENERATIONS = 1500;
const int TOURNAMENT_SIZE = 50;
const float MUTATION_RATE = 1.0;
int main() {
vector<vector<int>> population = initialize_population(POPULATION_SIZE, NUM_CITIES);
cout << "TSP GA CUDA IMPLEMENTATION START" << endl <<
"================================" << endl;
/*cout << "Population:" << endl;
for (int i = 0; i < POPULATION_SIZE; ++i) {
cout << "Permutation " << i << ": ";
for (int city : population[i]) {
cout << city << " ";
}
cout << endl;
}*/
int *d_population, *d_selected_individ, * d_offspring;
float *d_fitness_scores, *d_distance_matrix;
curandState *d_states;
cudaMalloc(&d_population, POPULATION_SIZE * SIZE_INDIVIDUAL * sizeof(int));
cudaMalloc(&d_selected_individ, POPULATION_SIZE * SIZE_INDIVIDUAL * sizeof(int));
cudaMalloc(&d_offspring, POPULATION_SIZE * SIZE_INDIVIDUAL * sizeof(int));
cudaMalloc(&d_fitness_scores, POPULATION_SIZE * sizeof(float));
cudaMalloc(&d_distance_matrix, NUM_CITIES * NUM_CITIES * sizeof(float));
cudaMalloc(&d_states, POPULATION_SIZE * sizeof(curandState));
vector<int> flat_population;
flat_population.reserve(POPULATION_SIZE * SIZE_INDIVIDUAL);
for (const auto& individual : population) {
flat_population.insert(flat_population.end(), individual.begin(), individual.end());
}
/*cout << "flat_population: ";
for (const int& element : flat_population) {
cout << element << " ";
}
cout << endl;*/
cudaMemcpy(d_population, flat_population.data(), POPULATION_SIZE * SIZE_INDIVIDUAL * sizeof(int), cudaMemcpyHostToDevice);
float distance_matrix[NUM_CITIES * NUM_CITIES];
generate_distance_matrix(distance_matrix, NUM_CITIES);
/*for (int i = 0; i < NUM_CITIES; ++i) {
for (int j = 0; j < NUM_CITIES; ++j) {
cout << distance_matrix[i * NUM_CITIES + j] << "\t";
}
cout << endl;
}*/
cudaMemcpy(d_distance_matrix, distance_matrix, NUM_CITIES * NUM_CITIES * sizeof(float), cudaMemcpyHostToDevice);
int threads_per_block = 256;
int num_blocks = (POPULATION_SIZE + threads_per_block - 1) / threads_per_block;
float best_fit = FLT_MAX;
int best_gen = 0;
vector<int> best_individual(SIZE_INDIVIDUAL);
//setup kernel
unsigned long random_seed = generate_random_seed();
setup_kernel << <num_blocks, threads_per_block >> > (d_states, random_seed);
for (int generation = 0; generation < NUM_GENERATIONS; ++generation) {
cout << "Generation: " << generation << endl;
tournament_selection << <num_blocks, threads_per_block >> > (d_population, d_selected_individ, d_fitness_scores, POPULATION_SIZE, SIZE_INDIVIDUAL, NUM_CITIES, TOURNAMENT_SIZE, d_states);
simple_crossover << <num_blocks, threads_per_block >> > (d_selected_individ, d_offspring, POPULATION_SIZE, SIZE_INDIVIDUAL, NUM_CITIES, d_states);
simple_mutation << <num_blocks, threads_per_block >> > (d_offspring, d_population, POPULATION_SIZE, SIZE_INDIVIDUAL, NUM_CITIES, MUTATION_RATE, d_states);
evaluate_population << <num_blocks, threads_per_block >> > (d_population, d_fitness_scores, d_distance_matrix, POPULATION_SIZE, SIZE_INDIVIDUAL, NUM_CITIES);
cudaMemcpy(flat_population.data(), d_population, POPULATION_SIZE * SIZE_INDIVIDUAL * sizeof(int), cudaMemcpyDeviceToHost);
vector<float> fitness_scores(POPULATION_SIZE);
cudaMemcpy(fitness_scores.data(), d_fitness_scores, POPULATION_SIZE * sizeof(float), cudaMemcpyDeviceToHost);
auto min_it = min_element(fitness_scores.begin(), fitness_scores.end());
int best_idx = distance(fitness_scores.begin(), min_it);
float current_best_fit = *min_it;
if (current_best_fit < best_fit) {
best_fit = current_best_fit;
best_gen = generation;
for (int i = 0; i < SIZE_INDIVIDUAL; i++) {
best_individual[i] = flat_population[best_idx * SIZE_INDIVIDUAL + i];
}
}
cout << "Best individual: [ ";
for (int gene : best_individual) {
cout << gene << " ";
}
cout << "] found at gen: " << best_gen << endl << "Fitness: " << best_fit << endl;
}
cudaFree(d_population);
cudaFree(d_offspring);
cudaFree(d_selected_individ);
cudaFree(d_fitness_scores);
cudaFree(d_distance_matrix);
cudaFree(d_states);
return 0;
}