-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsemi_randomizer.py
160 lines (144 loc) · 5.35 KB
/
semi_randomizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import math
import random
# TODO: RENAME THIS FILE
class SemiRandomizer:
def __init__(self, input_file_path, amount_of_slices):
self.amount_of_slices = amount_of_slices
self.slice_index_and_occurrences = {}
# Read the masses from the input file and append them to the array
masses = []
with open(input_file_path) as file:
lines = file.readlines()
for line in lines:
# Some of our data may be incomplete,
# If the data is incomplete there will be an empty line in the input file
# We simply ignore the incomplete data
if not line == '\n':
masses.append(float(line))
self.highest_mass = -10e99
self.lowest_mass = 10e99
for mass in masses:
if mass > self.highest_mass:
self.highest_mass = mass
if mass < self.lowest_mass:
self.lowest_mass = mass
self.amount_of_data_points = len(masses)
# Calculate the slice size
self.slice_size = (self.highest_mass - self.lowest_mass) / self.amount_of_slices
# Determine how often masses in a certain slice occur
# Add this information to the 'slice_index_and_occurrences' dictionary
for mass in masses:
slice_index = math.floor((mass - self.lowest_mass) / self.slice_size)
if slice_index in self.slice_index_and_occurrences:
self.slice_index_and_occurrences[slice_index] += 1
# If the slice_index is not in the dictionary this is the first occurrence
else:
self.slice_index_and_occurrences[slice_index] = 1
def get_semi_random_slice_index(self):
random_number = random.randrange(0, self.amount_of_data_points)
counter = 0
# print(random_number)
for key, value in self.slice_index_and_occurrences.items():
counter += value
if random_number < counter:
return key
def get_mass_from_index(self, slice_index):
return self.lowest_mass + (slice_index + 0.5) * self.slice_size
def get_semi_random_mass(self):
slice_index = self.get_semi_random_slice_index()
return self.get_mass_from_index(slice_index)
if __name__ == '__main__':
sr = SemiRandomizer('input/exoplanet_masses_kg.txt', 100)
# print(sr.get_mass(1))
temp_dict = {}
for i in range(100000):
j = sr.get_semi_random_mass()
if j not in temp_dict:
temp_dict[j] = 1
else:
temp_dict[j] += 1
print(temp_dict)
# def get_lowest_mass(masses):
# current_lowest = 10e99
# for mass in masses:
# if mass < current_lowest:
# current_lowest = mass
# return current_lowest
#
#
# def get_highest_mass(masses):
# current_highest = -10e99
# for mass in masses:
# if mass > current_highest:
# current_highest = mass
# return current_highest
#
#
# def f(file_path, amount_of_slices=1000):
# slice_index_and_occurrences = {}
# # Init dictionary
# # for i in range(amount_of_slices):
# # slice_index_and_occurrences[i] = 0
# masses = []
# with open(file_path) as file:
# lines = file.readlines()
# for line in lines:
# # Some of our data may be incomplete, if so we want to simply ignore this data point
# if not line == '\n':
# masses.append(float(line))
#
# lowest_mass = get_lowest_mass(masses)
# highest_mass = get_highest_mass(masses)
# # print('highest_mass: {}'.format(highest_mass))
# # print('lowest_mass: {}'.format(lowest_mass))
# slice_size = (highest_mass - lowest_mass) / amount_of_slices
# # print('slice_size: {}'.format(slice_size))
# for mass in masses:
# # print((mass - lowest_mass) / slice_size)
# slice_index = math.floor((mass - lowest_mass) / slice_size)
# # print('{}-{}'.format(mass, slice_index))
# if slice_index in slice_index_and_occurrences:
# slice_index_and_occurrences[slice_index] += 1
# else:
# slice_index_and_occurrences[slice_index] = 1
#
# return slice_index_and_occurrences
#
#
# def get_semi_random_index(dict):
# amount_of_data_points = 0
# for key, value in dict.items():
# amount_of_data_points += value
#
# for i in range(0, 1):
# random_number = random.randrange(0, amount_of_data_points)
# counter = 0
# # print(random_number)
# for key, value in dict.items():
# counter += value
# if random_number < counter:
# return key
#
# if __name__ == '__main__':
# # dict = f('input/exoplanet_masses.txt')
# dict = f('input/semi_randomizer_test_file.txt', 3)
#
# temp_dict = {}
# for i in range(10000):
# j = get_semi_random_index(dict)
# if j not in temp_dict:
# temp_dict[j] = 1
# else:
# temp_dict[j] += 1
# print(temp_dict)
# amount_of_data_points = 0
# for key, value in dict.items():
# amount_of_data_points += value
# for i in range(0, 1):
# random_number = random.randrange(0, amount_of_data_points)
# print(random_number)
# for key, value in dict.items():
# if random_number < value:
# print(key)
# break
# print(dict)