-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_part3.py
64 lines (52 loc) · 2.19 KB
/
run_part3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""
File name: run_part3.py
Author: Patrick Cummings
Date created: 11/18/2019
Date last modified: 11/24/2019
Python Version: 3.7
"""
import csv
import json
from pathlib import Path
import pandas as pd
from models.adaboost import AdaBoostClassifier
train_set = pd.read_csv('data/pa3_train.csv')
validation_set = pd.read_csv('data/pa3_val.csv')
test_set = pd.read_csv('data/pa3_test.csv')
# Drop 'veil-type_p', feature has value 1 for all instances.
train_set = train_set.drop('veil-type_p', axis=1)
validation_set = validation_set.drop('veil-type_p', axis=1)
test_set = test_set.drop('veil-type_p', axis=1)
# Run AdaBoost with varied number of base classifiers (L).
for L in [1, 2, 5, 10, 15]:
boosted_trees = AdaBoostClassifier(train_set, validation_set, test_set, label='class', n_classifiers=L, max_depth=1)
results = boosted_trees.train()
# Save output for learned model to .json file.
output_folder = Path('model_output/part3')
output_path = Path(__file__).parent.resolve().joinpath(output_folder)
training_file = output_path.joinpath(Path('boosted_L_' + str(L) + '.json'))
# Create output directory if doesn't exist.
if not Path(output_path).exists():
Path(output_path).mkdir()
with open(training_file, 'w') as f:
json.dump(results, f, indent=4)
# Run AdaBoost with L = 6 and max depth = 2.
L = 6
boosted_trees = AdaBoostClassifier(train_set, validation_set, test_set, label='class', n_classifiers=L, max_depth=2)
results = boosted_trees.train()
# Save output for learned model to .json file.
output_folder = Path('model_output/part3')
output_path = Path(__file__).parent.resolve().joinpath(output_folder)
training_file = output_path.joinpath(Path('boosted_L_' + str(L) + '_depth_2' + '.json'))
# Create output directory if doesn't exist.
if not Path(output_path).exists():
Path(output_path).mkdir()
with open(training_file, 'w') as f:
json.dump(results, f, indent=4)
# Write test set predictions to csv.
test_predictions = results['test_predictions']
prediction_file = output_path.joinpath(Path('pa3label.csv'))
with open(prediction_file, 'w') as fp:
writer = csv.writer(fp)
for i in test_predictions:
writer.writerows([[i]])