-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconstants.py
68 lines (48 loc) · 2.1 KB
/
constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File: constants.py
# File Related
# DATA FOLDER NAME
data_path = "data"
# DATA FILE NAMES
bus_file = "business.csv"
users_file = "users.csv"
review_file = "train_reviews.csv"
validate_data_file = "validate_queries.csv"
test_data_file = "test_queries.csv"
# BUSINESS AND USER DICT -> CSV (The interested features filled in / cleaned for each bus id and user id)
bus_dict_file = "bus_dict.csv"
users_dict_file = "users_dict.csv"
# Outputs after preprocessing
huge_train_data_file = "cleaned_train_review.csv"
cleaned_validate_queries = "cleaned_validate_queries.csv"
cleaned_test_queries = "cleaned_test_queries.csv"
# Output after running project.py
submission_file = "submission.csv"
# Features related
bus_features_id = ["business_id"]
bus_features_numerical = ["stars", "review_count", "attributes_RestaurantsPriceRange2"]
bus_features_bool = ["attributes_BikeParking",
"attributes_BusinessAcceptsCreditCards",
"attributes_GoodForKids",
"attributes_HasTV",
"attributes_OutdoorSeating",
"attributes_RestaurantsDelivery",
"attributes_RestaurantsGoodForGroups",
"attributes_RestaurantsReservations",
"attributes_RestaurantsTableService",
"attributes_RestaurantsTakeOut",
"attributes_WheelchairAccessible"]
bus_features_cat = ["attributes_NoiseLevel"]
# bus_features_drop = features to drop if wanted
bus_features_drop = bus_features_bool.copy()
bus_features_keep = ["attributes_BusinessAcceptsCreditCards", "attributes_RestaurantsGoodForGroups", "attributes_RestaurantsTakeOut",
"attributes_HasTV", "attributes_BikeParking" ] # Features to Keep
for feature in bus_features_keep:
if feature in bus_features_drop:
bus_features_drop.remove(feature)
# All user features we want are numerical
user_features_id = ["user_id"]
user_features_numerical = ["average_stars", "review_count", "useful"]
# All
review_features = ["user_id", "business_id", "stars"]
# Scale or not
scale = False