-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuploadToMongoDB.py
83 lines (64 loc) · 2.81 KB
/
uploadToMongoDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import pandas as pd
import pymongo
def find_csv_file(directory):
for file in os.listdir(directory):
if file.endswith(".csv"):
return os.path.join(directory, file)
return None
<<<<<<< HEAD
def hash_year(year, cutoff_year):
return 'before' if year < cutoff_year else 'after'
def upload_based_on_year(directory, collection_name_prefix, database_name_prefix, mongo_uri, cutoff_year=2020):
csv_file_path = find_csv_file(directory)
if not csv_file_path:
print(f"No CSV file found in directory: {directory}")
return
df = pd.read_csv(csv_file_path)
client = pymongo.MongoClient(mongo_uri)
for index, row in df.iterrows():
year = int(row['Year'])
suffix = hash_year(year, cutoff_year)
database_name = f"{database_name_prefix}_{suffix}_{cutoff_year}"
collection_name = collection_name_prefix
data = row.to_dict()
db = client[database_name]
collection = db[collection_name]
collection.insert_one(data)
print(f"Data uploaded successfully to {collection_name} in {database_name}.")
if __name__ == '__main__':
mongo_uri = "mongodb://localhost:27017"
directory_path = "Electric_Vehicle_Population_Data_Cleaned"
database_name_prefix = "Electric_Vehicles"
collection_name_prefix = "dataCleaned"
cutoff_year = 2020
=======
def upload_based_on_year(directory, collection_name_prefix, database_name_prefix, mongo_uri, cutoff_year):
csv_file_path = find_csv_file(directory)
if csv_file_path:
df = pd.read_csv(csv_file_path)
client = pymongo.MongoClient(mongo_uri)
for index, row in df.iterrows():
year = int(row['Year'])
if year < cutoff_year:
database_name = f"{database_name_prefix}_before_{cutoff_year}"
collection_name = f"{collection_name_prefix}"
else:
database_name = f"{database_name_prefix}_after_{cutoff_year}"
collection_name = f"{collection_name_prefix}"
data = row.to_dict()
db = client[database_name]
collection = db[collection_name]
collection.insert_one(data)
print(f"Data uploaded successfully to {collection_name} in {database_name}.")
else:
print(f"No CSV file found in directory: {directory}")
if __name__ == '__main__':
# MongoDB URI for local instance
mongo_uri = "mongodb://localhost:27017"
directory_path = "Electric_Vehicle_Population_Data_Cleaned" # 请替换为你的CSV文件目录
database_name_prefix = "Electric_Vehicles"
collection_name_prefix = "dataCleaned"
cutoff_year = 2020
>>>>>>> feca63891d9923da81086a8978f8441a317e0629
upload_based_on_year(directory_path, collection_name_prefix, database_name_prefix, mongo_uri, cutoff_year)