-
Notifications
You must be signed in to change notification settings - Fork 1
/
pipeline_config.py
85 lines (67 loc) · 2.9 KB
/
pipeline_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from dotenv import load_dotenv
from os import environ
# load the .env file
load_dotenv()
# path to the folder containing the raw data
RAW_DATA_PATH = "./data/raw/"
# path to the folder to save the parsed data
PARSED_DATA_PATH = "./data/parsed/"
# path to the folder to save the calculated loadprofiles
LOADPROFILES_PATH = "./data/loadprofiles/"
# path to the folder containing metadata
METADATA_PATH = "./data/metadata/datasets/"
# path to the folder to save the generated metadata
GENERATED_METADATA_PATH = "./data/"
# path to the folder to save the calculated consumption data
CONSUMPTION_DATA_PATH = "./data/"
# folder to save cleaned raw data with removed devices for training
TRAINING_DATA_CLEANED_FOLDER = "./data/training_data/raw/"
# path to the folder containing the trained model
MODEL_PATH = "./data/trained_models/InceptionTime/"
# path to the labels generated by generate_training_data.py
LABELS_PATH = "./data/training_data/labels.pkl"
# path to the folder to save the predicted appliances
PREDICTED_APPLIANCES_PATH = "./data/"
# endpoint to the knowledge graph where the data will be inserted
KNOWLEDGE_GRAPH_ENDPOINT = "http://193.2.205.14:7200/repositories/Electricity_Graph"
# postgres url to store the data
POSTGRES_URL = f"postgresql://{environ['DATABASE_USER']}:{environ['DATABASE_PASSWORD']}@193.2.205.14:5432/Energy"
# steps to be executed (Figure containing the steps in the pipeline https://github.com/sensorlab/energy-knowledge-graph/blob/main/images/pipeline.png)
STEPS = [
"parse", # this step will parse the raw data and generate the uniform data format(step 1 in the figure)
"loadprofiles", # this step will generate the loadprofiles from the parsed data(step 2 in the figure)
"metadata", # this step will generate the metadata for the datasets(step 3 in the figure)
"consumption-data", # this step will generate the consumption data for the datasets(average daily consumption in kWh per appliance and average on/off event consumption per appliance)(step 2 in the figure)
"db-reset", # this step will reset the database and populate it with the metadata (step 4 in the figure)
"generate-links", # this step will generate links between the KG and Wikidata and DBpedia (step 7 in the figure)
"predict-devices", # this step will predict the devices for datasets with only aggregate data(step c) in the figure)
"add-predicted-devices" # this step will add the predicted devices to the KG(step c) and step 6 in the figure)
]
# list of datasets to preprocess
DATASETS = [
"REFIT",
"ECO",
"HES",
"UKDALE",
"HUE",
"LERTA",
"UCIML",
"DRED",
"REDD",
"IAWE",
"DEKN",
"SUST1",
"SUST2",
"HEART",
"ENERTALK",
"DEDDIAG",
"IDEAL",
"ECDUY",
"PRECON",
"EEUD"
]
# datasets on which to predict appliances (for the pretrained model the datasets have to have atleast 8s sampling rate)
PREDICT_DATASETS = [
"IDEAL",
"LERTA"
]