configs/pipeline_config.py

from dotenv import load_dotenv
from os import environ

# load the .env file
load_dotenv()
# path to the folder containing the raw data
RAW_DATA_PATH = "./data/raw/"

# path to the folder to save the parsed data
PARSED_DATA_PATH = "./data/parsed/"

# path to the folder to save the calculated loadprofiles
LOADPROFILES_PATH = "./data/loadprofiles/"

# path to the folder containing metadata
METADATA_PATH = "./data/metadata/datasets/"

# path to the folder to save the generated metadata
GENERATED_METADATA_PATH = "./data/"

# path to the folder to save the calculated consumption data
CONSUMPTION_DATA_PATH = "./data/"


# folder to save cleaned raw data with removed devices for training
TRAINING_DATA_CLEANED_FOLDER = "./data/training_data/raw/"

# path to the folder containing the trained model
MODEL_PATH = "./data/trained_models/InceptionTime/"

# path to the labels generated by generate_training_data.py
LABELS_PATH = "./data/training_data/labels.pkl"

# path to the folder to save the predicted appliances
PREDICTED_APPLIANCES_PATH = "./data/"

# endpoint to the knowledge graph where the data will be inserted
KNOWLEDGE_GRAPH_ENDPOINT = "http://193.2.205.14:7200/repositories/Electricity_Graph"

# postgres url to store the data
POSTGRES_URL = f"postgresql://{environ['DATABASE_USER']}:{environ['DATABASE_PASSWORD']}@193.2.205.14:5432/Energy"

# steps to be executed (Figure containing the steps in the pipeline https://github.com/sensorlab/energy-knowledge-graph/blob/main/images/pipeline.png)
STEPS = [
    "parse", # this step will parse the raw data and generate the uniform data format(step 1 in the figure)
    "loadprofiles", # this step will generate the loadprofiles from the parsed data(step 2 in the figure) 
    "metadata", # this step will generate the metadata for the datasets(step 3 in the figure)
    "consumption-data", # this step will generate the consumption data for the datasets(average daily consumption in kWh per appliance and average on/off event consumption per appliance)(step 2 in the figure)
    "db-reset", # this step will reset the database and populate it with the metadata (step 4 in the figure)
    "generate-links", # this step will generate links between the KG and Wikidata and DBpedia (step 7 in the figure)
    "predict-devices", # this step will predict the devices for datasets with only aggregate data(step c) in the figure)
    "add-predicted-devices" # this step will add the predicted devices to the KG(step c) and step 6 in the figure)
]

# list of datasets to preprocess
DATASETS = [
    "REFIT",
    "ECO",
    "HES",
    "UKDALE",
    "HUE",
    "LERTA",
    "UCIML",
    "DRED",
    "REDD",
    "IAWE",
    "DEKN",
    "SUST1",
    "SUST2",
    "HEART",
    "ENERTALK",
    "DEDDIAG",
    "IDEAL",
    "ECDUY",
    "PRECON",
    "EEUD"
]


# datasets on which to predict appliances (for the pretrained model the datasets have to have atleast 8s sampling rate)
PREDICT_DATASETS = [
    "IDEAL",
    "LERTA"
]