From 6d33373372aecf2db5ed008e957c231eeec40463 Mon Sep 17 00:00:00 2001 From: Dimitris Papagiannis Date: Fri, 21 Jul 2023 17:35:39 +0200 Subject: [PATCH] WIP on cleanup --- .env_sample | 13 +- .gitignore | 6 +- README.md | 39 ++--- backend/__init__.py | 0 backend/api.py | 55 +++---- backend/db.py | 58 ++++--- backend/dqm_extractor.py | 226 +++++++++++++++------------- backend/extra.py | 17 ++- backend/get_token.py | 2 +- backend/nginx/conf.d/hdqm-test.conf | 23 +++ backend/nginx/conf.d/hdqm.conf | 23 +++ backend/nginx/nginx.conf | 90 +++++++++++ backend/oms_extractor.py | 17 ++- backend/rr_extractor.py | 2 +- backend/run.sh | 36 ----- deployment/hdqm-extract.service | 2 +- deployment/hdqm2.service | 2 +- requirements.txt | 6 +- run.sh | 52 +++++++ tests/__init__.py | 0 tests/test_rr_data_fetching.py | 2 +- deployment/update.sh => update.sh | 19 ++- 22 files changed, 448 insertions(+), 242 deletions(-) create mode 100644 backend/__init__.py create mode 100644 backend/nginx/conf.d/hdqm-test.conf create mode 100644 backend/nginx/conf.d/hdqm.conf create mode 100644 backend/nginx/nginx.conf delete mode 100755 backend/run.sh create mode 100755 run.sh create mode 100644 tests/__init__.py rename deployment/update.sh => update.sh (58%) diff --git a/.env_sample b/.env_sample index d4f1fbb..3f998d2 100644 --- a/.env_sample +++ b/.env_sample @@ -1,7 +1,10 @@ - -HDQM2_DB_PATH= - - # For accessing OMS and RR CLIENT_SECRET= -CLIENT_ID= \ No newline at end of file +CLIENT_ID= + +# Postgres configuration +DB_HOST= +DB_PORT= +DB_USERNAME= +DB_PASSWORD= +DB_NAME= \ No newline at end of file diff --git a/.gitignore b/.gitignore index b3a06dc..8f2974f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,9 @@ *.pyc */__pycache__/* */private/* +venv/ connection_string.txt -*.env \ No newline at end of file +.env +.env_prod +.env_local +*.log \ No newline at end of file diff --git a/README.md b/README.md index 3d494de..746536c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +> **Note** +> The latest up-to-date instructions can be found in [the wiki](./wiki/). + # Table of contents - [Table of contents](#table-of-contents) @@ -38,7 +41,7 @@ The code is running on a `vocms0231` machine. # Usage instructions -## How to run locally +## How to run on LXPLUS The following instruction are completely copy-pastable. This will start a complete HDQM stack on your local (lxplus) environment. This is perfect for testing new plots before adding them. Instructions are are made for bash shell. @@ -120,10 +123,9 @@ python3 -m http.server 8000 &>/dev/null & Main HDQM commands are the following: -1. `hdqmextract.py` -2. `calculate.py` +1. `dqm_extractor.py` -### `hdqmextract.py` +### `dqm_extractor.py` This tool is responsible for extracting DQM monitor elements from ROOT files and storing them as binary data in the database. This is separated from HDQM value calculation to ensure that values can be recalculated quickly, without relying on a file system every time. @@ -136,7 +138,7 @@ This tool is responsible for extracting DQM monitor elements from ROOT files and Default EOS directory for `-f` argument is this: `/eos/cms/store/group/comm_dqm/DQMGUI_data/*/*/*/DQM*.root` -### `calculate.py` + ### Other tools When new runs appear in the database, OMS and RR APIs need to be queried to find out if new runs need to be filtered out or not. For this, the following tools need to be executed, in this specific order: ``` bash -./oms_extractor.py -./rr_extractor.py +bash /data/hdqm2/current/backend/run.sh extract ``` -If a big chuck of new data was recently extracted, there is a tool to prewarm the database for initial queries be fast: - -`./db_prewarm.py` - ### Summary To summarize, by default HDQM will be done on all data available in EOS and all configuration files available in `cfg/`. The parameters to HDQM tools can be helpful to limit this scope and only extract the required subset of the data or your own data altogether. @@ -437,22 +434,26 @@ Production API server is running on internal port 5000 and test API service on 5 Code is located in `/data/hdqm/` directory. -EOS and CVMFS file systems need to be accessible in order for the service to work. ROOT input files are coming EOS, and CMSSW release is comming from CVMFS. +EOS and CVMFS file systems need to be accessible in order for the service to work. ROOT input files are coming from EOS, and CMSSW release is comming from CVMFS. -Nginx configuration for a reverse proxy can be found here: `/etc/nginx/conf.d/` +Nginx configurations (production and test) for the reverse proxy can be found here: `/etc/nginx/conf.d/` -Systemctl service for an API server can be found here: `/etc/systemd/system/hdqm.service` +Systemctl service for the API server can be found here: `/etc/systemd/system/hdqm2.service` +Systemctl service for the test API server can be found here: `/etc/systemd/system/hdqm2-test.service` Starting reverse proxy (nginx): `sudo systemctl start nginx.service` -Starting an API service: -`sudo systemctl start hdqm.service` +Starting the API service: +`sudo systemctl start hdqm2.service` + +Starting the extractor service: +`sudo systemctl start hdqm-extract.service` -Packages are installed locally in `backend/.python_packages/python2` and `backend/.python_packages/python3` directories, for different python versions. Extractor and calculator are using python 2 as they rely on ROOT but an API Flask service is running on python 3. Make sure an appropriate python path is set before using the tools by hand. For example (running from `backend` directory): +Packages are installed locally in `/venv`. Make sure an appropriate python path is set before using the tools by hand: ```bash -export PYTHONPATH="${PYTHONPATH}:$(pwd)/.python_packages/python2" +source venv/bin/activate ``` If nginx complains that it can't bind to port, make sure to request the ports to be opened in puppet: diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/api.py b/backend/api.py index 1ef9dea..6ae895c 100644 --- a/backend/api.py +++ b/backend/api.py @@ -1,17 +1,29 @@ +import os import sys +import re +import logging from collections import defaultdict from flask import Flask, jsonify, request, redirect from flask_cors import CORS +logger = logging.getLogger(__name__) app = Flask(__name__) +PDPATTERN = re.compile("DQM_V\d+_R\d+__(.+__.+__.+)[.]root") # PD inside the file name CORS(app) -import db +from . import db + +app.config["SQLALCHEMY_DATABASE_URI"] = db.get_formatted_db_uri( + username=os.environ.get("DB_USERNAME", "postgres"), + password=os.environ.get("DB_PASSWORD", "postgres"), + host=os.environ.get("DB_HOST", "127.0.0.1"), + port=os.environ.get("DB_PORT", 5432), + db_name=os.environ.get("DB_NAME", "hdqm"), +) -### @app.route("/api/data", methods=["GET"]) def get_data(json=True): if not json: @@ -95,7 +107,7 @@ def get_data(json=True): else: runs = db.session.query(db.Run).order_by(db.Run.id.desc()).limit(latest).all() - print([run.id for run in runs]) + logger.debug(f"{[run.id for run in runs]}") ### datasets dataset = ( db.session.query(db.Dataset) @@ -163,10 +175,8 @@ def get_data(json=True): return result -### @app.route("/api/selection", methods=["GET"]) def get_selections(json=True): - # try: subsystems = db.session.query(db.Config.subsystem).distinct().all() datasets = ( db.session.query(db.Dataset.id, db.Dataset.stream, db.Dataset.reco_path) @@ -196,11 +206,6 @@ def get_selections(json=True): return obj -# except: -# pass - - -### @app.route("/api/plot_selection", methods=["GET"]) def plot_selection(json=True): # try: @@ -244,13 +249,9 @@ def plot_selection(json=True): return obj -# except: -# pass - - -### @app.route("/api/runs", methods=["GET"]) def get_runs(json=True): + db.create_session(db_path) runs = [r.id for r in db.session.query(db.Run.id).order_by(db.Run.id.asc())] if json: return jsonify(runs) @@ -258,9 +259,6 @@ def get_runs(json=True): ### -import re - -PDPATTERN = re.compile("DQM_V\d+_R\d+__(.+__.+__.+)[.]root") # PD inside the file name @app.route("/api/expand_url", methods=["GET"]) @@ -415,19 +413,22 @@ def do_tests(): pass -if __name__ == "__main__": +def create_app(): + """ + Entrypoint + """ # do_tests() # exit() from dotenv import load_dotenv - from extra import * - load_dotenv() - db_path = os.environ.get("HDQM2_DB_PATH") + db_path = db.get_formatted_db_uri( + username=os.environ.get("DB_USERNAME", "postgres"), + password=os.environ.get("DB_PASSWORD", "postgres"), + host=os.environ.get("DB_HOST", "127.0.0.1"), + port=os.environ.get("DB_PORT", 5432), + db_name=os.environ.get("DB_NAME", "hdqm"), + ) db.create_session(db_path) - - port = 5000 - if len(sys.argv) >= 2: - port = int(sys.argv[1]) - app.run(host="127.0.0.1", port=port) + return app diff --git a/backend/db.py b/backend/db.py index 83b227f..567bb47 100755 --- a/backend/db.py +++ b/backend/db.py @@ -1,6 +1,7 @@ # P.S.~Mandrik, IHEP, 2022, https://github.com/pmandrik import os +import logging import sqlalchemy from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import ( @@ -19,6 +20,8 @@ from sqlalchemy.orm import sessionmaker, relationship from sqlalchemy.sql import select +logger = logging.getLogger(__name__) + Base = declarative_base() # SQLite will be used if no production DB credentials will be found @@ -26,6 +29,19 @@ engine = None +def get_formatted_db_uri( + username: str = "postgres", + password: str = "postgres", + host: str = "postgres", + port: int = 5432, + db_name="postgres", +) -> str: + """ + Helper function to format the DB URI for SQLAclhemy + """ + return f"postgresql://{username}:{password}@{host}:{port}/{db_name}" + + def create_session(db_string=None): global session global engine @@ -115,12 +131,12 @@ def get_runs(): return runs -def add_run(run_number, log): - log.info('Add new run "%s" to the DB ...' % (run_number)) +def add_run(run_number): + logger.info('Add new run "%s" to the DB ...' % (run_number)) run = Run(id=run_number) session.add(run) session.commit() - log.info("Add new run ... ok") + logger.info("Add new run ... ok") return run @@ -129,8 +145,8 @@ def get_configs(): return configs -def add_configs(configs, log): - log.info("Add new configs to the DB ...") +def add_configs(configs): + logger.info("Add new configs to the DB ...") try: configs_to_add = [] @@ -157,18 +173,18 @@ def add_configs(configs, log): # apply changes session.commit() except Exception as error_log: - log.warning( + logger.warning( "failed to add config to the DB %s/%s, skip" % (config.name, config.cfg_path) ) - log.warning("Error ... %s " % error_log) + logger.warning("Error ... %s " % error_log) return 1 return 0 -def update_configs(configs, log): - log.info("Update configs in the DB ...") +def update_configs(configs): + logger.info("Update configs in the DB ...") try: for config_new, config_old in configs: attributes = [ @@ -188,17 +204,17 @@ def update_configs(configs, log): # apply changes session.commit() except Exception as error_log: - log.warning( + logger.warning( "failed to add config to the DB %s/%s, skip" % (config.name, config.cfg_path) ) - log.warning("Error ... %s " % error_log) + logger.warning("Error ... %s " % error_log) return 1 return 0 -def get_dataset(stream, reco_path, log): +def get_dataset(stream, reco_path): dataset = ( session.query(Dataset) .where(Dataset.stream == stream, Dataset.reco_path == reco_path) @@ -212,19 +228,19 @@ def get_trends(dataset): return trends -def add_dataset(stream, reco_path, log): - log.info('Add new dataset ("%s", "%s") to the DB ...' % (stream, reco_path)) +def add_dataset(stream, reco_path): + logger.info('Add new dataset ("%s", "%s") to the DB ...' % (stream, reco_path)) dataset = Dataset() dataset.stream = stream dataset.reco_path = reco_path session.add(dataset) session.commit() - log.info("Add new dataset ... ok") + logger.info("Add new dataset ... ok") return dataset -def add_trends(dataset, trend_cfgs, log): - log.info( +def add_trends(dataset, trend_cfgs): + logger.info( 'Add new trends of dataset ("%s", "%s") to the DB ...' % (dataset.stream, dataset.reco_path) ) @@ -238,18 +254,18 @@ def add_trends(dataset, trend_cfgs, log): trends_to_add += [trend] session.bulk_save_objects(trends_to_add) session.commit() - log.info("Add new trends ... ok") + logger.info("Add new trends ... ok") -def add_gui_file(file, log): - log.info('Add processed gui file ("%s") to the DB ...' % (file.path)) +def add_gui_file(file): + logger.info('Add processed gui file ("%s") to the DB ...' % (file.path)) f = GUIFile(path=file.path) attributes = ["short_name", "version", "run", "stream", "reco_path"] for attr in attributes: setattr(f, attr, getattr(file, attr)) session.add(f) session.commit() - log.info("Add gui file ... ok") + logger.info("Add gui file ... ok") def check_gui_file(file): diff --git a/backend/dqm_extractor.py b/backend/dqm_extractor.py index afd8d7c..d7e23df 100644 --- a/backend/dqm_extractor.py +++ b/backend/dqm_extractor.py @@ -3,17 +3,18 @@ # python3 -m pip install -r requirements.txt -t .python_packages/python3 # export PYTHONPATH=$PYTHONPATH:.python_packages/python3/ import os - -from configparser import RawConfigParser -from glob import glob +import db import re +import sys import ROOT +import math +import logging +from logging.handlers import TimedRotatingFileHandler import argparse -import os, sys, math +from configparser import RawConfigParser +from glob import glob from collections import defaultdict -import logging -import db from extra import ( TrendCfg, DQMFile, @@ -22,14 +23,16 @@ get_plot_path, nostdout, ) -import oms_extractor -import rr_extractor + +sys.path.append(os.path.dirname(os.path.realpath("./backend"))) +from backend import oms_extractor +from backend import rr_extractor from metrics import basic, fits, muon_metrics, L1T_metrics, hcal_metrics -LOGLEVEL = logging.INFO -LOGPATH = "./logs_extractor.txt" -CFGFILES = "cfg/*/*.ini" +LOGLEVEL = logging.DEBUG +LOGPATH = "./dqm_extractor.log" +CFGFILES = "backend/cfg/*/*.ini" NLOGS = 10 GUIDATADIR = "/eos/cms/store/group/comm_dqm/DQMGUI_data" GUIDATADIR = "/eos/cms/store/group/comm_dqm/DQMGUI_data/Run2022/MinimumBias/0003557xx/" @@ -48,12 +51,16 @@ "hcal_metrics": hcal_metrics, } +logger = logging.getLogger(__name__) -def read_cfgs(cfg_files, log): + +def read_cfgs(cfg_files): + logger.info(f"Reading configs from {len(cfg_files)} files.") check_dic = {} mes_set = set() trend_cfgs = [] for cfg_file in cfg_files: + logger.debug(f"Reading file {cfg_file}") try: parser = RawConfigParser() parser.read(cfg_file) @@ -64,13 +71,13 @@ def read_cfgs(cfg_files, log): for section in parser: if not section.startswith("plot:"): if section != "DEFAULT": - log.info( + logger.info( "Invalid configuration section: %s:%s, skipping." % (cfg_file, section) ) continue if not PLOTNAMEPATTERN.match(section.lstrip("plot:")): - log.info( + logger.info( "Invalid plot name: '%s:%s' Plot names can contain only: [a-zA-Z0-9_+-]" % (cfg_file, section.lstrip("plot:")) ) @@ -81,11 +88,11 @@ def read_cfgs(cfg_files, log): pars = ["metric", "relativePath", "yTitle"] # mandatory parameters for par in pars: if par not in trend.data: - log.info( + logger.info( "Invalid configuration section: %s:%s, skipping." % (cfg_file, section) ) - log.info("Parameter not available %s" % par) + logger.info("Parameter not available %s" % par) trend = None break if not trend: @@ -93,7 +100,7 @@ def read_cfgs(cfg_files, log): duplicate_trend = check_dic.get(subsystem + "@" + section, None) if duplicate_trend: - log.info( + logger.info( 'Duplicate of plot name "%s" found in "%s" and in "%s". Skip' % (section, cfg_file, duplicate_trend.cfg_path) ) @@ -106,59 +113,59 @@ def read_cfgs(cfg_files, log): mes_set.update(trend.GetMEs()) except Exception as error_log: - log.info("Could not read %s, skipping..." % cfg_file) - log.info("Error ... %s " % error_log) + logger.info("Could not read %s, skipping..." % cfg_file) + logger.info("Error ... %s " % error_log) return trend_cfgs, mes_set -def process_gui_root(file, trend_cfgs, mes, log): - log.info('Process "%s"' % file.path) +def process_gui_root(file, trend_cfgs, mes): + logger.info('Process "%s"' % file.path) ### get dataset & trends for type of root file name - dataset = db.get_dataset(file.stream, file.reco_path, log) + dataset = db.get_dataset(file.stream, file.reco_path) if not dataset: - dataset = db.add_dataset(file.stream, file.reco_path, log) + dataset = db.add_dataset(file.stream, file.reco_path) - log.info( + logger.info( 'Process Dataset %s "%s" "%s"' % (dataset.id, dataset.stream, dataset.reco_path) ) trends = db.get_trends(dataset) if not trends: - db.add_trends(dataset, trend_cfgs, log) + db.add_trends(dataset, trend_cfgs) trends = db.get_trends(dataset) trends = sorted(trends, key=lambda x: x.config_id) ### read all MEs from root file - log.info('Extract MEs from "%s"' % file.path) + logger.info('Extract MEs from "%s"' % file.path) try: tdirectory = ROOT.TFile.Open(file.path) if tdirectory == None: - log.warning("Unable to open file: '%s', skip" % file.path) + logger.warning("Unable to open file: '%s', skip" % file.path) return 1 except Exception as error_log: - log.warning("Unable to read file: '%s', skip" % file.path) - log.warning("Error ... %s " % error_log) + logger.warning("Unable to read file: '%s', skip" % file.path) + logger.warning("Error ... %s " % error_log) return 1 me_dic = {} for me in mes: plot = tdirectory.Get(get_plot_path(me, file.run)) if not plot: - log.debug('No MEs "%s" available in "%s"' % (me, file.path)) + logger.debug('No MEs "%s" available in "%s"' % (me, file.path)) continue me_dic[me] = plot - log.info("Available/Requested MEs = %s/%s" % (len(me_dic), len(mes))) + logger.info("Available/Requested MEs = %s/%s" % (len(me_dic), len(mes))) ### calculate all trends based on MEs from root file - log.info('Calculating trends for "%s"' % file.path) + logger.info('Calculating trends for "%s"' % file.path) processed_trends = [] for trend_cfg, trend in zip(trend_cfgs, trends): metric_func = trend_cfg.metric_func if not metric_func: - log.info( + logger.info( "no metric for trend/cfg %s/%s, skip" % (trend_cfg.name, trend_cfg.cfg_path) ) @@ -191,7 +198,7 @@ def process_gui_root(file, trend_cfgs, mes, log): is_ok = False if not is_ok: - log.debug( + logger.debug( "Unable to get an monitor element for trend/cfg %s/%s, skip" % (trend_cfg.name, trend_cfg.cfg_path) ) @@ -202,22 +209,22 @@ def process_gui_root(file, trend_cfgs, mes, log): with nostdout(): # supress metrics stdout & stderr value, error = metric_func.calculate(main_hist) except Exception as error_log: - log.warning( + logger.warning( "Unable to calculate the metric for trend/cfg %s/%s, skip" % (trend_cfg.name, trend_cfg.cfg_path) ) - log.warning("Error ... %s " % error_log) + logger.warning("Error ... %s " % error_log) continue if math.isinf(value): - log.warning( + logger.warning( "Inf metric responce value for trend/cfg %s/%s, skip" % (trend_cfg.name, trend_cfg.cfg_path) ) continue if math.isinf(error): - log.warning( + logger.warning( "Inf metric responce error for trend/cfg %s/%s, skip" % (trend_cfg.name, trend_cfg.cfg_path) ) @@ -225,14 +232,14 @@ def process_gui_root(file, trend_cfgs, mes, log): # add new [run , value, error ] point to the trend # if we reprocessing the same run as in DB we updating points values - add_point_to_trend(trend, dataset, trend_cfg, file.run, value, error, log) + add_point_to_trend(trend, dataset, trend_cfg, file.run, value, error) processed_trends += [trend] - log.info( + logger.info( "Processed/Requested Trends = %s/%s" % (len(processed_trends), len(trend_cfgs)) ) - log.info("Updating trends in the DB ...") + logger.info("Updating trends in the DB ...") db.session.commit() tdirectory.Close() @@ -246,12 +253,12 @@ def process_gui_root(file, trend_cfgs, mes, log): load_dotenv() parser = argparse.ArgumentParser(description="HDQM trend calculation.") log = logging.getLogger(__file__) - log.setLevel(LOGLEVEL) + logger.setLevel(LOGLEVEL) formatter = logging.Formatter( fmt="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) - handler = logging.handlers.TimedRotatingFileHandler( + handler = TimedRotatingFileHandler( LOGPATH, when="h", interval=24, backupCount=int(NLOGS) ) handler.setFormatter(formatter) @@ -261,34 +268,41 @@ def process_gui_root(file, trend_cfgs, mes, log): handler2.setFormatter(formatter) handler2.setLevel(LOGLEVEL) - log.addHandler(handler) - log.addHandler(handler2) + logger.addHandler(handler) + logger.addHandler(handler2) - log.info("Start " + str(__file__)) - log.info("Create %s log file" % LOGPATH) + logger.info("Start " + str(__file__)) + logger.info("Create %s log file" % LOGPATH) ### some metrics pop up canvases -_- ROOT.gROOT.SetBatch(True) ### get path to the db - db_path = os.environ.get("HDQM2_DB_PATH") + db_path = db.get_formatted_db_uri( + username=os.environ.get("DB_USERNAME", "postgres"), + password=os.environ.get("DB_PASSWORD", "postgres"), + host=os.environ.get("DB_HOST", "127.0.0.1"), + port=os.environ.get("DB_PORT", 5432), + db_name=os.environ.get("DB_NAME", "hdqm"), + ) db.create_session(db_path) ### read configs - log.info("Glob confing files from %s" % CFGFILES) + logger.info("Glob config files from %s" % CFGFILES) config_files = glob(CFGFILES) for cfg in config_files: - if cfg.count("/") != 2 or not cfg.startswith("cfg/"): - log.info("Invalid configuration file: %s" % cfg) - log.info("Configuration files must come from here: cfg/*/*.ini") + # LOL? + if cfg.count("/") != 3 or not cfg.startswith("backend/cfg/"): + logger.info("Invalid configuration file: %s" % cfg) + logger.info("Configuration files must come from here: cfg/*/*.ini") exit() - log.info("Count " + str(len(config_files)) + " cfg files") + logger.info("Count " + str(len(config_files)) + " cfg files") ### get Monitoring elements (MEs) - log.info("Read configuration files") - trend_cfgs, mes = read_cfgs(config_files, log) - log.info("Count " + str(len(trend_cfgs)) + " Trends") - log.info("Count " + str(len(mes)) + " MEs") + logger.info("Read configuration files") + trend_cfgs, mes = read_cfgs(config_files) + logger.info("Count " + str(len(trend_cfgs)) + " Trends") + logger.info("Count " + str(len(mes)) + " MEs") trend_dic = defaultdict(list) try: @@ -300,8 +314,8 @@ def process_gui_root(file, trend_cfgs, mes, log): subsystem = item.relative_path.split("/")[1] trend_dic[subsystem] += [item] except Exception as error_log: - log.info("Could not count Trends per subsystem ...") - log.info("Error ... %s " % error_log) + logger.info("Could not count Trends per subsystem ...") + logger.info("Error ... %s " % error_log) exit() mes_dic = defaultdict(list) @@ -313,25 +327,25 @@ def process_gui_root(file, trend_cfgs, mes, log): subsystem = item.split("/")[1] mes_dic[subsystem] += [item] except Exception as error_log: - log.info("Could not count MEs per subsystem ...") - log.info("Error ... %s " % error_log) + logger.info("Could not count MEs per subsystem ...") + logger.info("Error ... %s " % error_log) exit() - log.info("--------------------Trends stat:") + logger.info("--------------------Trends stat:") for key, val in trend_dic.items(): - log.info(key + " : " + str(len(val))) + logger.info(key + " : " + str(len(val))) - log.info("--------------------MEs stat:") + logger.info("--------------------MEs stat:") for key, val in trend_dic.items(): - log.info(key + " : " + str(len(val))) + logger.info(key + " : " + str(len(val))) for key, val in mes_dic.items(): if key in trend_dic: continue - log.info(key + " : " + str(None) + " " + str(len(val))) + logger.info(key + " : " + str(None) + " " + str(len(val))) ### setup config metrics - log.info("Eval config metrics ...") + logger.info("Eval config metrics ...") metric_dic = defaultdict(int) for trend in trend_cfgs: try: @@ -340,31 +354,31 @@ def process_gui_root(file, trend_cfgs, mes, log): metric_str = trend.data["metric"] metric_dic[metric_str.split("(")[0]] += 1 except Exception as error_log: - log.info( + logger.info( 'Could not setup metric "%s" for cfg "%s", trend "%s"' % (trend.data["metric"], trend.cfg_path, trend.name) ) - log.info("Error ... %s " % error_log) + logger.info("Error ... %s " % error_log) exit() - log.info("-------------------- Metrics stat:") + logger.info("-------------------- Metrics stat:") for key, val in metric_dic.items(): - log.info(key + " : " + str(val)) + logger.info(key + " : " + str(val)) ### check trend configs in DB # - 1. add new configs or update old config # we are not removing old configs or trends here, but they are not updated with new run information # even if we updating the metric or reference in CFG used for calculation we do not change the DB trends - log.info("Get configs from the DB ...") + logger.info("Get configs from the DB ...") try: db_cfgs = db.get_configs() except Exception as error_log: - log.info("Could not extract configs from the DB, exit") - log.info("Error ... %s " % error_log) + logger.info("Could not extract configs from the DB, exit") + logger.info("Error ... %s " % error_log) exit() - log.info("Found %s configs in the DB " % len(db_cfgs)) + logger.info("Found %s configs in the DB " % len(db_cfgs)) - log.info("Search for new configs to add or update ...") + logger.info("Search for new configs to add or update ...") list_to_update = [] list_to_add = [] dic_overlap = {} @@ -383,16 +397,16 @@ def process_gui_root(file, trend_cfgs, mes, log): continue list_to_update += [[new_config, old_config]] - log.info( + logger.info( "Found %s new trend configs to add & %s configs to update in DB" % (len(list_to_add), len(list_to_update)) ) - bad_configs_add = db.add_configs(list_to_add, log) - bad_configs_update = db.update_configs(list_to_update, log) + bad_configs_add = db.add_configs(list_to_add) + bad_configs_update = db.update_configs(list_to_update) bad_configs = bad_configs_add + bad_configs_update if bad_configs: - log.warning( + logger.warning( "Failed to add new trend configs (%s) or update configs (%s) to DB, abort" % ((bad_configs_add), (bad_configs_update)) ) @@ -402,27 +416,27 @@ def process_gui_root(file, trend_cfgs, mes, log): filter(lambda x: x.db_id, trend_cfgs) ) ## 100% sure we have db_id if len(trend_cfgs_filtered) != len(trend_cfgs): - log.warning( + logger.warning( "Failed to add new trend configs or update configs, no ID returned by DB, abort" ) exit() trend_cfgs = sorted(trend_cfgs_filtered, key=lambda x: x.db_id) ## and sorted ### get known runs from the DB - log.info("Get known runs from the DB ...") + logger.info("Get known runs from the DB ...") db_runs = db.get_runs() db_runs_dic = {run.id: run for run in db_runs} - log.info("Found %s known runs ..." % len(db_runs)) + logger.info("Found %s known runs ..." % len(db_runs)) ### get list if root files root_files = [] - log.info("The process was restarted, so, we need to check all new files") - log.info("Find all GUI ROOT files on EOS (several minutes ...)") + logger.info("The process was restarted, so, we need to check all new files") + logger.info("Find all GUI ROOT files on EOS (several minutes ...)") import subprocess command = "find " + GUIDATADIR + " -name " + GUIDATAPATTERN - log.info('Execute "%s"' % command) + logger.info('Execute "%s"' % command) try: output = subprocess.check_output(command, shell=True) output = output.decode("utf-8").split("\n") @@ -430,16 +444,16 @@ def process_gui_root(file, trend_cfgs, mes, log): if len(root_files) and not root_files[-1]: root_files.pop() # remove last empty entries from find return except Exception as error_log: - log.info('find failed with error "%s"' % error_log) + logger.info('find failed with error "%s"' % error_log) exit() - log.info("Find %s GUI files" % len(root_files)) + logger.info("Find %s GUI files" % len(root_files)) if not root_files: - log.info("No GUI files, check if EOS is down. Aborting.") + logger.info("No GUI files, check if EOS is down. Aborting.") exit() ### get good and latest dqm files - log.info("Filtering GUI files") + logger.info("Filtering GUI files") dqm_files = defaultdict(dict) for path in root_files: @@ -465,50 +479,52 @@ def process_gui_root(file, trend_cfgs, mes, log): # print( list(dqm_files.values()) ) # nfiles = len( sum( list(dqm_files.items()) ) ) - # log.info('Number of GUI files after filtering %s' % nfiles ) + # logger.info('Number of GUI files after filtering %s' % nfiles ) ### process files - log.info("Start extraction of MEs from files") + logger.info("Start extraction of MEs from files") good_files = 0 for subdir, files in dqm_files.items(): - log.info('Process directory "%s" with %s files' % (subdir, len(files.values()))) + logger.info( + 'Process directory "%s" with %s files' % (subdir, len(files.values())) + ) files_tot = len(files.values()) for n, file in enumerate(files.values()): - log.info("File number = %s/%s" % (n, files_tot)) + logger.info("File number = %s/%s" % (n, files_tot)) if db.check_gui_file(file): - log.info("Skip file already in the DB %s" % file.path) + logger.info("Skip file already in the DB %s" % file.path) continue - result = process_gui_root(file, trend_cfgs, mes, log) + result = process_gui_root(file, trend_cfgs, mes) if not result: good_files += 1 # add file as procecced to the DB - db.add_gui_file(file, log) + db.add_gui_file(file) # if run is not known - update OMS and RR data db_run = db_runs_dic.get(file.run, None) if not db_run: - db_run = db.add_run(file.run, log) + db_run = db.add_run(file.run) db_runs_dic[file.run] = db_run if not db_run.oms_data: - oms_data = oms_extractor.get_oms_run(db_run.id, log) + oms_data = oms_extractor.get_oms_run(db_run.id) if oms_data: - log.info("Update oms run info for run %s" % db_run.id) + logger.info("Update oms run info for run %s" % db_run.id) db_run.oms_data = str(oms_data) db.session.commit() else: - log.info("No OMS data for run %s" % db_run.id) + logger.info("No OMS data for run %s" % db_run.id) if not db_run.rr_run_class: - rr_data = rr_extractor.get_rr_run(file.run, log) + rr_data = rr_extractor.get_rr_run(file.run) if rr_data: - log.info("Update RR run info for run %s" % db_run.id) + logger.info("Update RR run info for run %s" % db_run.id) db_run.rr_run_class = str(rr_data["rr_run_class"]) db_run.rr_significant = bool(rr_data["rr_significant"]) db.session.commit() else: - log.info("No RR data for run %s" % db_run.id) + logger.info("No RR data for run %s" % db_run.id) - log.info("Number of processed GUI files %s" % good_files) + logger.info("Number of processed GUI files %s" % good_files) diff --git a/backend/extra.py b/backend/extra.py index c5cb643..e73ccf9 100644 --- a/backend/extra.py +++ b/backend/extra.py @@ -1,8 +1,11 @@ # P.S.~Mandrik, IHEP, 2022, https://github.com/pmandrik -import sys, os +import sys +import logging import contextlib +logger = logging.getLogger(__name__) + ### DQM classes class TrendCfg: @@ -100,21 +103,21 @@ def get_plot_path(path, run): ### update trends informations with data points -import bisect +# import bisect -def add_point_to_trend(trend, dataset, trend_cfg, run, value, error, log): +def add_point_to_trend(trend, dataset, trend_cfg, run, value, error): trend.points.replace("inf", "0") try: points = eval(trend.points) - # log.info('Run %s duplicate point in trend "%s" dataset "%s" "%s" config "%s" "%s"' % (str(run), trend.subsystem, dataset.stream, dataset.reco_path, trend_cfg.name, trend_cfg.plot_title) ) + # logger.info('Run %s duplicate point in trend "%s" dataset "%s" "%s" config "%s" "%s"' % (str(run), trend.subsystem, dataset.stream, dataset.reco_path, trend_cfg.name, trend_cfg.plot_title) ) points[run] = [value, error] except Exception as error_log: - log.info( + logger.info( "Failed to add point to trend %s/%s" % (trend_cfg.name, trend_cfg.cfg_path) ) - log.info("Trend points %s" % (str(trend.points))) - log.info("Error ... %s " % error_log) + logger.info("Trend points %s" % (str(trend.points))) + logger.info("Error ... %s " % error_log) return False trend.points = str(points) return True diff --git a/backend/get_token.py b/backend/get_token.py index c040f68..507a493 100644 --- a/backend/get_token.py +++ b/backend/get_token.py @@ -3,7 +3,7 @@ import logging import cernrequests from cachetools import cached, TTLCache -from .extra import * +from backend.extra import * headers = {"content-type": "application/x-www-form-urlencoded"} diff --git a/backend/nginx/conf.d/hdqm-test.conf b/backend/nginx/conf.d/hdqm-test.conf new file mode 100644 index 0000000..f686a7d --- /dev/null +++ b/backend/nginx/conf.d/hdqm-test.conf @@ -0,0 +1,23 @@ +server { + listen 81 default_server; + listen [::]:81 default_server; + server_name _; + + gzip on; + gzip_types application/json; + + # This is definitely overkill. + # TODO: Just serve the Static site through flask! + location / { + add_header 'Access-Control-Allow-Origin' '*'; + root /data/hdqmTest/CentralHDQM/frontend; + } + + location /api { + proxy_pass http://127.0.0.1:5001; + proxy_hide_header 'Access-Control-Allow-Origin'; + add_header 'Access-Control-Allow-Origin' '*'; + root /data/hdqmTest/CentralHDQM/backend/api; + proxy_read_timeout 90; + } +} diff --git a/backend/nginx/conf.d/hdqm.conf b/backend/nginx/conf.d/hdqm.conf new file mode 100644 index 0000000..cf6778b --- /dev/null +++ b/backend/nginx/conf.d/hdqm.conf @@ -0,0 +1,23 @@ +server { + listen 80 default_server; + listen [::]:80 default_server; + server_name _; + + gzip on; + gzip_types application/json; + + # This is definitely overkill. + # TODO: Just serve the Static site through flask! + location / { + add_header 'Access-Control-Allow-Origin' '*'; + root /data/hdqm2/current/frontend; + } + + location /api { + proxy_pass http://127.0.0.1:5000; + proxy_hide_header 'Access-Control-Allow-Origin'; + add_header 'Access-Control-Allow-Origin' '*'; + root /data/hdqm2/current/backend; + proxy_read_timeout 90; + } +} diff --git a/backend/nginx/nginx.conf b/backend/nginx/nginx.conf new file mode 100644 index 0000000..bcef02e --- /dev/null +++ b/backend/nginx/nginx.conf @@ -0,0 +1,90 @@ +# For more information on configuration, see: +# * Official English Documentation: http://nginx.org/en/docs/ +# * Official Russian Documentation: http://nginx.org/ru/docs/ + +user nginx; +worker_processes auto; +error_log /var/log/nginx/error.log; +pid /run/nginx.pid; + +# Load dynamic modules. See /usr/share/nginx/README.dynamic. +include /usr/share/nginx/modules/*.conf; + +events { + worker_connections 1024; +} + +http { + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Load modular configuration files from the /etc/nginx/conf.d directory. + # See http://nginx.org/en/docs/ngx_core_module.html#include + # for more information. + include /etc/nginx/conf.d/*.conf; + +# server { +# listen 80 default_server; +# listen [::]:80 default_server; +# server_name _; +# root /usr/share/nginx/html; + + # Load configuration files for the default server block. +# include /etc/nginx/default.d/*.conf; + +# location / { +# } + +# error_page 404 /404.html; +# location = /40x.html { +# } + +# error_page 500 502 503 504 /50x.html; +# location = /50x.html { +# } +# } + +# Settings for a TLS enabled server. +# +# server { +# listen 443 ssl http2 default_server; +# listen [::]:443 ssl http2 default_server; +# server_name _; +# root /usr/share/nginx/html; +# +# ssl_certificate "/etc/pki/nginx/server.crt"; +# ssl_certificate_key "/etc/pki/nginx/private/server.key"; +# ssl_session_cache shared:SSL:1m; +# ssl_session_timeout 10m; +# ssl_ciphers HIGH:!aNULL:!MD5; +# ssl_prefer_server_ciphers on; +# +# # Load configuration files for the default server block. +# include /etc/nginx/default.d/*.conf; +# +# location / { +# } +# +# error_page 404 /404.html; +# location = /40x.html { +# } +# +# error_page 500 502 503 504 /50x.html; +# location = /50x.html { +# } +# } + +} + diff --git a/backend/oms_extractor.py b/backend/oms_extractor.py index c6ad7f5..7543a99 100644 --- a/backend/oms_extractor.py +++ b/backend/oms_extractor.py @@ -2,11 +2,14 @@ import os import urllib3 import requests +import logging from dotenv import load_dotenv -from .get_token import get_token +from backend.get_token import get_token load_dotenv() +logger = logging.getLogger(__name__) + CLIENT_ID = os.environ.get("CLIENT_ID") CLIENT_SECRET = os.environ.get("CLIENT_SECRET") AUDIENCE = "cmsoms-prod" @@ -20,8 +23,8 @@ HEADERS = None -def get_oms_run(run, log): - log.info("Get OMS data for run %s ..." % run) +def get_oms_run(run): + logger.info("Get OMS data for run %s ..." % run) url = OMS_URL % run oms_data = "" @@ -46,10 +49,10 @@ def get_oms_run(run, log): try: oms_attributes_meta = oms_runs_json["data"][0]["meta"]["row"] except Exception as error_log: - log.warning( + logger.warning( "Failed to get OMS meta for run %s ..., skip without units" % run ) - log.warning("Error ... %s " % error_log) + logger.warning("Error ... %s " % error_log) units = "0" if oms_attributes_meta: @@ -68,8 +71,8 @@ def get_oms_run(run, log): oms_data = oms_attributes except Exception as error_log: - log.warning("Failed to get OMS data for run %s ..." % run) - log.warning("Error ... %s " % error_log) + logger.warning("Failed to get OMS data for run %s ..." % run) + logger.warning("Error ... %s " % error_log) return 1 return oms_data diff --git a/backend/rr_extractor.py b/backend/rr_extractor.py index 700790b..09ed76e 100644 --- a/backend/rr_extractor.py +++ b/backend/rr_extractor.py @@ -4,7 +4,7 @@ import logging import requests from dotenv import load_dotenv -from .get_token import get_token +from backend.get_token import get_token load_dotenv() diff --git a/backend/run.sh b/backend/run.sh deleted file mode 100755 index e532db5..0000000 --- a/backend/run.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -if [ "$1" == "-h" ] || [ "$1" == "--help" ]; -then - echo "Runs the app. Default port is 5000 but different port can be passed as a first argument." - exit -fi - -source .env - -if [ "$1" = "api" ]; then - export PYTHONPATH=$(cd ../; pwd)/.python_packages/python3:$PYTHONPATH - python3 api.py -fi - -if [ "$1" = "extract" ]; then - export RELEASE=/cvmfs/cms.cern.ch/slc7_amd64_gcc10/cms/cmssw/CMSSW_12_4_5 - source /cvmfs/cms.cern.ch/cmsset_default.sh - cd $RELEASE - eval `scramv1 runtime -sh` - cd - - export PYTHONPATH=$(cd ../; pwd)/.python_packages/python3:$PYTHONPATH - - while true ; do - kinit -kt /data/hdqm/.keytab cmsdqm - /usr/bin/eosfusebind -g - - db_name="$(mktemp --dry-run hdqm_XXXXXXXX.db)" - echo $db_name - # cp hdqm_v3.db $db_name - python3 dqm_extractor.py - sleep 3d - #exit - done -fi - diff --git a/deployment/hdqm-extract.service b/deployment/hdqm-extract.service index 6ad2ec8..96a3dad 100644 --- a/deployment/hdqm-extract.service +++ b/deployment/hdqm-extract.service @@ -1,5 +1,5 @@ [Unit] -Description=An HDQM Extract +Description=HDQM Extractor service [Service] WorkingDirectory=/data/hdqm2/current/backend diff --git a/deployment/hdqm2.service b/deployment/hdqm2.service index 0fa1f22..abd7e11 100644 --- a/deployment/hdqm2.service +++ b/deployment/hdqm2.service @@ -1,5 +1,5 @@ [Unit] -Description=An HDQM API server +Description=HDQM API server [Service] WorkingDirectory=/data/hdqm2/current/backend diff --git a/requirements.txt b/requirements.txt index ddbe6ae..2cf3f9a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,10 @@ -Flask<2.0.0 +Flask<3.0.0 +MarkupSafe==2.0.1 # Newer versions have issues with Jinja2 requests<3.0.0 SQLAlchemy<2.0.0 psycopg2-binary cachetools<4.0.0 python-dotenv cernrequests>=0.5.0 -Flask-Cors \ No newline at end of file +Flask-Cors +gunicorn \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..8eb1fda --- /dev/null +++ b/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# Bash script for running HDQM as + + +if [ "$1" == "-h" ] || [ "$1" == "--help" ]; +then + echo "Runs HDQM's various services." + echo "" + echo " run.sh api: Runs the API server. Default port is $PORT Select another one by passing it" + echo " as an argument after the command (e.g. run.sh api 6000)" + echo "" + echo " run.sh extract: Runs the dqm_extractor.py script." + exit +fi + +source .env + +# Activate venv +if [ ! -d venv ]; then + echo "Virtual environment not found! Run update.sh first." + exit +fi + + +if [ "$1" = "api" ]; then + source venv/bin/activate + + PORT=5000 + if [ "$2" ]; then + PORT=$2 + fi + # No need to bind to 0.0.0.0, we have an nginx to take care of + # exposing the port. + gunicorn --workers=`nproc` 'backend.api:create_app()' --bind=127.0.0.1:$PORT +elif [ "$1" = "extract" ]; then + source venv/bin/activate + + # Source ROOT activation script from CVMFS + . /cvmfs/sft.cern.ch/lcg/app/releases/ROOT/6.24.08/x86_64-centos7-gcc48-opt/bin/thisroot.sh + + # Keep authenticating with kerberos to access EOS + while true ; do + kinit -kt /data/hdqm/.keytab cmsdqm + /usr/bin/eosfusebind -g + + python3 backend/dqm_extractor.py + sleep 3d + #exit + done +fi + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_rr_data_fetching.py b/tests/test_rr_data_fetching.py index be21847..aa208b8 100644 --- a/tests/test_rr_data_fetching.py +++ b/tests/test_rr_data_fetching.py @@ -4,7 +4,7 @@ sys.path.append(os.path.dirname(os.path.realpath("./backend"))) from backend.rr_extractor import get_rr_run, is_significant -from test_data import RUN_REGISTRY_RUN_355555_DATA, RUN_REGISTRY_RUN_355556_DATA +from tests.test_data import RUN_REGISTRY_RUN_355555_DATA, RUN_REGISTRY_RUN_355556_DATA def test_get_rr_run(): diff --git a/deployment/update.sh b/update.sh similarity index 58% rename from deployment/update.sh rename to update.sh index 076a67a..3f8c141 100644 --- a/deployment/update.sh +++ b/update.sh @@ -8,28 +8,33 @@ if [ $USER != "cmsdqm" ]; then exit -1 fi +# Creates a new directory to clone the source files into. src=$(date +%Y_%m_%d-%H_%M_%S)$(echo "_src") git clone https://github.com/cms-dqm/CentralHDQM $src cd $src -git checkout hdqm2 +git checkout master -echo "export CLIENT_ID=FIXME" > backend/.env -echo "export CLIENT_SECRET=FIXME" >> backend/.env -echo "export AUDIENCE=FIXME" >> backend/.env -echo "export HDQM2_DB_PATH=FIXME" >> backend/.env +# Copy the .env file +cp ../.env ./backend/ +# TODO: Why is CMSSW needed? export RELEASE=/cvmfs/cms.cern.ch/slc7_amd64_gcc10/cms/cmssw/CMSSW_12_4_5 source /cvmfs/cms.cern.ch/cmsset_default.sh cd $RELEASE eval `scramv1 runtime -sh` -cd - +cd - # Go back to $src -python3 -m pip install -r requirements.txt -t .python_packages/python3 --no-cache-dir +# Create a venv, activate it, install requirements. +PYTHON=`(which python3)` +$PYTHON -m venv venv +source venv/bin/activate +$PYTHON -m pip install -r requirements.txt -U --no-cache-dir # Get back to main dir to switch the link cd $dir +# Softlink latest source files to a link named "current" ln -s -f -n $src current echo "New version started! Don't forget to restart: sudo systemctl restart hdqm2.service"