diff --git a/Configuration/PyReleaseValidation/python/MatrixUtil.py b/Configuration/PyReleaseValidation/python/MatrixUtil.py index 8947d607ad5a5..d088cafc44b13 100644 --- a/Configuration/PyReleaseValidation/python/MatrixUtil.py +++ b/Configuration/PyReleaseValidation/python/MatrixUtil.py @@ -133,9 +133,9 @@ def das(self, das_options, dataset): command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0]) elif self.skimEvents: from os import getenv - if getenv("CMSSW_USE_IBEOS","false")=="true": + if getenv("JENKINS_PREFIX") is not None: # to be assured that whatever happens the files are only those at CERN - command = "das-up-to-nevents.py -d %s -e %d -s T2_CH_CERN"%(dataset,self.events) + command = "das-up-to-nevents.py -d %s -e %d -pc"%(dataset,self.events) else: command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events) # Run filter on DAS output diff --git a/Configuration/PyReleaseValidation/python/relval_data_highstats.py b/Configuration/PyReleaseValidation/python/relval_data_highstats.py index 9d676ae1386ff..76bd25ffd54d8 100644 --- a/Configuration/PyReleaseValidation/python/relval_data_highstats.py +++ b/Configuration/PyReleaseValidation/python/relval_data_highstats.py @@ -16,7 +16,7 @@ for e_n,era in enumerate(eras_2024): for p_n,pd in enumerate(pds_2024): for e_key,evs in event_steps_dict.items(): - if "50k" == e_key: # already defined in relval_standard + if "10k" == e_key: # already defined in relval_standard continue wf_number = base_wf_number_2024 wf_number = wf_number + offset_era * e_n diff --git a/Configuration/PyReleaseValidation/python/relval_highstats.py b/Configuration/PyReleaseValidation/python/relval_highstats.py index 6069eb9117384..5808f63f0ebaa 100644 --- a/Configuration/PyReleaseValidation/python/relval_highstats.py +++ b/Configuration/PyReleaseValidation/python/relval_highstats.py @@ -86,29 +86,3 @@ workflows[134.99601] = ['',['RunJetHT2015HLHS','HLTDR2_25ns','RECODR2_25nsreHLT_HIPM','HARVESTDR2']] workflows[134.99602] = ['',['RunZeroBias2015HLHS','HLTDR2_25ns','RECODR2_25nsreHLT_HIPM','HARVESTDR2']] workflows[134.99603] = ['',['RunSingleMu2015HLHS','HLTDR2_25ns','RECODR2_25nsreHLT_HIPM','HARVESTDR2']] - - - -## 2024 Data Higher Stats Workflows -## with 150k, 250k, 500k or 1M events each - -base_wf_number_2024 = 2024.0 -offset_era = 0.1 # less than 10 eras -offset_pd = 0.001 # less than 100 pds -offset_events = 0.0001 # less than 10 event setups (50k,150k,250k,500k) - -for e_n,era in enumerate(eras_2024): - for p_n,pd in enumerate(pds_2024): - for e_key,evs in event_steps_dict.items(): - if "50k" in e_key: # already defined in relval_standard - continue - wf_number = base_wf_number_2024 - wf_number = wf_number + offset_era * e_n - wf_number = wf_number + offset_pd * p_n - wf_number = wf_number + offset_events * evs - wf_number = round(wf_number,6) - step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key - workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']] - - - diff --git a/Configuration/PyReleaseValidation/python/relval_standard.py b/Configuration/PyReleaseValidation/python/relval_standard.py index 31a82e1dbc468..0f2075227dbd2 100644 --- a/Configuration/PyReleaseValidation/python/relval_standard.py +++ b/Configuration/PyReleaseValidation/python/relval_standard.py @@ -559,19 +559,21 @@ workflows[142.901] = ['',['RunUPC2023','RECODR3_2024_UPC','HARVESTDPROMPTR3']] workflows[142.902] = ['',['RunUPC2023','RECODR3_2024_HIN','HARVESTDPROMPTR3']] -## 2024 Data Workflows +## 2024 Data Workflows +# for a limited set of eras and PDs not to overflow the IB matrices +# base_wf_number_2024 = 2024.0 offset_era = 0.1 # less than 10 eras offset_pd = 0.001 # less than 100 pds -for e_n,era in enumerate(eras_2024): - for p_n,pd in enumerate(pds_2024): +for e_n,era in enumerate(['Run2024D','Run2024C']): + for p_n,pd in enumerate(['JetMET0','ZeroBias']): wf_number = base_wf_number_2024 wf_number = wf_number + offset_era * e_n wf_number = wf_number + offset_pd * p_n - wf_number = wf_number + 0.0001 * 0.05 + wf_number = wf_number + 0.0001 * 0.01 wf_number = round(wf_number,6) - step_name = "Run" + pd + era.split("Run")[1] + "_50k" + step_name = "Run" + pd + era.split("Run")[1] + "_10k" workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']] ### fastsim ### diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py index 316e2dd54712b..5cc607c1d42db 100644 --- a/Configuration/PyReleaseValidation/python/relval_steps.py +++ b/Configuration/PyReleaseValidation/python/relval_steps.py @@ -45,8 +45,8 @@ steps = Steps() #### Event to runs -event_steps = [0.05,0.15,0.25,0.5,1] #in millions -event_steps_k = ["50k","150k","250k","500k","1M"] +event_steps = [0.01,0.05,0.15,0.25,0.5,1] #in millions +event_steps_k = ["10k","50k","150k","250k","500k","1M"] event_steps_dict = dict(zip(event_steps_k,event_steps)) #### Production test section #### steps['ProdMinBias']=merge([{'cfg':'MinBias_8TeV_pythia8_TuneCUETP8M1_cff','--relval':'9000,300'},step1Defaults]) diff --git a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py index 04b942e834ca6..1af66830c13bf 100755 --- a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py +++ b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py @@ -40,10 +40,9 @@ def das_file_site(dataset, site): def das_file_data(dataset,opt=""): cmd = "dasgoclient --query='file dataset=%s %s| grep file.name, file.nevents'"%(dataset,opt) - out = das_do_command(cmd) out = [np.array(r.split(" "))[[0,3]] for r in out if len(r) > 0] - + df = pd.DataFrame(out,columns=["file","events"]) df.events = df.events.values.astype(int) @@ -59,6 +58,28 @@ def das_lumi_data(dataset,opt=""): return df +def das_run_events_data(dataset,run,opt=""): + cmd = "dasgoclient --query='file dataset=%s run=%s %s | sum(file.nevents) '"%(dataset,run,opt) + out = das_do_command(cmd)[0] + + out = [o for o in out.split(" ") if "sum" not in o] + out = int([r.split(" ") for r in out if len(r)>0][0][0]) + + return out + +def das_run_data(dataset,opt=""): + cmd = "dasgoclient --query='run dataset=%s %s '"%(dataset,opt) + out = das_do_command(cmd) + + return out + +def no_intersection(): + print("No intersection between:") + print(" - json : ", best_json) + print(" - dataset: ", dataset) + print("Exiting.") + sys.exit(1) + if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -69,6 +90,7 @@ def das_lumi_data(dataset,opt=""): parser.add_argument('--pandas', '-pd',action='store_true',help="Store the whole dataset (no event or threshold cut) in a csv") parser.add_argument('--proxy','-p', help='Allow to parse a x509 proxy if needed', type=str, default=None) parser.add_argument('--site','-s', help='Only data at specific site', type=str, default=None) + parser.add_argument('--precheck','-pc', action='store_true', help='Check run per run before building the dataframes, to avoid huge caching.') args = parser.parse_args() if args.proxy is not None: @@ -77,6 +99,8 @@ def das_lumi_data(dataset,opt=""): print("No X509 proxy set. Exiting.") sys.exit(1) + ## Check if we are in the cms-bot "environment" + testing = "JENKINS_PREFIX" in os.environ dataset = args.dataset events = args.events threshold = args.threshold @@ -97,6 +121,7 @@ def das_lumi_data(dataset,opt=""): cert_path = base_cert_path + cert_type + "/" web_fallback = False + ## if we have access to eos we get from there ... if os.path.isdir(cert_path): json_list = os.listdir(cert_path) if len(json_list) == 0: @@ -105,7 +130,7 @@ def das_lumi_data(dataset,opt=""): json_list = [c for c in json_list if c.startswith("Cert_C") and c.endswith("json")] else: web_fallback = True - + ## ... if not we go to the website if web_fallback: cert_url = base_cert_url + cert_type + "/" json_list = get_url_clean(cert_url).split("\n") @@ -132,12 +157,39 @@ def das_lumi_data(dataset,opt=""): R = R + [f for f in range(r[0],r[1]+1)] golden_flat[k] = R + # let's just check there's an intersection between the + # dataset and the json + data_runs = das_run_data(dataset) + golden_data_runs = [r for r in data_runs if r in golden_flat] + + if (len(golden_data_runs)==0): + no_intersection() + # building the dataframe, cleaning for bad lumis - df = das_lumi_data(dataset).merge(das_file_data(dataset),on="file",how="inner") # merge file informations with run and lumis - df = df[df["run"].isin(list(golden.keys()))] # skim for golden runs + golden_data_runs_tocheck = golden_data_runs + das_opt = "" + if testing or args.precheck: + golden_data_runs_tocheck = [] + # Here we check run per run. + # This implies more dasgoclient queries, but smaller outputs + # useful when running the IB/PR tests not to have huge + # query results that have to be cached. + + sum_events = 0 + + for r in golden_data_runs: + sum_events = sum_events + int(das_run_events_data(dataset,r)) + golden_data_runs_tocheck.append(r) + if events > 0 and sum_events > events: + break + + das_opt = "run in %s"%(str([int(g) for g in golden_data_runs_tocheck])) + + df = das_lumi_data(dataset,opt=das_opt).merge(das_file_data(dataset,opt=das_opt),on="file",how="inner") # merge file informations with run and lumis + df["lumis"] = [[int(ff) for ff in f.replace("[","").replace("]","").split(",")] for f in df.lumis.values] df_rs = [] - for r in golden_flat: + for r in golden_data_runs_tocheck: cut = (df["run"] == r) if not any(cut): continue @@ -152,12 +204,8 @@ def das_lumi_data(dataset,opt=""): n_lumis = np.array([len(l) for l in df_r.lumis]) df_rs.append(df_r[good_lumis==n_lumis]) - if len(df_rs) == 0: - print("No intersection between:") - print(" - json : ", best_json) - print(" - dataset: ", dataset) - print("Exiting.") - sys.exit(1) + if (len(df_rs)==0): + no_intersection() df = pd.concat(df_rs) df.loc[:,"min_lumi"] = [min(f) for f in df.lumis]